diff --git a/.asf.yaml b/.asf.yaml
index dd4975435cf0..36f01b88a724 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -46,6 +46,9 @@ github:
         strict: true
         # don't require any jobs to pass
         contexts: []
+  pull_requests:
+    # enable updating head branches of pull requests
+    allow_update_branch: true
 
 # publishes the content of the `asf-site` branch to
 # https://arrow.apache.org/rust/
diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
index 20da777ec0e5..209d58e2d86e 100644
--- a/.github/actions/setup-builder/action.yaml
+++ b/.github/actions/setup-builder/action.yaml
@@ -16,16 +16,7 @@
 # under the License.
 
 name: Prepare Rust Builder
-description: 'Prepare Rust Build Environment'
-inputs:
-  rust-version:
-    description: 'version of rust to install (e.g. stable)'
-    required: false
-    default: 'stable'
-  target:
-    description: 'target architecture(s)'
-    required: false
-    default: 'x86_64-unknown-linux-gnu'
+description: "Prepare Rust Build Environment"
 runs:
   using: "composite"
   steps:
@@ -43,6 +34,9 @@ runs:
           /usr/local/cargo/git/db/
         key: cargo-cache3-${{ hashFiles('**/Cargo.toml') }}
         restore-keys: cargo-cache3-
+    - name: Setup Rust toolchain
+      shell: bash
+      run: rustup install
     - name: Generate lockfile
       shell: bash
       run: cargo fetch
@@ -51,12 +45,6 @@ runs:
       run: |
         apt-get update
         apt-get install -y protobuf-compiler
-    - name: Setup Rust toolchain
-      shell: bash
-      run: |
-        echo "Installing ${{ inputs.rust-version }}"
-        rustup toolchain install ${{ inputs.rust-version }} --target ${{ inputs.target }}
-        rustup default ${{ inputs.rust-version }}
     - name: Disable debuginfo generation
       # Disable full debug symbol generation to speed up CI build and keep memory down
       # "1" means line tables only, which is useful for panic tracebacks.
@@ -65,6 +53,9 @@ runs:
     - name: Enable backtraces
       shell: bash
       run: echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
+    - name: Disable incremental compilation
+      shell: bash
+      run: echo CARGO_INCREMENTAL=0 >> $GITHUB_ENV
     - name: Fixup git permissions
       # https://github.com/actions/checkout/issues/766
       shell: bash
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 7ccf01fed2bd..2da398d7d861 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -14,6 +14,10 @@ updates:
         applies-to: version-updates
         patterns:
           - "prost*"
+      tonic:
+        applies-to: version-updates
+        patterns:
+          - "tonic*"
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index e999f505bca1..c2d07f49ab88 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,20 +1,38 @@
 # Which issue does this PR close?
 
+<!--
 We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax.
+-->
 
-Closes #NNN.
+- Closes #NNN.
 
 # Rationale for this change
 
+<!--
 Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed.
 Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes.
+-->
 
 # What changes are included in this PR?
 
+<!--
 There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR.
+-->
+
+# Are these changes tested?
+
+<!--
+We typically require tests for all PRs in order to:
+1. Prevent the code from being accidentally broken by subsequent changes
+2. Serve as another way to document the expected behavior of the code
+
+If tests are not included in your PR, please explain why (for example, are they covered by existing tests)?
+-->
 
 # Are there any user-facing changes?
 
+<!--
 If there are user-facing changes then we may require documentation to be updated before approving the PR.
 
 If there are any breaking changes to public APIs, please call them out.
+-->
diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index 0b90a78577e5..3a0b28d2d101 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -56,7 +56,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -68,7 +68,10 @@ jobs:
       - name: Test arrow-schema
         run: cargo test -p arrow-schema --all-features
       - name: Test arrow-array
-        run: cargo test -p arrow-array --all-features
+        run: |
+          cargo test -p arrow-array --all-features
+          # Disable feature `force_validate`
+          cargo test -p arrow-array --features=ffi
       - name: Test arrow-select
         run: cargo test -p arrow-select --all-features
       - name: Test arrow-cast
@@ -112,7 +115,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -140,13 +143,15 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-        with:
-          target: wasm32-unknown-unknown,wasm32-wasip1
+      - name: Install wasm32 targets
+        run: |
+          rustup target add wasm32-unknown-unknown
+          rustup target add wasm32-wasip1
       - name: Build wasm32-unknown-unknown
         run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-unknown-unknown
       - name: Build wasm32-wasip1
@@ -158,7 +163,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/arrow_flight.yml b/.github/workflows/arrow_flight.yml
index 2659a0d987b8..426255f0f3c3 100644
--- a/.github/workflows/arrow_flight.yml
+++ b/.github/workflows/arrow_flight.yml
@@ -47,7 +47,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -60,7 +60,7 @@ jobs:
           cargo test -p arrow-flight --all-features
       - name: Test --examples
         run: |
-          cargo test -p arrow-flight  --features=flight-sql,tls --examples
+          cargo test -p arrow-flight  --features=flight-sql,tls-ring --examples
 
   vendor:
     name: Verify Vendored Code
@@ -68,7 +68,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Run gen
@@ -82,7 +82,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index e6254ea24a58..d568fcc0f069 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -36,7 +36,7 @@ jobs:
     name: Audit
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Install cargo-audit
         run: cargo install cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index b28e8c20cfe7..f20f0b143696 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -38,9 +38,9 @@ jobs:
     name: Release Audit Tool (RAT)
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: 3.8
       - name: Audit licenses
@@ -50,8 +50,8 @@ jobs:
     name: Markdown format
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-node@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-node@v6
         with:
           node-version: "14"
       - name: Prettier check
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 0d60ae006796..7b0c2566a3bf 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -37,14 +37,14 @@ jobs:
       contents: read
       pull-requests: write
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Assign GitHub labels
         if: |
           github.event_name == 'pull_request_target' &&
             (github.event.action == 'opened' ||
              github.event.action == 'synchronize')
-        uses: actions/labeler@v5.0.0
+        uses: actions/labeler@v6.0.1
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
           configuration-path: .github/workflows/dev_pr/labeler.yml
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index 64299bd507d3..edb6d036174c 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -37,6 +37,11 @@ arrow:
       - 'arrow-string/**/*'
       - 'arrow/**/*'
 
+arrow-avro:
+  - changed-files:
+      - any-glob-to-any-file:
+          - 'arrow-avro/**/*'
+
 arrow-flight:
   - changed-files:
     - any-glob-to-any-file:
@@ -46,7 +51,13 @@ parquet:
   - changed-files:
     - any-glob-to-any-file:
       - 'parquet/**/*'
-      - 'parquet-variant/**/*'
+
+parquet-variant:
+  - changed-files:
+      - any-glob-to-any-file:
+          - 'parquet-variant/**/*'
+          - 'parquet-variant-compute/**/*'
+          - 'parquet-variant-json/**/*'
 
 parquet-derive:
   - changed-files:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index d6ec0622f6ed..12e22abce06d 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -34,28 +34,20 @@ jobs:
   docs:
     name: Rustdocs are clean
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [ amd64 ]
-        rust: [ nightly ]
     container:
-      image: ${{ matrix.arch }}/rust
+      image: amd64/rust
       env:
         RUSTDOCFLAGS: "-Dwarnings --enable-index-page -Zunstable-options"
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
-      - name: Install python dev
-        run: |
-          apt update
-          apt install -y libpython3.11-dev
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-        with:
-          rust-version: ${{ matrix.rust }}
+      - name: Install Nightly Rust
+        run: rustup install nightly
       - name: Run cargo doc
-        run: cargo doc --document-private-items --no-deps --workspace --all-features
+        run: cargo +nightly doc --document-private-items --no-deps --workspace --all-features
       - name: Fix file permissions
         shell: sh
         run: |
@@ -64,7 +56,7 @@ jobs:
               echo "::warning title=Invalid file permissions automatically fixed::$line"
           done
       - name: Upload artifacts
-        uses: actions/upload-pages-artifact@v3
+        uses: actions/upload-pages-artifact@v4
         with:
           name: crate-docs
           path: target/doc
@@ -77,9 +69,9 @@ jobs:
       contents: write
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Download crate docs
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
         with:
           name: crate-docs
           path: website/build
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 1b6eeb15dca4..cc74650812e9 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -63,6 +63,7 @@ jobs:
       ARROW_INTEGRATION_CPP: ON
       ARROW_INTEGRATION_CSHARP: ON
       ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
+      ARCHERY_INTEGRATION_WITH_DOTNET: "1"
       ARCHERY_INTEGRATION_WITH_GO: "1"
       ARCHERY_INTEGRATION_WITH_JAVA: "1"
       ARCHERY_INTEGRATION_WITH_JS: "1"
@@ -77,52 +78,112 @@ jobs:
       run:
         shell: bash
     steps:
+      - name: Monitor disk usage - Initial
+        run: |
+          echo "=== Initial Disk Usage ==="
+          df -h /
+          echo ""
+
+      - name: Remove unnecessary preinstalled software
+        run: |
+          echo "=== Cleaning up host disk space ==="
+          echo "Disk space before cleanup:"
+          df -h /
+
+          # Clean apt cache
+          apt-get clean || true
+
+          # Remove GitHub Actions tool cache
+          rm -rf /__t/* || true
+
+          # Remove large packages from host filesystem (mounted at /host/)
+          rm -rf /host/usr/share/dotnet || true
+          rm -rf /host/usr/local/lib/android || true
+          rm -rf /host/usr/local/.ghcup || true
+          rm -rf /host/opt/hostedtoolcache/CodeQL || true
+
+          echo ""
+          echo "Disk space after cleanup:"
+          df -h /
+          echo ""
+
       # This is necessary so that actions/checkout can find git
       - name: Export conda path
         run: echo "/opt/conda/envs/arrow/bin" >> $GITHUB_PATH
       # This is necessary so that Rust can find cargo
       - name: Export cargo path
         run: echo "/root/.cargo/bin" >> $GITHUB_PATH
-      - name: Check rustup
-        run: which rustup
-      - name: Check cmake
-        run: which cmake
+
+      # Checkout repos (using shallow clones with fetch-depth: 1)
       - name: Checkout Arrow
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           repository: apache/arrow
           submodules: true
-          fetch-depth: 0
+          fetch-depth: 1
       - name: Checkout Arrow Rust
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           path: rust
-          fetch-depth: 0
+          submodules: true
+          fetch-depth: 1
+      - name: Checkout Arrow .NET
+        uses: actions/checkout@v6
+        with:
+          repository: apache/arrow-dotnet
+          path: dotnet
+          fetch-depth: 1
       - name: Checkout Arrow Go
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           repository: apache/arrow-go
           path: go
+          fetch-depth: 1
       - name: Checkout Arrow Java
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           repository: apache/arrow-java
           path: java
+          fetch-depth: 1
       - name: Checkout Arrow JavaScript
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           repository: apache/arrow-js
           path: js
+          fetch-depth: 1
       - name: Checkout Arrow nanoarrow
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
         with:
           repository: apache/arrow-nanoarrow
           path: nanoarrow
+          fetch-depth: 1
+
+      - name: Monitor disk usage - After checkouts
+        run: |
+          echo "=== After Checkouts ==="
+          df -h /
+          echo ""
+
       - name: Build
         run: conda run --no-capture-output ci/scripts/integration_arrow_build.sh $PWD /build
+
+      - name: Monitor disk usage - After build
+        if: always()
+        run: |
+          echo "=== After Build ==="
+          df -h /
+          echo ""
+
       - name: Run
         run: conda run --no-capture-output ci/scripts/integration_arrow.sh $PWD /build
 
+      - name: Monitor disk usage - After tests
+        if: always()
+        run: |
+          echo "=== After Tests ==="
+          df -h /
+          echo ""
+
   # test FFI against the C-Data interface exposed by pyarrow
   pyarrow-integration-test:
     name: Pyarrow C Data Interface
@@ -133,7 +194,7 @@ jobs:
         # PyArrow 15 was the first version to introduce StringView/BinaryView support
         pyarrow: ["15", "16", "17"]
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -142,17 +203,17 @@ jobs:
           rustup default ${{ matrix.rust }}
           rustup component add rustfmt clippy
       - name: Cache Cargo
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: /home/runner/.cargo
           key: cargo-maturin-cache-
       - name: Cache Rust dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: /home/runner/target
           # this key is not equal because maturin uses different compilation flags.
           key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: '3.8'
       - name: Upgrade pip and setuptools
@@ -165,8 +226,9 @@ jobs:
       - name: Run Rust tests
         run: |
           source venv/bin/activate
-          cargo test -p arrow-pyarrow
-      - name: Run tests
+          cd arrow-pyarrow-testing
+          cargo test
+      - name: Run Python tests
         run: |
           source venv/bin/activate
           cd arrow-pyarrow-integration-testing
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index ce67546a104b..f7269f535249 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -47,7 +47,7 @@ jobs:
     name: MIRI
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
diff --git a/.github/workflows/parquet-geospatial.yml b/.github/workflows/parquet-geospatial.yml
new file mode 100644
index 000000000000..77bd8f97b4f7
--- /dev/null
+++ b/.github/workflows/parquet-geospatial.yml
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+# tests for parquet-geospatial crate
+name: "parquet-geospatial"
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+# trigger for all PRs that touch certain files and changes to main
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    paths:
+      - parquet-geospatial/**
+      - .github/**
+
+jobs:
+  # test the crate
+  linux-test:
+    name: Test
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: true
+      - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+      - name: Test parquet-geospatial
+        run: cargo test -p parquet-geospatial
+
+  # test compilation
+  linux-features:
+    name: Check Compilation
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: true
+      - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+      - name: Check compilation (parquet-geospatial)
+        run: cargo check -p parquet-geospatial
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v6
+      - name: Setup Rust toolchain
+        uses: ./.github/actions/setup-builder
+      - name: Setup Clippy
+        run: rustup component add clippy
+      - name: Run clippy (parquet-geospatial)
+        run: cargo clippy -p parquet-geospatial --all-targets --all-features -- -D warnings
diff --git a/.github/workflows/parquet-variant.yml b/.github/workflows/parquet-variant.yml
index 6fc5c3a8cd00..3e4563286b22 100644
--- a/.github/workflows/parquet-variant.yml
+++ b/.github/workflows/parquet-variant.yml
@@ -31,6 +31,8 @@ on:
   pull_request:
     paths:
       - parquet-variant/**
+      - parquet-variant-json/**
+      - parquet-variant-compute/**
       - .github/**
 
 jobs:
@@ -41,13 +43,17 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-      - name: Test
+      - name: Test parquet-variant
         run: cargo test -p parquet-variant
+      - name: Test parquet-variant-json
+        run: cargo test -p parquet-variant-json
+      - name: Test parquet-variant-compute
+        run: cargo test -p parquet-variant-compute
 
   # test compilation
   linux-features:
@@ -56,13 +62,17 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-      - name: Check compilation
+      - name: Check compilation (parquet-variant)
         run: cargo check -p parquet-variant
+      - name: Check compilation (parquet-variant-json)
+        run: cargo check -p parquet-variant-json
+      - name: Check compilation (parquet-variant-compute)
+        run: cargo check -p parquet-variant-compute
 
   clippy:
     name: Clippy
@@ -70,10 +80,14 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
         run: rustup component add clippy
-      - name: Run clippy
+      - name: Run clippy (parquet-variant)
         run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings
+      - name: Run clippy (parquet-variant-json)
+        run: cargo clippy -p parquet-variant-json --all-targets --all-features -- -D warnings
+      - name: Run clippy (parquet-variant-compute)
+        run: cargo clippy -p parquet-variant-compute --all-targets --all-features -- -D warnings
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 96c7ab8f4e3a..8b94efd91f90 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -42,6 +42,9 @@ on:
       - arrow-json/**
       - arrow-avro/**
       - parquet/**
+      - parquet-variant/**
+      - parquet-variant-compute/**
+      - parquet-variant-json/**
       - .github/**
 
 jobs:
@@ -52,7 +55,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -75,7 +78,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -113,6 +116,15 @@ jobs:
         run: cargo check -p parquet --all-targets --no-default-features --features json
       - name: Check compilation --no-default-features --features encryption --features async
         run: cargo check -p parquet --no-default-features --features encryption --features async
+      - name: Check compilation --no-default-features --features flate2, this is expected to fail
+        run: if `cargo check -p parquet --no-default-features --features flate2 2>/dev/null`; then false; else true; fi
+      - name: Check compilation --no-default-features --features flate2 --features flate2-rust_backened
+        run: cargo check -p parquet --no-default-features --features flate2 --features flate2-rust_backened
+      - name: Check compilation --no-default-features --features flate2 --features flate2-zlib-rs
+        run: cargo check -p parquet --no-default-features --features flate2 --features flate2-zlib-rs
+      - name: Check compilation --no-default-features --features variant_experimental
+        run: cargo check -p parquet --no-default-features --features variant_experimental
+
 
   # test the parquet crate builds against wasm32 in stable rust
   wasm32-build:
@@ -121,13 +133,15 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-        with:
-          target: wasm32-unknown-unknown,wasm32-wasip1
+      - name: Install wasm32 targets
+        run: |
+            rustup target add wasm32-unknown-unknown
+            rustup target add wasm32-wasip1
       - name: Install clang # Needed for zlib compilation
         run: apt-get update && apt-get install -y clang gcc-multilib
       - name: Build wasm32-unknown-unknown
@@ -142,9 +156,9 @@ jobs:
       matrix:
         rust: [ stable ]
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: "3.10"
           cache: "pip"
@@ -175,7 +189,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/parquet_derive.yml b/.github/workflows/parquet_derive.yml
index 17aec724a820..b1541b5dfb0b 100644
--- a/.github/workflows/parquet_derive.yml
+++ b/.github/workflows/parquet_derive.yml
@@ -43,7 +43,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -57,7 +57,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8f87c50649d3..6e0d10106cbe 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -33,7 +33,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 5
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Create GitHub Releases
         run: |
           version=${GITHUB_REF_NAME}
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index a20575391b48..77fccdbebc46 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -30,14 +30,13 @@ on:
   pull_request:
 
 jobs:
-
   # Check workspace wide compile and test with default features for
   # mac
   macos:
     name: Test on Mac
     runs-on: macos-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Install protoc with brew
@@ -52,8 +51,7 @@ jobs:
           # do not produce debug symbols to keep memory usage down
           export RUSTFLAGS="-C debuginfo=0"
           # PyArrow tests happen in integration.yml.
-          cargo test --workspace --exclude arrow-pyarrow
-
+          cargo test --workspace
 
   # Check workspace wide compile and test with default features for
   # windows
@@ -61,7 +59,7 @@ jobs:
     name: Test on Windows
     runs-on: windows-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
         with:
           submodules: true
       - name: Install protobuf compiler in /d/protoc
@@ -84,9 +82,7 @@ jobs:
           # do not produce debug symbols to keep memory usage down
           export RUSTFLAGS="-C debuginfo=0"
           export PATH=$PATH:/d/protoc/bin
-          # PyArrow tests happen in integration.yml.
-          cargo test --workspace --exclude arrow-pyarrow
-
+          cargo test --workspace
 
   # Run cargo fmt for all crates
   lint:
@@ -95,7 +91,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup rustfmt
@@ -117,20 +113,12 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
-      - name: Install cargo-msrv
-        run: cargo install cargo-msrv
-      - name: Downgrade arrow-pyarrow-integration-testing dependencies
-        working-directory: arrow-pyarrow-integration-testing
-        # Necessary because half 2.5 requires rust 1.81 or newer
-        run: |
-          cargo update -p half --precise 2.4.0
-      - name: Downgrade workspace dependencies
-        # Necessary because half 2.5 requires rust 1.81 or newer
-        run: |
-          cargo update -p half --precise 2.4.0
+      - name: Install cargo-msrv (if needed)
+        # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed
+        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi
       - name: Check all packages
         run: |
           # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies
diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml
index dd21c794960e..94a95f6e31a2 100644
--- a/.github/workflows/take.yml
+++ b/.github/workflows/take.yml
@@ -28,7 +28,7 @@ jobs:
     if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/github-script@v7
+      - uses: actions/github-script@v8
         with:
           script: |
             github.rest.issues.addAssignees({
diff --git a/.gitignore b/.gitignore
index 05091a4e975d..127182a8f99e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,9 @@ __blobstorage__
 *.bak2
 # OS-specific .gitignores
 
+# cargo insta temp files
+*.pending-snap
+
 # Mac .gitignore
 # General
 .DS_Store
@@ -99,4 +102,4 @@ parquet/pytest/venv/
 __pycache__/
 
 # Parquet file from arrow_reader_clickbench
-hits_1.parquet
\ No newline at end of file
+hits_1.parquet
diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 941c9f26382c..a651a860f893 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,1318 @@
 
 # Historical Changelog
 
+
+## [57.1.0](https://github.com/apache/arrow-rs/tree/57.1.0) (2025-11-20)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/57.0.0...57.1.0)
+
+**Implemented enhancements:**
+
+- Eliminate bound checks in filter kernels [\#8865](https://github.com/apache/arrow-rs/issues/8865) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Respect page index policy option for ParquetObjectReader when it's not skip [\#8856](https://github.com/apache/arrow-rs/issues/8856) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Speed up collect\_bool and remove `unsafe` [\#8848](https://github.com/apache/arrow-rs/issues/8848) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Error reading parquet FileMetaData with empty lists encoded as element-type=0 [\#8826](https://github.com/apache/arrow-rs/issues/8826) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- ValueStatistics methods can't be used from generic context in external crate [\#8823](https://github.com/apache/arrow-rs/issues/8823) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Custom Pretty-Printing Implementation for Column when Formatting Record Batches [\#8821](https://github.com/apache/arrow-rs/issues/8821) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet-concat: supports bloom filter and page index [\#8804](https://github.com/apache/arrow-rs/issues/8804) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Parquet\] virtual row number support [\#7299](https://github.com/apache/arrow-rs/issues/7299)
+- \[Variant\] Enforce shredded-type validation in `shred_variant` [\#8795](https://github.com/apache/arrow-rs/issues/8795) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Simplify decision logic to call `FilterBuilder::optimize` or not [\#8781](https://github.com/apache/arrow-rs/issues/8781) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add variant to arrow for DataType::{Binary, LargeBinary, BinaryView} [\#8767](https://github.com/apache/arrow-rs/issues/8767) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Provide algorithm that allows zipping arrays whose values are not prealigned [\#8752](https://github.com/apache/arrow-rs/issues/8752) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] ParquetMetadataReader decodes too much metadata under point-get scenerio [\#8751](https://github.com/apache/arrow-rs/issues/8751) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `arrow-json` supports encoding binary arrays, but not decoding [\#8736](https://github.com/apache/arrow-rs/issues/8736) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Allow `FilterPredicate` instances to be reused for RecordBatches [\#8692](https://github.com/apache/arrow-rs/issues/8692) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- ArrowJsonBatch::from\_batch is incomplete [\#8684](https://github.com/apache/arrow-rs/issues/8684) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet-layout: More info about layout including footer size, page index, bloom filter? [\#8682](https://github.com/apache/arrow-rs/issues/8682) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Rewrite `ParquetRecordBatchStream` \(async API\) in terms of the PushDecoder [\#8677](https://github.com/apache/arrow-rs/issues/8677) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[JSON\] Add encoding for binary view [\#8674](https://github.com/apache/arrow-rs/issues/8674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Refactor arrow-cast decimal casting to unify the rescale logic used in Parquet variant casts [\#8670](https://github.com/apache/arrow-rs/issues/8670) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Support Uuid/`FixedSizeBinary(16)` shredding [\#8665](https://github.com/apache/arrow-rs/issues/8665)
+- \[Parquet\]There should be an encoding counter to know how many encodings the repo supports in total [\#8662](https://github.com/apache/arrow-rs/issues/8662) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Improve `parse_data_type` for `List`, `ListView`, `LargeList`, `LargeListView`, `FixedSizeList`, `Union`, `Map`, `RunEndCoded`. [\#8648](https://github.com/apache/arrow-rs/issues/8648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Support variant to arrow primitive support null/time/decimal\_\* [\#8637](https://github.com/apache/arrow-rs/issues/8637)
+- Return error from `RleDecoder::reset` rather than panic [\#8632](https://github.com/apache/arrow-rs/issues/8632) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add bitwise ops on `BooleanBufferBuilder` and `MutableBuffer` that mutate directly the buffer [\#8618](https://github.com/apache/arrow-rs/issues/8618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add variant\_to\_arrow Utf-8, LargeUtf8, Utf8View types support [\#8567](https://github.com/apache/arrow-rs/issues/8567) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- Regression: Parsing `List(Int64)` results in nullable list in 57.0.0 and a non-nullable list in 57.1.0 [\#8883](https://github.com/apache/arrow-rs/issues/8883)
+- Regression: FixedSlizeList data type parsing fails on 57.1.0 [\#8880](https://github.com/apache/arrow-rs/issues/8880)
+- \(dyn ArrayFormatterFactory + 'static\) can't be safely shared between threads [\#8875](https://github.com/apache/arrow-rs/issues/8875)
+- RowNumber reader has wrong row group ordering [\#8864](https://github.com/apache/arrow-rs/issues/8864) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `ThriftMetadataWriter::write_column_indexes` cannot handle a `ColumnIndexMetaData::NONE` [\#8815](https://github.com/apache/arrow-rs/issues/8815) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- "Archery test With other arrows" Integration test failing on main: [\#8813](https://github.com/apache/arrow-rs/issues/8813) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] Writing in 57.0.0 seems 10% slower than 56.0.0 [\#8783](https://github.com/apache/arrow-rs/issues/8783) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet reader cannot handle files with unknown logical types [\#8776](https://github.com/apache/arrow-rs/issues/8776) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- zip now treats nulls as false in provided mask regardless of the underlying bit value [\#8721](https://github.com/apache/arrow-rs/issues/8721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[avro\] Incorrect version in crate.io landing page [\#8691](https://github.com/apache/arrow-rs/issues/8691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Array: ViewType gc\(\) has bug when array sum length exceed i32::MAX [\#8681](https://github.com/apache/arrow-rs/issues/8681) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet 56: encounter `error: item_reader def levels are None` when reading nested field with row filter [\#8657](https://github.com/apache/arrow-rs/issues/8657) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Degnerate and non-nullable `FixedSizeListArray`s are not handled [\#8623](https://github.com/apache/arrow-rs/issues/8623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\]Performance Degradation with RowFilter on Unsorted Columns due to Fragmented ReadPlan [\#8565](https://github.com/apache/arrow-rs/issues/8565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- docs: Add example for creating a `MutableBuffer` from `Buffer` [\#8853](https://github.com/apache/arrow-rs/pull/8853) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: Add examples for creating MutableBuffer from Vec [\#8852](https://github.com/apache/arrow-rs/pull/8852) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve ParquetDecoder docs [\#8802](https://github.com/apache/arrow-rs/pull/8802) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Update docs for zero copy conversion of ScalarBuffer [\#8772](https://github.com/apache/arrow-rs/pull/8772) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add example to convert `PrimitiveArray` to a `Vec` [\#8771](https://github.com/apache/arrow-rs/pull/8771) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: Add links for arrow-avro [\#8770](https://github.com/apache/arrow-rs/pull/8770) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Parquet\] Minor: Update comments in page decompressor [\#8764](https://github.com/apache/arrow-rs/pull/8764) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Document limitations of the `arrow_integration_test` crate [\#8738](https://github.com/apache/arrow-rs/pull/8738) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([phil-opp](https://github.com/phil-opp))
+- docs: Add link to the Arrow implementation status page [\#8732](https://github.com/apache/arrow-rs/pull/8732) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: Update Parquet readme implementation status [\#8731](https://github.com/apache/arrow-rs/pull/8731) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- `RowConverter::from_binary` should opportunistically take ownership of the buffer [\#8685](https://github.com/apache/arrow-rs/issues/8685) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speed up filter some more \(up to 2x\) [\#8868](https://github.com/apache/arrow-rs/pull/8868) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Speed up `collect_bool` and remove `unsafe`, optimize `take_bits`, `take_native` for null values [\#8849](https://github.com/apache/arrow-rs/pull/8849) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Change `BooleanBuffer::append_packed_range` to use `apply_bitwise_binary_op` [\#8812](https://github.com/apache/arrow-rs/pull/8812) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Parquet\] Avoid copying `LogicalType` in `ColumnOrder::get_sort_order`, deprecate `get_logical_type` [\#8789](https://github.com/apache/arrow-rs/pull/8789) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- perf: Speed up Parquet file writing \(10%, back to speed of 56\) [\#8786](https://github.com/apache/arrow-rs/pull/8786) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- perf: override `ArrayIter` default impl for `nth`, `nth_back`, `last` and `count` [\#8785](https://github.com/apache/arrow-rs/pull/8785) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Parquet\] Reduce one copy in `SerializedPageReader` [\#8745](https://github.com/apache/arrow-rs/pull/8745) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Small optimization in Parquet varint decoder [\#8742](https://github.com/apache/arrow-rs/pull/8742) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- perf: override `count`, `nth`, `nth_back`, `last` and `max` for BitIterator [\#8696](https://github.com/apache/arrow-rs/pull/8696) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Add `FilterPredicate::filter_record_batch` [\#8693](https://github.com/apache/arrow-rs/pull/8693) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pepijnve](https://github.com/pepijnve))
+- perf: zero-copy path in `RowConverter::from_binary` [\#8686](https://github.com/apache/arrow-rs/pull/8686) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- perf: add optimized zip implementation for scalars [\#8653](https://github.com/apache/arrow-rs/pull/8653) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- feat: add `apply_unary_op` and `apply_binary_op` bitwise operations [\#8619](https://github.com/apache/arrow-rs/pull/8619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Parquet\]Optimize the performance in record reader [\#8607](https://github.com/apache/arrow-rs/pull/8607) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([hhhizzz](https://github.com/hhhizzz))
+
+**Closed issues:**
+
+- Variant to NullType conversion ignores strict casting [\#8810](https://github.com/apache/arrow-rs/issues/8810)
+- Unify display representation for `Field` [\#8784](https://github.com/apache/arrow-rs/issues/8784)
+- Misleading configuration name: skip\_arrow\_metadata [\#8780](https://github.com/apache/arrow-rs/issues/8780)
+- Inconsistent display for types with Metadata [\#8761](https://github.com/apache/arrow-rs/issues/8761) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Internal `arrow-integration-test` crate is linked from `arrow` docs [\#8739](https://github.com/apache/arrow-rs/issues/8739) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add benchmark for RunEndEncoded casting [\#8709](https://github.com/apache/arrow-rs/issues/8709) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Varaint\] Support `VariantArray::value` to return a `Result<Variant>` [\#8672](https://github.com/apache/arrow-rs/issues/8672)
+
+**Merged pull requests:**
+
+- Fix regression caused by changes in Display for DataType - display \(`List(non-null Int64)` instead of `List(nullable Int64)` [\#8890](https://github.com/apache/arrow-rs/pull/8890) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([etseidl](https://github.com/etseidl))
+- Support parsing for old style FixedSizeList [\#8882](https://github.com/apache/arrow-rs/pull/8882) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Make ArrayFormatterFactory Send + Sync and add a test [\#8878](https://github.com/apache/arrow-rs/pull/8878) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- Make `ArrowReaderOptions::with_virtual_columns` error rather than panic on invalid input [\#8867](https://github.com/apache/arrow-rs/pull/8867) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix errors when reading nested Lists with pushdown predicates. [\#8866](https://github.com/apache/arrow-rs/pull/8866) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix `RowNumberReader` when not all row groups are selected [\#8863](https://github.com/apache/arrow-rs/pull/8863) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vustef](https://github.com/vustef))
+- Respect page index policy option for ParquetObjectReader when it's not skip [\#8857](https://github.com/apache/arrow-rs/pull/8857) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- build\(deps\): update apache-avro requirement from 0.20.0 to 0.21.0 [\#8832](https://github.com/apache/arrow-rs/pull/8832) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Allow Users to Provide Custom `ArrayFormatter`s when Pretty-Printing Record Batches [\#8829](https://github.com/apache/arrow-rs/pull/8829) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- Allow reading of improperly constructed empty lists in Parquet metadata [\#8827](https://github.com/apache/arrow-rs/pull/8827) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Variant\] Fix cast logic for Variant to Arrow for DataType::Null [\#8825](https://github.com/apache/arrow-rs/pull/8825) ([klion26](https://github.com/klion26))
+- remove T: ParquetValueType bound on ValueStatistics [\#8824](https://github.com/apache/arrow-rs/pull/8824) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([pmarks](https://github.com/pmarks))
+- build\(deps\): update lz4\_flex requirement from 0.11 to 0.12 [\#8820](https://github.com/apache/arrow-rs/pull/8820) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix bug in handling of empty Parquet page index structures [\#8817](https://github.com/apache/arrow-rs/pull/8817) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Parquet-concat: supports page index and bloom filter [\#8811](https://github.com/apache/arrow-rs/pull/8811) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- \[Doc\] Correct `ListArray` documentation [\#8803](https://github.com/apache/arrow-rs/pull/8803) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- \[Parquet\] Add additional docs for `ArrowReaderOptions` and `ArrowReaderMetadata` [\#8798](https://github.com/apache/arrow-rs/pull/8798) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Enforce shredded-type validation in `shred_variant` [\#8796](https://github.com/apache/arrow-rs/pull/8796) ([liamzwbao](https://github.com/liamzwbao))
+- Add `VariantPath::is_empty` [\#8791](https://github.com/apache/arrow-rs/pull/8791) ([friendlymatthew](https://github.com/friendlymatthew))
+- Add FilterBuilder::is\_optimize\_beneficial [\#8782](https://github.com/apache/arrow-rs/pull/8782) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pepijnve](https://github.com/pepijnve))
+- \[Parquet\] Allow reading of files with unknown logical types [\#8777](https://github.com/apache/arrow-rs/pull/8777) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- bench: add `ArrayIter` benchmarks [\#8774](https://github.com/apache/arrow-rs/pull/8774) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Update Rust toolchain to 1.91 [\#8769](https://github.com/apache/arrow-rs/pull/8769) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Add variant to arrow for `DataType::{Binary/LargeBinary/BinaryView}` [\#8768](https://github.com/apache/arrow-rs/pull/8768) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([klion26](https://github.com/klion26))
+-  feat: parse `DataType::Union`, `DataType::Map`, `DataType::RunEndEncoded` [\#8765](https://github.com/apache/arrow-rs/pull/8765) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dqkqd](https://github.com/dqkqd))
+- Add options to control various aspects of Parquet metadata decoding [\#8763](https://github.com/apache/arrow-rs/pull/8763) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- feat: Ensure consistent metadata display for data types [\#8760](https://github.com/apache/arrow-rs/pull/8760) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mhilton](https://github.com/mhilton))
+- Clean up predicate\_cache tests [\#8755](https://github.com/apache/arrow-rs/pull/8755) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- refactor `test_cache_projection_excludes_nested_columns` to use high level APIs [\#8754](https://github.com/apache/arrow-rs/pull/8754) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add `merge` and `merge_n` kernels [\#8753](https://github.com/apache/arrow-rs/pull/8753) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pepijnve](https://github.com/pepijnve))
+- Fix lint in arrow-flight by updating assert\_cmd after it upgraded [\#8741](https://github.com/apache/arrow-rs/pull/8741) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([vegarsti](https://github.com/vegarsti))
+- Remove link to internal `arrow-integration-test` crate from main `arrow` crate [\#8740](https://github.com/apache/arrow-rs/pull/8740) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([phil-opp](https://github.com/phil-opp))
+- Implement hex decoding of JSON strings to binary arrays [\#8737](https://github.com/apache/arrow-rs/pull/8737) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([phil-opp](https://github.com/phil-opp))
+- \[Parquet\] Adaptive Parquet Predicate Pushdown [\#8733](https://github.com/apache/arrow-rs/pull/8733) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([hhhizzz](https://github.com/hhhizzz))
+- \[Parquet\] Return error from `RleDecoder::reload` rather than panic [\#8729](https://github.com/apache/arrow-rs/pull/8729) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([liamzwbao](https://github.com/liamzwbao))
+- fix: `ArrayIter` does not report size hint correctly after advancing from the iterator back [\#8728](https://github.com/apache/arrow-rs/pull/8728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: Use Vec::with\_capacity in cast\_to\_run\_end\_encoded [\#8726](https://github.com/apache/arrow-rs/pull/8726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
+- \[Variant\] Fix the index of an item in VariantArray in a unit test [\#8725](https://github.com/apache/arrow-rs/pull/8725) ([martin-g](https://github.com/martin-g))
+- build\(deps\): bump actions/download-artifact from 5 to 6 [\#8720](https://github.com/apache/arrow-rs/pull/8720) ([dependabot[bot]](https://github.com/apps/dependabot))
+- \[Variant\] Add try\_value/value for VariantArray [\#8719](https://github.com/apache/arrow-rs/pull/8719) ([klion26](https://github.com/klion26))
+- General virtual columns support + row numbers as a first use-case [\#8715](https://github.com/apache/arrow-rs/pull/8715) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vustef](https://github.com/vustef))
+- feat: Parquet-layout add Index and Footer info [\#8712](https://github.com/apache/arrow-rs/pull/8712) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- fix: `zip` now treats nulls as false in provided mask regardless of the underlying bit value [\#8711](https://github.com/apache/arrow-rs/pull/8711) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Add benchmark for casting to RunEndEncoded \(REE\) [\#8710](https://github.com/apache/arrow-rs/pull/8710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
+- \[Minor\]: Document visibility for enums produced by Thrift macros [\#8706](https://github.com/apache/arrow-rs/pull/8706) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update `arrow-avro` `README.md` version to 57 [\#8695](https://github.com/apache/arrow-rs/pull/8695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Fix: ViewType gc on huge batch would produce bad output [\#8694](https://github.com/apache/arrow-rs/pull/8694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mapleFU](https://github.com/mapleFU))
+- Refactor arrow-cast decimal casting to unify the rescale logic used in Parquet variant casts [\#8689](https://github.com/apache/arrow-rs/pull/8689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- check bit width to avoid panic in DeltaBitPackDecoder [\#8688](https://github.com/apache/arrow-rs/pull/8688) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rambleraptor](https://github.com/rambleraptor))
+- \[thrift-remodel\] Use `thrift_enum` macro for `ConvertedType` [\#8680](https://github.com/apache/arrow-rs/pull/8680) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[JSON\] Map key supports utf8 view [\#8679](https://github.com/apache/arrow-rs/pull/8679) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mapleFU](https://github.com/mapleFU))
+- \[JSON\] Add encoding for binary view [\#8675](https://github.com/apache/arrow-rs/pull/8675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mapleFU](https://github.com/mapleFU))
+- \[Parquet\] Account for FileDecryptor in ParquetMetaData heap size calculation [\#8671](https://github.com/apache/arrow-rs/pull/8671) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- chore: update `OffsetBuffer::from_lengths(std::iter::repeat_n(<val>, <repeat>));` with `OffsetBuffer::from_repeated_length(<val>, <repeat>);` [\#8669](https://github.com/apache/arrow-rs/pull/8669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Variant\] Support `shred_variant` for Uuids [\#8666](https://github.com/apache/arrow-rs/pull/8666) ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Remove `create_test_variant_array` helper method [\#8664](https://github.com/apache/arrow-rs/pull/8664) ([friendlymatthew](https://github.com/friendlymatthew))
+- \[parquet\] Adding counting method in thrift\_enum macro to support  ENCODING\_SLOTS [\#8663](https://github.com/apache/arrow-rs/pull/8663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([hhhizzz](https://github.com/hhhizzz))
+- chore: add test case of RowSelection::trim [\#8660](https://github.com/apache/arrow-rs/pull/8660) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lichuang](https://github.com/lichuang))
+- feat: add `new_repeated` to `ByteArray` [\#8659](https://github.com/apache/arrow-rs/pull/8659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: add `repeat_slice_n_times` to `MutableBuffer` [\#8658](https://github.com/apache/arrow-rs/pull/8658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: add optimized function to create offset with same length [\#8656](https://github.com/apache/arrow-rs/pull/8656) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Variant\] `rescale_decimal` followup [\#8655](https://github.com/apache/arrow-rs/pull/8655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- feat: parse DataType `List`, `ListView`, `LargeList`, `LargeListView`, `FixedSizeList` [\#8649](https://github.com/apache/arrow-rs/pull/8649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dqkqd](https://github.com/dqkqd))
+- Support more operations on ListView [\#8645](https://github.com/apache/arrow-rs/pull/8645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([a10y](https://github.com/a10y))
+- \[Variant\] Implement primitive type access for null/time/decimal\* [\#8638](https://github.com/apache/arrow-rs/pull/8638) ([klion26](https://github.com/klion26))
+- \[Variant\] refactor: Split builder.rs into several smaller files [\#8635](https://github.com/apache/arrow-rs/pull/8635) ([Weijun-H](https://github.com/Weijun-H))
+- add `try_new_with_length` constructor to `FixedSizeList` [\#8624](https://github.com/apache/arrow-rs/pull/8624) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([connortsui20](https://github.com/connortsui20))
+- Change some panics to errors in parquet decoder [\#8602](https://github.com/apache/arrow-rs/pull/8602) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rambleraptor](https://github.com/rambleraptor))
+- Support `variant_to_arrow` for utf8 [\#8600](https://github.com/apache/arrow-rs/pull/8600) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sdf-jkl](https://github.com/sdf-jkl))
+- Cast support for RunEndEncoded arrays [\#8589](https://github.com/apache/arrow-rs/pull/8589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
+
+
+
+## [57.0.0](https://github.com/apache/arrow-rs/tree/57.0.0) (2025-10-19)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/56.2.0...57.0.0)
+
+**Breaking changes:**
+
+- Use `Arc<FileEncryptionProperties>` everywhere to be be consistent with `FileDecryptionProperties` [\#8626](https://github.com/apache/arrow-rs/pull/8626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- feat: Improve DataType display for `RunEndEncoded` [\#8596](https://github.com/apache/arrow-rs/pull/8596) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Add `ArrowError::AvroError`, remaining types and roundtrip tests to `arrow-avro`,  [\#8595](https://github.com/apache/arrow-rs/pull/8595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[thrift-remodel\] Refactor Thrift encryption and store encodings as bitmask [\#8587](https://github.com/apache/arrow-rs/pull/8587) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- feat: Enhance `Map` display formatting in DataType [\#8570](https://github.com/apache/arrow-rs/pull/8570) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- feat: Enhance DataType display formatting for `ListView` and `LargeListView` variants [\#8569](https://github.com/apache/arrow-rs/pull/8569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Use custom thrift parser for parquet metadata \(phase 1 of Thrift remodel\) [\#8530](https://github.com/apache/arrow-rs/pull/8530) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- refactor: improve display formatting for Union [\#8529](https://github.com/apache/arrow-rs/pull/8529) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Use `Arc<FileDecryptionProperties>` to reduce size of ParquetMetadata and avoid copying when `encryption` is enabled [\#8470](https://github.com/apache/arrow-rs/pull/8470) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix for column name based projection mask creation [\#8447](https://github.com/apache/arrow-rs/pull/8447) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Improve Display formatting of DataType::Timestamp [\#8425](https://github.com/apache/arrow-rs/pull/8425) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- Use more compact Debug formatting of Field [\#8424](https://github.com/apache/arrow-rs/pull/8424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- Reuse zstd compression context when writing IPC [\#8405](https://github.com/apache/arrow-rs/pull/8405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([albertlockett](https://github.com/albertlockett))
+- \[Decimal\] Add scale argument to validation functions to ensure accurate error logging [\#8396](https://github.com/apache/arrow-rs/pull/8396) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Quote `DataType::Struct` field names in `Display` formatting [\#8291](https://github.com/apache/arrow-rs/pull/8291) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- Improve `Display` for `DataType` and `Field` [\#8290](https://github.com/apache/arrow-rs/pull/8290) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- Bump pyo3 to 0.26.0 [\#8286](https://github.com/apache/arrow-rs/pull/8286) ([mbrobbel](https://github.com/mbrobbel))
+
+**Implemented enhancements:**
+
+- Added Avro support (new `arrow-avro` crate) [\#4886](https://github.com/apache/arrow-rs/issues/4886)
+- parquet-rewrite: supports compression level and write batch size [\#8639](https://github.com/apache/arrow-rs/issues/8639)
+- Error not panic when int96 stastistics aren't size 12 [\#8614](https://github.com/apache/arrow-rs/issues/8614) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Make `VariantArray` iterable [\#8612](https://github.com/apache/arrow-rs/issues/8612)
+- \[Variant\] impl `PartialEq` for `VariantArray` [\#8610](https://github.com/apache/arrow-rs/issues/8610)
+- \[Variant\] Remove potential panics when probing `VariantArray` [\#8609](https://github.com/apache/arrow-rs/issues/8609)
+- \[Variant\] Remove ceremony of going from list of `Variant` to `VariantArray` [\#8606](https://github.com/apache/arrow-rs/issues/8606)
+- Eliminate redundant validation in `RecordBatch::project` [\#8591](https://github.com/apache/arrow-rs/issues/8591) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[PARQUET\]\[BENCH\] Arrow writer bench with compression and/or page v2 [\#8559](https://github.com/apache/arrow-rs/issues/8559) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] casting functions are confusingly named [\#8531](https://github.com/apache/arrow-rs/issues/8531) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support writing GeospatialStatistics in Parquet writer [\#8523](https://github.com/apache/arrow-rs/issues/8523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[thrift-remodel\] Optimize `convert_row_groups` [\#8517](https://github.com/apache/arrow-rs/issues/8517) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Add variant to arrow primitive support for boolean/timestamp/time [\#8515](https://github.com/apache/arrow-rs/issues/8515)
+- Test `thrift-remodel` branch with DataFusion [\#8513](https://github.com/apache/arrow-rs/issues/8513) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Make `UnionArray::is_dense` Method Public [\#8503](https://github.com/apache/arrow-rs/issues/8503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `append_n` method to `FixedSizeBinaryDictionaryBuilder` [\#8497](https://github.com/apache/arrow-rs/issues/8497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] Reduce size of ParquetMetadata when encryption feature is enabled [\#8469](https://github.com/apache/arrow-rs/issues/8469) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Parquet\] Remove useless mut requirements in geting bloom filter function [\#8461](https://github.com/apache/arrow-rs/issues/8461) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Change `serde` dependency to `serde_core` where applicable [\#8451](https://github.com/apache/arrow-rs/issues/8451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] Split `ParquetMetadataReader` into IO/decoder state machine and thrift parsing [\#8439](https://github.com/apache/arrow-rs/issues/8439) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Remove compiler warning for redundant config enablement [\#8412](https://github.com/apache/arrow-rs/issues/8412) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add geospatial statistics creation support for GEOMETRY/GEOGRAPHY Parquet logical types [\#8411](https://github.com/apache/arrow-rs/issues/8411) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow_json` lacks  `with_timestamp_format` functions like `arrow_csv` had offered [\#8398](https://github.com/apache/arrow-rs/issues/8398) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Unify API for writing column chunks / row groups in parallel [\#8389](https://github.com/apache/arrow-rs/issues/8389) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Reuse zstd context in arrow IPC writer [\#8386](https://github.com/apache/arrow-rs/issues/8386) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- \[Variant\] Support reading/writing Parquet Variant LogicalType [\#8370](https://github.com/apache/arrow-rs/issues/8370) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Implement a `shred_variant` function [\#8361](https://github.com/apache/arrow-rs/issues/8361)
+- \[Parquet\] Expose ReadPlan and ReadPlanBuilder [\#8347](https://github.com/apache/arrow-rs/issues/8347) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `List` [\#8337](https://github.com/apache/arrow-rs/issues/8337) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `Struct` [\#8336](https://github.com/apache/arrow-rs/issues/8336) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `Time64(Microsecond)` [\#8334](https://github.com/apache/arrow-rs/issues/8334) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `Decimal128` [\#8332](https://github.com/apache/arrow-rs/issues/8332) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `Timestamp(Microsecond, _)` and `Timestamp(Nanosecond, _)` [\#8331](https://github.com/apache/arrow-rs/issues/8331) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] \[Shredding\] Support typed\_access for `Date32` [\#8330](https://github.com/apache/arrow-rs/issues/8330) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Support strict casting for all data types [\#8303](https://github.com/apache/arrow-rs/issues/8303)
+- \[Variant\] Support typed access for string types in variant\_get [\#8285](https://github.com/apache/arrow-rs/issues/8285)
+- \[Variant\]: Implement `DataType::FixedSizeList` support for `cast_to_variant` kernel [\#8281](https://github.com/apache/arrow-rs/issues/8281)
+
+**Fixed bugs:**
+
+- Fix arrow-avro Writer Documentation related to AvroBinaryFormat [\#8631](https://github.com/apache/arrow-rs/issues/8631) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Decimal -\> Decimal cast wrongly fails for large scale reduction [\#8579](https://github.com/apache/arrow-rs/issues/8579) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] Avoid fetching multiple pages when `max_predicate_cache_size`is 0 [\#8542](https://github.com/apache/arrow-rs/issues/8542) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- DataType parsing no longer works correctly for old formatted timestamps [\#8539](https://github.com/apache/arrow-rs/issues/8539) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] ArrowWriter flush does not work [\#8534](https://github.com/apache/arrow-rs/issues/8534) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `arrow::compute::interleave` fails with struct arrays with no fields [\#8533](https://github.com/apache/arrow-rs/issues/8533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] Over memory consumation for writer page v1 compressed [\#8526](https://github.com/apache/arrow-rs/issues/8526) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Incorrect Behavior of Collecting a filtered iterator to a BooleanArray [\#8505](https://github.com/apache/arrow-rs/issues/8505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Parquet\] ProjectionMask::columns name handling is bug prone [\#8443](https://github.com/apache/arrow-rs/issues/8443) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Shredded typed\_value columns must have valid variant types [\#8435](https://github.com/apache/arrow-rs/issues/8435) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- cargo test -p parquet fails with default `ulimit` [\#8406](https://github.com/apache/arrow-rs/issues/8406) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Column with List\(Struct\) causes failed to decode level data for struct array [\#8404](https://github.com/apache/arrow-rs/issues/8404) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Binaryview Utf8 Cast Issue [\#8403](https://github.com/apache/arrow-rs/issues/8403) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Decimal precision validation displays value without accounting for scale [\#8382](https://github.com/apache/arrow-rs/issues/8382) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] `VariantArray::data_type` returns `StructType`, causing `Array::as_struct` to panic [\#8319](https://github.com/apache/arrow-rs/issues/8319) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] writing a VariantArray to parquet panics [\#8296](https://github.com/apache/arrow-rs/issues/8296) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- Docs: Add more comments to the Parquet writer code [\#8383](https://github.com/apache/arrow-rs/pull/8383) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- \[parquet\] Improve encoding mask API \(wrap bare  i32 in a struct w/ docs\) [\#8588](https://github.com/apache/arrow-rs/issues/8588) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- bench: create `zip` kernel benchmarks [\#8654](https://github.com/apache/arrow-rs/pull/8654) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Skip redundant validation checks in RecordBatch\#project [\#8583](https://github.com/apache/arrow-rs/pull/8583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pepijnve](https://github.com/pepijnve))
+- \[thrift-remodel\] Remove conversion functions for row group and column metadata [\#8574](https://github.com/apache/arrow-rs/pull/8574) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[PARQUET\] Improve memory efficency for compressed writer parquet 1.0 [\#8527](https://github.com/apache/arrow-rs/pull/8527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lilianm](https://github.com/lilianm))
+- perf: improve `GenericByteBuilder::append_array` to use SIMD for extending the offsets [\#8388](https://github.com/apache/arrow-rs/pull/8388) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+
+**Closed issues:**
+
+- Utf-8, LargeUtf8, Utf8View [\#8601](https://github.com/apache/arrow-rs/issues/8601)
+- \[Variant\] Improve the get type logic for DataType in variant to arrow row builder [\#8538](https://github.com/apache/arrow-rs/issues/8538)
+- Add a README.md for arrow-avro [\#8504](https://github.com/apache/arrow-rs/issues/8504) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix UnionArray references to "positive" values [\#8418](https://github.com/apache/arrow-rs/issues/8418) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] `metadata` field should be marked is non-nullable [\#8410](https://github.com/apache/arrow-rs/issues/8410) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Avro\] Example read\_with\_utf8view.rs fails to run with error "Error: ParseError\("Unexpected EOF while reading Avro header"\)" [\#8380](https://github.com/apache/arrow-rs/issues/8380) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Geospatial\]: Add CI checks for `parquet-geospatial` crate [\#8377](https://github.com/apache/arrow-rs/issues/8377)
+- \[Geospatial\] Create new `parquet-geometry` crate [\#8374](https://github.com/apache/arrow-rs/issues/8374)
+
+**Merged pull requests:**
+
+- parquet-rewrite: add write\_batch\_size and compression\_level config  [\#8642](https://github.com/apache/arrow-rs/pull/8642) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- Introduce a ThriftProtocolError to avoid allocating and formattings strings for error messages [\#8636](https://github.com/apache/arrow-rs/pull/8636) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- \[thrift-remodel\] Add macro to reduce boilerplate necessary to implement Thrift serialization [\#8634](https://github.com/apache/arrow-rs/pull/8634) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix Writer docs and rename `AvroBinaryFormat` to `AvroSoeFormat` [\#8633](https://github.com/apache/arrow-rs/pull/8633) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] Bulk insert elements into List and Object Builders [\#8629](https://github.com/apache/arrow-rs/pull/8629) ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] impl `PartialEq` and `FromIterator<Option<..>>` for `VariantArray` [\#8627](https://github.com/apache/arrow-rs/pull/8627) ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Remove ceremony from iterator of variants into VariantArray [\#8625](https://github.com/apache/arrow-rs/pull/8625) ([friendlymatthew](https://github.com/friendlymatthew))
+- Undeprecate `ArrowWriter::into_serialized_writer` and add docs [\#8621](https://github.com/apache/arrow-rs/pull/8621) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- fix: incorrect assertion in `BitChunks::new` [\#8620](https://github.com/apache/arrow-rs/pull/8620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Variant\] Clean up redundant `get_type_name` [\#8617](https://github.com/apache/arrow-rs/pull/8617) ([liamzwbao](https://github.com/liamzwbao))
+- \[Minor\] Hide thrift macros [\#8616](https://github.com/apache/arrow-rs/pull/8616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Deprecate `parquet::format` module [\#8615](https://github.com/apache/arrow-rs/pull/8615) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Variant\] Make `VariantArray` iterable [\#8613](https://github.com/apache/arrow-rs/pull/8613) ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Impl `Extend` for `VariantArrayBuilder` [\#8611](https://github.com/apache/arrow-rs/pull/8611) ([friendlymatthew](https://github.com/friendlymatthew))
+- build\(deps\): bump actions/setup-node from 5 to 6 [\#8604](https://github.com/apache/arrow-rs/pull/8604) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Check int96 min/max instead of panicking [\#8603](https://github.com/apache/arrow-rs/pull/8603) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rambleraptor](https://github.com/rambleraptor))
+- \[thrift-remodel\] Refactor Parquet Thrift code into new `thrift` module [\#8599](https://github.com/apache/arrow-rs/pull/8599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Parquet\] Remove use of `parquet::format` in metadata bench code [\#8598](https://github.com/apache/arrow-rs/pull/8598) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lichuang](https://github.com/lichuang))
+- Remove experimental warning from `extension` module [\#8597](https://github.com/apache/arrow-rs/pull/8597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Adding `try_append_value` implementation to `ByteViewBuilder` [\#8594](https://github.com/apache/arrow-rs/pull/8594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([samueleresca](https://github.com/samueleresca))
+- Add RecordBatch::project microbenchmark [\#8592](https://github.com/apache/arrow-rs/pull/8592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pepijnve](https://github.com/pepijnve))
+- \[parquet\] Add a sync fn to ArrowWriter that flushes Writer [\#8586](https://github.com/apache/arrow-rs/pull/8586) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PiotrSrebrny](https://github.com/PiotrSrebrny))
+- chore: use magic number`FOOTER_SIZE` instead of hard code number [\#8585](https://github.com/apache/arrow-rs/pull/8585) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lichuang](https://github.com/lichuang))
+- Add support for run-end encoded \(REE\) arrays in arrow-avro [\#8584](https://github.com/apache/arrow-rs/pull/8584) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Unify API for writing column chunks / row groups in parallel [\#8582](https://github.com/apache/arrow-rs/pull/8582) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- Fix linting issues missed by \#8506 [\#8581](https://github.com/apache/arrow-rs/pull/8581) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix broken decimal-\>decimal casting with large scale reduction [\#8580](https://github.com/apache/arrow-rs/pull/8580) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- Migrate `arrow` and workspace to Rust 2024 [\#8578](https://github.com/apache/arrow-rs/pull/8578) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([mbrobbel](https://github.com/mbrobbel))
+- Fix doctests of parquet push decoded without default features [\#8577](https://github.com/apache/arrow-rs/pull/8577) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mbrobbel](https://github.com/mbrobbel))
+- Avoid panics and warnings when building avro without default features [\#8576](https://github.com/apache/arrow-rs/pull/8576) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Add support for 64-bit Schema Registry IDs \(Id64\) in arrow-avro [\#8575](https://github.com/apache/arrow-rs/pull/8575) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- fix: bug when struct nullability determined from `Dict<_, ByteArray>>` column [\#8573](https://github.com/apache/arrow-rs/pull/8573) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
+- fix: Support `interleave_struct` to handle empty fields [\#8563](https://github.com/apache/arrow-rs/pull/8563) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- \[Variant\] Define and use VariantDecimalType trait [\#8562](https://github.com/apache/arrow-rs/pull/8562) ([scovich](https://github.com/scovich))
+- \[PARQUET\] Update parquet writer bench with compression and pagev2 [\#8560](https://github.com/apache/arrow-rs/pull/8560) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lilianm](https://github.com/lilianm))
+- Replace serde with `serde_core` when possible [\#8558](https://github.com/apache/arrow-rs/pull/8558) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- fix: use default field name when name is None in Field conversion [\#8557](https://github.com/apache/arrow-rs/pull/8557) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Add arrow-avro README.md file [\#8556](https://github.com/apache/arrow-rs/pull/8556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- minor\(parquet\): Fix test\_not\_found on Windows [\#8555](https://github.com/apache/arrow-rs/pull/8555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nuno-faria](https://github.com/nuno-faria))
+- \[Parquet\] Avoid fetching multiple pages when the predicate cache is disabled [\#8554](https://github.com/apache/arrow-rs/pull/8554) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nuno-faria](https://github.com/nuno-faria))
+- \[Variant\] Support variant to `Decimal32/64/128/256` [\#8552](https://github.com/apache/arrow-rs/pull/8552) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- Arrow-avro Writer Dense Union support  [\#8550](https://github.com/apache/arrow-rs/pull/8550) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Arrow-Avro: Resolve named field discrepancies [\#8546](https://github.com/apache/arrow-rs/pull/8546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Migrate `arrow-avro` to Rust 2024 [\#8545](https://github.com/apache/arrow-rs/pull/8545) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- feat: Export `is_dense` public [\#8544](https://github.com/apache/arrow-rs/pull/8544) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Fix "Incorrect Behavior of Collecting a filtered iterator to a BooleanArray" [\#8543](https://github.com/apache/arrow-rs/pull/8543) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- Support old syntax for DataType parsing [\#8541](https://github.com/apache/arrow-rs/pull/8541) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Variant\] Decimal unshredding support [\#8540](https://github.com/apache/arrow-rs/pull/8540) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Variant\] Improve documentation and make kernels consistent [\#8536](https://github.com/apache/arrow-rs/pull/8536) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- feat: support casting from null to float16 [\#8535](https://github.com/apache/arrow-rs/pull/8535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([chenkovsky](https://github.com/chenkovsky))
+- Add benchmarks for FromIter \(PrimitiveArray and BooleanArray\) [\#8525](https://github.com/apache/arrow-rs/pull/8525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- Support writing GeospatialStatistics in Parquet writer [\#8524](https://github.com/apache/arrow-rs/pull/8524) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([paleolimbot](https://github.com/paleolimbot))
+- Fix some new rustdoc warnings [\#8522](https://github.com/apache/arrow-rs/pull/8522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Variant\] Reverse VariantAsPrimitive trait to PrimitiveFromVariant [\#8519](https://github.com/apache/arrow-rs/pull/8519) ([scovich](https://github.com/scovich))
+- \[Variant\] Add variant to arrow primitive support for boolean/timestamp/time [\#8516](https://github.com/apache/arrow-rs/pull/8516) ([klion26](https://github.com/klion26))
+- \[Variant\] Add list support to unshred\_variant [\#8514](https://github.com/apache/arrow-rs/pull/8514) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Migrate `parquet-variant-json` to Rust 2024 [\#8512](https://github.com/apache/arrow-rs/pull/8512) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet-variant-compute` to Rust 2024 [\#8511](https://github.com/apache/arrow-rs/pull/8511) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet-variant` to Rust 2024 [\#8510](https://github.com/apache/arrow-rs/pull/8510) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet-geospatial` to Rust 2024 [\#8509](https://github.com/apache/arrow-rs/pull/8509) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet_derive_test` to Rust 2024 [\#8508](https://github.com/apache/arrow-rs/pull/8508) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet_derive` to Rust 2024 [\#8507](https://github.com/apache/arrow-rs/pull/8507) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `parquet` to Rust 2024 [\#8506](https://github.com/apache/arrow-rs/pull/8506) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] ReadOnlyMetadataBuilder borrows its underlying VariantMetadata [\#8502](https://github.com/apache/arrow-rs/pull/8502) ([scovich](https://github.com/scovich))
+- \[Variant\] Add a VariantBuilderExt impl for VariantValueArrayBuilder [\#8501](https://github.com/apache/arrow-rs/pull/8501) ([scovich](https://github.com/scovich))
+- build\(deps\): update sysinfo requirement from 0.36.0 to 0.37.1 [\#8500](https://github.com/apache/arrow-rs/pull/8500) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- \[Variant\] Introduce new BorrowedShreddingState concept [\#8499](https://github.com/apache/arrow-rs/pull/8499) ([scovich](https://github.com/scovich))
+- Add `append_n` method to `FixedSizeBinaryDictionaryBuilder` [\#8498](https://github.com/apache/arrow-rs/pull/8498) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- Fix docs.rs build: Use `doc_cfg` instead of removed `doc_auto_cfg` [\#8494](https://github.com/apache/arrow-rs/pull/8494) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([mbrobbel](https://github.com/mbrobbel))
+- Remove allow unused from arrow-avro lib.rs file [\#8493](https://github.com/apache/arrow-rs/pull/8493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Regression Testing, Bug Fixes, and Public API Tightening for arrow-avro [\#8492](https://github.com/apache/arrow-rs/pull/8492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Migrate `arrow-string` to Rust 2024 [\#8491](https://github.com/apache/arrow-rs/pull/8491) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-select` to Rust 2024 [\#8490](https://github.com/apache/arrow-rs/pull/8490) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-schema` to Rust 2024 [\#8489](https://github.com/apache/arrow-rs/pull/8489) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-row` to Rust 2024 [\#8488](https://github.com/apache/arrow-rs/pull/8488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-pyarrow-testing` to Rust 2024 [\#8487](https://github.com/apache/arrow-rs/pull/8487) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-pyarrow-integration-testing` to Rust 2024 [\#8486](https://github.com/apache/arrow-rs/pull/8486) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-pyarrow` to Rust 2024 [\#8485](https://github.com/apache/arrow-rs/pull/8485) ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-ord` to Rust 2024 [\#8484](https://github.com/apache/arrow-rs/pull/8484) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Support strict casting for Decimals [\#8483](https://github.com/apache/arrow-rs/pull/8483) ([liamzwbao](https://github.com/liamzwbao))
+- feat\(json\): Add temporal formatting options when write to JSON [\#8482](https://github.com/apache/arrow-rs/pull/8482) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([linyihai](https://github.com/linyihai))
+- \[Variant\] Define and use unshred\_variant function [\#8481](https://github.com/apache/arrow-rs/pull/8481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Minor\] Remove private APIs from Parquet metadata benchmark [\#8478](https://github.com/apache/arrow-rs/pull/8478) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add examples of using `Field::try_extension_type` [\#8475](https://github.com/apache/arrow-rs/pull/8475) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix Rustfmt in arrow-cast [\#8473](https://github.com/apache/arrow-rs/pull/8473) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Disable incremental builds in CI [\#8471](https://github.com/apache/arrow-rs/pull/8471) ([mbrobbel](https://github.com/mbrobbel))
+- Update Rust toolchain to 1.90 [\#8468](https://github.com/apache/arrow-rs/pull/8468) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Parquet\] Minor: Remove mut ref for getting row-group bloom filter [\#8462](https://github.com/apache/arrow-rs/pull/8462) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- refactor: split `num` dependency [\#8459](https://github.com/apache/arrow-rs/pull/8459) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([crepererum](https://github.com/crepererum))
+- Migrate `arrow-json` to Rust 2024 [\#8458](https://github.com/apache/arrow-rs/pull/8458) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+-  Migrate `arrow-ipc` to Rust 2024 [\#8457](https://github.com/apache/arrow-rs/pull/8457) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-flight` to Rust 2024 [\#8456](https://github.com/apache/arrow-rs/pull/8456) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-data` to Rust 2024 [\#8455](https://github.com/apache/arrow-rs/pull/8455) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-csv` to Rust 2024 [\#8454](https://github.com/apache/arrow-rs/pull/8454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-cast` to Rust 2024 [\#8453](https://github.com/apache/arrow-rs/pull/8453) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-buffer` to Rust 2024 [\#8452](https://github.com/apache/arrow-rs/pull/8452) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-array` to Rust 2024 [\#8450](https://github.com/apache/arrow-rs/pull/8450) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Migrate `arrow-arith` to Rust 2024 [\#8449](https://github.com/apache/arrow-rs/pull/8449) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Expose `fields` in `StructBuilder` [\#8448](https://github.com/apache/arrow-rs/pull/8448) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lewiszlw](https://github.com/lewiszlw))
+- \[Variant\] Simpler shredding state [\#8444](https://github.com/apache/arrow-rs/pull/8444) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Unpin comfytable [\#8440](https://github.com/apache/arrow-rs/pull/8440) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Variant integration fixes [\#8438](https://github.com/apache/arrow-rs/pull/8438) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Refactor: extract FooterTail from ParquetMetadataReader [\#8437](https://github.com/apache/arrow-rs/pull/8437) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Refactor: Move parquet metadata parsing code into its own module [\#8436](https://github.com/apache/arrow-rs/pull/8436) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Update `UnionArray` wording to 'non-negative' [\#8434](https://github.com/apache/arrow-rs/pull/8434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jdockerty](https://github.com/jdockerty))
+- Adds Duration\(TimeUnit\) support to arrow-avro reader and writer [\#8433](https://github.com/apache/arrow-rs/pull/8433) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Update release schedule [\#8432](https://github.com/apache/arrow-rs/pull/8432) ([mbrobbel](https://github.com/mbrobbel))
+- expose read plan and plan builder via mod [\#8431](https://github.com/apache/arrow-rs/pull/8431) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yeya24](https://github.com/yeya24))
+- Bump MSRV to 1.85 [\#8429](https://github.com/apache/arrow-rs/pull/8429) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Fix clippy [\#8426](https://github.com/apache/arrow-rs/pull/8426) ([alamb](https://github.com/alamb))
+- Fix red main by updating test [\#8421](https://github.com/apache/arrow-rs/pull/8421) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([emilk](https://github.com/emilk))
+- Implement AsRef for Schema and Field [\#8417](https://github.com/apache/arrow-rs/pull/8417) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- \[Variant\] mark metadata field as non-nullable [\#8416](https://github.com/apache/arrow-rs/pull/8416) ([ding-young](https://github.com/ding-young))
+- Respect `CastOptions.safe` when casting `BinaryView` → `Utf8View` \(return `null` for invalid UTF‑8\) [\#8415](https://github.com/apache/arrow-rs/pull/8415) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kosiew](https://github.com/kosiew))
+- Add Parquet geospatial statistics utility [\#8414](https://github.com/apache/arrow-rs/pull/8414) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([paleolimbot](https://github.com/paleolimbot))
+- Remove explicit default cfg option [\#8413](https://github.com/apache/arrow-rs/pull/8413) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([abacef](https://github.com/abacef))
+- Support parquet canonical extension type roundtrip [\#8409](https://github.com/apache/arrow-rs/pull/8409) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Support reading/writing `VariantArray` to parquet with Variant LogicalType [\#8408](https://github.com/apache/arrow-rs/pull/8408) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Follow-up on arrow-avro Documentation [\#8402](https://github.com/apache/arrow-rs/pull/8402) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\]\[Shredding\] Support typed\_access for timestamp\_micro/timestamp\_nano [\#8401](https://github.com/apache/arrow-rs/pull/8401) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([klion26](https://github.com/klion26))
+- Expose ReadPlan and ReadPlanBuilder [\#8399](https://github.com/apache/arrow-rs/pull/8399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yeya24](https://github.com/yeya24))
+- Propagate errors instead of panics: Replace usages of `new` with `try_new` for Array types [\#8397](https://github.com/apache/arrow-rs/pull/8397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- \[Variant\] Fix NULL handling for shredded object fields [\#8395](https://github.com/apache/arrow-rs/pull/8395) ([scovich](https://github.com/scovich))
+- Add Arrow Variant Extension Type, remove  `Array` impl for `VariantArray` and `ShreddedVariantFieldArray` [\#8392](https://github.com/apache/arrow-rs/pull/8392) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Minor cleanup creating Schema [\#8391](https://github.com/apache/arrow-rs/pull/8391) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Geospatial\]: Add CI checks for `parquet-geospatial` crate [\#8390](https://github.com/apache/arrow-rs/pull/8390) ([kylebarron](https://github.com/kylebarron))
+- Follow-up Improvements to Avro union handling  [\#8385](https://github.com/apache/arrow-rs/pull/8385) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- fix: reset the offset of 'file\_for\_view' [\#8381](https://github.com/apache/arrow-rs/pull/8381) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([TrevorADHD](https://github.com/TrevorADHD))
+- \[Variant\] \[Shredding\] feat: Support typed\_access for Date32 [\#8379](https://github.com/apache/arrow-rs/pull/8379) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PinkCrow007](https://github.com/PinkCrow007))
+- \[Geospatial\]: Scaffolding for new `parquet-geospatial` crate [\#8375](https://github.com/apache/arrow-rs/pull/8375) ([kylebarron](https://github.com/kylebarron))
+- Avro writer prefix support [\#8371](https://github.com/apache/arrow-rs/pull/8371) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- \[Variant\] Define new shred\_variant function [\#8366](https://github.com/apache/arrow-rs/pull/8366) ([scovich](https://github.com/scovich))
+- Add arrow-avro Reader support for Dense Union and Union resolution \(Part 2\) [\#8349](https://github.com/apache/arrow-rs/pull/8349) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Move ParquetMetadata decoder state machine into ParquetMetadataPushDecoder [\#8340](https://github.com/apache/arrow-rs/pull/8340) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\]: Implement `DataType::FixedSizeList` support for `cast_to_variant` kernel [\#8282](https://github.com/apache/arrow-rs/pull/8282) ([liamzwbao](https://github.com/liamzwbao))
+
+## [56.2.0](https://github.com/apache/arrow-rs/tree/56.2.0) (2025-09-19)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/56.1.0...56.2.0)
+
+- \[Variant\] \[Shredding\] Support typed\_access for Utf8 and BinaryView [\#8364](https://github.com/apache/arrow-rs/pull/8364) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([petern48](https://github.com/petern48))
+- Fix casting floats to Decimal64 [\#8363](https://github.com/apache/arrow-rs/pull/8363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- \[Variant\] Implement new VariantValueArrayBuilder [\#8360](https://github.com/apache/arrow-rs/pull/8360) ([scovich](https://github.com/scovich))
+- \[Variant\] Add constants for empty variant metadata [\#8359](https://github.com/apache/arrow-rs/pull/8359) ([scovich](https://github.com/scovich))
+- \[Variant\] Allow lossless casting from integer to floating point [\#8357](https://github.com/apache/arrow-rs/pull/8357) ([scovich](https://github.com/scovich))
+- \[Variant\] Minor code cleanups [\#8356](https://github.com/apache/arrow-rs/pull/8356) ([scovich](https://github.com/scovich))
+- \[Variant\] Remove unused metadata from variant ShreddingState [\#8355](https://github.com/apache/arrow-rs/pull/8355) ([scovich](https://github.com/scovich))
+- Adds Map & Enum support, round-trip & benchmark tests [\#8353](https://github.com/apache/arrow-rs/pull/8353) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- \[Variant\] \[Shredding\] feat: Support typed\_access for FixedSizeBinary [\#8352](https://github.com/apache/arrow-rs/pull/8352) ([petern48](https://github.com/petern48))
+- Add arrow-avro Reader support for Dense Union and Union resolution \(Part 1\) [\#8348](https://github.com/apache/arrow-rs/pull/8348) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] feat: Support typed\_access for Boolean [\#8346](https://github.com/apache/arrow-rs/pull/8346) ([Weijun-H](https://github.com/Weijun-H))
+- \[Variant\] Make VariantToArrowRowBuilder an enum [\#8345](https://github.com/apache/arrow-rs/pull/8345) ([scovich](https://github.com/scovich))
+- \[Variant\] Rename VariantShreddingRowBuilder to VariantToArrowRowBuilder [\#8344](https://github.com/apache/arrow-rs/pull/8344) ([scovich](https://github.com/scovich))
+- \[Variant\] Add tests for variant\_get requesting Some struct [\#8343](https://github.com/apache/arrow-rs/pull/8343) ([scovich](https://github.com/scovich))
+- \[Variant\] Add nullable arg to StructArrayBuilder::with\_field [\#8342](https://github.com/apache/arrow-rs/pull/8342) ([scovich](https://github.com/scovich))
+- Minor: avoid an `Arc::clone` in CacheOptions for Parquet PredicateCache [\#8338](https://github.com/apache/arrow-rs/pull/8338) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix `can_cast_types` for temporal to `Utf8View` [\#8328](https://github.com/apache/arrow-rs/pull/8328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Update `variant_integration` test to use final approved `parquet-testing` data [\#8325](https://github.com/apache/arrow-rs/pull/8325) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] ParentState tracks builder-specific state in a uniform way [\#8324](https://github.com/apache/arrow-rs/pull/8324) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Variant\] Remove boilerplate from make\_shredding\_row\_builder [\#8322](https://github.com/apache/arrow-rs/pull/8322) ([scovich](https://github.com/scovich))
+- \[Variant\] Move VariantAsPrimitive to type\_conversions.rs [\#8321](https://github.com/apache/arrow-rs/pull/8321) ([scovich](https://github.com/scovich))
+- \[Variant\] Remove unused output builder files [\#8320](https://github.com/apache/arrow-rs/pull/8320) ([scovich](https://github.com/scovich))
+- Add arrow-avro examples and Reader documentation [\#8316](https://github.com/apache/arrow-rs/pull/8316) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Expose predicates from RowFilter [\#8315](https://github.com/apache/arrow-rs/pull/8315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yeya24](https://github.com/yeya24))
+- \[Variant\] Implement row builders for cast\_to\_variant [\#8299](https://github.com/apache/arrow-rs/pull/8299) ([scovich](https://github.com/scovich))
+- Adds additional type support to arrow-avro writer [\#8298](https://github.com/apache/arrow-rs/pull/8298) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Use apache/arrow-dotnet for integration test [\#8295](https://github.com/apache/arrow-rs/pull/8295) ([kou](https://github.com/kou))
+- Add projection with default values support to `RecordDecoder` [\#8293](https://github.com/apache/arrow-rs/pull/8293) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add array/map/fixed schema resolution and default value support to arrow-avro codec [\#8292](https://github.com/apache/arrow-rs/pull/8292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Bump actions/labeler from 6.0.0 to 6.0.1 [\#8288](https://github.com/apache/arrow-rs/pull/8288) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/github-script from 7 to 8 [\#8287](https://github.com/apache/arrow-rs/pull/8287) ([dependabot[bot]](https://github.com/apps/dependabot))
+- \[Variant\] Add as\_u\* for Variant [\#8284](https://github.com/apache/arrow-rs/pull/8284) ([klion26](https://github.com/klion26))
+- \[Variant\] Support Shredded Objects in variant\_get \(take 2\) [\#8280](https://github.com/apache/arrow-rs/pull/8280) ([scovich](https://github.com/scovich))
+- Bump actions/setup-node from 4 to 5 [\#8279](https://github.com/apache/arrow-rs/pull/8279) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/setup-python from 5 to 6 [\#8278](https://github.com/apache/arrow-rs/pull/8278) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/labeler from 5.0.0 to 6.0.0 [\#8276](https://github.com/apache/arrow-rs/pull/8276) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Impl `Display` for `Tz` [\#8275](https://github.com/apache/arrow-rs/pull/8275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
+- Added List and Struct Encoding to arrow-avro Writer [\#8274](https://github.com/apache/arrow-rs/pull/8274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add into\_builder method for WriterProperties [\#8272](https://github.com/apache/arrow-rs/pull/8272) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([corwinjoy](https://github.com/corwinjoy))
+- chore\(parquet/record/field\): dont truncate timestamps on display [\#8266](https://github.com/apache/arrow-rs/pull/8266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Erigara](https://github.com/Erigara))
+- \[Parquet\] Write row group with async writer [\#8262](https://github.com/apache/arrow-rs/pull/8262) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lilianm](https://github.com/lilianm))
+- Parquet: Do not compress v2 data page when compress is bad quality [\#8257](https://github.com/apache/arrow-rs/pull/8257) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- Add Decimal32 and Decimal64 support to arrow-avro Reader [\#8255](https://github.com/apache/arrow-rs/pull/8255) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Minor\] Backport changes to metadata benchmark [\#8251](https://github.com/apache/arrow-rs/pull/8251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update hashbrown requirement from 0.15.1 to 0.16.0 [\#8248](https://github.com/apache/arrow-rs/pull/8248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Sort: Change lexsort comment from stable to unstable [\#8245](https://github.com/apache/arrow-rs/pull/8245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mapleFU](https://github.com/mapleFU))
+- pin comfy-table to 7.1.2 [\#8244](https://github.com/apache/arrow-rs/pull/8244) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zachschuermann](https://github.com/zachschuermann))
+- Adds Confluent wire format handling to arrow-avro crate [\#8242](https://github.com/apache/arrow-rs/pull/8242) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- feat: gRPC compression support for flight CLI [\#8240](https://github.com/apache/arrow-rs/pull/8240) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- feat: `SSLKEYLOGFILE` support for flight CLI [\#8239](https://github.com/apache/arrow-rs/pull/8239) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- \[Variant\] Refactor `cast_to_variant` [\#8235](https://github.com/apache/arrow-rs/pull/8235) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] add strict mode to cast\_to\_variant [\#8233](https://github.com/apache/arrow-rs/pull/8233) ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] Add Variant::as\_f16 [\#8232](https://github.com/apache/arrow-rs/pull/8232) ([klion26](https://github.com/klion26))
+- Unpin nightly rust version \(MIRI job\) [\#8229](https://github.com/apache/arrow-rs/pull/8229) ([mbrobbel](https://github.com/mbrobbel))
+- Update apache-avro requirement from 0.14.0 to 0.20.0 [\#8226](https://github.com/apache/arrow-rs/pull/8226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/upload-pages-artifact from 3 to 4 [\#8224](https://github.com/apache/arrow-rs/pull/8224) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Added arrow-avro enum mapping support for schema resolution [\#8223](https://github.com/apache/arrow-rs/pull/8223) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Added arrow-avro schema resolution value skipping [\#8220](https://github.com/apache/arrow-rs/pull/8220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Fix error condition in doc comment of `Field::try_canonical_extension_type` [\#8216](https://github.com/apache/arrow-rs/pull/8216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\]: Implement `DataType::Duration` support for `cast_to_variant` kernel [\#8215](https://github.com/apache/arrow-rs/pull/8215) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] feat: remove unnecessary unwraps in `Object::finish` [\#8214](https://github.com/apache/arrow-rs/pull/8214) ([Weijun-H](https://github.com/Weijun-H))
+- \[avro\] Fix Avro decoder bitmap corruption when nullable field decoding fails [\#8213](https://github.com/apache/arrow-rs/pull/8213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yongkyunlee](https://github.com/yongkyunlee))
+- Restore accidentally removed method Block::to\_ne\_bytes [\#8211](https://github.com/apache/arrow-rs/pull/8211) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- \[avro\] Support all default types for avro schema's record field [\#8210](https://github.com/apache/arrow-rs/pull/8210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yongkyunlee](https://github.com/yongkyunlee))
+- \[Variant\] Support read-only metadata builders [\#8208](https://github.com/apache/arrow-rs/pull/8208) ([scovich](https://github.com/scovich))
+- Avro to arrow schema conversion fails when a field has a default type that is not string [\#8209](https://github.com/apache/arrow-rs/issues/8209)
+- parquet: No method named `to_ne_bytes` found for struct `bloom_filter::Block` for target `s390x-unknown-linux-gnu` [\#8207](https://github.com/apache/arrow-rs/issues/8207)
+- \[Variant\] cast\_to\_variant will panic on certain `Date64` or Timestamp Values values [\#8155](https://github.com/apache/arrow-rs/issues/8155)
+- Parquet: Avoid page-size overflows i32 [\#8264](https://github.com/apache/arrow-rs/pull/8264) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+
+**Documentation updates:**
+
+- Update docstring comment for Writer::write\(\) in writer.rs [\#8267](https://github.com/apache/arrow-rs/pull/8267) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([YKoustubhRao](https://github.com/YKoustubhRao))
+
+**Closed issues:**
+
+- comfy-table release 7.2.0 breaks MSRV [\#8243](https://github.com/apache/arrow-rs/issues/8243)
+- \[Variant\] Add `Variant::as_f16` [\#8228](https://github.com/apache/arrow-rs/issues/8228)
+- Support appending raw bytes to variant objects and lists [\#8217](https://github.com/apache/arrow-rs/issues/8217)
+- `VariantArrayBuilder` uses `ParentState` for simpler rollbacks [\#8205](https://github.com/apache/arrow-rs/issues/8205)
+- Make `ObjectBuilder::finish` signature infallible [\#8184](https://github.com/apache/arrow-rs/issues/8184)
+- Improve performance of `i256` to `f64` [\#8013](https://github.com/apache/arrow-rs/issues/8013)
+
+**Merged pull requests:**
+
+- \[Variant\] Support Variant to PrimitiveArrow for unsigned integer [\#8369](https://github.com/apache/arrow-rs/pull/8369) ([klion26](https://github.com/klion26))
+- \[Variant\] \[Shredding\] Support typed\_access for Utf8 and BinaryView [\#8364](https://github.com/apache/arrow-rs/pull/8364) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([petern48](https://github.com/petern48))
+- Fix casting floats to Decimal64 [\#8363](https://github.com/apache/arrow-rs/pull/8363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- \[Variant\] Implement new VariantValueArrayBuilder [\#8360](https://github.com/apache/arrow-rs/pull/8360) ([scovich](https://github.com/scovich))
+- \[Variant\] Add constants for empty variant metadata [\#8359](https://github.com/apache/arrow-rs/pull/8359) ([scovich](https://github.com/scovich))
+- \[Variant\] Allow lossless casting from integer to floating point [\#8357](https://github.com/apache/arrow-rs/pull/8357) ([scovich](https://github.com/scovich))
+- \[Variant\] Minor code cleanups [\#8356](https://github.com/apache/arrow-rs/pull/8356) ([scovich](https://github.com/scovich))
+- \[Variant\] Remove unused metadata from variant ShreddingState [\#8355](https://github.com/apache/arrow-rs/pull/8355) ([scovich](https://github.com/scovich))
+- Adds Map & Enum support, round-trip & benchmark tests [\#8353](https://github.com/apache/arrow-rs/pull/8353) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- \[Variant\] \[Shredding\] feat: Support typed\_access for FixedSizeBinary [\#8352](https://github.com/apache/arrow-rs/pull/8352) ([petern48](https://github.com/petern48))
+- Add arrow-avro Reader support for Dense Union and Union resolution \(Part 1\) [\#8348](https://github.com/apache/arrow-rs/pull/8348) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] feat: Support typed\_access for Boolean [\#8346](https://github.com/apache/arrow-rs/pull/8346) ([Weijun-H](https://github.com/Weijun-H))
+- \[Variant\] Make VariantToArrowRowBuilder an enum [\#8345](https://github.com/apache/arrow-rs/pull/8345) ([scovich](https://github.com/scovich))
+- \[Variant\] Rename VariantShreddingRowBuilder to VariantToArrowRowBuilder [\#8344](https://github.com/apache/arrow-rs/pull/8344) ([scovich](https://github.com/scovich))
+- \[Variant\] Add tests for variant\_get requesting Some struct [\#8343](https://github.com/apache/arrow-rs/pull/8343) ([scovich](https://github.com/scovich))
+- \[Variant\] Add nullable arg to StructArrayBuilder::with\_field [\#8342](https://github.com/apache/arrow-rs/pull/8342) ([scovich](https://github.com/scovich))
+- Minor: avoid an `Arc::clone` in CacheOptions for Parquet PredicateCache [\#8338](https://github.com/apache/arrow-rs/pull/8338) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix `can_cast_types` for temporal to `Utf8View` [\#8328](https://github.com/apache/arrow-rs/pull/8328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Update `variant_integration` test to use final approved `parquet-testing` data [\#8325](https://github.com/apache/arrow-rs/pull/8325) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] ParentState tracks builder-specific state in a uniform way [\#8324](https://github.com/apache/arrow-rs/pull/8324) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Variant\] Remove boilerplate from make\_shredding\_row\_builder [\#8322](https://github.com/apache/arrow-rs/pull/8322) ([scovich](https://github.com/scovich))
+- \[Variant\] Move VariantAsPrimitive to type\_conversions.rs [\#8321](https://github.com/apache/arrow-rs/pull/8321) ([scovich](https://github.com/scovich))
+- \[Variant\] Remove unused output builder files [\#8320](https://github.com/apache/arrow-rs/pull/8320) ([scovich](https://github.com/scovich))
+- Add arrow-avro examples and Reader documentation [\#8316](https://github.com/apache/arrow-rs/pull/8316) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Expose predicates from RowFilter [\#8315](https://github.com/apache/arrow-rs/pull/8315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yeya24](https://github.com/yeya24))
+- \[Variant\] Implement row builders for cast\_to\_variant [\#8299](https://github.com/apache/arrow-rs/pull/8299) ([scovich](https://github.com/scovich))
+- Adds additional type support to arrow-avro writer [\#8298](https://github.com/apache/arrow-rs/pull/8298) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Use apache/arrow-dotnet for integration test [\#8295](https://github.com/apache/arrow-rs/pull/8295) ([kou](https://github.com/kou))
+- Add projection with default values support to `RecordDecoder` [\#8293](https://github.com/apache/arrow-rs/pull/8293) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add array/map/fixed schema resolution and default value support to arrow-avro codec [\#8292](https://github.com/apache/arrow-rs/pull/8292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Bump actions/labeler from 6.0.0 to 6.0.1 [\#8288](https://github.com/apache/arrow-rs/pull/8288) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/github-script from 7 to 8 [\#8287](https://github.com/apache/arrow-rs/pull/8287) ([dependabot[bot]](https://github.com/apps/dependabot))
+- \[Variant\] Add as\_u\* for Variant [\#8284](https://github.com/apache/arrow-rs/pull/8284) ([klion26](https://github.com/klion26))
+- \[Variant\] Support Shredded Objects in variant\_get \(take 2\) [\#8280](https://github.com/apache/arrow-rs/pull/8280) ([scovich](https://github.com/scovich))
+- Bump actions/setup-node from 4 to 5 [\#8279](https://github.com/apache/arrow-rs/pull/8279) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/setup-python from 5 to 6 [\#8278](https://github.com/apache/arrow-rs/pull/8278) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/labeler from 5.0.0 to 6.0.0 [\#8276](https://github.com/apache/arrow-rs/pull/8276) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Impl `Display` for `Tz` [\#8275](https://github.com/apache/arrow-rs/pull/8275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
+- Added List and Struct Encoding to arrow-avro Writer [\#8274](https://github.com/apache/arrow-rs/pull/8274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add into\_builder method for WriterProperties [\#8272](https://github.com/apache/arrow-rs/pull/8272) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([corwinjoy](https://github.com/corwinjoy))
+- chore\(parquet/record/field\): dont truncate timestamps on display [\#8266](https://github.com/apache/arrow-rs/pull/8266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Erigara](https://github.com/Erigara))
+- \[Parquet\] Write row group with async writer [\#8262](https://github.com/apache/arrow-rs/pull/8262) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lilianm](https://github.com/lilianm))
+- Parquet: Do not compress v2 data page when compress is bad quality [\#8257](https://github.com/apache/arrow-rs/pull/8257) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- Add Decimal32 and Decimal64 support to arrow-avro Reader [\#8255](https://github.com/apache/arrow-rs/pull/8255) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Minor\] Backport changes to metadata benchmark [\#8251](https://github.com/apache/arrow-rs/pull/8251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update hashbrown requirement from 0.15.1 to 0.16.0 [\#8248](https://github.com/apache/arrow-rs/pull/8248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Sort: Change lexsort comment from stable to unstable [\#8245](https://github.com/apache/arrow-rs/pull/8245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mapleFU](https://github.com/mapleFU))
+- pin comfy-table to 7.1.2 [\#8244](https://github.com/apache/arrow-rs/pull/8244) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zachschuermann](https://github.com/zachschuermann))
+- Adds Confluent wire format handling to arrow-avro crate [\#8242](https://github.com/apache/arrow-rs/pull/8242) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- feat: gRPC compression support for flight CLI [\#8240](https://github.com/apache/arrow-rs/pull/8240) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- feat: `SSLKEYLOGFILE` support for flight CLI [\#8239](https://github.com/apache/arrow-rs/pull/8239) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- \[Variant\] Refactor `cast_to_variant` [\#8235](https://github.com/apache/arrow-rs/pull/8235) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] add strict mode to cast\_to\_variant [\#8233](https://github.com/apache/arrow-rs/pull/8233) ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] Add Variant::as\_f16 [\#8232](https://github.com/apache/arrow-rs/pull/8232) ([klion26](https://github.com/klion26))
+- Unpin nightly rust version \(MIRI job\) [\#8229](https://github.com/apache/arrow-rs/pull/8229) ([mbrobbel](https://github.com/mbrobbel))
+- Update apache-avro requirement from 0.14.0 to 0.20.0 [\#8226](https://github.com/apache/arrow-rs/pull/8226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/upload-pages-artifact from 3 to 4 [\#8224](https://github.com/apache/arrow-rs/pull/8224) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Added arrow-avro enum mapping support for schema resolution [\#8223](https://github.com/apache/arrow-rs/pull/8223) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Added arrow-avro schema resolution value skipping [\#8220](https://github.com/apache/arrow-rs/pull/8220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Fix error condition in doc comment of `Field::try_canonical_extension_type` [\#8216](https://github.com/apache/arrow-rs/pull/8216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\]: Implement `DataType::Duration` support for `cast_to_variant` kernel [\#8215](https://github.com/apache/arrow-rs/pull/8215) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] feat: remove unnecessary unwraps in `Object::finish` [\#8214](https://github.com/apache/arrow-rs/pull/8214) ([Weijun-H](https://github.com/Weijun-H))
+- \[avro\] Fix Avro decoder bitmap corruption when nullable field decoding fails [\#8213](https://github.com/apache/arrow-rs/pull/8213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yongkyunlee](https://github.com/yongkyunlee))
+- Restore accidentally removed method Block::to\_ne\_bytes [\#8211](https://github.com/apache/arrow-rs/pull/8211) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- \[avro\] Support all default types for avro schema's record field [\#8210](https://github.com/apache/arrow-rs/pull/8210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yongkyunlee](https://github.com/yongkyunlee))
+- \[Variant\] Support read-only metadata builders [\#8208](https://github.com/apache/arrow-rs/pull/8208) ([scovich](https://github.com/scovich))
+- \[Variant\] VariantArrayBuilder uses MetadataBuilder and ValueBuilder [\#8206](https://github.com/apache/arrow-rs/pull/8206) ([scovich](https://github.com/scovich))
+- \[Variant\]: Implement DataType::List/LargeList support for cast\_to\_variant kernel [\#8201](https://github.com/apache/arrow-rs/pull/8201) ([sdf-jkl](https://github.com/sdf-jkl))
+- \[Variant\]: Implement `DataType::Union` support for `cast_to_variant` kernel [\#8196](https://github.com/apache/arrow-rs/pull/8196) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] Support typed access for numeric types in variant\_get [\#8179](https://github.com/apache/arrow-rs/pull/8179) ([superserious-dev](https://github.com/superserious-dev))
+- \[Variant\]: Implement `DataType::Union` support for `cast_to_variant` kernel [\#8195](https://github.com/apache/arrow-rs/issues/8195)
+- \[Variant\]: Implement `DataType::Duration` support for `cast_to_variant` kernel [\#8194](https://github.com/apache/arrow-rs/issues/8194)
+- \[Variant\] Support typed access for numeric types in variant\_get [\#8178](https://github.com/apache/arrow-rs/issues/8178)
+- \[Parquet\] Implement a "push style" API for decoding Parquet Metadata [\#8164](https://github.com/apache/arrow-rs/issues/8164)
+- \[Variant\] Support creating Variants with pre-existing Metadata [\#8152](https://github.com/apache/arrow-rs/issues/8152)
+- \[Variant\] Support Shredded Objects in `variant_get`: typed path access \(STEP 1\) [\#8150](https://github.com/apache/arrow-rs/issues/8150)
+- \[Variant\] Add `variant` feature to `parquet` crate [\#8132](https://github.com/apache/arrow-rs/issues/8132)
+- \[Parquet\] Concurrent writes with ArrowWriter.get\_column\_writers should parallelize across row groups [\#8115](https://github.com/apache/arrow-rs/issues/8115)
+- \[Variant\] Implement `VariantArray::value` for shredded variants [\#8091](https://github.com/apache/arrow-rs/issues/8091)
+- \[Variant\] Integration tests for reading parquet w/ Variants [\#8084](https://github.com/apache/arrow-rs/issues/8084)
+- \[Variant\]: Implement `DataType::Map` support for `cast_to_variant` kernel [\#8063](https://github.com/apache/arrow-rs/issues/8063)
+- \[Variant\]: Implement `DataType::List/LargeList` support for `cast_to_variant` kernel [\#8060](https://github.com/apache/arrow-rs/issues/8060)
+
+## [56.1.0](https://github.com/apache/arrow-rs/tree/56.1.0) (2025-08-21)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/56.0.0...56.1.0)
+
+**Implemented enhancements:**
+
+- Implement cast and other operations on decimal32 and decimal64 \#7815 [\#8204](https://github.com/apache/arrow-rs/issues/8204) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speed up Parquet filter pushdown with predicate cache [\#8203](https://github.com/apache/arrow-rs/issues/8203) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Optionally read parquet page indexes [\#8070](https://github.com/apache/arrow-rs/issues/8070) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet reader: add method for sync reader read bloom filter [\#8023](https://github.com/apache/arrow-rs/issues/8023) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[parquet\] Support writing logically equivalent types  to `ArrowWriter` [\#8012](https://github.com/apache/arrow-rs/issues/8012) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Improve StringArray\(Utf8\) sort performance [\#7847](https://github.com/apache/arrow-rs/issues/7847) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- feat: arrow-ipc delta dictionary support [\#8001](https://github.com/apache/arrow-rs/pull/8001) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([JakeDern](https://github.com/JakeDern))
+
+**Fixed bugs:**
+
+- The Rustdocs are clean CI job is failing [\#8175](https://github.com/apache/arrow-rs/issues/8175)
+- \[avro\] Bug in resolving avro schema with named type [\#8045](https://github.com/apache/arrow-rs/issues/8045) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Doc test failure \(test arrow-avro/src/lib.rs - reader\) when verifying avro 56.0.0 RC1 release [\#8018](https://github.com/apache/arrow-rs/issues/8018) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- arrow-row: Document dictionary handling [\#8168](https://github.com/apache/arrow-rs/pull/8168) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Docs: Clarify that Array::value does not check for nulls [\#8065](https://github.com/apache/arrow-rs/pull/8065) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: Fix a  typo in README [\#8036](https://github.com/apache/arrow-rs/pull/8036) ([EricccTaiwan](https://github.com/EricccTaiwan))
+- Add more comments to the internal parquet reader [\#7932](https://github.com/apache/arrow-rs/pull/7932) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- perf\(arrow-ipc\): avoid counting nulls in `RecordBatchDecoder` [\#8127](https://github.com/apache/arrow-rs/pull/8127) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Use `Vec` directly in builders [\#7984](https://github.com/apache/arrow-rs/pull/7984) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- Improve StringArray\(Utf8\) sort performance \(~2-4x faster\) [\#7860](https://github.com/apache/arrow-rs/pull/7860) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+
+**Closed issues:**
+
+- \[Variant\] Improve fuzz test for Variant [\#8199](https://github.com/apache/arrow-rs/issues/8199)
+- \[Variant\] Improve fuzz test for Variant [\#8198](https://github.com/apache/arrow-rs/issues/8198)
+- `VariantArrayBuilder` tracks starting offsets instead of \(offset, len\) pairs [\#8192](https://github.com/apache/arrow-rs/issues/8192)
+- Rework `ValueBuilder` API to work with `ParentState` for reliable nested rollbacks [\#8188](https://github.com/apache/arrow-rs/issues/8188)
+- \[Variant\] Rename `ValueBuffer` as `ValueBuilder` [\#8186](https://github.com/apache/arrow-rs/issues/8186)
+- \[Variant\] Refactor `ParentState` to track and rollback state on behalf of its owning builder [\#8182](https://github.com/apache/arrow-rs/issues/8182)
+- \[Variant\] `ObjectBuilder` should detect duplicates at insertion time, not at finish [\#8180](https://github.com/apache/arrow-rs/issues/8180)
+- \[Variant\] ObjectBuilder does not reliably check for duplicates [\#8170](https://github.com/apache/arrow-rs/issues/8170)
+- [Variant] Support `StringView` and `LargeString` in ´batch_json_string_to_variant` [\#8145](https://github.com/apache/arrow-rs/issues/8145) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Rename `batch_json_string_to_variant` and `batch_variant_to_json_string` json\_to\_variant [\#8144](https://github.com/apache/arrow-rs/issues/8144) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[avro\] Use `tempfile` crate rather than custom temporary file generator in tests [\#8143](https://github.com/apache/arrow-rs/issues/8143) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Avro\] Use `Write` rather   `dyn Write` in Decoder [\#8142](https://github.com/apache/arrow-rs/issues/8142) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Nested builder rollback is broken [\#8136](https://github.com/apache/arrow-rs/issues/8136)
+- \[Variant\] Add support the remaing primitive type\(timestamp\_nanos/timestampntz\_nanos/uuid\) for parquet variant [\#8126](https://github.com/apache/arrow-rs/issues/8126)
+- Meta: Implement missing Arrow 56.0 lint rules - Sequential workflow [\#8121](https://github.com/apache/arrow-rs/issues/8121)
+- ARROW-012-015: Add linter rules for remaining Arrow 56.0 breaking changes [\#8120](https://github.com/apache/arrow-rs/issues/8120)
+- ARROW-010 & ARROW-011: Add linter rules for Parquet Statistics and Metadata API removals [\#8119](https://github.com/apache/arrow-rs/issues/8119)
+- ARROW-009: Add linter rules for IPC Dictionary API removals in Arrow 56.0 [\#8118](https://github.com/apache/arrow-rs/issues/8118)
+- ARROW-008: Add linter rule for SerializedPageReaderState usize→u64 breaking change [\#8117](https://github.com/apache/arrow-rs/issues/8117)
+- ARROW-007: Add linter rule for Schema.all\_fields\(\) removal in Arrow 56.0 [\#8116](https://github.com/apache/arrow-rs/issues/8116)
+- \[Variant\] Implement `ShreddingState::AllNull` variant  [\#8088](https://github.com/apache/arrow-rs/issues/8088) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Support Shredded Objects in `variant_get` [\#8083](https://github.com/apache/arrow-rs/issues/8083) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::RunEndEncoded` support for `cast_to_variant` kernel [\#8064](https://github.com/apache/arrow-rs/issues/8064) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Dictionary` support for `cast_to_variant` kernel [\#8062](https://github.com/apache/arrow-rs/issues/8062) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Struct` support for `cast_to_variant` kernel [\#8061](https://github.com/apache/arrow-rs/issues/8061) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Decimal32/Decimal64/Decimal128/Decimal256` support for `cast_to_variant` kernel [\#8059](https://github.com/apache/arrow-rs/issues/8059) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Timestamp(..)` support for `cast_to_variant` kernel [\#8058](https://github.com/apache/arrow-rs/issues/8058) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Float16` support for `cast_to_variant` kernel [\#8057](https://github.com/apache/arrow-rs/issues/8057) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Interval` support for `cast_to_variant` kernel [\#8056](https://github.com/apache/arrow-rs/issues/8056) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Time32/Time64` support for `cast_to_variant` kernel [\#8055](https://github.com/apache/arrow-rs/issues/8055) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Date32 / DataType::Date64` support for `cast_to_variant` kernel [\#8054](https://github.com/apache/arrow-rs/issues/8054) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Null` support for `cast_to_variant` kernel [\#8053](https://github.com/apache/arrow-rs/issues/8053) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Boolean` support for `cast_to_variant` kernel [\#8052](https://github.com/apache/arrow-rs/issues/8052) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::FixedSizeBinary` support for `cast_to_variant` kernel [\#8051](https://github.com/apache/arrow-rs/issues/8051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Binary/LargeBinary/BinaryView` support for `cast_to_variant` kernel [\#8050](https://github.com/apache/arrow-rs/issues/8050) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]: Implement `DataType::Utf8/LargeUtf8/Utf8View` support for `cast_to_variant` kernel [\#8049](https://github.com/apache/arrow-rs/issues/8049) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Implement `cast_to_variant` kernel [\#8043](https://github.com/apache/arrow-rs/issues/8043) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Support `variant_get` kernel for shredded variants [\#7941](https://github.com/apache/arrow-rs/issues/7941) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add test for casting `Decimal128` \(`i128::MIN` and `i128::MAX`\) to `f64` with overflow handling [\#7939](https://github.com/apache/arrow-rs/issues/7939) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- \[Variant\] Enhance the variant fuz test to cover time/timestamp/uuid primitive type [\#8200](https://github.com/apache/arrow-rs/pull/8200) ([klion26](https://github.com/klion26))
+- \[Variant\] VariantArrayBuilder tracks only offsets [\#8193](https://github.com/apache/arrow-rs/pull/8193) ([scovich](https://github.com/scovich))
+- \[Variant\] Caller provides ParentState to ValueBuilder methods [\#8189](https://github.com/apache/arrow-rs/pull/8189) ([scovich](https://github.com/scovich))
+- \[Variant\] Rename ValueBuffer as ValueBuilder [\#8187](https://github.com/apache/arrow-rs/pull/8187) ([scovich](https://github.com/scovich))
+- \[Variant\] ParentState handles finish/rollback for builders [\#8185](https://github.com/apache/arrow-rs/pull/8185) ([scovich](https://github.com/scovich))
+- \[Variant\]: Implement `DataType::RunEndEncoded` support for `cast_to_variant` kernel [\#8174](https://github.com/apache/arrow-rs/pull/8174) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\]: Implement `DataType::Dictionary` support for `cast_to_variant` kernel [\#8173](https://github.com/apache/arrow-rs/pull/8173) ([liamzwbao](https://github.com/liamzwbao))
+- Implement `ArrayBuilder` for `UnionBuilder` [\#8169](https://github.com/apache/arrow-rs/pull/8169) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([grtlr](https://github.com/grtlr))
+- \[Variant\] Support `LargeString` and `StringView` in `batch_json_string_to_variant` [\#8163](https://github.com/apache/arrow-rs/pull/8163) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] Rename `batch_json_string_to_variant` and `batch_variant_to_json_string` [\#8161](https://github.com/apache/arrow-rs/pull/8161) ([liamzwbao](https://github.com/liamzwbao))
+- \[Variant\] Add primitive type timestamp\_nanos\(with&without timezone\) and uuid [\#8149](https://github.com/apache/arrow-rs/pull/8149) ([klion26](https://github.com/klion26))
+- refactor\(avro\): Use impl Write instead of dyn Write in encoder [\#8148](https://github.com/apache/arrow-rs/pull/8148) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Xuanwo](https://github.com/Xuanwo))
+- chore: Use tempfile to replace hand-written utils functions [\#8147](https://github.com/apache/arrow-rs/pull/8147) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Xuanwo](https://github.com/Xuanwo))
+- feat: support push batch direct to completed and add biggest coalesce batch support [\#8146](https://github.com/apache/arrow-rs/pull/8146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- \[Variant\] Add human-readable impl Debug for Variant [\#8140](https://github.com/apache/arrow-rs/pull/8140) ([scovich](https://github.com/scovich))
+- \[Variant\] Fix broken metadata builder rollback [\#8135](https://github.com/apache/arrow-rs/pull/8135) ([scovich](https://github.com/scovich))
+- \[Variant\]: Implement DataType::Interval support for cast\_to\_variant kernel [\#8125](https://github.com/apache/arrow-rs/pull/8125) ([codephage2020](https://github.com/codephage2020))
+- Add schema resolution and type promotion support to arrow-avro Decoder [\#8124](https://github.com/apache/arrow-rs/pull/8124) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add Initial `arrow-avro` writer implementation with basic type support [\#8123](https://github.com/apache/arrow-rs/pull/8123) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] Add Variant::Time primitive and cast logic [\#8114](https://github.com/apache/arrow-rs/pull/8114) ([klion26](https://github.com/klion26))
+- \[Variant\] Support Timestamp to variant for `cast_to_variant` kernel [\#8113](https://github.com/apache/arrow-rs/pull/8113) ([abacef](https://github.com/abacef))
+- Bump actions/checkout from 4 to 5 [\#8110](https://github.com/apache/arrow-rs/pull/8110) ([dependabot[bot]](https://github.com/apps/dependabot))
+- \[Varaint\]: add `DataType::Null` support to cast\_to\_variant [\#8107](https://github.com/apache/arrow-rs/pull/8107) ([feniljain](https://github.com/feniljain))
+- \[Variant\] Adding fixed size byte array to variant and test [\#8106](https://github.com/apache/arrow-rs/pull/8106) ([abacef](https://github.com/abacef))
+- \[VARIANT\] Initial integration tests for variant reads [\#8104](https://github.com/apache/arrow-rs/pull/8104) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([carpecodeum](https://github.com/carpecodeum))
+- \[Variant\]: Implement `DataType::Decimal32/Decimal64/Decimal128/Decimal256` support for `cast_to_variant` kernel [\#8101](https://github.com/apache/arrow-rs/pull/8101) ([liamzwbao](https://github.com/liamzwbao))
+- Refactor arrow-avro `Decoder` to support partial decoding [\#8100](https://github.com/apache/arrow-rs/pull/8100) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- fix: Validate metadata len in IPC reader  [\#8097](https://github.com/apache/arrow-rs/pull/8097) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([JakeDern](https://github.com/JakeDern))
+- \[parquet\] further improve logical type compatibility in ArrowWriter [\#8095](https://github.com/apache/arrow-rs/pull/8095) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
+- \[Varint\] Implement ShreddingState::AllNull variant [\#8093](https://github.com/apache/arrow-rs/pull/8093) ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] Minor: Add comments to tickets for follow on items [\#8092](https://github.com/apache/arrow-rs/pull/8092) ([alamb](https://github.com/alamb))
+- \[VARIANT\] Add support for DataType::Struct for cast\_to\_variant [\#8090](https://github.com/apache/arrow-rs/pull/8090) ([carpecodeum](https://github.com/carpecodeum))
+- \[VARIANT\] Add support for DataType::Utf8/LargeUtf8/Utf8View for cast\_to\_variant [\#8089](https://github.com/apache/arrow-rs/pull/8089) ([carpecodeum](https://github.com/carpecodeum))
+- \[Variant\] Implement `DataType::Boolean` support for `cast_to_variant` kernel [\#8085](https://github.com/apache/arrow-rs/pull/8085) ([sdf-jkl](https://github.com/sdf-jkl))
+- \[Variant\] Implement `DataType::{Date32,Date64}` =\> `Variant::Date` [\#8081](https://github.com/apache/arrow-rs/pull/8081) ([superserious-dev](https://github.com/superserious-dev))
+- Fix new clippy lints from Rust 1.89 [\#8078](https://github.com/apache/arrow-rs/pull/8078) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Implement ArrowSchema to AvroSchema conversion logic in arrow-avro [\#8075](https://github.com/apache/arrow-rs/pull/8075) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Implement `DataType::{Binary, LargeBinary, BinaryView}` =\> `Variant::Binary` [\#8074](https://github.com/apache/arrow-rs/pull/8074) ([superserious-dev](https://github.com/superserious-dev))
+- \[Variant\] Implement `DataType::Float16` =\> `Variant::Float` [\#8073](https://github.com/apache/arrow-rs/pull/8073) ([superserious-dev](https://github.com/superserious-dev))
+- create PageIndexPolicy to allow optional indexes [\#8071](https://github.com/apache/arrow-rs/pull/8071) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kczimm](https://github.com/kczimm))
+- \[Variant\] Minor: use From impl to make conversion infallable [\#8068](https://github.com/apache/arrow-rs/pull/8068) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Bump actions/download-artifact from 4 to 5 [\#8066](https://github.com/apache/arrow-rs/pull/8066) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Added arrow-avro schema resolution foundations and type promotion [\#8047](https://github.com/apache/arrow-rs/pull/8047) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Fix arrow-avro type resolver register bug [\#8046](https://github.com/apache/arrow-rs/pull/8046) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yongkyunlee](https://github.com/yongkyunlee))
+- implement `cast_to_variant` kernel to cast native types to `VariantArray` [\#8044](https://github.com/apache/arrow-rs/pull/8044) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add arrow-avro `SchemaStore` and fingerprinting [\#8039](https://github.com/apache/arrow-rs/pull/8039) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add more benchmarks for Parquet thrift decoding [\#8037](https://github.com/apache/arrow-rs/pull/8037) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Support multi-threaded writing of Parquet files with modular encryption [\#8029](https://github.com/apache/arrow-rs/pull/8029) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rok](https://github.com/rok))
+- Add arrow-avro Decoder Benchmarks  [\#8025](https://github.com/apache/arrow-rs/pull/8025) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- feat: add method for sync Parquet reader read bloom filter [\#8024](https://github.com/apache/arrow-rs/pull/8024) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([mapleFU](https://github.com/mapleFU))
+- \[Variant\] Add `variant_get` and Shredded `VariantArray` [\#8021](https://github.com/apache/arrow-rs/pull/8021) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Implement arrow-avro SchemaStore and Fingerprinting To Enable Schema Resolution [\#8006](https://github.com/apache/arrow-rs/pull/8006) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Parquet\] Add tests for IO/CPU access in parquet reader [\#7971](https://github.com/apache/arrow-rs/pull/7971) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Speed up Parquet filter pushdown v4 \(Predicate evaluation cache for async\_reader\) [\#7850](https://github.com/apache/arrow-rs/pull/7850) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Implement cast and other operations on decimal32 and decimal64 [\#7815](https://github.com/apache/arrow-rs/pull/7815) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([CurtHagenlocher](https://github.com/CurtHagenlocher))
+## [56.0.0](https://github.com/apache/arrow-rs/tree/56.0.0) (2025-07-29)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/55.2.0...56.0.0)
+
+**Breaking changes:**
+
+- arrow-schema: Remove dict\_id from being required equal for merging [\#7968](https://github.com/apache/arrow-rs/pull/7968) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- \[Parquet\] Use `u64` for `SerializedPageReaderState.offset` & `remaining_bytes`, instead of `usize` [\#7918](https://github.com/apache/arrow-rs/pull/7918) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- Upgrade tonic dependencies to 0.13.0 version \(try 2\) [\#7839](https://github.com/apache/arrow-rs/pull/7839) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Remove deprecated Arrow functions [\#7830](https://github.com/apache/arrow-rs/pull/7830) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([etseidl](https://github.com/etseidl))
+- Remove deprecated temporal functions [\#7813](https://github.com/apache/arrow-rs/pull/7813) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([etseidl](https://github.com/etseidl))
+- Remove functions from parquet crate deprecated in or before 54.0.0 [\#7811](https://github.com/apache/arrow-rs/pull/7811) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- GH-7686: \[Parquet\] Fix int96 min/max stats [\#7687](https://github.com/apache/arrow-rs/pull/7687) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rahulketch](https://github.com/rahulketch))
+
+**Implemented enhancements:**
+
+- \[parquet\] Relax type restriction to allow writing dictionary/native batches for same column [\#8004](https://github.com/apache/arrow-rs/issues/8004)
+- Support casting int64 to interval [\#7988](https://github.com/apache/arrow-rs/issues/7988) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add `ListBuilder::with_value` for convenience [\#7951](https://github.com/apache/arrow-rs/issues/7951) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Add  `ObjectBuilder::with_field` for convenience [\#7949](https://github.com/apache/arrow-rs/issues/7949) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Impl PartialEq for VariantObject \#7943 [\#7948](https://github.com/apache/arrow-rs/issues/7948)
+- \[Variant\] Offer `simdutf8` as an optional dependency when validating metadata [\#7902](https://github.com/apache/arrow-rs/issues/7902) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Avoid collecting offset iterator [\#7901](https://github.com/apache/arrow-rs/issues/7901) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Remove superfluous check when validating monotonic offsets [\#7900](https://github.com/apache/arrow-rs/issues/7900) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Avoid extra allocation in `ObjectBuilder` [\#7899](https://github.com/apache/arrow-rs/issues/7899) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]\[Compute\] `variant_get` kernel [\#7893](https://github.com/apache/arrow-rs/issues/7893) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\]\[Compute\] Add batch processing for Variant-JSON String conversion [\#7883](https://github.com/apache/arrow-rs/issues/7883) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support `MapArray` in lexsort [\#7881](https://github.com/apache/arrow-rs/issues/7881) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add testing for invalid variants \(fuzz testing??\) [\#7842](https://github.com/apache/arrow-rs/issues/7842) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] VariantMetadata, VariantList and VariantObject are too big for Copy [\#7831](https://github.com/apache/arrow-rs/issues/7831) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Allow choosing flate2 backend [\#7826](https://github.com/apache/arrow-rs/issues/7826) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Tests for creating "large" `VariantObjects`s [\#7821](https://github.com/apache/arrow-rs/issues/7821) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Tests for creating "large" `VariantList`s [\#7820](https://github.com/apache/arrow-rs/issues/7820) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Support VariantBuilder to write to buffers owned by the caller [\#7805](https://github.com/apache/arrow-rs/issues/7805) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Move JSON related functionality to different crate. [\#7800](https://github.com/apache/arrow-rs/issues/7800) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Add flag in `ObjectBuilder` to control validation behavior on duplicate field write [\#7777](https://github.com/apache/arrow-rs/issues/7777) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] make `serde_json` an optional dependency of `parquet-variant` [\#7775](https://github.com/apache/arrow-rs/issues/7775) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[coalesce\] Implement specialized `BatchCoalescer::push_batch` for `PrimitiveArray` [\#7763](https://github.com/apache/arrow-rs/issues/7763) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add sort\_kernel benchmark for StringViewArray case [\#7758](https://github.com/apache/arrow-rs/issues/7758) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Improved API for accessing Variant Objects and lists [\#7756](https://github.com/apache/arrow-rs/issues/7756) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Buildable reproducible release builds [\#7751](https://github.com/apache/arrow-rs/issues/7751)
+- Allow per-column parquet dictionary page size limit [\#7723](https://github.com/apache/arrow-rs/issues/7723) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Test and implement efficient building for "large" Arrays [\#7699](https://github.com/apache/arrow-rs/issues/7699) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Improve VariantBuilder when creating field name dictionaries / sorted dictionaries [\#7698](https://github.com/apache/arrow-rs/issues/7698) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Add input validation in `VariantBuilder` [\#7697](https://github.com/apache/arrow-rs/issues/7697) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Support Nested Data in `VariantBuilder` [\#7696](https://github.com/apache/arrow-rs/issues/7696) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet: Incorrect min/max stats for int96 columns [\#7686](https://github.com/apache/arrow-rs/issues/7686) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add `DictionaryArray::gc` method [\#7683](https://github.com/apache/arrow-rs/issues/7683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add negative tests for reading invalid primitive variant values [\#7645](https://github.com/apache/arrow-rs/issues/7645) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Fixed bugs:**
+
+- \[Variant\] Panic when appending nested objects to VariantBuilder [\#7907](https://github.com/apache/arrow-rs/issues/7907) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Panic when casting large Decimal256 to f64 due to unchecked `unwrap()`  [\#7886](https://github.com/apache/arrow-rs/issues/7886) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Incorrect inlined string view comparison after " Add prefix compare for inlined" [\#7874](https://github.com/apache/arrow-rs/issues/7874) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] `test_json_to_variant_object_very_large` takes over 20s [\#7872](https://github.com/apache/arrow-rs/issues/7872) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] If `ObjectBuilder::finalize` is not called, the resulting Variant object is malformed. [\#7863](https://github.com/apache/arrow-rs/issues/7863) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- CSV error message has values transposed [\#7848](https://github.com/apache/arrow-rs/issues/7848) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Concating struct arrays with no fields unnecessarily errors [\#7828](https://github.com/apache/arrow-rs/issues/7828) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Clippy CI is failing on main after Rust `1.88` upgrade [\#7796](https://github.com/apache/arrow-rs/issues/7796) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- \[Variant\] Field lookup with out of bounds index causes unwanted behavior [\#7784](https://github.com/apache/arrow-rs/issues/7784) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Error verifying `parquet-variant` crate on 55.2.0 with `verify-release-candidate.sh` [\#7746](https://github.com/apache/arrow-rs/issues/7746)
+- `test_to_pyarrow` tests fail during release verification [\#7736](https://github.com/apache/arrow-rs/issues/7736) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[parquet\_derive\] Example for ParquetRecordWriter is broken. [\#7732](https://github.com/apache/arrow-rs/issues/7732)
+- \[Variant\] `Variant::Object` can contain two fields with the same field name [\#7730](https://github.com/apache/arrow-rs/issues/7730) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Panic when appending Object or List to VariantBuilder [\#7701](https://github.com/apache/arrow-rs/issues/7701) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Slicing a single-field dense union array creates an array with incorrect `logical_nulls` length  [\#7647](https://github.com/apache/arrow-rs/issues/7647) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Ensure page encoding statistics are written to Parquet file [\#7643](https://github.com/apache/arrow-rs/pull/7643) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Documentation updates:**
+
+- Minor: Upate `cast_with_options` docs about casting integers --\> intervals [\#8002](https://github.com/apache/arrow-rs/pull/8002) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: More docs to `BatchCoalescer` [\#7891](https://github.com/apache/arrow-rs/pull/7891) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([2010YOUY01](https://github.com/2010YOUY01))
+- chore: fix a typo in `ExtensionType::supports_data_type` docs [\#7682](https://github.com/apache/arrow-rs/pull/7682) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Add variant docs and examples [\#7661](https://github.com/apache/arrow-rs/pull/7661) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Minor: Add version to deprecation notice for `ParquetMetaDataReader::decode_footer` [\#7639](https://github.com/apache/arrow-rs/pull/7639) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Performance improvements:**
+
+- `RowConverter` on list should only encode the sliced list values and not the entire data [\#7993](https://github.com/apache/arrow-rs/issues/7993) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Avoid extra allocation in list builder [\#7977](https://github.com/apache/arrow-rs/issues/7977) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Convert JSON to Variant with fewer copies [\#7964](https://github.com/apache/arrow-rs/issues/7964) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Optimize sort kernels partition\_validity method [\#7936](https://github.com/apache/arrow-rs/issues/7936) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speedup sorting for inline views [\#7857](https://github.com/apache/arrow-rs/issues/7857) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Perf: Investigate and improve parquet writing performance [\#7822](https://github.com/apache/arrow-rs/issues/7822) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Perf: optimize sort string\_view performance [\#7790](https://github.com/apache/arrow-rs/issues/7790) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Clickbench microbenchmark spends significant time in memcmp for not\_empty predicate [\#7766](https://github.com/apache/arrow-rs/issues/7766) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Use prefix first for comparisons, resort to data buffer for remaining data on equal values [\#7744](https://github.com/apache/arrow-rs/issues/7744) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Change use of `inline_value`  to inline it to a u128 [\#7743](https://github.com/apache/arrow-rs/issues/7743) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add efficient way to upgrade keys for additional dictionary builders [\#7654](https://github.com/apache/arrow-rs/issues/7654) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Perf: Make sort string view fast\(1.5X ~ 3X faster\) [\#7792](https://github.com/apache/arrow-rs/pull/7792) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Add specialized coalesce path for PrimitiveArrays [\#7772](https://github.com/apache/arrow-rs/pull/7772) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- Implement full-range `i256::to_f64` to replace current ±∞ saturation for Decimal256 → Float64 [\#7985](https://github.com/apache/arrow-rs/issues/7985)
+- \[Variant\] `impl FromIterator` fpr `VariantPath` [\#7955](https://github.com/apache/arrow-rs/issues/7955)
+-  `validated` and `is_fully_validated` flags  doesn't need to be part of PartialEq [\#7952](https://github.com/apache/arrow-rs/issues/7952) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] remove VariantMetadata::dictionary\_size [\#7947](https://github.com/apache/arrow-rs/issues/7947) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Improve `VariantArray` performance by storing the index of the metadata and value arrays [\#7920](https://github.com/apache/arrow-rs/issues/7920)
+- \[Variant\] Converting variant to JSON string seems slow [\#7869](https://github.com/apache/arrow-rs/issues/7869) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Present Variant at Iceberg Summit NYC July 10, 2025 [\#7858](https://github.com/apache/arrow-rs/issues/7858)
+- \[Variant\] Avoid second copy of field name in MetadataBuilder [\#7814](https://github.com/apache/arrow-rs/issues/7814) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Remove APIs deprecated in or before 54.0.0 [\#7810](https://github.com/apache/arrow-rs/issues/7810) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- \[Variant\] Make it harder to forget to finish a pending parent i n ObjectBuilder [\#7798](https://github.com/apache/arrow-rs/issues/7798) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Remove explicit ObjectBuilder::finish\(\) and ListBuilder::finish and move to `Drop` impl [\#7780](https://github.com/apache/arrow-rs/issues/7780) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Reduce repetition in tests for arrow-row/src/run.rs [\#7692](https://github.com/apache/arrow-rs/issues/7692) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Add tests for invalid variant values \(aka verify invalid inputs\) [\#7681](https://github.com/apache/arrow-rs/issues/7681) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Introduce structs for Variant::Decimal types  [\#7660](https://github.com/apache/arrow-rs/issues/7660) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- Add benchmark for converting StringViewArray with mixed short and long strings [\#8015](https://github.com/apache/arrow-rs/pull/8015) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ding-young](https://github.com/ding-young))
+- \[Variant\] impl FromIterator for VariantPath [\#8011](https://github.com/apache/arrow-rs/pull/8011) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- Create empty buffer for a buffer specified in the C Data Interface with length zero [\#8009](https://github.com/apache/arrow-rs/pull/8009) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- bench: add benchmark for converting list and sliced list to row format [\#8008](https://github.com/apache/arrow-rs/pull/8008) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- bench: benchmark interleave structs [\#8007](https://github.com/apache/arrow-rs/pull/8007) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Parquet\] Allow writing compatible DictionaryArrays to parquet writer [\#8005](https://github.com/apache/arrow-rs/pull/8005) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
+- doc: remove outdated info from CONTRIBUTING doc in project root dir. [\#7998](https://github.com/apache/arrow-rs/pull/7998) ([sonhmai](https://github.com/sonhmai))
+- perf: only encode actual list values in `RowConverter` \(16-26 times faster for small sliced list\) [\#7996](https://github.com/apache/arrow-rs/pull/7996) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- test: add tests for converting sliced list to row based [\#7994](https://github.com/apache/arrow-rs/pull/7994) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: Improve `interleave` performance for struct \(3-6 times faster\) [\#7991](https://github.com/apache/arrow-rs/pull/7991) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Variant\] Avoid extra buffer allocation in ListBuilder [\#7987](https://github.com/apache/arrow-rs/pull/7987) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([klion26](https://github.com/klion26))
+- Implement full-range `i256::to_f64` to eliminate ±∞ saturation for Decimal256 → Float64 casts [\#7986](https://github.com/apache/arrow-rs/pull/7986) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kosiew](https://github.com/kosiew))
+- Minor: Restore warning comment on Int96 statistics read [\#7975](https://github.com/apache/arrow-rs/pull/7975) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add additional integration tests to arrow-avro [\#7974](https://github.com/apache/arrow-rs/pull/7974) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nathaniel-d-ef](https://github.com/nathaniel-d-ef))
+- Perf: optimize actual\_buffer\_size to use only data buffer capacity for coalesce [\#7967](https://github.com/apache/arrow-rs/pull/7967) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Implement Improved arrow-avro Reader Zero-Byte Record Handling [\#7966](https://github.com/apache/arrow-rs/pull/7966) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Perf: improve sort via `partition_validity` to use fast path for bit map scan \(up to 30% faster\) [\#7962](https://github.com/apache/arrow-rs/pull/7962) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- \[Variant\] Revisit VariantMetadata and Object equality [\#7961](https://github.com/apache/arrow-rs/pull/7961) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Add ListBuilder::with\_value for convenience [\#7959](https://github.com/apache/arrow-rs/pull/7959) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] remove VariantMetadata::dictionary\_size [\#7958](https://github.com/apache/arrow-rs/pull/7958) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] VariantMetadata is allowed to contain the empty string [\#7956](https://github.com/apache/arrow-rs/pull/7956) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add arrow-avro support for Impala Nullability [\#7954](https://github.com/apache/arrow-rs/pull/7954) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([veronica-m-ef](https://github.com/veronica-m-ef))
+- \[Test\] Add tests for VariantList equality [\#7953](https://github.com/apache/arrow-rs/pull/7953) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Add ObjectBuilder::with\_field for convenience [\#7950](https://github.com/apache/arrow-rs/pull/7950) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Adding code to store metadata and value references in VariantArray [\#7945](https://github.com/apache/arrow-rs/pull/7945) ([abacef](https://github.com/abacef))
+- \[Variant\] Add `variant_kernels` benchmark [\#7944](https://github.com/apache/arrow-rs/pull/7944) ([alamb](https://github.com/alamb))
+- \[Variant\] Impl `PartialEq` for VariantObject [\#7943](https://github.com/apache/arrow-rs/pull/7943) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Add documentation, tests and cleaner api for Variant::get\_path [\#7942](https://github.com/apache/arrow-rs/pull/7942) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- arrow-ipc: Remove all abilities to preserve dict IDs [\#7940](https://github.com/apache/arrow-rs/pull/7940) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([brancz](https://github.com/brancz))
+- Optimize partition\_validity function used in sort kernels [\#7937](https://github.com/apache/arrow-rs/pull/7937) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- \[Variant\] Avoid extra allocation in object builder [\#7935](https://github.com/apache/arrow-rs/pull/7935) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([klion26](https://github.com/klion26))
+- \[Variant\] Avoid collecting offset iterator [\#7934](https://github.com/apache/arrow-rs/pull/7934) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020))
+- Minor: Support BinaryView and StringView builders in `make_builder` [\#7931](https://github.com/apache/arrow-rs/pull/7931) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
+- chore: bump MSRV to 1.84 [\#7926](https://github.com/apache/arrow-rs/pull/7926) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([mbrobbel](https://github.com/mbrobbel))
+- Update bzip2 requirement from 0.4.4 to 0.6.0 [\#7924](https://github.com/apache/arrow-rs/pull/7924) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Reserve capacity beforehand during large object building [\#7922](https://github.com/apache/arrow-rs/pull/7922) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Add `variant_get` compute kernel [\#7919](https://github.com/apache/arrow-rs/pull/7919) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Samyak2](https://github.com/Samyak2))
+- Improve memory usage for `arrow-row -> String/BinaryView` when utf8 validation disabled [\#7917](https://github.com/apache/arrow-rs/pull/7917) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ding-young](https://github.com/ding-young))
+- Restructure compare\_greater function used in parquet statistics for better performance [\#7916](https://github.com/apache/arrow-rs/pull/7916) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- \[Variant\] Support appending complex variants in `VariantBuilder` [\#7914](https://github.com/apache/arrow-rs/pull/7914) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Add `VariantBuilder::new_with_buffers` to write to existing buffers [\#7912](https://github.com/apache/arrow-rs/pull/7912) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Convert JSON to VariantArray without copying \(8 - 32% faster\) [\#7911](https://github.com/apache/arrow-rs/pull/7911) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Use simdutf8 for UTF-8 validation [\#7908](https://github.com/apache/arrow-rs/pull/7908) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- \[Variant\] Avoid superflous validation checks [\#7906](https://github.com/apache/arrow-rs/pull/7906) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add `VariantArray` and `VariantArrayBuilder` for constructing Arrow Arrays of Variants [\#7905](https://github.com/apache/arrow-rs/pull/7905) ([alamb](https://github.com/alamb))
+- Update sysinfo requirement from 0.35.0 to 0.36.0 [\#7904](https://github.com/apache/arrow-rs/pull/7904) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix current CI failure [\#7898](https://github.com/apache/arrow-rs/pull/7898) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Remove redundant is\_err checks in Variant tests [\#7897](https://github.com/apache/arrow-rs/pull/7897) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- \[Variant\] test: add variant object tests with different sizes [\#7896](https://github.com/apache/arrow-rs/pull/7896) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([odysa](https://github.com/odysa))
+- \[Variant\] Define basic convenience methods for variant pathing [\#7894](https://github.com/apache/arrow-rs/pull/7894) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- fix: `view_types` benchmark slice should follow by correct len array [\#7892](https://github.com/apache/arrow-rs/pull/7892) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Add arrow-avro support for bzip2 and xz compression [\#7890](https://github.com/apache/arrow-rs/pull/7890) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Add arrow-avro support for Duration type and minor fixes for UUID decoding [\#7889](https://github.com/apache/arrow-rs/pull/7889) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] Reduce variant-related struct sizes [\#7888](https://github.com/apache/arrow-rs/pull/7888) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Fix panic on lossy decimal to float casting: round to saturation for overflows  [\#7887](https://github.com/apache/arrow-rs/pull/7887) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kosiew](https://github.com/kosiew))
+- Add tests for invalid variant metadata and value [\#7885](https://github.com/apache/arrow-rs/pull/7885) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- \[Variant\] Introduce parquet-variant-compute crate to transform batches of JSON strings to and from Variants [\#7884](https://github.com/apache/arrow-rs/pull/7884) ([harshmotw-db](https://github.com/harshmotw-db))
+- feat: support `MapArray` in lexsort [\#7882](https://github.com/apache/arrow-rs/pull/7882) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- fix: mark `DataType::Map` as unsupported in `RowConverter` [\#7880](https://github.com/apache/arrow-rs/pull/7880) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- \[Variant\] Speedup validation [\#7878](https://github.com/apache/arrow-rs/pull/7878) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- benchmark: Add StringViewArray gc benchmark with not null cases [\#7877](https://github.com/apache/arrow-rs/pull/7877) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- \[ARROW-RS-7820\]\[Variant\] Add tests for large variant lists [\#7876](https://github.com/apache/arrow-rs/pull/7876) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([klion26](https://github.com/klion26))
+- fix: Incorrect inlined string view comparison after Add prefix compar… [\#7875](https://github.com/apache/arrow-rs/pull/7875) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- perf: speed up StringViewArray gc 1.4 ~5.x faster [\#7873](https://github.com/apache/arrow-rs/pull/7873) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- \[Variant\] Remove superflous validate call and rename methods [\#7871](https://github.com/apache/arrow-rs/pull/7871) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Benchmark: Add rich testing cases for sort string\(utf8\) [\#7867](https://github.com/apache/arrow-rs/pull/7867) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- chore: update link for `row_filter.rs` [\#7866](https://github.com/apache/arrow-rs/pull/7866) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([haohuaijin](https://github.com/haohuaijin))
+- \[Variant\] List and object builders have no effect until finalized [\#7865](https://github.com/apache/arrow-rs/pull/7865) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Added number to string benches for json\_writer [\#7864](https://github.com/apache/arrow-rs/pull/7864) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([abacef](https://github.com/abacef))
+- \[Variant\] Introduce `parquet-variant-json` crate [\#7862](https://github.com/apache/arrow-rs/pull/7862) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Remove dead code, add comments [\#7861](https://github.com/apache/arrow-rs/pull/7861) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Speedup sorting for inline views: 1.4x - 1.7x improvement [\#7856](https://github.com/apache/arrow-rs/pull/7856) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Fix union slice logical\_nulls length [\#7855](https://github.com/apache/arrow-rs/pull/7855) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- Add `get_ref/get_mut` to JSON Writer [\#7854](https://github.com/apache/arrow-rs/pull/7854) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cetra3](https://github.com/cetra3))
+- \[Minor\] Add Benchmark for RowConverter::append [\#7853](https://github.com/apache/arrow-rs/pull/7853) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Add Enum type support to arrow-avro and Minor Decimal type fix [\#7852](https://github.com/apache/arrow-rs/pull/7852) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- CSV error message has values transposed [\#7851](https://github.com/apache/arrow-rs/pull/7851) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Omega359](https://github.com/Omega359))
+- \[Variant\]   Fuzz testing and benchmarks for vaildation [\#7849](https://github.com/apache/arrow-rs/pull/7849) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([carpecodeum](https://github.com/carpecodeum))
+- \[Variant\] Follow up nits and uncomment test cases [\#7846](https://github.com/apache/arrow-rs/pull/7846) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Make sure ObjectBuilder and ListBuilder to be finalized before its parent builder [\#7843](https://github.com/apache/arrow-rs/pull/7843) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- Add decimal32 and decimal64 support to Parquet, JSON and CSV readers and writers [\#7841](https://github.com/apache/arrow-rs/pull/7841) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([CurtHagenlocher](https://github.com/CurtHagenlocher))
+- Implement arrow-avro Reader and ReaderBuilder [\#7834](https://github.com/apache/arrow-rs/pull/7834) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- \[Variant\] Support creating sorted dictionaries [\#7833](https://github.com/apache/arrow-rs/pull/7833) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add Decimal type support to arrow-avro  [\#7832](https://github.com/apache/arrow-rs/pull/7832) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Allow concating struct arrays with no fields [\#7829](https://github.com/apache/arrow-rs/pull/7829) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- Add features to configure flate2 [\#7827](https://github.com/apache/arrow-rs/pull/7827) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- make builder public under experimental [\#7825](https://github.com/apache/arrow-rs/pull/7825) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Improvements for parquet writing performance \(25%-44%\) [\#7824](https://github.com/apache/arrow-rs/pull/7824) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Use in-memory buffer for arrow\_writer benchmark [\#7823](https://github.com/apache/arrow-rs/pull/7823) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- \[Variant\] impl \[Try\]From for VariantDecimalXX types [\#7809](https://github.com/apache/arrow-rs/pull/7809) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Variant\] Speedup `ObjectBuilder` \(62x faster\) [\#7808](https://github.com/apache/arrow-rs/pull/7808) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[VARIANT\] Support both fallible and infallible access to variants [\#7807](https://github.com/apache/arrow-rs/pull/7807) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Minor: fix clippy in parquet-variant after logical conflict [\#7803](https://github.com/apache/arrow-rs/pull/7803) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Add flag in `ObjectBuilder` to control validation behavior on duplicate field write [\#7801](https://github.com/apache/arrow-rs/pull/7801) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([micoo227](https://github.com/micoo227))
+- Fix clippy for Rust 1.88 release [\#7797](https://github.com/apache/arrow-rs/pull/7797) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- \[Variant\] Simplify `Builder` buffer operations [\#7795](https://github.com/apache/arrow-rs/pull/7795) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- fix: Change panic to error in`take` kernel for StringArrary/BinaryArray on overflow [\#7793](https://github.com/apache/arrow-rs/pull/7793) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([chenkovsky](https://github.com/chenkovsky))
+- Update base64 requirement from 0.21 to 0.22 [\#7791](https://github.com/apache/arrow-rs/pull/7791) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix RowConverter when FixedSizeList is not the last [\#7789](https://github.com/apache/arrow-rs/pull/7789) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Add schema with only primitive arrays to `coalesce_kernel` benchmark [\#7788](https://github.com/apache/arrow-rs/pull/7788) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add sort\_kernel benchmark for StringViewArray case [\#7787](https://github.com/apache/arrow-rs/pull/7787) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- \[Variant\] Check pending before `VariantObject::insert` [\#7786](https://github.com/apache/arrow-rs/pull/7786) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[VARIANT\] impl Display for VariantDecimalXX [\#7785](https://github.com/apache/arrow-rs/pull/7785) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- \[VARIANT\] Add support for the json\_to\_variant API [\#7783](https://github.com/apache/arrow-rs/pull/7783) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([harshmotw-db](https://github.com/harshmotw-db))
+- \[Variant\] Consolidate examples for json writing [\#7782](https://github.com/apache/arrow-rs/pull/7782) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add benchmark for about view array slice [\#7781](https://github.com/apache/arrow-rs/pull/7781) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- \[Variant\] Add negative tests for reading invalid primitive variant values [\#7779](https://github.com/apache/arrow-rs/pull/7779) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([superserious-dev](https://github.com/superserious-dev))
+- \[Variant\] Support creating nested objects and object with lists [\#7778](https://github.com/apache/arrow-rs/pull/7778) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[VARIANT\] Validate precision in VariantDecimalXX structs and add missing tests [\#7776](https://github.com/apache/arrow-rs/pull/7776) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add tests for `BatchCoalescer::push_batch_with_filter`, fix bug [\#7774](https://github.com/apache/arrow-rs/pull/7774) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Variant\] Minor: make fields in `VariantDecimal*` private, add examples [\#7770](https://github.com/apache/arrow-rs/pull/7770) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Extend the fast path in GenericByteViewArray::is\_eq for comparing against empty strings [\#7767](https://github.com/apache/arrow-rs/pull/7767) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- \[Variant\] Improve getter API for `VariantList` and `VariantObject` [\#7757](https://github.com/apache/arrow-rs/pull/7757) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Add Variant::as\_object and Variant::as\_list [\#7755](https://github.com/apache/arrow-rs/pull/7755) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Variant\] Fix several overflow panic risks for 32-bit arch [\#7752](https://github.com/apache/arrow-rs/pull/7752) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add testing section to pull request template [\#7749](https://github.com/apache/arrow-rs/pull/7749) ([alamb](https://github.com/alamb))
+- Perf: Add prefix compare for inlined compare and change use of inline\_value to inline it to a u128  [\#7748](https://github.com/apache/arrow-rs/pull/7748) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Move arrow-pyarrow tests that require `pyarrow` to be installed into `arrow-pyarrow-testing` crate [\#7742](https://github.com/apache/arrow-rs/pull/7742) ([alamb](https://github.com/alamb))
+- \[Variant\] Improve write API in `Variant::Object` [\#7741](https://github.com/apache/arrow-rs/pull/7741) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Support nested lists and object lists [\#7740](https://github.com/apache/arrow-rs/pull/7740) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- feat: \[Variant\] Add Validation for Variant Deciaml [\#7738](https://github.com/apache/arrow-rs/pull/7738) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Weijun-H](https://github.com/Weijun-H))
+- Add fallible versions of temporal functions that may panic [\#7737](https://github.com/apache/arrow-rs/pull/7737) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
+- fix: Implement support for appending Object and List variants in VariantBuilder [\#7735](https://github.com/apache/arrow-rs/pull/7735) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Weijun-H](https://github.com/Weijun-H))
+- parquet\_derive: update in working example for ParquetRecordWriter [\#7733](https://github.com/apache/arrow-rs/pull/7733) ([LanHikari22](https://github.com/LanHikari22))
+- Perf: Optimize comparison kernels for inlined views [\#7731](https://github.com/apache/arrow-rs/pull/7731) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- arrow-row: Refactor arrow-row REE roundtrip tests [\#7729](https://github.com/apache/arrow-rs/pull/7729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- arrow-array: Implement PartialEq for RunArray [\#7727](https://github.com/apache/arrow-rs/pull/7727) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- fix: Do not add null buffer for `NullArray` in MutableArrayData [\#7726](https://github.com/apache/arrow-rs/pull/7726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
+- Allow per-column parquet dictionary page size limit [\#7724](https://github.com/apache/arrow-rs/pull/7724) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- fix JSON decoder error checking for UTF16 / surrogate parsing panic [\#7721](https://github.com/apache/arrow-rs/pull/7721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nicklan](https://github.com/nicklan))
+- \[Variant\] Use `BTreeMap` for `VariantBuilder.dict` and `ObjectBuilder.fields` to maintain invariants upon entry writes [\#7720](https://github.com/apache/arrow-rs/pull/7720) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Introduce `MAX_INLINE_VIEW_LEN` constant for string/byte views [\#7719](https://github.com/apache/arrow-rs/pull/7719) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Variant\] Introduce new type over &str for ShortString [\#7718](https://github.com/apache/arrow-rs/pull/7718) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Split out variant code into several new sub-modules [\#7717](https://github.com/apache/arrow-rs/pull/7717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- add `garbage_collect_dictionary` to `arrow-select` [\#7716](https://github.com/apache/arrow-rs/pull/7716) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([davidhewitt](https://github.com/davidhewitt))
+- Support write to buffer api for SerializedFileWriter [\#7714](https://github.com/apache/arrow-rs/pull/7714) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Support `FixedSizeList` RowConverter [\#7705](https://github.com/apache/arrow-rs/pull/7705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Make variant iterators safely infallible [\#7704](https://github.com/apache/arrow-rs/pull/7704) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Speedup `interleave_views` \(4-7x faster\) [\#7695](https://github.com/apache/arrow-rs/pull/7695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. [\#7694](https://github.com/apache/arrow-rs/pull/7694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- feat: add constructor to efficiently upgrade dict key type to remaining builders [\#7689](https://github.com/apache/arrow-rs/pull/7689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- Document REE row format and add some more tests [\#7680](https://github.com/apache/arrow-rs/pull/7680) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- feat: add min max aggregate support for FixedSizeBinary [\#7675](https://github.com/apache/arrow-rs/pull/7675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- arrow-data: Add REE support for `build_extend` and `build_extend_nulls` [\#7671](https://github.com/apache/arrow-rs/pull/7671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Variant: Write Variant Values as JSON [\#7670](https://github.com/apache/arrow-rs/pull/7670) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([carpecodeum](https://github.com/carpecodeum))
+- Remove `lazy_static` dependency [\#7669](https://github.com/apache/arrow-rs/pull/7669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Expyron](https://github.com/Expyron))
+- Finish implementing Variant::Object and Variant::List [\#7666](https://github.com/apache/arrow-rs/pull/7666) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add `RecordBatch::schema_metadata_mut` and `Field::metadata_mut` [\#7664](https://github.com/apache/arrow-rs/pull/7664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- \[Variant\] Simplify creation of Variants from metadata and value [\#7663](https://github.com/apache/arrow-rs/pull/7663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- chore: group prost dependabot updates [\#7659](https://github.com/apache/arrow-rs/pull/7659) ([mbrobbel](https://github.com/mbrobbel))
+- Initial Builder API for Creating Variant Values [\#7653](https://github.com/apache/arrow-rs/pull/7653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PinkCrow007](https://github.com/PinkCrow007))
+- Add `BatchCoalescer::push_filtered_batch` and docs [\#7652](https://github.com/apache/arrow-rs/pull/7652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Optimize coalesce kernel for StringView \(10-50% faster\) [\#7650](https://github.com/apache/arrow-rs/pull/7650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- arrow-row: Add support for REE [\#7649](https://github.com/apache/arrow-rs/pull/7649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Use approximate comparisons for pow tests [\#7646](https://github.com/apache/arrow-rs/pull/7646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Implement read support for remaining primitive types [\#7644](https://github.com/apache/arrow-rs/pull/7644) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([superserious-dev](https://github.com/superserious-dev))
+- Add `pretty_format_batches_with_schema` function [\#7642](https://github.com/apache/arrow-rs/pull/7642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lewiszlw](https://github.com/lewiszlw))
+- Deprecate old Parquet page index parsing functions [\#7640](https://github.com/apache/arrow-rs/pull/7640) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update FlightSQL `GetDbSchemas` and `GetTables` schemas to fully match the protocol [\#7638](https://github.com/apache/arrow-rs/pull/7638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([sgrebnov](https://github.com/sgrebnov))
+- Minor: Remove outdated FIXME from `ParquetMetaDataReader` [\#7635](https://github.com/apache/arrow-rs/pull/7635) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix the error info of `StructArray::try_new` [\#7634](https://github.com/apache/arrow-rs/pull/7634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963))
+- Fix reading encrypted Parquet pages when using the page index [\#7633](https://github.com/apache/arrow-rs/pull/7633) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Add commented out primitive test casees [\#7631](https://github.com/apache/arrow-rs/pull/7631) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+## [55.2.0](https://github.com/apache/arrow-rs/tree/55.2.0) (2025-06-22)
+
+- Add a `strong_count` method to `Buffer` [\#7568](https://github.com/apache/arrow-rs/issues/7568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Create version of LexicographicalComparator that compares fixed number of columns [\#7531](https://github.com/apache/arrow-rs/issues/7531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet-show-bloom-filter should work with integer typed columns [\#7528](https://github.com/apache/arrow-rs/issues/7528) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Allow merging primitive dictionary values in concat and interleave kernels [\#7518](https://github.com/apache/arrow-rs/issues/7518) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add efficient concatenation of StructArrays [\#7516](https://github.com/apache/arrow-rs/issues/7516) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Rename `flight-sql-experimental` to `flight-sql` [\#7498](https://github.com/apache/arrow-rs/issues/7498) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Consider moving from ryu to lexical-core for string formatting / casting floats to string. [\#7496](https://github.com/apache/arrow-rs/issues/7496)
+- Arithmetic kernels can be safer and faster [\#7494](https://github.com/apache/arrow-rs/issues/7494) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speedup `filter_bytes` by precalculating capacity [\#7465](https://github.com/apache/arrow-rs/issues/7465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\]: Rust API to Create Variant Values [\#7424](https://github.com/apache/arrow-rs/issues/7424) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Rust API to Read Variant Values [\#7423](https://github.com/apache/arrow-rs/issues/7423) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Release arrow-rs / parquet Minor version `55.1.0` \(May 2025\) [\#7393](https://github.com/apache/arrow-rs/issues/7393) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support create\_random\_array for Decimal data types [\#7343](https://github.com/apache/arrow-rs/issues/7343) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Truncate Parquet page data page statistics [\#7555](https://github.com/apache/arrow-rs/pull/7555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Fixed bugs:**
+
+- In arrow\_json, Decoder::decode can panic if it encounters two high surrogates in a row. [\#7712](https://github.com/apache/arrow-rs/issues/7712)
+- FlightSQL "GetDbSchemas" and "GetTables" schemas do not fully match the protocol [\#7637](https://github.com/apache/arrow-rs/issues/7637) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Cannot read encrypted Parquet file if page index reading is enabled [\#7629](https://github.com/apache/arrow-rs/issues/7629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `encoding_stats` not present in Parquet generated by `parquet-rewrite` [\#7616](https://github.com/apache/arrow-rs/issues/7616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- When writing parquet plaintext footer files `footer_signing_key_metadata` is not included, encryption alghoritm is always written in footer [\#7599](https://github.com/apache/arrow-rs/issues/7599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `new_null_array` panics when constructing a struct of a dictionary [\#7571](https://github.com/apache/arrow-rs/issues/7571)
+- Parquet derive fails to build when Result is aliased [\#7547](https://github.com/apache/arrow-rs/issues/7547)
+- Unable to read `Dictionary(u8, FixedSizeBinary(_))` using datafusion. [\#7545](https://github.com/apache/arrow-rs/issues/7545) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- filter\_record\_batch panics with empty struct array. [\#7538](https://github.com/apache/arrow-rs/issues/7538) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Panic in `pretty_format` function when displaying DurationSecondsArray with `i64::MIN` / `i64::MAX` [\#7533](https://github.com/apache/arrow-rs/issues/7533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Record API unable to parse TIME\_MILLIS when encoded as INT32 [\#7510](https://github.com/apache/arrow-rs/issues/7510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- The `read_record_batch` func of the `RecordBatchDecoder` does not respect the `skip_validation` property [\#7508](https://github.com/apache/arrow-rs/issues/7508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow-55.1.0` breaks `filter_record_batch` [\#7500](https://github.com/apache/arrow-rs/issues/7500)
+- Files containing binary data with \>=8\_388\_855 bytes per row written with `arrow-rs` can't be read with `pyarrow` [\#7489](https://github.com/apache/arrow-rs/issues/7489) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Bug\] Ingestion with Arrow Flight Sql panic when the input stream is empty or fallible [\#7329](https://github.com/apache/arrow-rs/issues/7329) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Ensure page encoding statistics are written to Parquet file [\#7643](https://github.com/apache/arrow-rs/pull/7643) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Documentation updates:**
+
+- arrow\_reader\_row\_filter benchmark doesn't capture page cache improvements [\#7460](https://github.com/apache/arrow-rs/issues/7460) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- chore: fix a typo in `ExtensionType::supports_data_type` docs [\#7682](https://github.com/apache/arrow-rs/pull/7682) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Add variant docs and examples [\#7661](https://github.com/apache/arrow-rs/pull/7661) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Minor: Add version to deprecation notice for `ParquetMetaDataReader::decode_footer` [\#7639](https://github.com/apache/arrow-rs/pull/7639) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add references for defaults in `WriterPropertiesBuilder` [\#7558](https://github.com/apache/arrow-rs/pull/7558) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Clarify Docs: NullBuffer::len is in bits [\#7556](https://github.com/apache/arrow-rs/pull/7556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: fix typo for `Decimal128Array` [\#7525](https://github.com/apache/arrow-rs/pull/7525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([burmecia](https://github.com/burmecia))
+- Minor: Add examples to ProjectionMask documentation [\#7523](https://github.com/apache/arrow-rs/pull/7523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve documentation for Parquet `WriterProperties` [\#7491](https://github.com/apache/arrow-rs/pull/7491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- \[Variant\] More efficient determination of String vs ShortString [\#7700](https://github.com/apache/arrow-rs/issues/7700)
+- \[Variant\] Improve API for iterating over values of a VariantList [\#7685](https://github.com/apache/arrow-rs/issues/7685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Consider validating variants on creation \(rather than read\) [\#7684](https://github.com/apache/arrow-rs/issues/7684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Miri test\_native\_type\_pow test failing [\#7641](https://github.com/apache/arrow-rs/issues/7641) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve performance of `coalesce` and `concat` for views [\#7615](https://github.com/apache/arrow-rs/issues/7615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Bad min value in row group statistics in some special cases [\#7593](https://github.com/apache/arrow-rs/issues/7593)
+- Feature Request: BloomFilter Position Flexibility in `parquet-rewrite` [\#7552](https://github.com/apache/arrow-rs/issues/7552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- arrow-array: Implement PartialEq for RunArray [\#7727](https://github.com/apache/arrow-rs/pull/7727) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- fix: Do not add null buffer for `NullArray` in MutableArrayData [\#7726](https://github.com/apache/arrow-rs/pull/7726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
+- fix JSON decoder error checking for UTF16 / surrogate parsing panic [\#7721](https://github.com/apache/arrow-rs/pull/7721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nicklan](https://github.com/nicklan))
+- \[Variant\] Introduce new type over &str for ShortString [\#7718](https://github.com/apache/arrow-rs/pull/7718) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Split out variant code into several new sub-modules [\#7717](https://github.com/apache/arrow-rs/pull/7717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Support write to buffer api for SerializedFileWriter [\#7714](https://github.com/apache/arrow-rs/pull/7714) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Make variant iterators safely infallible [\#7704](https://github.com/apache/arrow-rs/pull/7704) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Speedup `interleave_views` \(4-7x faster\) [\#7695](https://github.com/apache/arrow-rs/pull/7695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. [\#7694](https://github.com/apache/arrow-rs/pull/7694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- Document REE row format and add some more tests [\#7680](https://github.com/apache/arrow-rs/pull/7680) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- feat: add min max aggregate support for FixedSizeBinary [\#7675](https://github.com/apache/arrow-rs/pull/7675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- arrow-data: Add REE support for `build_extend` and `build_extend_nulls` [\#7671](https://github.com/apache/arrow-rs/pull/7671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Remove `lazy_static` dependency [\#7669](https://github.com/apache/arrow-rs/pull/7669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Expyron](https://github.com/Expyron))
+- Finish implementing Variant::Object and Variant::List [\#7666](https://github.com/apache/arrow-rs/pull/7666) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add `RecordBatch::schema_metadata_mut` and `Field::metadata_mut` [\#7664](https://github.com/apache/arrow-rs/pull/7664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- \[Variant\] Simplify creation of Variants from metadata and value [\#7663](https://github.com/apache/arrow-rs/pull/7663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- chore: group prost dependabot updates [\#7659](https://github.com/apache/arrow-rs/pull/7659) ([mbrobbel](https://github.com/mbrobbel))
+- Initial Builder API for Creating Variant Values [\#7653](https://github.com/apache/arrow-rs/pull/7653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PinkCrow007](https://github.com/PinkCrow007))
+- Add `BatchCoalescer::push_filtered_batch` and docs [\#7652](https://github.com/apache/arrow-rs/pull/7652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Optimize coalesce kernel for StringView \(10-50% faster\) [\#7650](https://github.com/apache/arrow-rs/pull/7650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- arrow-row: Add support for REE [\#7649](https://github.com/apache/arrow-rs/pull/7649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Use approximate comparisons for pow tests [\#7646](https://github.com/apache/arrow-rs/pull/7646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Implement read support for remaining primitive types [\#7644](https://github.com/apache/arrow-rs/pull/7644) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([superserious-dev](https://github.com/superserious-dev))
+- Add `pretty_format_batches_with_schema` function [\#7642](https://github.com/apache/arrow-rs/pull/7642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lewiszlw](https://github.com/lewiszlw))
+- Deprecate old Parquet page index parsing functions [\#7640](https://github.com/apache/arrow-rs/pull/7640) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update FlightSQL `GetDbSchemas` and `GetTables` schemas to fully match the protocol [\#7638](https://github.com/apache/arrow-rs/pull/7638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([sgrebnov](https://github.com/sgrebnov))
+- Minor: Remove outdated FIXME from `ParquetMetaDataReader` [\#7635](https://github.com/apache/arrow-rs/pull/7635) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix the error info of `StructArray::try_new` [\#7634](https://github.com/apache/arrow-rs/pull/7634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963))
+- Fix reading encrypted Parquet pages when using the page index [\#7633](https://github.com/apache/arrow-rs/pull/7633) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Add commented out primitive test casees [\#7631](https://github.com/apache/arrow-rs/pull/7631) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve `coalesce` kernel tests [\#7626](https://github.com/apache/arrow-rs/pull/7626) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Revert "Revert "Improve `coalesce` and `concat` performance for views… [\#7625](https://github.com/apache/arrow-rs/pull/7625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Revert "Improve `coalesce` and `concat` performance for views \(\#7614\)" [\#7623](https://github.com/apache/arrow-rs/pull/7623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Improve coalesce\_kernel benchmark to capture inline vs non inline views [\#7619](https://github.com/apache/arrow-rs/pull/7619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve `coalesce` and `concat` performance for views [\#7614](https://github.com/apache/arrow-rs/pull/7614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- feat: add constructor to help efficiently upgrade key for GenericBytesDictionaryBuilder [\#7611](https://github.com/apache/arrow-rs/pull/7611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- feat: support append\_nulls on additional builders [\#7606](https://github.com/apache/arrow-rs/pull/7606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- feat: add AsyncArrowWriter::into\_inner [\#7604](https://github.com/apache/arrow-rs/pull/7604) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jpopesculian](https://github.com/jpopesculian))
+- Move variant interop test to Rust integration test [\#7602](https://github.com/apache/arrow-rs/pull/7602) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Include footer key metadata when writing encrypted Parquet with a plaintext footer [\#7600](https://github.com/apache/arrow-rs/pull/7600) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rok](https://github.com/rok))
+- Add `coalesce` kernel and`BatchCoalescer` for statefully combining selected b…atches: [\#7597](https://github.com/apache/arrow-rs/pull/7597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add FixedSizeBinary to `take_kernel` benchmark [\#7592](https://github.com/apache/arrow-rs/pull/7592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix GenericBinaryArray docstring. [\#7588](https://github.com/apache/arrow-rs/pull/7588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- fix: error reading multiple batches of `Dict(_, FixedSizeBinary(_))` [\#7585](https://github.com/apache/arrow-rs/pull/7585) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
+- Revert "Minor: remove filter code deprecated in 2023 \(\#7554\)" [\#7583](https://github.com/apache/arrow-rs/pull/7583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fixed a warning build build: function never used. [\#7577](https://github.com/apache/arrow-rs/pull/7577) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- Adding Encoding argument in `parquet-rewrite` [\#7576](https://github.com/apache/arrow-rs/pull/7576) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter [\#7574](https://github.com/apache/arrow-rs/pull/7574) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([CookiePieWw](https://github.com/CookiePieWw))
+- \[array\] Remove unwrap checks from GenericByteArray::value\_unchecked [\#7573](https://github.com/apache/arrow-rs/pull/7573) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- \[benches/row\_format\] fix typo in array lengths [\#7572](https://github.com/apache/arrow-rs/pull/7572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- Add a strong\_count method to Buffer [\#7569](https://github.com/apache/arrow-rs/pull/7569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([westonpace](https://github.com/westonpace))
+- Minor: Enable byte view for clickbench benchmark [\#7565](https://github.com/apache/arrow-rs/pull/7565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Optimize length calculation in row encoding for fixed-length columns [\#7564](https://github.com/apache/arrow-rs/pull/7564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- Use PR title and description for commit message [\#7563](https://github.com/apache/arrow-rs/pull/7563) ([kou](https://github.com/kou))
+- Use apache/arrow-{go,java,js} in integration test [\#7561](https://github.com/apache/arrow-rs/pull/7561) ([kou](https://github.com/kou))
+- Implement Array Decoding in arrow-avro [\#7559](https://github.com/apache/arrow-rs/pull/7559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Minor: remove filter code deprecated in 2023 [\#7554](https://github.com/apache/arrow-rs/pull/7554) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix: Correct docs for `WriterPropertiesBuilder::set_column_index_truncate_length` [\#7553](https://github.com/apache/arrow-rs/pull/7553) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Adding Bloom Filter Position argument in parquet-rewrite [\#7550](https://github.com/apache/arrow-rs/pull/7550) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- Fix `Result` name collision in parquet\_derive [\#7548](https://github.com/apache/arrow-rs/pull/7548) ([jspaezp](https://github.com/jspaezp))
+- Fix: Converted feature flight-sql-experimental to flight-sql [\#7546](https://github.com/apache/arrow-rs/pull/7546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal))
+- Fix CI on main due to logical conflict [\#7542](https://github.com/apache/arrow-rs/pull/7542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix `filter_record_batch` panics with empty struct array [\#7539](https://github.com/apache/arrow-rs/pull/7539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour))
+- \[Variant\] Initial API for reading Variant data and metadata [\#7535](https://github.com/apache/arrow-rs/pull/7535) ([mkarbo](https://github.com/mkarbo))
+- fix: Panic in pretty\_format function when displaying DurationSecondsA… [\#7534](https://github.com/apache/arrow-rs/pull/7534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Create version of LexicographicalComparator that compares fixed number of columns \(~ -15%\) [\#7530](https://github.com/apache/arrow-rs/pull/7530) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make parquet-show-bloom-filter work with integer typed columns [\#7529](https://github.com/apache/arrow-rs/pull/7529) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- chore\(deps\): update criterion requirement from 0.5 to 0.6 [\#7527](https://github.com/apache/arrow-rs/pull/7527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Minor: Add a parquet row\_filter test, reduce some test boiler plate [\#7522](https://github.com/apache/arrow-rs/pull/7522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Refactor `build_array_reader` into a struct [\#7521](https://github.com/apache/arrow-rs/pull/7521) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- arrow: add concat structs benchmark [\#7520](https://github.com/apache/arrow-rs/pull/7520) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- arrow-select: add support for merging primitive dictionary values [\#7519](https://github.com/apache/arrow-rs/pull/7519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- arrow-select: add support for optimized concatenation of struct arrays [\#7517](https://github.com/apache/arrow-rs/pull/7517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- Fix Clippy in CI for Rust 1.87 release [\#7514](https://github.com/apache/arrow-rs/pull/7514) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Simplify `ParquetRecordBatchReader::next` control logic [\#7512](https://github.com/apache/arrow-rs/pull/7512) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix record API support for reading INT32 encoded TIME\_MILLIS [\#7511](https://github.com/apache/arrow-rs/pull/7511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([njaremko](https://github.com/njaremko))
+- RecordBatchDecoder: skip RecordBatch validation when `skip_validation` property is enabled [\#7509](https://github.com/apache/arrow-rs/pull/7509) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nilskch](https://github.com/nilskch))
+- Introduce `ReadPlan` to encapsulate the calculation of what parquet rows to decode [\#7502](https://github.com/apache/arrow-rs/pull/7502) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Update documentation for ParquetReader [\#7501](https://github.com/apache/arrow-rs/pull/7501) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve `Field` docs, add missing `Field::set_*` methods [\#7497](https://github.com/apache/arrow-rs/pull/7497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Speed up arithmetic kernels, reduce `unsafe` usage [\#7493](https://github.com/apache/arrow-rs/pull/7493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Prevent FlightSQL server panics for `do_put` when stream is empty or 1st stream element is an Err [\#7492](https://github.com/apache/arrow-rs/pull/7492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([superserious-dev](https://github.com/superserious-dev))
+- arrow-ipc: add `StreamDecoder::schema` [\#7488](https://github.com/apache/arrow-rs/pull/7488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm))
+- arrow-select: Implement concat for `RunArray`s [\#7487](https://github.com/apache/arrow-rs/pull/7487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- \[Variant\] Add \(empty\) `parquet-variant` crate, update `parquet-testing` pin [\#7485](https://github.com/apache/arrow-rs/pull/7485) ([alamb](https://github.com/alamb))
+- Improve error messages if schema hint mismatches with parquet schema [\#7481](https://github.com/apache/arrow-rs/pull/7481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `arrow_reader_clickbench` benchmark [\#7470](https://github.com/apache/arrow-rs/pull/7470) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Speedup `filter_bytes` ~-20-40%, `filter_native` low selectivity \(~-37%\) [\#7463](https://github.com/apache/arrow-rs/pull/7463) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+## [55.2.0](https://github.com/apache/arrow-rs/tree/55.2.0) (2025-06-22)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/55.1.0...55.2.0)
+
+**Implemented enhancements:**
+
+- Do not populate nulls for `NullArray` for `MutableArrayData` [\#7725](https://github.com/apache/arrow-rs/issues/7725)
+- Implement `PartialEq` for RunArray [\#7691](https://github.com/apache/arrow-rs/issues/7691)
+- `interleave_views` is really slow [\#7688](https://github.com/apache/arrow-rs/issues/7688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add min max aggregates for FixedSizeBinary [\#7674](https://github.com/apache/arrow-rs/issues/7674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Deliver pyarrow as a standalone crate [\#7668](https://github.com/apache/arrow-rs/issues/7668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Implement `VariantObject::field` and `VariantObject::fields` [\#7665](https://github.com/apache/arrow-rs/issues/7665) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Implement read support for remaining primitive types [\#7630](https://github.com/apache/arrow-rs/issues/7630) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Fast and ergonomic method to add metadata to a `RecordBatch` [\#7628](https://github.com/apache/arrow-rs/issues/7628) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add efficient way to change the keys of string dictionary builder [\#7610](https://github.com/apache/arrow-rs/issues/7610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `add_nulls` on additional builder types [\#7605](https://github.com/apache/arrow-rs/issues/7605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `into_inner` for `AsyncArrowWriter` [\#7603](https://github.com/apache/arrow-rs/issues/7603) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Optimize `PrimitiveBuilder::append_trusted_len_iter` [\#7591](https://github.com/apache/arrow-rs/issues/7591) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Benchmark for filter+concat and take+concat into even sized record batches [\#7589](https://github.com/apache/arrow-rs/issues/7589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `max_statistics_truncate_length` is ignored when writing statistics to data page headers [\#7579](https://github.com/apache/arrow-rs/issues/7579) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Feature Request: Encoding in `parquet-rewrite` [\#7575](https://github.com/apache/arrow-rs/issues/7575) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add a `strong_count` method to `Buffer` [\#7568](https://github.com/apache/arrow-rs/issues/7568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Create version of LexicographicalComparator that compares fixed number of columns [\#7531](https://github.com/apache/arrow-rs/issues/7531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet-show-bloom-filter should work with integer typed columns [\#7528](https://github.com/apache/arrow-rs/issues/7528) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Allow merging primitive dictionary values in concat and interleave kernels [\#7518](https://github.com/apache/arrow-rs/issues/7518) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add efficient concatenation of StructArrays [\#7516](https://github.com/apache/arrow-rs/issues/7516) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Rename `flight-sql-experimental` to `flight-sql` [\#7498](https://github.com/apache/arrow-rs/issues/7498) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Consider moving from ryu to lexical-core for string formatting / casting floats to string. [\#7496](https://github.com/apache/arrow-rs/issues/7496)
+- Arithmetic kernels can be safer and faster [\#7494](https://github.com/apache/arrow-rs/issues/7494) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speedup `filter_bytes` by precalculating capacity [\#7465](https://github.com/apache/arrow-rs/issues/7465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\]: Rust API to Create Variant Values [\#7424](https://github.com/apache/arrow-rs/issues/7424) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Rust API to Read Variant Values [\#7423](https://github.com/apache/arrow-rs/issues/7423) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Release arrow-rs / parquet Minor version `55.1.0` \(May 2025\) [\#7393](https://github.com/apache/arrow-rs/issues/7393) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support create\_random\_array for Decimal data types [\#7343](https://github.com/apache/arrow-rs/issues/7343) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Truncate Parquet page data page statistics [\#7555](https://github.com/apache/arrow-rs/pull/7555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Fixed bugs:**
+
+- In arrow\_json, Decoder::decode can panic if it encounters two high surrogates in a row. [\#7712](https://github.com/apache/arrow-rs/issues/7712)
+- FlightSQL "GetDbSchemas" and "GetTables" schemas do not fully match the protocol [\#7637](https://github.com/apache/arrow-rs/issues/7637) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Cannot read encrypted Parquet file if page index reading is enabled [\#7629](https://github.com/apache/arrow-rs/issues/7629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `encoding_stats` not present in Parquet generated by `parquet-rewrite` [\#7616](https://github.com/apache/arrow-rs/issues/7616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- When writing parquet plaintext footer files `footer_signing_key_metadata` is not included, encryption alghoritm is always written in footer [\#7599](https://github.com/apache/arrow-rs/issues/7599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `new_null_array` panics when constructing a struct of a dictionary [\#7571](https://github.com/apache/arrow-rs/issues/7571)
+- Parquet derive fails to build when Result is aliased [\#7547](https://github.com/apache/arrow-rs/issues/7547)
+- Unable to read `Dictionary(u8, FixedSizeBinary(_))` using datafusion. [\#7545](https://github.com/apache/arrow-rs/issues/7545) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- filter\_record\_batch panics with empty struct array. [\#7538](https://github.com/apache/arrow-rs/issues/7538) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Panic in `pretty_format` function when displaying DurationSecondsArray with `i64::MIN` / `i64::MAX` [\#7533](https://github.com/apache/arrow-rs/issues/7533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Record API unable to parse TIME\_MILLIS when encoded as INT32 [\#7510](https://github.com/apache/arrow-rs/issues/7510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- The `read_record_batch` func of the `RecordBatchDecoder` does not respect the `skip_validation` property [\#7508](https://github.com/apache/arrow-rs/issues/7508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow-55.1.0` breaks `filter_record_batch` [\#7500](https://github.com/apache/arrow-rs/issues/7500)
+- Files containing binary data with \>=8\_388\_855 bytes per row written with `arrow-rs` can't be read with `pyarrow` [\#7489](https://github.com/apache/arrow-rs/issues/7489) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Bug\] Ingestion with Arrow Flight Sql panic when the input stream is empty or fallible [\#7329](https://github.com/apache/arrow-rs/issues/7329) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Ensure page encoding statistics are written to Parquet file [\#7643](https://github.com/apache/arrow-rs/pull/7643) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
+**Documentation updates:**
+
+- arrow\_reader\_row\_filter benchmark doesn't capture page cache improvements [\#7460](https://github.com/apache/arrow-rs/issues/7460) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- chore: fix a typo in `ExtensionType::supports_data_type` docs [\#7682](https://github.com/apache/arrow-rs/pull/7682) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- \[Variant\] Add variant docs and examples [\#7661](https://github.com/apache/arrow-rs/pull/7661) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Minor: Add version to deprecation notice for `ParquetMetaDataReader::decode_footer` [\#7639](https://github.com/apache/arrow-rs/pull/7639) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add references for defaults in `WriterPropertiesBuilder` [\#7558](https://github.com/apache/arrow-rs/pull/7558) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Clarify Docs: NullBuffer::len is in bits [\#7556](https://github.com/apache/arrow-rs/pull/7556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- docs: fix typo for `Decimal128Array` [\#7525](https://github.com/apache/arrow-rs/pull/7525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([burmecia](https://github.com/burmecia))
+- Minor: Add examples to ProjectionMask documentation [\#7523](https://github.com/apache/arrow-rs/pull/7523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve documentation for Parquet `WriterProperties` [\#7491](https://github.com/apache/arrow-rs/pull/7491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- \[Variant\] More efficient determination of String vs ShortString [\#7700](https://github.com/apache/arrow-rs/issues/7700)
+- \[Variant\] Improve API for iterating over values of a VariantList [\#7685](https://github.com/apache/arrow-rs/issues/7685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- \[Variant\] Consider validating variants on creation \(rather than read\) [\#7684](https://github.com/apache/arrow-rs/issues/7684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Miri test\_native\_type\_pow test failing [\#7641](https://github.com/apache/arrow-rs/issues/7641) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve performance of `coalesce` and `concat` for views [\#7615](https://github.com/apache/arrow-rs/issues/7615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Bad min value in row group statistics in some special cases [\#7593](https://github.com/apache/arrow-rs/issues/7593)
+- Feature Request: BloomFilter Position Flexibility in `parquet-rewrite` [\#7552](https://github.com/apache/arrow-rs/issues/7552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- arrow-array: Implement PartialEq for RunArray [\#7727](https://github.com/apache/arrow-rs/pull/7727) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- fix: Do not add null buffer for `NullArray` in MutableArrayData [\#7726](https://github.com/apache/arrow-rs/pull/7726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
+- fix JSON decoder error checking for UTF16 / surrogate parsing panic [\#7721](https://github.com/apache/arrow-rs/pull/7721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nicklan](https://github.com/nicklan))
+- \[Variant\] Introduce new type over &str for ShortString [\#7718](https://github.com/apache/arrow-rs/pull/7718) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Split out variant code into several new sub-modules [\#7717](https://github.com/apache/arrow-rs/pull/7717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Support write to buffer api for SerializedFileWriter [\#7714](https://github.com/apache/arrow-rs/pull/7714) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Make variant iterators safely infallible [\#7704](https://github.com/apache/arrow-rs/pull/7704) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Speedup `interleave_views` \(4-7x faster\) [\#7695](https://github.com/apache/arrow-rs/pull/7695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. [\#7694](https://github.com/apache/arrow-rs/pull/7694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- Document REE row format and add some more tests [\#7680](https://github.com/apache/arrow-rs/pull/7680) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- feat: add min max aggregate support for FixedSizeBinary [\#7675](https://github.com/apache/arrow-rs/pull/7675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- arrow-data: Add REE support for `build_extend` and `build_extend_nulls` [\#7671](https://github.com/apache/arrow-rs/pull/7671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Remove `lazy_static` dependency [\#7669](https://github.com/apache/arrow-rs/pull/7669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Expyron](https://github.com/Expyron))
+- Finish implementing Variant::Object and Variant::List [\#7666](https://github.com/apache/arrow-rs/pull/7666) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- Add `RecordBatch::schema_metadata_mut` and `Field::metadata_mut` [\#7664](https://github.com/apache/arrow-rs/pull/7664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- \[Variant\] Simplify creation of Variants from metadata and value [\#7663](https://github.com/apache/arrow-rs/pull/7663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- chore: group prost dependabot updates [\#7659](https://github.com/apache/arrow-rs/pull/7659) ([mbrobbel](https://github.com/mbrobbel))
+- Initial Builder API for Creating Variant Values [\#7653](https://github.com/apache/arrow-rs/pull/7653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PinkCrow007](https://github.com/PinkCrow007))
+- Add `BatchCoalescer::push_filtered_batch` and docs [\#7652](https://github.com/apache/arrow-rs/pull/7652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Optimize coalesce kernel for StringView \(10-50% faster\) [\#7650](https://github.com/apache/arrow-rs/pull/7650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- arrow-row: Add support for REE [\#7649](https://github.com/apache/arrow-rs/pull/7649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Use approximate comparisons for pow tests [\#7646](https://github.com/apache/arrow-rs/pull/7646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Implement read support for remaining primitive types [\#7644](https://github.com/apache/arrow-rs/pull/7644) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([superserious-dev](https://github.com/superserious-dev))
+- Add `pretty_format_batches_with_schema` function [\#7642](https://github.com/apache/arrow-rs/pull/7642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lewiszlw](https://github.com/lewiszlw))
+- Deprecate old Parquet page index parsing functions [\#7640](https://github.com/apache/arrow-rs/pull/7640) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Update FlightSQL `GetDbSchemas` and `GetTables` schemas to fully match the protocol [\#7638](https://github.com/apache/arrow-rs/pull/7638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([sgrebnov](https://github.com/sgrebnov))
+- Minor: Remove outdated FIXME from `ParquetMetaDataReader` [\#7635](https://github.com/apache/arrow-rs/pull/7635) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix the error info of `StructArray::try_new` [\#7634](https://github.com/apache/arrow-rs/pull/7634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963))
+- Fix reading encrypted Parquet pages when using the page index [\#7633](https://github.com/apache/arrow-rs/pull/7633) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- \[Variant\] Add commented out primitive test casees [\#7631](https://github.com/apache/arrow-rs/pull/7631) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve `coalesce` kernel tests [\#7626](https://github.com/apache/arrow-rs/pull/7626) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Revert "Revert "Improve `coalesce` and `concat` performance for views… [\#7625](https://github.com/apache/arrow-rs/pull/7625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Revert "Improve `coalesce` and `concat` performance for views \(\#7614\)" [\#7623](https://github.com/apache/arrow-rs/pull/7623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Improve coalesce\_kernel benchmark to capture inline vs non inline views [\#7619](https://github.com/apache/arrow-rs/pull/7619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve `coalesce` and `concat` performance for views [\#7614](https://github.com/apache/arrow-rs/pull/7614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- feat: add constructor to help efficiently upgrade key for GenericBytesDictionaryBuilder [\#7611](https://github.com/apache/arrow-rs/pull/7611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- feat: support append\_nulls on additional builders [\#7606](https://github.com/apache/arrow-rs/pull/7606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- feat: add AsyncArrowWriter::into\_inner [\#7604](https://github.com/apache/arrow-rs/pull/7604) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jpopesculian](https://github.com/jpopesculian))
+- Move variant interop test to Rust integration test [\#7602](https://github.com/apache/arrow-rs/pull/7602) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Include footer key metadata when writing encrypted Parquet with a plaintext footer [\#7600](https://github.com/apache/arrow-rs/pull/7600) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rok](https://github.com/rok))
+- Add `coalesce` kernel and`BatchCoalescer` for statefully combining selected b…atches: [\#7597](https://github.com/apache/arrow-rs/pull/7597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add FixedSizeBinary to `take_kernel` benchmark [\#7592](https://github.com/apache/arrow-rs/pull/7592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix GenericBinaryArray docstring. [\#7588](https://github.com/apache/arrow-rs/pull/7588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- fix: error reading multiple batches of `Dict(_, FixedSizeBinary(_))` [\#7585](https://github.com/apache/arrow-rs/pull/7585) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
+- Revert "Minor: remove filter code deprecated in 2023 \(\#7554\)" [\#7583](https://github.com/apache/arrow-rs/pull/7583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fixed a warning build build: function never used. [\#7577](https://github.com/apache/arrow-rs/pull/7577) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- Adding Encoding argument in `parquet-rewrite` [\#7576](https://github.com/apache/arrow-rs/pull/7576) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter [\#7574](https://github.com/apache/arrow-rs/pull/7574) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([CookiePieWw](https://github.com/CookiePieWw))
+- \[array\] Remove unwrap checks from GenericByteArray::value\_unchecked [\#7573](https://github.com/apache/arrow-rs/pull/7573) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- \[benches/row\_format\] fix typo in array lengths [\#7572](https://github.com/apache/arrow-rs/pull/7572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- Add a strong\_count method to Buffer [\#7569](https://github.com/apache/arrow-rs/pull/7569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([westonpace](https://github.com/westonpace))
+- Minor: Enable byte view for clickbench benchmark [\#7565](https://github.com/apache/arrow-rs/pull/7565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Optimize length calculation in row encoding for fixed-length columns [\#7564](https://github.com/apache/arrow-rs/pull/7564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
+- Use PR title and description for commit message [\#7563](https://github.com/apache/arrow-rs/pull/7563) ([kou](https://github.com/kou))
+- Use apache/arrow-{go,java,js} in integration test [\#7561](https://github.com/apache/arrow-rs/pull/7561) ([kou](https://github.com/kou))
+- Implement Array Decoding in arrow-avro [\#7559](https://github.com/apache/arrow-rs/pull/7559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Minor: remove filter code deprecated in 2023 [\#7554](https://github.com/apache/arrow-rs/pull/7554) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix: Correct docs for `WriterPropertiesBuilder::set_column_index_truncate_length` [\#7553](https://github.com/apache/arrow-rs/pull/7553) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Adding Bloom Filter Position argument in parquet-rewrite [\#7550](https://github.com/apache/arrow-rs/pull/7550) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
+- Fix `Result` name collision in parquet\_derive [\#7548](https://github.com/apache/arrow-rs/pull/7548) ([jspaezp](https://github.com/jspaezp))
+- Fix: Converted feature flight-sql-experimental to flight-sql [\#7546](https://github.com/apache/arrow-rs/pull/7546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal))
+- Fix CI on main due to logical conflict [\#7542](https://github.com/apache/arrow-rs/pull/7542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix `filter_record_batch` panics with empty struct array [\#7539](https://github.com/apache/arrow-rs/pull/7539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour))
+- \[Variant\] Initial API for reading Variant data and metadata [\#7535](https://github.com/apache/arrow-rs/pull/7535) ([mkarbo](https://github.com/mkarbo))
+- fix: Panic in pretty\_format function when displaying DurationSecondsA… [\#7534](https://github.com/apache/arrow-rs/pull/7534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- Create version of LexicographicalComparator that compares fixed number of columns \(~ -15%\) [\#7530](https://github.com/apache/arrow-rs/pull/7530) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make parquet-show-bloom-filter work with integer typed columns [\#7529](https://github.com/apache/arrow-rs/pull/7529) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
+- chore\(deps\): update criterion requirement from 0.5 to 0.6 [\#7527](https://github.com/apache/arrow-rs/pull/7527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Minor: Add a parquet row\_filter test, reduce some test boiler plate [\#7522](https://github.com/apache/arrow-rs/pull/7522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Refactor `build_array_reader` into a struct [\#7521](https://github.com/apache/arrow-rs/pull/7521) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- arrow: add concat structs benchmark [\#7520](https://github.com/apache/arrow-rs/pull/7520) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- arrow-select: add support for merging primitive dictionary values [\#7519](https://github.com/apache/arrow-rs/pull/7519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- arrow-select: add support for optimized concatenation of struct arrays [\#7517](https://github.com/apache/arrow-rs/pull/7517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- Fix Clippy in CI for Rust 1.87 release [\#7514](https://github.com/apache/arrow-rs/pull/7514) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Simplify `ParquetRecordBatchReader::next` control logic [\#7512](https://github.com/apache/arrow-rs/pull/7512) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Fix record API support for reading INT32 encoded TIME\_MILLIS [\#7511](https://github.com/apache/arrow-rs/pull/7511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([njaremko](https://github.com/njaremko))
+- RecordBatchDecoder: skip RecordBatch validation when `skip_validation` property is enabled [\#7509](https://github.com/apache/arrow-rs/pull/7509) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nilskch](https://github.com/nilskch))
+- Introduce `ReadPlan` to encapsulate the calculation of what parquet rows to decode [\#7502](https://github.com/apache/arrow-rs/pull/7502) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Update documentation for ParquetReader [\#7501](https://github.com/apache/arrow-rs/pull/7501) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Improve `Field` docs, add missing `Field::set_*` methods [\#7497](https://github.com/apache/arrow-rs/pull/7497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Speed up arithmetic kernels, reduce `unsafe` usage [\#7493](https://github.com/apache/arrow-rs/pull/7493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Prevent FlightSQL server panics for `do_put` when stream is empty or 1st stream element is an Err [\#7492](https://github.com/apache/arrow-rs/pull/7492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([superserious-dev](https://github.com/superserious-dev))
+- arrow-ipc: add `StreamDecoder::schema` [\#7488](https://github.com/apache/arrow-rs/pull/7488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm))
+- arrow-select: Implement concat for `RunArray`s [\#7487](https://github.com/apache/arrow-rs/pull/7487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- \[Variant\] Add \(empty\) `parquet-variant` crate, update `parquet-testing` pin [\#7485](https://github.com/apache/arrow-rs/pull/7485) ([alamb](https://github.com/alamb))
+- Improve error messages if schema hint mismatches with parquet schema [\#7481](https://github.com/apache/arrow-rs/pull/7481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `arrow_reader_clickbench` benchmark [\#7470](https://github.com/apache/arrow-rs/pull/7470) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Speedup `filter_bytes` ~-20-40%, `filter_native` low selectivity \(~-37%\) [\#7463](https://github.com/apache/arrow-rs/pull/7463) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Update arrow\_reader\_row\_filter benchmark to reflect ClickBench distribution [\#7461](https://github.com/apache/arrow-rs/pull/7461) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add Map support to arrow-avro [\#7451](https://github.com/apache/arrow-rs/pull/7451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Support Utf8View for Avro [\#7434](https://github.com/apache/arrow-rs/pull/7434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kumarlokesh](https://github.com/kumarlokesh))
+- Add support for creating random Decimal128 and Decimal256 arrays [\#7427](https://github.com/apache/arrow-rs/pull/7427) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+
 ## [55.1.0](https://github.com/apache/arrow-rs/tree/55.1.0) (2025-05-09)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/55.0.0...55.1.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 03c5f6436fd5..fbbdba7d36ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,177 +19,172 @@
 
 # Changelog
 
-## [55.2.0](https://github.com/apache/arrow-rs/tree/55.2.0) (2025-06-22)
+## [57.2.0](https://github.com/apache/arrow-rs/tree/57.2.0) (2026-01-07)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/55.1.0...55.2.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/57.1.0...57.2.0)
+
+**Breaking changes:**
+
+- Seal Array trait [\#9092](https://github.com/apache/arrow-rs/pull/9092) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- \[Variant\] Unify the CastOptions usage in parquet-variant-compute [\#8984](https://github.com/apache/arrow-rs/pull/8984) ([klion26](https://github.com/klion26))
 
 **Implemented enhancements:**
 
-- Do not populate nulls for `NullArray` for `MutableArrayData` [\#7725](https://github.com/apache/arrow-rs/issues/7725)
-- Implement `PartialEq` for RunArray [\#7691](https://github.com/apache/arrow-rs/issues/7691)
-- `interleave_views` is really slow [\#7688](https://github.com/apache/arrow-rs/issues/7688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add min max aggregates for FixedSizeBinary [\#7674](https://github.com/apache/arrow-rs/issues/7674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Deliver pyarrow as a standalone crate [\#7668](https://github.com/apache/arrow-rs/issues/7668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Variant\] Implement `VariantObject::field` and `VariantObject::fields` [\#7665](https://github.com/apache/arrow-rs/issues/7665) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- \[Variant\] Implement read support for remaining primitive types [\#7630](https://github.com/apache/arrow-rs/issues/7630) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Fast and ergonomic method to add metadata to a `RecordBatch` [\#7628](https://github.com/apache/arrow-rs/issues/7628) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add efficient way to change the keys of string dictionary builder [\#7610](https://github.com/apache/arrow-rs/issues/7610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `add_nulls` on additional builder types [\#7605](https://github.com/apache/arrow-rs/issues/7605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `into_inner` for `AsyncArrowWriter` [\#7603](https://github.com/apache/arrow-rs/issues/7603) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Optimize `PrimitiveBuilder::append_trusted_len_iter` [\#7591](https://github.com/apache/arrow-rs/issues/7591) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Benchmark for filter+concat and take+concat into even sized record batches [\#7589](https://github.com/apache/arrow-rs/issues/7589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `max_statistics_truncate_length` is ignored when writing statistics to data page headers [\#7579](https://github.com/apache/arrow-rs/issues/7579) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Feature Request: Encoding in `parquet-rewrite` [\#7575](https://github.com/apache/arrow-rs/issues/7575) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add a `strong_count` method to `Buffer` [\#7568](https://github.com/apache/arrow-rs/issues/7568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Create version of LexicographicalComparator that compares fixed number of columns [\#7531](https://github.com/apache/arrow-rs/issues/7531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- parquet-show-bloom-filter should work with integer typed columns [\#7528](https://github.com/apache/arrow-rs/issues/7528) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Allow merging primitive dictionary values in concat and interleave kernels [\#7518](https://github.com/apache/arrow-rs/issues/7518) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add efficient concatenation of StructArrays [\#7516](https://github.com/apache/arrow-rs/issues/7516) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Rename `flight-sql-experimental` to `flight-sql` [\#7498](https://github.com/apache/arrow-rs/issues/7498) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Consider moving from ryu to lexical-core for string formatting / casting floats to string. [\#7496](https://github.com/apache/arrow-rs/issues/7496)
-- Arithmetic kernels can be safer and faster [\#7494](https://github.com/apache/arrow-rs/issues/7494) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Speedup `filter_bytes` by precalculating capacity [\#7465](https://github.com/apache/arrow-rs/issues/7465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Variant\]: Rust API to Create Variant Values [\#7424](https://github.com/apache/arrow-rs/issues/7424) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Variant\] Rust API to Read Variant Values [\#7423](https://github.com/apache/arrow-rs/issues/7423) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Release arrow-rs / parquet Minor version `55.1.0` \(May 2025\) [\#7393](https://github.com/apache/arrow-rs/issues/7393) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Support create\_random\_array for Decimal data types [\#7343](https://github.com/apache/arrow-rs/issues/7343) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Truncate Parquet page data page statistics [\#7555](https://github.com/apache/arrow-rs/pull/7555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[parquet\] further relax `LevelInfoBuilder::types_compatible` for `ArrowWriter` [\#9098](https://github.com/apache/arrow-rs/issues/9098)
+- Update arrow-row documentation with Union encoding [\#9084](https://github.com/apache/arrow-rs/issues/9084)
+- Add code examples for min and max compute functions [\#9055](https://github.com/apache/arrow-rs/issues/9055)
+- Add `append_n` to bytes view builder API [\#9034](https://github.com/apache/arrow-rs/issues/9034) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Move `RunArray::get_physical_indices` to `RunEndBuffer` [\#9025](https://github.com/apache/arrow-rs/issues/9025) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Allow quote style in csv writer [\#9003](https://github.com/apache/arrow-rs/issues/9003) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- IPC support for ListView [\#9002](https://github.com/apache/arrow-rs/issues/9002) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement `BinaryArrayType` for `&FixedSizeBinaryArray`s [\#8992](https://github.com/apache/arrow-rs/issues/8992) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- arrow-buffer: implement num-traits for i256 [\#8976](https://github.com/apache/arrow-rs/issues/8976) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support for `Arc<str>` in `ParquetRecordWriter` derive macro [\#8972](https://github.com/apache/arrow-rs/issues/8972)
+- \[arrow-avro\] suggest switching from xz to liblzma [\#8970](https://github.com/apache/arrow-rs/issues/8970) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- arrow-buffer: add i256::trailing\_zeros [\#8968](https://github.com/apache/arrow-rs/issues/8968) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- arrow-buffer: make i256::leading\_zeros public [\#8965](https://github.com/apache/arrow-rs/issues/8965) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add spark like `ignoreLeadingWhiteSpace` and `ignoreTrailingWhiteSpace` options to the csv writer [\#8961](https://github.com/apache/arrow-rs/issues/8961) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add round trip benchmark for Parquet writer/reader [\#8955](https://github.com/apache/arrow-rs/issues/8955) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support performant `interleave` for List/LargeList [\#8952](https://github.com/apache/arrow-rs/issues/8952) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Support array access when parsing `VariantPath` [\#8946](https://github.com/apache/arrow-rs/issues/8946)
+- Some panic!s could be represented as unimplemented!s [\#8932](https://github.com/apache/arrow-rs/issues/8932) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] easier way to construct a shredded schema [\#8922](https://github.com/apache/arrow-rs/issues/8922)
+- Support `DataType::ListView` and `DataType::LargeListView` in `ArrayData::new_null` [\#8908](https://github.com/apache/arrow-rs/issues/8908) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `GenericListViewArray::from_iter_primitive` [\#8906](https://github.com/apache/arrow-rs/issues/8906) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Unify the cast option usage in ParquentVariant [\#8873](https://github.com/apache/arrow-rs/issues/8873)
+- Blog post about efficient filter representation in Parquet filter pushdown [\#8843](https://github.com/apache/arrow-rs/issues/8843) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add comparison support for Union arrays in the `cmp` kernel [\#8837](https://github.com/apache/arrow-rs/issues/8837) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Support array shredding into `List/LargeList/ListView/LargeListView` [\#8830](https://github.com/apache/arrow-rs/issues/8830)
+- Support `Union` data types for row format [\#8828](https://github.com/apache/arrow-rs/issues/8828) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- FFI support for ListView [\#8819](https://github.com/apache/arrow-rs/issues/8819) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Variant\] Support more Arrow Datatypes from Variant primitive types [\#8805](https://github.com/apache/arrow-rs/issues/8805)
+- `FixedSizeBinaryBuilder` supports `append_array` [\#8750](https://github.com/apache/arrow-rs/issues/8750) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement special case `zip` with scalar for Utf8View [\#8724](https://github.com/apache/arrow-rs/issues/8724) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[geometry\] Wire up arrow reader/writer for `GEOMETRY` and `GEOGRAPHY` [\#8717](https://github.com/apache/arrow-rs/issues/8717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
 
 **Fixed bugs:**
 
-- In arrow\_json, Decoder::decode can panic if it encounters two high surrogates in a row. [\#7712](https://github.com/apache/arrow-rs/issues/7712)
-- FlightSQL "GetDbSchemas" and "GetTables" schemas do not fully match the protocol [\#7637](https://github.com/apache/arrow-rs/issues/7637) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Cannot read encrypted Parquet file if page index reading is enabled [\#7629](https://github.com/apache/arrow-rs/issues/7629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `encoding_stats` not present in Parquet generated by `parquet-rewrite` [\#7616](https://github.com/apache/arrow-rs/issues/7616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- When writing parquet plaintext footer files `footer_signing_key_metadata` is not included, encryption alghoritm is always written in footer [\#7599](https://github.com/apache/arrow-rs/issues/7599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `new_null_array` panics when constructing a struct of a dictionary [\#7571](https://github.com/apache/arrow-rs/issues/7571)
-- Parquet derive fails to build when Result is aliased [\#7547](https://github.com/apache/arrow-rs/issues/7547)
-- Unable to read `Dictionary(u8, FixedSizeBinary(_))` using datafusion. [\#7545](https://github.com/apache/arrow-rs/issues/7545) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- filter\_record\_batch panics with empty struct array. [\#7538](https://github.com/apache/arrow-rs/issues/7538) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Panic in `pretty_format` function when displaying DurationSecondsArray with `i64::MIN` / `i64::MAX` [\#7533](https://github.com/apache/arrow-rs/issues/7533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Record API unable to parse TIME\_MILLIS when encoded as INT32 [\#7510](https://github.com/apache/arrow-rs/issues/7510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- The `read_record_batch` func of the `RecordBatchDecoder` does not respect the `skip_validation` property [\#7508](https://github.com/apache/arrow-rs/issues/7508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `arrow-55.1.0` breaks `filter_record_batch` [\#7500](https://github.com/apache/arrow-rs/issues/7500)
-- Files containing binary data with \>=8\_388\_855 bytes per row written with `arrow-rs` can't be read with `pyarrow` [\#7489](https://github.com/apache/arrow-rs/issues/7489) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- \[Bug\] Ingestion with Arrow Flight Sql panic when the input stream is empty or fallible [\#7329](https://github.com/apache/arrow-rs/issues/7329) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Ensure page encoding statistics are written to Parquet file [\#7643](https://github.com/apache/arrow-rs/pull/7643) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Soundness Bug in `try_binary` when `Array` is implemented incorrectly in external crate [\#9106](https://github.com/apache/arrow-rs/issues/9106)
+- casting `Dict(_, LargeUtf8)` to `Utf8View` \(`StringViewArray`\) panics [\#9101](https://github.com/apache/arrow-rs/issues/9101)
+- wrong results for null count of `nullif` kernel [\#9085](https://github.com/apache/arrow-rs/issues/9085) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Empty first line in some code examples [\#9063](https://github.com/apache/arrow-rs/issues/9063)
+- GenericByteViewArray::slice is not zero-copy but ought to be [\#9014](https://github.com/apache/arrow-rs/issues/9014)
+- Regression in struct casting in 57.2.0 \(not yet released\) [\#9005](https://github.com/apache/arrow-rs/issues/9005) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix panic when decoding multiple Union columns in RowConverter [\#8999](https://github.com/apache/arrow-rs/issues/8999) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `take_fixed_size_binary` Does Not Consider NULL Indices [\#8947](https://github.com/apache/arrow-rs/issues/8947) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[arrow-avro\] RecordEncoder Bugs [\#8934](https://github.com/apache/arrow-rs/issues/8934) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `FixedSizeBinaryArray::try_new(...)` Panics with Item Length of Zero [\#8926](https://github.com/apache/arrow-rs/issues/8926) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `cargo test -p arrow-cast` fails on main [\#8910](https://github.com/apache/arrow-rs/issues/8910) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `GenericListViewArray::new_null` ignores `len` and returns an empty array [\#8904](https://github.com/apache/arrow-rs/issues/8904) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `FixedSizeBinaryArray::new_null` Does Not Properly Set the Length of the Values Buffer [\#8900](https://github.com/apache/arrow-rs/issues/8900) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Struct casting requires same order of fields [\#8870](https://github.com/apache/arrow-rs/issues/8870) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cannot cast string dictionary to binary view [\#8841](https://github.com/apache/arrow-rs/issues/8841) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Documentation updates:**
 
-- arrow\_reader\_row\_filter benchmark doesn't capture page cache improvements [\#7460](https://github.com/apache/arrow-rs/issues/7460) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- chore: fix a typo in `ExtensionType::supports_data_type` docs [\#7682](https://github.com/apache/arrow-rs/pull/7682) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- \[Variant\] Add variant docs and examples [\#7661](https://github.com/apache/arrow-rs/pull/7661) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Minor: Add version to deprecation notice for `ParquetMetaDataReader::decode_footer` [\#7639](https://github.com/apache/arrow-rs/pull/7639) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Add references for defaults in `WriterPropertiesBuilder` [\#7558](https://github.com/apache/arrow-rs/pull/7558) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Clarify Docs: NullBuffer::len is in bits [\#7556](https://github.com/apache/arrow-rs/pull/7556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- docs: fix typo for `Decimal128Array` [\#7525](https://github.com/apache/arrow-rs/pull/7525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([burmecia](https://github.com/burmecia))
-- Minor: Add examples to ProjectionMask documentation [\#7523](https://github.com/apache/arrow-rs/pull/7523) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Improve documentation for Parquet `WriterProperties` [\#7491](https://github.com/apache/arrow-rs/pull/7491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add Union encoding documentation  [\#9102](https://github.com/apache/arrow-rs/pull/9102) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([EduardAkhmetshin](https://github.com/EduardAkhmetshin))
+- docs: fix misleading reserve documentation [\#9076](https://github.com/apache/arrow-rs/pull/9076) ([WaterWhisperer](https://github.com/WaterWhisperer))
+- Fix headers and empty lines in code examples [\#9064](https://github.com/apache/arrow-rs/pull/9064) ([EduardAkhmetshin](https://github.com/EduardAkhmetshin))
+- Add examples for min and max functions [\#9062](https://github.com/apache/arrow-rs/pull/9062) ([EduardAkhmetshin](https://github.com/EduardAkhmetshin))
+- Improve arrow-buffer documentation [\#9020](https://github.com/apache/arrow-rs/pull/9020) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Move examples in arrow-csv to docstrings, polish up docs [\#9001](https://github.com/apache/arrow-rs/pull/9001) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add example of parsing field names as VariantPath [\#8945](https://github.com/apache/arrow-rs/pull/8945) ([alamb](https://github.com/alamb))
+- Improve documentation for `prep\_null\_mask\_flter [\#8722](https://github.com/apache/arrow-rs/pull/8722) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- \[parquet\] Avoid a clone while resolving the read strategy [\#9056](https://github.com/apache/arrow-rs/pull/9056) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- perf: improve performance of encoding `GenericByteArray` by 8% [\#9054](https://github.com/apache/arrow-rs/pull/9054) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Speed up unary `not` kernel by 50%, add `BooleanBuffer::from_bitwise_unary` [\#8996](https://github.com/apache/arrow-rs/pull/8996) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- arrow-select: improve dictionary interleave fallback performance [\#8978](https://github.com/apache/arrow-rs/pull/8978) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- Add special implementation for zip for Utf8View/BinaryView scalars [\#8963](https://github.com/apache/arrow-rs/pull/8963) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mkleen](https://github.com/mkleen))
+- arrow-select: implement specialized interleave\_list [\#8953](https://github.com/apache/arrow-rs/pull/8953) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
 
 **Closed issues:**
 
-- \[Variant\] More efficient determination of String vs ShortString [\#7700](https://github.com/apache/arrow-rs/issues/7700)
-- \[Variant\] Improve API for iterating over values of a VariantList [\#7685](https://github.com/apache/arrow-rs/issues/7685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- \[Variant\] Consider validating variants on creation \(rather than read\) [\#7684](https://github.com/apache/arrow-rs/issues/7684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Miri test\_native\_type\_pow test failing [\#7641](https://github.com/apache/arrow-rs/issues/7641) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve performance of `coalesce` and `concat` for views [\#7615](https://github.com/apache/arrow-rs/issues/7615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Bad min value in row group statistics in some special cases [\#7593](https://github.com/apache/arrow-rs/issues/7593)
-- Feature Request: BloomFilter Position Flexibility in `parquet-rewrite` [\#7552](https://github.com/apache/arrow-rs/issues/7552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- impl `Index` for `UnionFields` [\#8958](https://github.com/apache/arrow-rs/issues/8958) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- arrow-array: Implement PartialEq for RunArray [\#7727](https://github.com/apache/arrow-rs/pull/7727) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- fix: Do not add null buffer for `NullArray` in MutableArrayData [\#7726](https://github.com/apache/arrow-rs/pull/7726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comphead](https://github.com/comphead))
-- fix JSON decoder error checking for UTF16 / surrogate parsing panic [\#7721](https://github.com/apache/arrow-rs/pull/7721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nicklan](https://github.com/nicklan))
-- \[Variant\] Introduce new type over &str for ShortString [\#7718](https://github.com/apache/arrow-rs/pull/7718) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
-- Split out variant code into several new sub-modules [\#7717](https://github.com/apache/arrow-rs/pull/7717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
-- Support write to buffer api for SerializedFileWriter [\#7714](https://github.com/apache/arrow-rs/pull/7714) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
-- Make variant iterators safely infallible [\#7704](https://github.com/apache/arrow-rs/pull/7704) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
-- Speedup `interleave_views` \(4-7x faster\) [\#7695](https://github.com/apache/arrow-rs/pull/7695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Define a "arrow-pyrarrow" crate to implement the "pyarrow" feature. [\#7694](https://github.com/apache/arrow-rs/pull/7694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
-- Document REE row format and add some more tests [\#7680](https://github.com/apache/arrow-rs/pull/7680) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- feat: add min max aggregate support for FixedSizeBinary [\#7675](https://github.com/apache/arrow-rs/pull/7675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
-- arrow-data: Add REE support for `build_extend` and `build_extend_nulls` [\#7671](https://github.com/apache/arrow-rs/pull/7671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Remove `lazy_static` dependency [\#7669](https://github.com/apache/arrow-rs/pull/7669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Expyron](https://github.com/Expyron))
-- Finish implementing Variant::Object and Variant::List [\#7666](https://github.com/apache/arrow-rs/pull/7666) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
-- Add `RecordBatch::schema_metadata_mut` and `Field::metadata_mut` [\#7664](https://github.com/apache/arrow-rs/pull/7664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
-- \[Variant\] Simplify creation of Variants from metadata and value [\#7663](https://github.com/apache/arrow-rs/pull/7663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- chore: group prost dependabot updates [\#7659](https://github.com/apache/arrow-rs/pull/7659) ([mbrobbel](https://github.com/mbrobbel))
-- Initial Builder API for Creating Variant Values [\#7653](https://github.com/apache/arrow-rs/pull/7653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([PinkCrow007](https://github.com/PinkCrow007))
-- Add `BatchCoalescer::push_filtered_batch` and docs [\#7652](https://github.com/apache/arrow-rs/pull/7652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Optimize coalesce kernel for StringView \(10-50% faster\) [\#7650](https://github.com/apache/arrow-rs/pull/7650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- arrow-row: Add support for REE [\#7649](https://github.com/apache/arrow-rs/pull/7649) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Use approximate comparisons for pow tests [\#7646](https://github.com/apache/arrow-rs/pull/7646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adamreeve](https://github.com/adamreeve))
-- \[Variant\] Implement read support for remaining primitive types [\#7644](https://github.com/apache/arrow-rs/pull/7644) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([superserious-dev](https://github.com/superserious-dev))
-- Add `pretty_format_batches_with_schema` function [\#7642](https://github.com/apache/arrow-rs/pull/7642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lewiszlw](https://github.com/lewiszlw))
-- Deprecate old Parquet page index parsing functions [\#7640](https://github.com/apache/arrow-rs/pull/7640) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Update FlightSQL `GetDbSchemas` and `GetTables` schemas to fully match the protocol [\#7638](https://github.com/apache/arrow-rs/pull/7638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([sgrebnov](https://github.com/sgrebnov))
-- Minor: Remove outdated FIXME from `ParquetMetaDataReader` [\#7635](https://github.com/apache/arrow-rs/pull/7635) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Fix the error info of `StructArray::try_new` [\#7634](https://github.com/apache/arrow-rs/pull/7634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963))
-- Fix reading encrypted Parquet pages when using the page index [\#7633](https://github.com/apache/arrow-rs/pull/7633) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
-- \[Variant\] Add commented out primitive test casees [\#7631](https://github.com/apache/arrow-rs/pull/7631) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Improve `coalesce` kernel tests [\#7626](https://github.com/apache/arrow-rs/pull/7626) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Revert "Revert "Improve `coalesce` and `concat` performance for views… [\#7625](https://github.com/apache/arrow-rs/pull/7625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Revert "Improve `coalesce` and `concat` performance for views \(\#7614\)" [\#7623](https://github.com/apache/arrow-rs/pull/7623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Improve coalesce\_kernel benchmark to capture inline vs non inline views [\#7619](https://github.com/apache/arrow-rs/pull/7619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve `coalesce` and `concat` performance for views [\#7614](https://github.com/apache/arrow-rs/pull/7614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- feat: add constructor to help efficiently upgrade key for GenericBytesDictionaryBuilder [\#7611](https://github.com/apache/arrow-rs/pull/7611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
-- feat: support append\_nulls on additional builders [\#7606](https://github.com/apache/arrow-rs/pull/7606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
-- feat: add AsyncArrowWriter::into\_inner [\#7604](https://github.com/apache/arrow-rs/pull/7604) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jpopesculian](https://github.com/jpopesculian))
-- Move variant interop test to Rust integration test [\#7602](https://github.com/apache/arrow-rs/pull/7602) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Include footer key metadata when writing encrypted Parquet with a plaintext footer [\#7600](https://github.com/apache/arrow-rs/pull/7600) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rok](https://github.com/rok))
-- Add `coalesce` kernel and`BatchCoalescer` for statefully combining selected b…atches: [\#7597](https://github.com/apache/arrow-rs/pull/7597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add FixedSizeBinary to `take_kernel` benchmark [\#7592](https://github.com/apache/arrow-rs/pull/7592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fix GenericBinaryArray docstring. [\#7588](https://github.com/apache/arrow-rs/pull/7588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
-- fix: error reading multiple batches of `Dict(_, FixedSizeBinary(_))` [\#7585](https://github.com/apache/arrow-rs/pull/7585) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([albertlockett](https://github.com/albertlockett))
-- Revert "Minor: remove filter code deprecated in 2023 \(\#7554\)" [\#7583](https://github.com/apache/arrow-rs/pull/7583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fixed a warning build build: function never used. [\#7577](https://github.com/apache/arrow-rs/pull/7577) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
-- Adding Encoding argument in `parquet-rewrite` [\#7576](https://github.com/apache/arrow-rs/pull/7576) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
-- feat: add `row_group_is_[max/min]_value_exact` to StatisticsConverter [\#7574](https://github.com/apache/arrow-rs/pull/7574) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([CookiePieWw](https://github.com/CookiePieWw))
-- \[array\] Remove unwrap checks from GenericByteArray::value\_unchecked [\#7573](https://github.com/apache/arrow-rs/pull/7573) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
-- \[benches/row\_format\] fix typo in array lengths [\#7572](https://github.com/apache/arrow-rs/pull/7572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
-- Add a strong\_count method to Buffer [\#7569](https://github.com/apache/arrow-rs/pull/7569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([westonpace](https://github.com/westonpace))
-- Minor: Enable byte view for clickbench benchmark [\#7565](https://github.com/apache/arrow-rs/pull/7565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
-- Optimize length calculation in row encoding for fixed-length columns [\#7564](https://github.com/apache/arrow-rs/pull/7564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ctsk](https://github.com/ctsk))
-- Use PR title and description for commit message [\#7563](https://github.com/apache/arrow-rs/pull/7563) ([kou](https://github.com/kou))
-- Use apache/arrow-{go,java,js} in integration test [\#7561](https://github.com/apache/arrow-rs/pull/7561) ([kou](https://github.com/kou))
-- Implement Array Decoding in arrow-avro [\#7559](https://github.com/apache/arrow-rs/pull/7559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- Minor: remove filter code deprecated in 2023 [\#7554](https://github.com/apache/arrow-rs/pull/7554) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- fix: Correct docs for `WriterPropertiesBuilder::set_column_index_truncate_length` [\#7553](https://github.com/apache/arrow-rs/pull/7553) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Adding Bloom Filter Position argument in parquet-rewrite [\#7550](https://github.com/apache/arrow-rs/pull/7550) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([JigaoLuo](https://github.com/JigaoLuo))
-- Fix `Result` name collision in parquet\_derive [\#7548](https://github.com/apache/arrow-rs/pull/7548) ([jspaezp](https://github.com/jspaezp))
-- Fix: Converted feature flight-sql-experimental to flight-sql [\#7546](https://github.com/apache/arrow-rs/pull/7546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal))
-- Fix CI on main due to logical conflict [\#7542](https://github.com/apache/arrow-rs/pull/7542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fix `filter_record_batch` panics with empty struct array [\#7539](https://github.com/apache/arrow-rs/pull/7539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour))
-- \[Variant\] Initial API for reading Variant data and metadata [\#7535](https://github.com/apache/arrow-rs/pull/7535) ([mkarbo](https://github.com/mkarbo))
-- fix: Panic in pretty\_format function when displaying DurationSecondsA… [\#7534](https://github.com/apache/arrow-rs/pull/7534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
-- Create version of LexicographicalComparator that compares fixed number of columns \(~ -15%\) [\#7530](https://github.com/apache/arrow-rs/pull/7530) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Make parquet-show-bloom-filter work with integer typed columns [\#7529](https://github.com/apache/arrow-rs/pull/7529) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([adamreeve](https://github.com/adamreeve))
-- chore\(deps\): update criterion requirement from 0.5 to 0.6 [\#7527](https://github.com/apache/arrow-rs/pull/7527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- Minor: Add a parquet row\_filter test, reduce some test boiler plate [\#7522](https://github.com/apache/arrow-rs/pull/7522) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Refactor `build_array_reader` into a struct [\#7521](https://github.com/apache/arrow-rs/pull/7521) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- arrow: add concat structs benchmark [\#7520](https://github.com/apache/arrow-rs/pull/7520) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
-- arrow-select: add support for merging primitive dictionary values [\#7519](https://github.com/apache/arrow-rs/pull/7519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
-- arrow-select: add support for optimized concatenation of struct arrays [\#7517](https://github.com/apache/arrow-rs/pull/7517) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
-- Fix Clippy in CI for Rust 1.87 release [\#7514](https://github.com/apache/arrow-rs/pull/7514) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-- Simplify `ParquetRecordBatchReader::next` control logic [\#7512](https://github.com/apache/arrow-rs/pull/7512) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Fix record API support for reading INT32 encoded TIME\_MILLIS [\#7511](https://github.com/apache/arrow-rs/pull/7511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([njaremko](https://github.com/njaremko))
-- RecordBatchDecoder: skip RecordBatch validation when `skip_validation` property is enabled [\#7509](https://github.com/apache/arrow-rs/pull/7509) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nilskch](https://github.com/nilskch))
-- Introduce `ReadPlan` to encapsulate the calculation of what parquet rows to decode [\#7502](https://github.com/apache/arrow-rs/pull/7502) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Update documentation for ParquetReader [\#7501](https://github.com/apache/arrow-rs/pull/7501) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Improve `Field` docs, add missing `Field::set_*` methods [\#7497](https://github.com/apache/arrow-rs/pull/7497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Speed up arithmetic kernels, reduce `unsafe` usage [\#7493](https://github.com/apache/arrow-rs/pull/7493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Prevent FlightSQL server panics for `do_put` when stream is empty or 1st stream element is an Err [\#7492](https://github.com/apache/arrow-rs/pull/7492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([superserious-dev](https://github.com/superserious-dev))
-- arrow-ipc: add `StreamDecoder::schema` [\#7488](https://github.com/apache/arrow-rs/pull/7488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm))
-- arrow-select: Implement concat for `RunArray`s [\#7487](https://github.com/apache/arrow-rs/pull/7487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- \[Variant\] Add \(empty\) `parquet-variant` crate, update `parquet-testing` pin [\#7485](https://github.com/apache/arrow-rs/pull/7485) ([alamb](https://github.com/alamb))
-- Improve error messages if schema hint mismatches with parquet schema [\#7481](https://github.com/apache/arrow-rs/pull/7481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add `arrow_reader_clickbench` benchmark [\#7470](https://github.com/apache/arrow-rs/pull/7470) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Speedup `filter_bytes` ~-20-40%, `filter_native` low selectivity \(~-37%\) [\#7463](https://github.com/apache/arrow-rs/pull/7463) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Update arrow\_reader\_row\_filter benchmark to reflect ClickBench distribution [\#7461](https://github.com/apache/arrow-rs/pull/7461) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Add Map support to arrow-avro [\#7451](https://github.com/apache/arrow-rs/pull/7451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- Support Utf8View for Avro [\#7434](https://github.com/apache/arrow-rs/pull/7434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kumarlokesh](https://github.com/kumarlokesh))
-- Add support for creating random Decimal128 and Decimal256 arrays [\#7427](https://github.com/apache/arrow-rs/pull/7427) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Add `DataType::is_decimal` [\#9100](https://github.com/apache/arrow-rs/pull/9100) ([AdamGS](https://github.com/AdamGS))
+- feat\(parquet\): relax type compatility check in parquet ArrowWriter [\#9099](https://github.com/apache/arrow-rs/pull/9099) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([gruuya](https://github.com/gruuya))
+- \[Variant\] Move `ArrayVariantToArrowRowBuilder` to `variant_to_arrow` [\#9094](https://github.com/apache/arrow-rs/pull/9094) ([liamzwbao](https://github.com/liamzwbao))
+- chore: increase row count and batch size for more deterministic tests [\#9088](https://github.com/apache/arrow-rs/pull/9088) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Fix `nullif` kernel [\#9087](https://github.com/apache/arrow-rs/pull/9087) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `FlightInfo::with_endpoints` method [\#9075](https://github.com/apache/arrow-rs/pull/9075) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([lewiszlw](https://github.com/lewiszlw))
+- chore: run validation when debug assertion enabled and not only for test [\#9073](https://github.com/apache/arrow-rs/pull/9073) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Minor: make it clear cache array reader is not cloning arrays [\#9057](https://github.com/apache/arrow-rs/pull/9057) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Minor: avoid clone in RunArray row decoding via buffer stealing [\#9052](https://github.com/apache/arrow-rs/pull/9052) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lyang24](https://github.com/lyang24))
+- Minor: avoid some clones when reading parquet [\#9048](https://github.com/apache/arrow-rs/pull/9048) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- fix: don't generate nulls for `Decimal128` and `Decimal256` when field is non-nullable and have non-zero `null_density` [\#9046](https://github.com/apache/arrow-rs/pull/9046) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- fix: `Rows` `size` should use `capacity` and not `len` [\#9044](https://github.com/apache/arrow-rs/pull/9044) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- fix: integration / Archery test With other arrows container ran out of space [\#9043](https://github.com/apache/arrow-rs/pull/9043) ([lyang24](https://github.com/lyang24))
+- feat: add new `try_append_value_n()` function to `GenericByteViewBuilder` [\#9040](https://github.com/apache/arrow-rs/pull/9040) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lyang24](https://github.com/lyang24))
+- Rename fields in BooleanBuffer for clarity [\#9039](https://github.com/apache/arrow-rs/pull/9039) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Allocate buffers before work in `boolean_kernels` benchmark [\#9035](https://github.com/apache/arrow-rs/pull/9035) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Move RunArray::get\_physical\_indices to RunEndBuffer [\#9027](https://github.com/apache/arrow-rs/pull/9027) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lyang24](https://github.com/lyang24))
+- Improve `RunArray` documentation [\#9019](https://github.com/apache/arrow-rs/pull/9019) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Add BooleanArray tests for null and slice behavior [\#9013](https://github.com/apache/arrow-rs/pull/9013) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([UtkarshSahay123](https://github.com/UtkarshSahay123))
+- feat: support array indices in VariantPath dot notation [\#9012](https://github.com/apache/arrow-rs/pull/9012) ([foskey51](https://github.com/foskey51))
+- arrow-cast: Bring back in-order field casting for `StructArray` [\#9007](https://github.com/apache/arrow-rs/pull/9007) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- arrow-ipc: Add ListView support [\#9006](https://github.com/apache/arrow-rs/pull/9006) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add quote style to csv writer [\#9004](https://github.com/apache/arrow-rs/pull/9004) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xanderbailey](https://github.com/xanderbailey))
+- Fix row slice bug in Union column decoding with many columns [\#9000](https://github.com/apache/arrow-rs/pull/9000) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- build\(deps\): bump actions/download-artifact from 6 to 7 [\#8995](https://github.com/apache/arrow-rs/pull/8995) ([dependabot[bot]](https://github.com/apps/dependabot))
+- minor: Add comment blocks to PR template [\#8994](https://github.com/apache/arrow-rs/pull/8994) ([Jefffrey](https://github.com/Jefffrey))
+- Implement `BinaryArrayType` for `&FixedSizeBinaryArray`s [\#8993](https://github.com/apache/arrow-rs/pull/8993) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- feat: impl BatchCoalescer::push\_batch\_with\_indices [\#8991](https://github.com/apache/arrow-rs/pull/8991) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ClSlaid](https://github.com/ClSlaid))
+- \[Arrow\]Configure max deduplication length for `StringView` [\#8990](https://github.com/apache/arrow-rs/pull/8990) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lichuang](https://github.com/lichuang))
+- feat: implement append\_array for FixedSizeBinaryBuilder [\#8989](https://github.com/apache/arrow-rs/pull/8989) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ClSlaid](https://github.com/ClSlaid))
+- Add benchmarks for Utf8View scalars for zip [\#8988](https://github.com/apache/arrow-rs/pull/8988) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mkleen](https://github.com/mkleen))
+- build\(deps\): bump actions/cache from 4 to 5 [\#8986](https://github.com/apache/arrow-rs/pull/8986) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Take fsb null indices [\#8981](https://github.com/apache/arrow-rs/pull/8981) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add List to `interleave_kernels` benchmark [\#8980](https://github.com/apache/arrow-rs/pull/8980) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix ipc errors for `LargeList` containing sliced `StringViews` [\#8979](https://github.com/apache/arrow-rs/pull/8979) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fabianmurariu](https://github.com/fabianmurariu))
+- arrow-buffer: implement num-traits numeric operations [\#8977](https://github.com/apache/arrow-rs/pull/8977) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([theirix](https://github.com/theirix))
+- Update `xz` crate dependency to use `liblzma` in arrow-avro [\#8975](https://github.com/apache/arrow-rs/pull/8975) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- arrow-data: avoid allocating in get\_last\_run\_end [\#8974](https://github.com/apache/arrow-rs/pull/8974) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- Support for `Arc<str>` in `ParquetRecordWriter` derive macro [\#8973](https://github.com/apache/arrow-rs/pull/8973) ([heilhead](https://github.com/heilhead))
+- feat: support casting  `Time32` to `Int64` [\#8971](https://github.com/apache/arrow-rs/pull/8971) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tshauck](https://github.com/tshauck))
+- arrow-buffer: add i256::trailing\_zeros [\#8969](https://github.com/apache/arrow-rs/pull/8969) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([theirix](https://github.com/theirix))
+- Perf: Vectorize check\_bounds\(2x speedup\) [\#8966](https://github.com/apache/arrow-rs/pull/8966) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gstvg](https://github.com/gstvg))
+- arrow-buffer: make i256::leading\_zeros public and tested [\#8964](https://github.com/apache/arrow-rs/pull/8964) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([theirix](https://github.com/theirix))
+- Add ignore leading and trailing white space to csv parser [\#8960](https://github.com/apache/arrow-rs/pull/8960) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xanderbailey](https://github.com/xanderbailey))
+- Access `UnionFields` elements by index [\#8959](https://github.com/apache/arrow-rs/pull/8959) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add Parquet roundtrip benchmarks [\#8956](https://github.com/apache/arrow-rs/pull/8956) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Variant\] Add variant to arrow for Date64/Timestamp\(Second/Millisecond\)/Time32/Time64 [\#8950](https://github.com/apache/arrow-rs/pull/8950) ([klion26](https://github.com/klion26))
+- Let `ArrowArrayStreamReader` handle schema with attached metadata + do schema checking [\#8944](https://github.com/apache/arrow-rs/pull/8944) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonded94](https://github.com/jonded94))
+- Adds ExtensionType for Parquet geospatial WKB arrays [\#8943](https://github.com/apache/arrow-rs/pull/8943) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([BlakeOrth](https://github.com/BlakeOrth))
+- Add builder to help create Schemas for shredding \(`ShreddedSchemaBuilder`\) [\#8940](https://github.com/apache/arrow-rs/pull/8940) ([XiangpengHao](https://github.com/XiangpengHao))
+- build\(deps\): update criterion requirement from 0.7.0 to 0.8.0 [\#8939](https://github.com/apache/arrow-rs/pull/8939) ([dependabot[bot]](https://github.com/apps/dependabot))
+- fix: Resolve Avro RecordEncoder bugs related to nullable Struct fields and Union type ids [\#8935](https://github.com/apache/arrow-rs/pull/8935) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Some panic!s could more semantically be unimplemented! [\#8933](https://github.com/apache/arrow-rs/pull/8933) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([abacef](https://github.com/abacef))
+- fix: ipc decode panic with invalid data [\#8931](https://github.com/apache/arrow-rs/pull/8931) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([leiysky](https://github.com/leiysky))
+- Allow creating zero-sized FixedSizeBinary arrays [\#8927](https://github.com/apache/arrow-rs/pull/8927) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- Update `test_variant_get_error_when_cast_failure...`  tests to uses a valid `VariantArray` [\#8921](https://github.com/apache/arrow-rs/pull/8921) ([alamb](https://github.com/alamb))
+- Make flight sql client generic [\#8915](https://github.com/apache/arrow-rs/pull/8915) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([lewiszlw](https://github.com/lewiszlw))
+- \[minor\] Name Magic Number "8" in `FixedSizeBinaryArray::new_null` [\#8914](https://github.com/apache/arrow-rs/pull/8914) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- fix: cast Binary/String dictionary to view [\#8912](https://github.com/apache/arrow-rs/pull/8912) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- \[8910\]Fixed doc test with feature prettyprint [\#8911](https://github.com/apache/arrow-rs/pull/8911) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr))
+- feat: `ArrayData::new_null` for `ListView` / `LargeListView` [\#8909](https://github.com/apache/arrow-rs/pull/8909) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dqkqd](https://github.com/dqkqd))
+- fead: add `GenericListViewArray::from_iter_primitive` [\#8907](https://github.com/apache/arrow-rs/pull/8907) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dqkqd](https://github.com/dqkqd))
+- fix: `GenericListViewArray::new_null` returns empty array [\#8905](https://github.com/apache/arrow-rs/pull/8905) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dqkqd](https://github.com/dqkqd))
+- Allocate a zeroed buffer for FixedSizeBinaryArray::null [\#8901](https://github.com/apache/arrow-rs/pull/8901) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- build\(deps\): bump actions/checkout from 5 to 6 [\#8899](https://github.com/apache/arrow-rs/pull/8899) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add getters to `UnionFields` [\#8895](https://github.com/apache/arrow-rs/pull/8895) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add validated constructors for UnionFields [\#8891](https://github.com/apache/arrow-rs/pull/8891) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add bit width check [\#8888](https://github.com/apache/arrow-rs/pull/8888) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([rambleraptor](https://github.com/rambleraptor))
+- \[Variant\] Improve `variant_get` performance on a perfect shredding [\#8887](https://github.com/apache/arrow-rs/pull/8887) ([XiangpengHao](https://github.com/XiangpengHao))
+- Add UnionArray::fields [\#8884](https://github.com/apache/arrow-rs/pull/8884) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Struct casting field order [\#8871](https://github.com/apache/arrow-rs/pull/8871) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add support for `Union` types in `RowConverter` [\#8839](https://github.com/apache/arrow-rs/pull/8839) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- Add comparison support for Union arrays [\#8838](https://github.com/apache/arrow-rs/pull/8838) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- \[Variant\] Support array shredding into `List/LargeList/ListView/LargeListView` [\#8831](https://github.com/apache/arrow-rs/pull/8831) ([liamzwbao](https://github.com/liamzwbao))
+- Add support for using ListView arrays and types through FFI [\#8822](https://github.com/apache/arrow-rs/pull/8822) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- Add ability to skip or transform page encoding statistics in Parquet metadata [\#8797](https://github.com/apache/arrow-rs/pull/8797) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Implement a `Vec<RecordBatch>` wrapper for `pyarrow.Table` convenience [\#8790](https://github.com/apache/arrow-rs/pull/8790) ([jonded94](https://github.com/jonded94))
+- Make Parquet SBBF serialize/deserialize helpers public for external reuse [\#8762](https://github.com/apache/arrow-rs/pull/8762) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([RoseZhang123](https://github.com/RoseZhang123))
+- Add cast support for \(Large\)ListView \<-\> \(Large\)List [\#8735](https://github.com/apache/arrow-rs/pull/8735) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
 
 
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 07ed5e010c40..a375917e3a3b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -89,8 +89,7 @@ You can also use rust's official docker image:
 docker run --rm -v $(pwd):/arrow-rs -it rust /bin/bash -c "cd /arrow-rs && rustup component add rustfmt && cargo build"
 ```
 
-The command above assumes that are in the root directory of the project, not in the same
-directory as this README.md.
+The command above assumes that are in the root directory of the project.
 
 You can also compile specific workspaces:
 
diff --git a/Cargo.toml b/Cargo.toml
index a9b00f9537dc..e4f1780d2914 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,7 +39,10 @@ members = [
     "arrow-select",
     "arrow-string",
     "parquet",
+    "parquet-geospatial",
     "parquet-variant",
+    "parquet-variant-compute",
+    "parquet-variant-json",
     "parquet_derive",
     "parquet_derive_test",
 ]
@@ -55,6 +58,9 @@ members = [
 resolver = "2"
 
 exclude = [
+    # arrow-pyarrow-testing is excluded because it requires a Python interpreter with the pyarrow package installed,
+    # which makes running `cargo test --all` fail if the appropriate Python environment is not set up.
+    "arrow-pyarrow-testing",
     # arrow-pyarrow-integration-testing is excluded because it requires different compilation flags, thereby
     # significantly changing how it is compiled within the workspace, causing the whole workspace to be compiled from
     # scratch this way, this is a stand-alone package that compiles independently of the others.
@@ -62,7 +68,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "55.2.0"
+version = "57.2.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -75,29 +81,37 @@ include = [
     "LICENSE.txt",
     "NOTICE.txt",
 ]
-edition = "2021"
-rust-version = "1.81"
+edition = "2024"
+rust-version = "1.85"
 
 [workspace.dependencies]
-arrow = { version = "55.2.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "55.2.0", path = "./arrow-arith" }
-arrow-array = { version = "55.2.0", path = "./arrow-array" }
-arrow-buffer = { version = "55.2.0", path = "./arrow-buffer" }
-arrow-cast = { version = "55.2.0", path = "./arrow-cast" }
-arrow-csv = { version = "55.2.0", path = "./arrow-csv" }
-arrow-data = { version = "55.2.0", path = "./arrow-data" }
-arrow-ipc = { version = "55.2.0", path = "./arrow-ipc" }
-arrow-json = { version = "55.2.0", path = "./arrow-json" }
-arrow-ord = { version = "55.2.0", path = "./arrow-ord" }
-arrow-pyarrow = { version = "55.2.0", path = "./arrow-pyarrow" }
-arrow-row = { version = "55.2.0", path = "./arrow-row" }
-arrow-schema = { version = "55.2.0", path = "./arrow-schema" }
-arrow-select = { version = "55.2.0", path = "./arrow-select" }
-arrow-string = { version = "55.2.0", path = "./arrow-string" }
-parquet = { version = "55.2.0", path = "./parquet", default-features = false }
+arrow = { version = "57.2.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "57.2.0", path = "./arrow-arith" }
+arrow-array = { version = "57.2.0", path = "./arrow-array" }
+arrow-buffer = { version = "57.2.0", path = "./arrow-buffer" }
+arrow-cast = { version = "57.2.0", path = "./arrow-cast" }
+arrow-csv = { version = "57.2.0", path = "./arrow-csv" }
+arrow-data = { version = "57.2.0", path = "./arrow-data" }
+arrow-ipc = { version = "57.2.0", path = "./arrow-ipc" }
+arrow-json = { version = "57.2.0", path = "./arrow-json" }
+arrow-ord = { version = "57.2.0", path = "./arrow-ord" }
+arrow-pyarrow = { version = "57.2.0", path = "./arrow-pyarrow" }
+arrow-row = { version = "57.2.0", path = "./arrow-row" }
+arrow-schema = { version = "57.2.0", path = "./arrow-schema" }
+arrow-select = { version = "57.2.0", path = "./arrow-select" }
+arrow-string = { version = "57.2.0", path = "./arrow-string" }
+parquet = { version = "57.2.0", path = "./parquet", default-features = false }
+parquet-geospatial = { version = "57.2.0", path = "./parquet-geospatial" }
+parquet-variant = { version = "57.2.0", path = "./parquet-variant" }
+parquet-variant-json = { version = "57.2.0", path = "./parquet-variant-json" }
+parquet-variant-compute = { version = "57.2.0", path = "./parquet-variant-compute" }
 
 chrono = { version = "0.4.40", default-features = false, features = ["clock"] }
 
+simdutf8 = { version = "0.1.5", default-features = false }
+
+criterion = { version = "0.8.0", default-features = false }
+
 # release inherited profile keeping debug information and symbols
 # for mem/cpu profiling
 [profile.profiling]
diff --git a/NOTICE.txt b/NOTICE.txt
index a609791374c2..68538ffbdb4c 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,5 @@
 Apache Arrow
-Copyright 2016-2019 The Apache Software Foundation
+Copyright 2016-2026 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
diff --git a/README.md b/README.md
index 6140f9e902ea..901448eb6a92 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,32 @@
 
 # Native Rust implementation of Apache Arrow and Apache Parquet
 
-Welcome to the [Rust][rust] implementation of [Apache Arrow], the popular in-memory columnar format.
+Welcome to the [Rust][rust] implementation of [Apache Arrow], a popular
+in-memory columnar format and [Apache Parquet], a popular columnar file
+format.
+
+## Community
+
+We welcome participation from everyone and encourage you to join us, ask
+questions, help others, and get involved. All participation in the Apache Arrow
+project is governed by the Apache Software Foundation's [code of
+conduct](https://www.apache.org/foundation/policies/conduct.html).
+
+We use GitHub [issues] and [pull requests] for all technical discussions, reviews,
+new features, bug fixes and release coordination. This ensures that all communication
+is public and archived for future reference.
+
+The `dev@arrow.apache.org` mailing list is the communication channel for the overall Apache Arrow community.
+Instructions for signing up and links to the archives can be found on the [Arrow Community](https://arrow.apache.org/community/) page.
+
+Some community members also use the [Arrow Rust Discord Server](https://discord.gg/YAb2TdazKQ) and the official [ASF Slack](https://s.apache.org/slack-invite) server for informal discussions and coordination.
+This is a great place to meet other contributors and get guidance on where to contribute.
+However, all technical designs should also be recorded and formalized in GitHub issues, so that they are accessible to everyone.
+In Slack, find us in the `#arrow-rust` channel and feel free to ask for an invite via Discord, GitHub issues, or other means.
+
+There is more information in the [contributing] guide.
+
+## Repository Structure
 
 This repository contains the following crates:
 
@@ -27,15 +52,16 @@ This repository contains the following crates:
 | ------------------ | ---------------------------------------------------------------------------- | ------------------------------------------------ | --------------------------------- |
 | [`arrow`]          | Core functionality (memory layout, arrays, low level computations)           | [docs.rs](https://docs.rs/arrow/latest)          | [(README)][arrow-readme]          |
 | [`arrow-flight`]   | Support for Arrow-Flight IPC protocol                                        | [docs.rs](https://docs.rs/arrow-flight/latest)   | [(README)][flight-readme]         |
-| [`parquet`]        | Support for Parquet columnar file format                                     | [docs.rs](https://docs.rs/parquet/latest)        | [(README)][parquet-readme]        |
+| [`parquet`]        | Support for the [Apache Parquet] columnar file format                        | [docs.rs](https://docs.rs/parquet/latest)        | [(README)][parquet-readme]        |
 | [`parquet_derive`] | A crate for deriving RecordWriter/RecordReader for arbitrary, simple structs | [docs.rs](https://docs.rs/parquet-derive/latest) | [(README)][parquet-derive-readme] |
 
-The current development version the API documentation in this repo can be found [here](https://arrow.apache.org/rust).
+The current development version the API documentation can be found [here](https://arrow.apache.org/rust).
 
 Note: previously the [`object_store`] crate was also part of this repository,
 but it has been moved to the [arrow-rs-object-store repository]
 
 [apache arrow]: https://arrow.apache.org/
+[apache parquet]: https://parquet.apache.org/
 [`arrow`]: https://crates.io/crates/arrow
 [`parquet`]: https://crates.io/crates/parquet
 [`parquet_derive`]: https://crates.io/crates/parquet-derive
@@ -49,7 +75,7 @@ Versioning].
 
 Due to available maintainer and testing bandwidth, [`arrow`] crates ([`arrow`],
 [`arrow-flight`], etc.) are released on the same schedule with the same versions
-as the [`parquet`] and [`parquet-derive`] crates.
+as the [`parquet`] and [`parquet_derive`] crates.
 
 This crate releases every month. We release new major versions (with potentially
 breaking API changes) at most once a quarter, and release incremental minor
@@ -65,28 +91,26 @@ Planned Release Schedule
 
 | Approximate Date | Version    | Notes                                   |
 | ---------------- | ---------- | --------------------------------------- |
-| Apr 2025         | [`55.0.0`] | Major, potentially breaking API changes |
-| May 2025         | [`55.1.0`] | Minor, NO breaking API changes          |
-| June 2025        | [`55.2.0`] | Minor, NO breaking API changes          |
-| July 2025        | [`56.0.0`] | Major, potentially breaking API changes |
-
-[`55.0.0`]: https://github.com/apache/arrow-rs/issues/7084
-[`55.1.0`]: https://github.com/apache/arrow-rs/issues/7393
-[`55.2.0`]: https://github.com/apache/arrow-rs/issues/7394
-[`56.0.0`]: https://github.com/apache/arrow-rs/issues/7395
+| December 2025    | [`57.2.0`] | Minor, NO breaking API changes          |
+| January 2026     | [`58.0.0`] | Major, potentially breaking API changes |
+| February 2026    | [`58.1.0`] | Minor, NO breaking API changes          |
+| March 2026       | [`58.2.0`] | Minor, NO breaking API changes          |
+| April 2026       | [`59.0.0`] | Major, potentially breaking API changes |
+
+[`57.2.0`]: https://github.com/apache/arrow-rs/milestone/5
+[`58.0.0`]: https://github.com/apache/arrow-rs/milestone/6
+[`58.1.0`]: https://github.com/apache/arrow-rs/issues/9108
+[`58.2.0`]: https://github.com/apache/arrow-rs/issues/9109
+[`59.0.0`]: https://github.com/apache/arrow-rs/issues/9110
 [ticket #5368]: https://github.com/apache/arrow-rs/issues/5368
 [semantic versioning]: https://semver.org/
 
 ### Rust Version Compatibility Policy
 
-arrow-rs, parquet and object_store are built and tested with stable Rust, and will keep a rolling MSRV (minimum supported Rust version) that can only be updated in major releases on a need by basis (e.g. project dependencies bump their MSRV or a particular Rust feature is useful for us etc.). The new MSRV if selected will be at least 6 months old. The minor releases are guaranteed to have the same MSRV.
+arrow-rs and parquet are built and tested with stable Rust, and will keep a rolling MSRV (minimum supported Rust version) that can only be updated in major releases on an as needed basis (e.g. project dependencies bump their MSRV or a particular Rust feature is useful for us etc.). The new MSRV if selected will be at least 6 months old. The minor releases are guaranteed to have the same MSRV.
 
 Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.
 
-E.g.
-
-in Apr 2025 we will release version 55.0.0 which might have a version bump. But the Rust version selected in this case will be at most version 1.81.
-
 ### Guidelines for `panic` vs `Result`
 
 In general, use panics for bad states that are unreachable, unrecoverable or harmful.
@@ -112,7 +136,7 @@ The deprecated version is the next version which will be released (please
 consult the list above). To mark the API as deprecated, use the
 `#[deprecated(since = "...", note = "...")]` attribute.
 
-Foe example
+For example
 
 ```rust
 #[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
@@ -154,24 +178,6 @@ including `join`s and window functions.
 
 You can find more details about each crate in their respective READMEs.
 
-## Arrow Rust Community
-
-The `dev@arrow.apache.org` mailing list serves as the core communication channel for the Arrow community. Instructions for signing up and links to the archives can be found on the [Arrow Community](https://arrow.apache.org/community/) page. All major announcements and communications happen there.
-
-The Rust Arrow community also uses the official [ASF Slack](https://s.apache.org/slack-invite) for informal discussions and coordination. This is
-a great place to meet other contributors and get guidance on where to contribute. Join us in the `#arrow-rust` channel and feel free to ask for an invite via:
-
-1. the `dev@arrow.apache.org` mailing list
-2. the [GitHub Discussions][discussions]
-3. the [Discord channel](https://discord.gg/YAb2TdazKQ)
-
-The Rust implementation uses [GitHub issues][issues] as the system of record for new features and bug fixes and
-this plays a critical role in the release process.
-
-For design discussions we generally use GitHub issues.
-
-There is more information in the [contributing] guide.
-
 [rust]: https://www.rust-lang.org/
 [`object_store`]: https://crates.io/crates/object-store
 [arrow-readme]: arrow/README.md
@@ -182,4 +188,5 @@ There is more information in the [contributing] guide.
 [ballista-readme]: https://github.com/apache/datafusion-ballista/blob/main/README.md
 [parquet-derive-readme]: parquet_derive/README.md
 [issues]: https://github.com/apache/arrow-rs/issues
+[pull requests]: https://github.com/apache/arrow-rs/pulls
 [discussions]: https://github.com/apache/arrow-rs/discussions
diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml
index a3fdafa823a2..f2a4604c116e 100644
--- a/arrow-arith/Cargo.toml
+++ b/arrow-arith/Cargo.toml
@@ -41,4 +41,4 @@ arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 9a19b5d8a1f1..a043259694c1 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -45,11 +45,7 @@ trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
 /// After verifying the generated assembly this can be a simple `if`.
 #[inline(always)]
 fn select<T: Copy>(m: bool, a: T, b: T) -> T {
-    if m {
-        a
-    } else {
-        b
-    }
+    if m { a } else { b }
 }
 
 #[derive(Clone, Copy)]
@@ -336,10 +332,10 @@ fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: Numeric
 
 /// Returns the minimum value in the boolean array.
 ///
+/// # Example
 /// ```
 /// # use arrow_array::BooleanArray;
 /// # use arrow_arith::aggregate::min_boolean;
-///
 /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
 /// assert_eq!(min_boolean(&a), Some(false))
 /// ```
@@ -394,10 +390,10 @@ pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
 
 /// Returns the maximum value in the boolean array
 ///
+/// # Example
 /// ```
 /// # use arrow_array::BooleanArray;
 /// # use arrow_arith::aggregate::max_boolean;
-///
 /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
 /// assert_eq!(max_boolean(&a), Some(true))
 /// ```
@@ -451,11 +447,7 @@ where
             let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
                 let acc = array.value_unchecked(acc_idx);
                 let item = array.value_unchecked(idx);
-                if cmp(&acc, &item) {
-                    idx
-                } else {
-                    acc_idx
-                }
+                if cmp(&acc, &item) { idx } else { acc_idx }
             });
             idx.map(|idx| array.value_unchecked(idx))
         }
@@ -477,11 +469,7 @@ fn min_max_view_helper<T: ByteViewType>(
         let target_idx = (0..array.len()).reduce(|acc, item| {
             // SAFETY:  array's length is correct so item is within bounds
             let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
-            if cmp == swap_cond {
-                item
-            } else {
-                acc
-            }
+            if cmp == swap_cond { item } else { acc }
         });
         // SAFETY: idx came from valid range `0..array.len()`
         unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
@@ -491,11 +479,7 @@ fn min_max_view_helper<T: ByteViewType>(
         let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
             let cmp =
                 unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
-            if cmp == swap_cond {
-                idx
-            } else {
-                acc_idx
-            }
+            if cmp == swap_cond { idx } else { acc_idx }
         });
 
         // SAFETY: idx came from valid range `0..array.len()`
@@ -825,6 +809,15 @@ where
 
 /// Returns the minimum value in the array, according to the natural order.
 /// For floating point arrays any NaN values are considered to be greater than any other non-null value
+///
+/// # Example
+/// ```rust
+/// # use arrow_array::Int32Array;
+/// # use arrow_arith::aggregate::min;
+/// let array = Int32Array::from(vec![8, 2, 4]);
+/// let result = min(&array);
+/// assert_eq!(result, Some(2));
+/// ```
 pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
 where
     T::Native: PartialOrd,
@@ -834,6 +827,15 @@ where
 
 /// Returns the maximum value in the array, according to the natural order.
 /// For floating point arrays any NaN values are considered to be greater than any other non-null value
+///
+/// # Example
+/// ```rust
+/// # use arrow_array::Int32Array;
+/// # use arrow_arith::aggregate::max;
+/// let array = Int32Array::from(vec![4, 8, 2]);
+/// let result = max(&array);
+/// assert_eq!(result, Some(8));
+/// ```
 pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
 where
     T::Native: PartialOrd,
diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index febf5ceabdd9..27efed6fcdb4 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -25,8 +25,8 @@
 use crate::arity::*;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::i256;
 use arrow_buffer::ArrowNativeType;
+use arrow_buffer::i256;
 use arrow_schema::*;
 use std::cmp::min;
 use std::sync::Arc;
@@ -43,8 +43,7 @@ fn get_fixed_point_info(
 
     if required_scale > product_scale {
         return Err(ArrowError::ComputeError(format!(
-            "Required scale {} is greater than product scale {}",
-            required_scale, product_scale
+            "Required scale {required_scale} is greater than product scale {product_scale}",
         )));
     }
 
@@ -122,7 +121,7 @@ pub fn multiply_fixed_point_checked(
         let mut mul = a.wrapping_mul(b);
         mul = divide_and_round::<Decimal256Type>(mul, divisor);
         mul.to_i128().ok_or_else(|| {
-            ArrowError::ArithmeticOverflow(format!("Overflow happened on: {:?} * {:?}", a, b))
+            ArrowError::ArithmeticOverflow(format!("Overflow happened on: {a:?} * {b:?}"))
         })
     })
     .and_then(|a| a.with_precision_and_scale(precision, required_scale))
@@ -209,9 +208,11 @@ mod tests {
             .unwrap();
 
         let err = mul(&a, &b).unwrap_err();
-        assert!(err
-            .to_string()
-            .contains("Overflow happened on: 123456789000000000000000000 * 10000000000000000000"));
+        assert!(
+            err.to_string().contains(
+                "Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
+            )
+        );
 
         // Allow precision loss.
         let result = multiply_fixed_point_checked(&a, &b, 28).unwrap();
@@ -279,9 +280,11 @@ mod tests {
 
         // Required scale cannot be larger than the product of the input scales.
         let result = multiply_fixed_point_checked(&a, &b, 5).unwrap_err();
-        assert!(result
-            .to_string()
-            .contains("Required scale 5 is greater than product scale 4"));
+        assert!(
+            result
+                .to_string()
+                .contains("Required scale 5 is greater than product scale 4")
+        );
     }
 
     #[test]
@@ -323,7 +326,10 @@ mod tests {
 
         // `multiply` overflows on this case.
         let err = mul(&a, &b).unwrap_err();
-        assert_eq!(err.to_string(), "Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000");
+        assert_eq!(
+            err.to_string(),
+            "Arithmetic overflow: Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
+        );
 
         // Avoid overflow by reducing the scale.
         let result = multiply_fixed_point(&a, &b, 28).unwrap();
diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
index d1bf1abcb269..b9f7a82963c7 100644
--- a/arrow-arith/src/arity.rs
+++ b/arrow-arith/src/arity.rs
@@ -19,9 +19,9 @@
 
 use arrow_array::builder::BufferBuilder;
 use arrow_array::*;
-use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::ArrowNativeType;
 use arrow_buffer::MutableBuffer;
+use arrow_buffer::buffer::NullBuffer;
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 
diff --git a/arrow-arith/src/bitwise.rs b/arrow-arith/src/bitwise.rs
index a3c18136c5eb..aedeecd5b835 100644
--- a/arrow-arith/src/bitwise.rs
+++ b/arrow-arith/src/bitwise.rs
@@ -21,7 +21,7 @@ use crate::arity::{binary, unary};
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::ArrowError;
-use num::traits::{WrappingShl, WrappingShr};
+use num_traits::{WrappingShl, WrappingShr};
 use std::ops::{BitAnd, BitOr, BitXor, Not};
 
 /// The helper function for bitwise operation with two array
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index d8c7cc19323e..6bf438e64618 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -23,8 +23,8 @@
 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
 
 use arrow_array::*;
-use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_quaternary_op_helper};
-use arrow_buffer::{buffer_bin_and_not, BooleanBuffer, NullBuffer};
+use arrow_buffer::buffer::bitwise_quaternary_op_helper;
+use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
 use arrow_schema::ArrowError;
 
 /// Logical 'and' boolean values with Kleene logic
@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is false (because null AND false = false).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_null_buffer.buffer(),
                 left_null_buffer.offset(),
                 right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
         }
         (None, Some(right_null_buffer)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_null_buffer.buffer(),
                 right_null_buffer.offset(),
                 left_values.inner(),
@@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & !d)) & (c | (a & !b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_null_buffer.buffer(),
                     left_values.inner(),
@@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanAr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values & right_values, nulls))
 }
 
@@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is true (because null OR true = true).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_nulls.buffer(),
                 left_nulls.offset(),
                 right_values.inner(),
@@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
         }
         (None, Some(right_nulls)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_nulls.buffer(),
                 right_nulls.offset(),
                 left_values.inner(),
@@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & d)) & (c | (a & b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_nulls.buffer(),
                     left_values.inner(),
@@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & d)) & (c | (a & b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
 
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values | right_values, nulls))
 }
 
diff --git a/arrow-arith/src/lib.rs b/arrow-arith/src/lib.rs
index 63640c51c3ce..035519ed992b 100644
--- a/arrow-arith/src/lib.rs
+++ b/arrow-arith/src/lib.rs
@@ -21,7 +21,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 pub mod aggregate;
 #[doc(hidden)] // Kernels to be removed in a future release
diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index a2dc39166931..022a3bb64193 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -111,6 +111,20 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
         Float16 => neg_wrapping!(Float16Type, array),
         Float32 => neg_wrapping!(Float32Type, array),
         Float64 => neg_wrapping!(Float64Type, array),
+        Decimal32(p, s) => {
+            let a = array
+                .as_primitive::<Decimal32Type>()
+                .try_unary::<_, Decimal32Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
+        Decimal64(p, s) => {
+            let a = array
+                .as_primitive::<Decimal64Type>()
+                .try_unary::<_, Decimal64Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
         Decimal128(p, s) => {
             let a = array
                 .as_primitive::<Decimal128Type>()
@@ -234,6 +248,8 @@ fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, A
         (Interval(MonthDayNano), Interval(MonthDayNano)) => interval_op::<IntervalMonthDayNanoType>(op, l, l_scalar, r, r_scalar),
         (Date32, _) => date_op::<Date32Type>(op, l, l_scalar, r, r_scalar),
         (Date64, _) => date_op::<Date64Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal32(_, _), Decimal32(_, _)) => decimal_op::<Decimal32Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal64(_, _), Decimal64(_, _)) => decimal_op::<Decimal64Type>(op, l, l_scalar, r, r_scalar),
         (Decimal128(_, _), Decimal128(_, _)) => decimal_op::<Decimal128Type>(op, l, l_scalar, r, r_scalar),
         (Decimal256(_, _), Decimal256(_, _)) => decimal_op::<Decimal256Type>(op, l, l_scalar, r, r_scalar),
         (l_t, r_t) => match (l_t, r_t) {
@@ -503,56 +519,123 @@ fn timestamp_op<T: TimestampOp>(
                 "Invalid timestamp arithmetic operation: {} {op} {}",
                 l.data_type(),
                 r.data_type()
-            )))
+            )));
         }
     };
     Ok(Arc::new(array.with_timezone_opt(l.timezone())))
 }
 
 /// Arithmetic trait for date arrays
-///
-/// Note: these should be fallible (#4456)
 trait DateOp: ArrowTemporalType {
-    fn add_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
-    fn add_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
-    fn add_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
+    fn add_year_month(timestamp: Self::Native, delta: i32) -> Result<Self::Native, ArrowError>;
+    fn add_day_time(
+        timestamp: Self::Native,
+        delta: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError>;
+    fn add_month_day_nano(
+        timestamp: Self::Native,
+        delta: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError>;
+
+    fn sub_year_month(timestamp: Self::Native, delta: i32) -> Result<Self::Native, ArrowError>;
+    fn sub_day_time(
+        timestamp: Self::Native,
+        delta: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError>;
+    fn sub_month_day_nano(
+        timestamp: Self::Native,
+        delta: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError>;
+}
+
+impl DateOp for Date32Type {
+    fn add_year_month(left: Self::Native, right: i32) -> Result<Self::Native, ArrowError> {
+        // Date32Type functions don't have _opt variants and should be safe
+        Ok(Self::add_year_months(left, right))
+    }
+
+    fn add_day_time(
+        left: Self::Native,
+        right: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError> {
+        Ok(Self::add_day_time(left, right))
+    }
+
+    fn add_month_day_nano(
+        left: Self::Native,
+        right: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError> {
+        Ok(Self::add_month_day_nano(left, right))
+    }
+
+    fn sub_year_month(left: Self::Native, right: i32) -> Result<Self::Native, ArrowError> {
+        Ok(Self::subtract_year_months(left, right))
+    }
 
-    fn sub_year_month(timestamp: Self::Native, delta: i32) -> Self::Native;
-    fn sub_day_time(timestamp: Self::Native, delta: IntervalDayTime) -> Self::Native;
-    fn sub_month_day_nano(timestamp: Self::Native, delta: IntervalMonthDayNano) -> Self::Native;
+    fn sub_day_time(
+        left: Self::Native,
+        right: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError> {
+        Ok(Self::subtract_day_time(left, right))
+    }
+
+    fn sub_month_day_nano(
+        left: Self::Native,
+        right: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError> {
+        Ok(Self::subtract_month_day_nano(left, right))
+    }
 }
 
-macro_rules! date {
-    ($t:ty) => {
-        impl DateOp for $t {
-            fn add_year_month(left: Self::Native, right: i32) -> Self::Native {
-                Self::add_year_months(left, right)
-            }
+impl DateOp for Date64Type {
+    fn add_year_month(left: Self::Native, right: i32) -> Result<Self::Native, ArrowError> {
+        Self::add_year_months_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} + {right} months",))
+        })
+    }
 
-            fn add_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
-                Self::add_day_time(left, right)
-            }
+    fn add_day_time(
+        left: Self::Native,
+        right: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError> {
+        Self::add_day_time_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} + {right:?}"))
+        })
+    }
 
-            fn add_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
-                Self::add_month_day_nano(left, right)
-            }
+    fn add_month_day_nano(
+        left: Self::Native,
+        right: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError> {
+        Self::add_month_day_nano_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} + {right:?}"))
+        })
+    }
 
-            fn sub_year_month(left: Self::Native, right: i32) -> Self::Native {
-                Self::subtract_year_months(left, right)
-            }
+    fn sub_year_month(left: Self::Native, right: i32) -> Result<Self::Native, ArrowError> {
+        Self::subtract_year_months_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} - {right} months",))
+        })
+    }
 
-            fn sub_day_time(left: Self::Native, right: IntervalDayTime) -> Self::Native {
-                Self::subtract_day_time(left, right)
-            }
+    fn sub_day_time(
+        left: Self::Native,
+        right: IntervalDayTime,
+    ) -> Result<Self::Native, ArrowError> {
+        Self::subtract_day_time_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} - {right:?}"))
+        })
+    }
 
-            fn sub_month_day_nano(left: Self::Native, right: IntervalMonthDayNano) -> Self::Native {
-                Self::subtract_month_day_nano(left, right)
-            }
-        }
-    };
+    fn sub_month_day_nano(
+        left: Self::Native,
+        right: IntervalMonthDayNano,
+    ) -> Result<Self::Native, ArrowError> {
+        Self::subtract_month_day_nano_opt(left, right).ok_or_else(|| {
+            ArrowError::ComputeError(format!("Date arithmetic overflow: {left} - {right:?}"))
+        })
+    }
 }
-date!(Date32Type);
-date!(Date64Type);
 
 /// Arithmetic trait for interval arrays
 trait IntervalOp: ArrowPrimitiveType {
@@ -689,29 +772,29 @@ fn date_op<T: DateOp>(
     match (op, r_t) {
         (Op::Add | Op::AddWrapping, Interval(YearMonth)) => {
             let r = r.as_primitive::<IntervalYearMonthType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::add_year_month(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::add_year_month(l, r)))
         }
         (Op::Sub | Op::SubWrapping, Interval(YearMonth)) => {
             let r = r.as_primitive::<IntervalYearMonthType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::sub_year_month(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::sub_year_month(l, r)))
         }
 
         (Op::Add | Op::AddWrapping, Interval(DayTime)) => {
             let r = r.as_primitive::<IntervalDayTimeType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::add_day_time(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::add_day_time(l, r)))
         }
         (Op::Sub | Op::SubWrapping, Interval(DayTime)) => {
             let r = r.as_primitive::<IntervalDayTimeType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::sub_day_time(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::sub_day_time(l, r)))
         }
 
         (Op::Add | Op::AddWrapping, Interval(MonthDayNano)) => {
             let r = r.as_primitive::<IntervalMonthDayNanoType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::add_month_day_nano(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::add_month_day_nano(l, r)))
         }
         (Op::Sub | Op::SubWrapping, Interval(MonthDayNano)) => {
             let r = r.as_primitive::<IntervalMonthDayNanoType>();
-            Ok(op_ref!(T, l, l_s, r, r_s, T::sub_month_day_nano(l, r)))
+            Ok(try_op_ref!(T, l, l_s, r, r_s, T::sub_month_day_nano(l, r)))
         }
 
         _ => Err(ArrowError::InvalidArgumentError(format!(
@@ -734,6 +817,8 @@ fn decimal_op<T: DecimalType>(
     let r = r.as_primitive::<T>();
 
     let (p1, s1, p2, s2) = match (l.data_type(), r.data_type()) {
+        (DataType::Decimal32(p1, s1), DataType::Decimal32(p2, s2)) => (p1, s1, p2, s2),
+        (DataType::Decimal64(p1, s1), DataType::Decimal64(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal128(p1, s1), DataType::Decimal128(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal256(p1, s1), DataType::Decimal256(p2, s2)) => (p1, s1, p2, s2),
         _ => unreachable!(),
@@ -856,7 +941,7 @@ fn decimal_op<T: DecimalType>(
 mod tests {
     use super::*;
     use arrow_array::temporal_conversions::{as_date, as_datetime};
-    use arrow_buffer::{i256, ScalarBuffer};
+    use arrow_buffer::{ScalarBuffer, i256};
     use chrono::{DateTime, NaiveDate};
 
     fn test_neg_primitive<T: ArrowPrimitiveType>(
@@ -922,6 +1007,28 @@ mod tests {
             "Arithmetic overflow: Overflow happened on: - -9223372036854775808"
         );
 
+        let a = Decimal32Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal32Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
+        let a = Decimal64Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal64Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
         let a = Decimal128Array::from(vec![1, 3, -44, 2, 4])
             .with_precision_and_scale(9, 6)
             .unwrap();
@@ -1156,7 +1263,10 @@ mod tests {
             .with_precision_and_scale(37, 37)
             .unwrap();
         let err = mul(&a, &b).unwrap_err().to_string();
-        assert_eq!(err, "Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38");
+        assert_eq!(
+            err,
+            "Invalid argument error: Output scale of Decimal128(3, 3) * Decimal128(37, 37) would exceed max scale of 38"
+        );
 
         let a = Decimal128Array::from(vec![1])
             .with_precision_and_scale(3, -2)
@@ -1533,4 +1643,536 @@ mod tests {
             "Arithmetic overflow: Overflow happened on: 9223372036854775807 - -1"
         );
     }
+
+    #[test]
+    fn test_date64_to_naive_date_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+
+        // Date64Type::to_naive_date_opt has boundaries determined by NaiveDate's supported range.
+        // The valid date range is from January 1, -262143 to December 31, 262142 (Gregorian calendar).
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+        let ms_per_day = 24 * 60 * 60 * 1000i64;
+
+        // Define the boundary dates using NaiveDate::from_ymd_opt
+        let max_valid_date = NaiveDate::from_ymd_opt(262142, 12, 31).unwrap();
+        let min_valid_date = NaiveDate::from_ymd_opt(-262143, 1, 1).unwrap();
+
+        // Calculate their millisecond values from epoch
+        let max_valid_millis = (max_valid_date - epoch).num_milliseconds();
+        let min_valid_millis = (min_valid_date - epoch).num_milliseconds();
+
+        // Verify these match the expected boundaries in milliseconds
+        assert_eq!(
+            max_valid_millis, 8210266790400000i64,
+            "December 31, 262142 should be 8210266790400000 ms from epoch"
+        );
+        assert_eq!(
+            min_valid_millis, -8334601228800000i64,
+            "January 1, -262143 should be -8334601228800000 ms from epoch"
+        );
+
+        // Test that the boundary dates work
+        assert!(
+            Date64Type::to_naive_date_opt(max_valid_millis).is_some(),
+            "December 31, 262142 should return Some"
+        );
+        assert!(
+            Date64Type::to_naive_date_opt(min_valid_millis).is_some(),
+            "January 1, -262143 should return Some"
+        );
+
+        // Test that one day beyond the boundaries fails
+        assert!(
+            Date64Type::to_naive_date_opt(max_valid_millis + ms_per_day).is_none(),
+            "January 1, 262143 should return None"
+        );
+        assert!(
+            Date64Type::to_naive_date_opt(min_valid_millis - ms_per_day).is_none(),
+            "December 31, -262144 should return None"
+        );
+
+        // Test some values well within the valid range
+        assert!(
+            Date64Type::to_naive_date_opt(0).is_some(),
+            "Epoch (1970-01-01) should return Some"
+        );
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+        assert!(
+            Date64Type::to_naive_date_opt(year_2000_millis).is_some(),
+            "Year 2000 should return Some"
+        );
+
+        // Test extreme values that definitely fail due to Duration constraints
+        assert!(
+            Date64Type::to_naive_date_opt(i64::MAX).is_none(),
+            "i64::MAX should return None"
+        );
+        assert!(
+            Date64Type::to_naive_date_opt(i64::MIN).is_none(),
+            "i64::MIN should return None"
+        );
+    }
+
+    #[test]
+    fn test_date64_add_year_months_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+        assert!(
+            Date64Type::add_year_months_opt(year_2000_millis, 120).is_some(),
+            "Adding 10 years to year 2000 should succeed"
+        );
+
+        // Test with moderate years that are within chrono's safe range
+        let large_year = NaiveDate::from_ymd_opt(5000, 1, 1).unwrap();
+        let large_year_millis = (large_year - epoch).num_milliseconds();
+        assert!(
+            Date64Type::add_year_months_opt(large_year_millis, 12).is_some(),
+            "Adding 12 months to year 5000 should succeed"
+        );
+
+        let neg_year = NaiveDate::from_ymd_opt(-5000, 12, 31).unwrap();
+        let neg_year_millis = (neg_year - epoch).num_milliseconds();
+        assert!(
+            Date64Type::add_year_months_opt(neg_year_millis, -12).is_some(),
+            "Subtracting 12 months from year -5000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::add_year_months_opt(i64::MAX, 1).is_none(),
+            "Adding months to i64::MAX should fail"
+        );
+        assert!(
+            Date64Type::add_year_months_opt(i64::MIN, -1).is_none(),
+            "Subtracting months from i64::MIN should fail"
+        );
+
+        // Test edge case: adding zero should always work for valid dates
+        assert!(
+            Date64Type::add_year_months_opt(year_2000_millis, 0).is_some(),
+            "Adding zero months should always succeed for valid dates"
+        );
+    }
+
+    #[test]
+    fn test_date64_add_day_time_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+        use arrow_buffer::IntervalDayTime;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test with a date far from the boundary but still testing the function
+        let near_max_date = NaiveDate::from_ymd_opt(200000, 12, 1).unwrap();
+        let near_max_millis = (near_max_date - epoch).num_milliseconds();
+
+        // Adding 30 days should succeed
+        let interval_30_days = IntervalDayTime::new(30, 0);
+        assert!(
+            Date64Type::add_day_time_opt(near_max_millis, interval_30_days).is_some(),
+            "Adding 30 days to large year should succeed"
+        );
+
+        // Adding a very large number of days should fail
+        let interval_large_days = IntervalDayTime::new(100000000, 0);
+        assert!(
+            Date64Type::add_day_time_opt(near_max_millis, interval_large_days).is_none(),
+            "Adding 100M days to large year should fail"
+        );
+
+        // Test with a date far from the boundary in the negative direction
+        let near_min_date = NaiveDate::from_ymd_opt(-200000, 2, 1).unwrap();
+        let near_min_millis = (near_min_date - epoch).num_milliseconds();
+
+        // Subtracting 30 days should succeed
+        let interval_minus_30_days = IntervalDayTime::new(-30, 0);
+        assert!(
+            Date64Type::add_day_time_opt(near_min_millis, interval_minus_30_days).is_some(),
+            "Subtracting 30 days from large negative year should succeed"
+        );
+
+        // Subtracting a very large number of days should fail
+        let interval_minus_large_days = IntervalDayTime::new(-100000000, 0);
+        assert!(
+            Date64Type::add_day_time_opt(near_min_millis, interval_minus_large_days).is_none(),
+            "Subtracting 100M days from large negative year should fail"
+        );
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+        let interval_1000_days = IntervalDayTime::new(1000, 12345);
+        assert!(
+            Date64Type::add_day_time_opt(year_2000_millis, interval_1000_days).is_some(),
+            "Adding 1000 days and time to year 2000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        let interval_one_day = IntervalDayTime::new(1, 0);
+        assert!(
+            Date64Type::add_day_time_opt(i64::MAX, interval_one_day).is_none(),
+            "Adding interval to i64::MAX should fail"
+        );
+        assert!(
+            Date64Type::add_day_time_opt(i64::MIN, IntervalDayTime::new(-1, 0)).is_none(),
+            "Subtracting interval from i64::MIN should fail"
+        );
+
+        // Test with extreme interval values
+        let max_interval = IntervalDayTime::new(i32::MAX, i32::MAX);
+        assert!(
+            Date64Type::add_day_time_opt(0, max_interval).is_none(),
+            "Adding extreme interval should fail"
+        );
+
+        let min_interval = IntervalDayTime::new(i32::MIN, i32::MIN);
+        assert!(
+            Date64Type::add_day_time_opt(0, min_interval).is_none(),
+            "Adding extreme negative interval should fail"
+        );
+
+        // Test millisecond overflow within a day
+        let large_ms_interval = IntervalDayTime::new(0, i32::MAX);
+        assert!(
+            Date64Type::add_day_time_opt(year_2000_millis, large_ms_interval).is_some(),
+            "Adding large milliseconds within valid range should succeed"
+        );
+    }
+
+    #[test]
+    fn test_date64_add_month_day_nano_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+        use arrow_buffer::IntervalMonthDayNano;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test with a large year that is still within chrono's safe range
+        let near_max_date = NaiveDate::from_ymd_opt(5000, 11, 1).unwrap();
+        let near_max_millis = (near_max_date - epoch).num_milliseconds();
+
+        // Adding 1 month and 30 days should succeed
+        let interval_safe = IntervalMonthDayNano::new(1, 30, 0);
+        assert!(
+            Date64Type::add_month_day_nano_opt(near_max_millis, interval_safe).is_some(),
+            "Adding 1 month 30 days to large year should succeed"
+        );
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+
+        // Test edge case: adding zero should always work for valid dates
+        let zero_interval = IntervalMonthDayNano::new(0, 0, 0);
+        assert!(
+            Date64Type::add_month_day_nano_opt(year_2000_millis, zero_interval).is_some(),
+            "Adding zero interval should always succeed for valid dates"
+        );
+
+        // Test with a negative year that is still within chrono's safe range
+        let near_min_date = NaiveDate::from_ymd_opt(-5000, 2, 28).unwrap();
+        let near_min_millis = (near_min_date - epoch).num_milliseconds();
+
+        // Subtracting 1 month and 30 days should succeed
+        let interval_safe_neg = IntervalMonthDayNano::new(-1, -30, 0);
+        assert!(
+            Date64Type::add_month_day_nano_opt(near_min_millis, interval_safe_neg).is_some(),
+            "Subtracting 1 month 30 days from large negative year should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::add_month_day_nano_opt(i64::MAX, IntervalMonthDayNano::new(1, 0, 0))
+                .is_none(),
+            "Adding interval to i64::MAX should fail"
+        );
+
+        let interval_normal = IntervalMonthDayNano::new(2, 10, 123_456_789_000);
+        assert!(
+            Date64Type::add_month_day_nano_opt(year_2000_millis, interval_normal).is_some(),
+            "Adding 2 months, 10 days, and nanos to year 2000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::add_month_day_nano_opt(i64::MAX, IntervalMonthDayNano::new(1, 0, 0))
+                .is_none(),
+            "Adding interval to i64::MAX should fail"
+        );
+        assert!(
+            Date64Type::add_month_day_nano_opt(i64::MIN, IntervalMonthDayNano::new(-1, 0, 0))
+                .is_none(),
+            "Subtracting interval from i64::MIN should fail"
+        );
+
+        // Test with invalid timestamp input (the _opt function should handle these gracefully)
+
+        // Test nanosecond precision (should not affect boundary since it's < 1ms)
+        let nano_interval = IntervalMonthDayNano::new(0, 0, 999_999_999);
+        assert!(
+            Date64Type::add_month_day_nano_opt(year_2000_millis, nano_interval).is_some(),
+            "Adding nanoseconds within valid range should succeed"
+        );
+
+        // Test large nanosecond values that convert to milliseconds
+        let large_nano_interval = IntervalMonthDayNano::new(0, 0, 86_400_000_000_000); // 1 day in nanos
+        assert!(
+            Date64Type::add_month_day_nano_opt(year_2000_millis, large_nano_interval).is_some(),
+            "Adding 1 day worth of nanoseconds should succeed"
+        );
+    }
+
+    #[test]
+    fn test_date64_subtract_year_months_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test with a negative year that is still within chrono's safe range
+        let near_min_date = NaiveDate::from_ymd_opt(-5000, 12, 31).unwrap();
+        let near_min_millis = (near_min_date - epoch).num_milliseconds();
+
+        // Subtracting 12 months should succeed
+        assert!(
+            Date64Type::subtract_year_months_opt(near_min_millis, 12).is_some(),
+            "Subtracting 12 months from year -5000 should succeed"
+        );
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+
+        // Test edge case: subtracting zero should always work for valid dates
+        assert!(
+            Date64Type::subtract_year_months_opt(year_2000_millis, 0).is_some(),
+            "Subtracting zero months should always succeed for valid dates"
+        );
+
+        // Test with a large year that is still within chrono's safe range
+        let near_max_date = NaiveDate::from_ymd_opt(5000, 1, 1).unwrap();
+        let near_max_millis = (near_max_date - epoch).num_milliseconds();
+
+        // Adding 12 months (subtracting negative) should succeed
+        assert!(
+            Date64Type::subtract_year_months_opt(near_max_millis, -12).is_some(),
+            "Adding 12 months to year 5000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::subtract_year_months_opt(i64::MAX, -1).is_none(),
+            "Adding months to i64::MAX should fail"
+        );
+
+        assert!(
+            Date64Type::subtract_year_months_opt(year_2000_millis, 12).is_some(),
+            "Subtracting 1 year from year 2000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::subtract_year_months_opt(i64::MAX, -1).is_none(),
+            "Adding months to i64::MAX should fail"
+        );
+        assert!(
+            Date64Type::subtract_year_months_opt(i64::MIN, 1).is_none(),
+            "Subtracting months from i64::MIN should fail"
+        );
+
+        // Test edge case: subtracting zero should always work for valid dates
+        let valid_date = NaiveDate::from_ymd_opt(2020, 6, 15).unwrap();
+        let valid_millis = (valid_date - epoch).num_milliseconds();
+        assert!(
+            Date64Type::subtract_year_months_opt(valid_millis, 0).is_some(),
+            "Subtracting zero months should always succeed for valid dates"
+        );
+    }
+
+    #[test]
+    fn test_date64_subtract_day_time_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+        use arrow_buffer::IntervalDayTime;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test with a date far from the boundary in the negative direction
+        let near_min_date = NaiveDate::from_ymd_opt(-200000, 2, 1).unwrap();
+        let near_min_millis = (near_min_date - epoch).num_milliseconds();
+
+        // Subtracting 30 days should succeed
+        let interval_30_days = IntervalDayTime::new(30, 0);
+        assert!(
+            Date64Type::subtract_day_time_opt(near_min_millis, interval_30_days).is_some(),
+            "Subtracting 30 days from large negative year should succeed"
+        );
+
+        // Subtracting a very large number of days should fail
+        let interval_large_days = IntervalDayTime::new(100000000, 0);
+        assert!(
+            Date64Type::subtract_day_time_opt(near_min_millis, interval_large_days).is_none(),
+            "Subtracting 100M days from large negative year should fail"
+        );
+
+        // Test with a date far from the boundary but still testing the function
+        let near_max_date = NaiveDate::from_ymd_opt(200000, 12, 1).unwrap();
+        let near_max_millis = (near_max_date - epoch).num_milliseconds();
+
+        // Adding 30 days (subtracting negative) should succeed
+        let interval_minus_30_days = IntervalDayTime::new(-30, 0);
+        assert!(
+            Date64Type::subtract_day_time_opt(near_max_millis, interval_minus_30_days).is_some(),
+            "Adding 30 days to large year should succeed"
+        );
+
+        // Adding a very large number of days should fail
+        let interval_minus_large_days = IntervalDayTime::new(-100000000, 0);
+        assert!(
+            Date64Type::subtract_day_time_opt(near_max_millis, interval_minus_large_days).is_none(),
+            "Adding 100M days to large year should fail"
+        );
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+        let interval_1000_days = IntervalDayTime::new(1000, 12345);
+        assert!(
+            Date64Type::subtract_day_time_opt(year_2000_millis, interval_1000_days).is_some(),
+            "Subtracting 1000 days and time from year 2000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        let interval_one_day = IntervalDayTime::new(1, 0);
+        assert!(
+            Date64Type::subtract_day_time_opt(i64::MIN, interval_one_day).is_none(),
+            "Subtracting interval from i64::MIN should fail"
+        );
+        assert!(
+            Date64Type::subtract_day_time_opt(i64::MAX, IntervalDayTime::new(-1, 0)).is_none(),
+            "Adding interval to i64::MAX should fail"
+        );
+
+        // Test with extreme interval values
+        let max_interval = IntervalDayTime::new(i32::MAX, i32::MAX);
+        assert!(
+            Date64Type::subtract_day_time_opt(0, max_interval).is_none(),
+            "Subtracting extreme interval should fail"
+        );
+
+        let min_interval = IntervalDayTime::new(i32::MIN, i32::MIN);
+        assert!(
+            Date64Type::subtract_day_time_opt(0, min_interval).is_none(),
+            "Subtracting extreme negative interval should fail"
+        );
+
+        // Test millisecond precision
+        let large_ms_interval = IntervalDayTime::new(0, i32::MAX);
+        assert!(
+            Date64Type::subtract_day_time_opt(year_2000_millis, large_ms_interval).is_some(),
+            "Subtracting large milliseconds within valid range should succeed"
+        );
+
+        // Test edge case: subtracting zero should always work for valid dates
+        let zero_interval = IntervalDayTime::new(0, 0);
+        let valid_date = NaiveDate::from_ymd_opt(2020, 6, 15).unwrap();
+        let valid_millis = (valid_date - epoch).num_milliseconds();
+        assert!(
+            Date64Type::subtract_day_time_opt(valid_millis, zero_interval).is_some(),
+            "Subtracting zero interval should always succeed for valid dates"
+        );
+    }
+
+    #[test]
+    fn test_date64_subtract_month_day_nano_opt_boundary_values() {
+        use arrow_array::types::Date64Type;
+        use arrow_buffer::IntervalMonthDayNano;
+
+        let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+
+        // Test with a negative year that is still within chrono's safe range
+        let near_min_date = NaiveDate::from_ymd_opt(-5000, 2, 28).unwrap();
+        let near_min_millis = (near_min_date - epoch).num_milliseconds();
+
+        // Subtracting 1 month and 30 days should succeed
+        let interval_safe = IntervalMonthDayNano::new(1, 30, 0);
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(near_min_millis, interval_safe).is_some(),
+            "Subtracting 1 month 30 days from large negative year should succeed"
+        );
+
+        // Test normal case within valid range
+        let year_2000 = NaiveDate::from_ymd_opt(2000, 1, 1).unwrap();
+        let year_2000_millis = (year_2000 - epoch).num_milliseconds();
+
+        // Test edge case: subtracting zero should always work for valid dates
+        let zero_interval = IntervalMonthDayNano::new(0, 0, 0);
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(year_2000_millis, zero_interval).is_some(),
+            "Subtracting zero interval should always succeed for valid dates"
+        );
+
+        // Test with a large year that is still within chrono's safe range
+        let near_max_date = NaiveDate::from_ymd_opt(5000, 11, 1).unwrap();
+        let near_max_millis = (near_max_date - epoch).num_milliseconds();
+
+        // Adding 1 month and 30 days (subtracting negative) should succeed
+        let interval_safe_neg = IntervalMonthDayNano::new(-1, -30, 0);
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(near_max_millis, interval_safe_neg).is_some(),
+            "Adding 1 month 30 days to large year should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(i64::MIN, IntervalMonthDayNano::new(1, 0, 0))
+                .is_none(),
+            "Subtracting interval from i64::MIN should fail"
+        );
+
+        let interval_normal = IntervalMonthDayNano::new(2, 10, 123_456_789_000);
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(year_2000_millis, interval_normal).is_some(),
+            "Subtracting 2 months, 10 days, and nanos from year 2000 should succeed"
+        );
+
+        // Test with extreme input values that would cause overflow
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(i64::MIN, IntervalMonthDayNano::new(1, 0, 0))
+                .is_none(),
+            "Subtracting interval from i64::MIN should fail"
+        );
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(i64::MAX, IntervalMonthDayNano::new(-1, 0, 0))
+                .is_none(),
+            "Adding interval to i64::MAX should fail"
+        );
+
+        // Test nanosecond precision (should not affect boundary since it's < 1ms)
+        let nano_interval = IntervalMonthDayNano::new(0, 0, 999_999_999);
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(year_2000_millis, nano_interval).is_some(),
+            "Subtracting nanoseconds within valid range should succeed"
+        );
+
+        // Test large nanosecond values that convert to milliseconds
+        let large_nano_interval = IntervalMonthDayNano::new(0, 0, 86_400_000_000_000); // 1 day in nanos
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(year_2000_millis, large_nano_interval)
+                .is_some(),
+            "Subtracting 1 day worth of nanoseconds should succeed"
+        );
+
+        // Test edge case: subtracting zero should always work for valid dates
+        let zero_interval = IntervalMonthDayNano::new(0, 0, 0);
+        let valid_date = NaiveDate::from_ymd_opt(2020, 6, 15).unwrap();
+        let valid_millis = (valid_date - epoch).num_milliseconds();
+        assert!(
+            Date64Type::subtract_month_day_nano_opt(valid_millis, zero_interval).is_some(),
+            "Subtracting zero interval should always succeed for valid dates"
+        );
+    }
 }
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index 0b2b98b67b93..faff59bc307d 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -24,14 +24,14 @@ use cast::as_primitive_array;
 use chrono::{Datelike, TimeZone, Timelike, Utc};
 
 use arrow_array::temporal_conversions::{
-    date32_to_datetime, date64_to_datetime, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
-    timestamp_s_to_datetime, timestamp_us_to_datetime, MICROSECONDS, MICROSECONDS_IN_DAY,
-    MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS, NANOSECONDS_IN_DAY, SECONDS_IN_DAY,
+    MICROSECONDS, MICROSECONDS_IN_DAY, MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS,
+    NANOSECONDS_IN_DAY, SECONDS_IN_DAY, date32_to_datetime, date64_to_datetime,
+    timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_s_to_datetime,
+    timestamp_us_to_datetime,
 };
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit};
 
 /// Valid parts to extract from date/time/timestamp arrays.
@@ -79,7 +79,7 @@ pub enum DatePart {
 
 impl std::fmt::Display for DatePart {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
+        write!(f, "{self:?}")
     }
 }
 
@@ -197,16 +197,6 @@ pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, ArrowErr
     )
 }
 
-/// Used to integrate new [`date_part()`] method with deprecated shims such as
-/// [`hour()`] and [`week()`].
-fn date_part_primitive<T: ArrowTemporalType>(
-    array: &PrimitiveArray<T>,
-    part: DatePart,
-) -> Result<Int32Array, ArrowError> {
-    let array = date_part(array, part)?;
-    Ok(array.as_primitive::<Int32Type>().to_owned())
-}
-
 /// Extract optional [`Tz`] from timestamp data types, returning error
 /// if called with a non-timestamp type.
 fn get_tz(dt: &DataType) -> Result<Option<Tz>, ArrowError> {
@@ -660,7 +650,7 @@ impl ExtractDatePartExt for PrimitiveArray<DurationNanosecondType> {
 
 macro_rules! return_compute_error_with {
     ($msg:expr, $param:expr) => {
-        return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, $param))) }
+        return { Err(ArrowError::ComputeError(format!("{}: {}", $msg, $param))) }
     };
 }
 
@@ -685,300 +675,26 @@ impl<T: Datelike> ChronoDateExt for T {
     }
 }
 
-/// Extracts the hours of a given array as an array of integers within
-/// the range of [0, 23]. If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn hour_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Hour)
-}
-
-/// Extracts the hours of a given temporal primitive array as an array of integers within
-/// the range of [0, 23].
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Hour)
-}
-
-/// Extracts the years of a given temporal array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn year_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Year)
-}
-
-/// Extracts the years of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn year<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Year)
-}
-
-/// Extracts the quarter of a given temporal array as an array of integersa within
-/// the range of [1, 4]. If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn quarter_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Quarter)
-}
-
-/// Extracts the quarter of a given temporal primitive array as an array of integers within
-/// the range of [1, 4].
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn quarter<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Quarter)
-}
-
-/// Extracts the month of a given temporal array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn month_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Month)
-}
-
-/// Extracts the month of a given temporal primitive array as an array of integers within
-/// the range of [1, 12].
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn month<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Month)
-}
-
-/// Extracts the day of week of a given temporal array as an array of
-/// integers.
-///
-/// Monday is encoded as `0`, Tuesday as `1`, etc.
-///
-/// See also [`num_days_from_sunday`] which starts at Sunday.
-///
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::DayOfWeekMonday0)
-}
-
-/// Extracts the day of week of a given temporal primitive array as an array of
-/// integers.
-///
-/// Monday is encoded as `0`, Tuesday as `1`, etc.
-///
-/// See also [`num_days_from_sunday`] which starts at Sunday.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn num_days_from_monday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::DayOfWeekMonday0)
-}
-
-/// Extracts the day of week of a given temporal array as an array of
-/// integers, starting at Sunday.
-///
-/// Sunday is encoded as `0`, Monday as `1`, etc.
-///
-/// See also [`num_days_from_monday`] which starts at Monday.
-///
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::DayOfWeekSunday0)
-}
-
-/// Extracts the day of week of a given temporal primitive array as an array of
-/// integers, starting at Sunday.
-///
-/// Sunday is encoded as `0`, Monday as `1`, etc.
-///
-/// See also [`num_days_from_monday`] which starts at Monday.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn num_days_from_sunday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::DayOfWeekSunday0)
-}
-
-/// Extracts the day of a given temporal array as an array of integers.
-///
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn day_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Day)
-}
-
-/// Extracts the day of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn day<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Day)
-}
-
-/// Extracts the day of year of a given temporal array as an array of integers.
-///
-/// The day of year that ranges from 1 to 366.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn doy_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::DayOfYear)
-}
-
-/// Extracts the day of year of a given temporal primitive array as an array of integers.
-///
-/// The day of year that ranges from 1 to 366
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn doy<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    T::Native: ArrowNativeType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::DayOfYear)
-}
-
-/// Extracts the minutes of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn minute<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Minute)
-}
-
-/// Extracts the week of a given temporal array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn week_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Week)
-}
-
-/// Extracts the week of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn week<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Week)
-}
-
-/// Extracts the seconds of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn second<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Second)
-}
-
-/// Extracts the nanoseconds of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn nanosecond<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Nanosecond)
-}
-
-/// Extracts the nanoseconds of a given temporal primitive array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn nanosecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Nanosecond)
-}
-
-/// Extracts the microseconds of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn microsecond<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Microsecond)
-}
-
-/// Extracts the microseconds of a given temporal primitive array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn microsecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Microsecond)
-}
-
-/// Extracts the milliseconds of a given temporal primitive array as an array of integers
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn millisecond<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
-where
-    T: ArrowTemporalType + ArrowNumericType,
-    i64: From<T::Native>,
-{
-    date_part_primitive(array, DatePart::Millisecond)
-}
-
-/// Extracts the milliseconds of a given temporal primitive array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn millisecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Millisecond)
-}
-
-/// Extracts the minutes of a given temporal array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn minute_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Minute)
-}
-
-/// Extracts the seconds of a given temporal array as an array of integers.
-/// If the given array isn't temporal primitive or dictionary array,
-/// an `Err` will be returned.
-#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
-pub fn second_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
-    date_part(array, DatePart::Second)
-}
-
 #[cfg(test)]
-#[allow(deprecated)]
 mod tests {
     use super::*;
 
+    /// Used to integrate new [`date_part()`] method with deprecated shims such as
+    /// [`hour()`] and [`week()`].
+    fn date_part_primitive<T: ArrowTemporalType>(
+        array: &PrimitiveArray<T>,
+        part: DatePart,
+    ) -> Result<Int32Array, ArrowError> {
+        let array = date_part(array, part)?;
+        Ok(array.as_primitive::<Int32Type>().to_owned())
+    }
+
     #[test]
     fn test_temporal_array_date64_hour() {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(4, b.value(2));
@@ -988,7 +704,7 @@ mod tests {
     fn test_temporal_array_date32_hour() {
         let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15148)].into();
 
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(0, b.value(2));
@@ -998,7 +714,7 @@ mod tests {
     fn test_temporal_array_time32_second_hour() {
         let a: PrimitiveArray<Time32SecondType> = vec![37800, 86339].into();
 
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(10, b.value(0));
         assert_eq!(23, b.value(1));
     }
@@ -1007,7 +723,7 @@ mod tests {
     fn test_temporal_array_time64_micro_hour() {
         let a: PrimitiveArray<Time64MicrosecondType> = vec![37800000000, 86339000000].into();
 
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(10, b.value(0));
         assert_eq!(23, b.value(1));
     }
@@ -1016,7 +732,7 @@ mod tests {
     fn test_temporal_array_timestamp_micro_hour() {
         let a: TimestampMicrosecondArray = vec![37800000000, 86339000000].into();
 
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(10, b.value(0));
         assert_eq!(23, b.value(1));
     }
@@ -1026,7 +742,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = year(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Year).unwrap();
         assert_eq!(2018, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2019, b.value(2));
@@ -1036,7 +752,7 @@ mod tests {
     fn test_temporal_array_date32_year() {
         let a: PrimitiveArray<Date32Type> = vec![Some(15147), None, Some(15448)].into();
 
-        let b = year(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Year).unwrap();
         assert_eq!(2011, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2012, b.value(2));
@@ -1049,7 +765,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1566275025000)].into();
 
-        let b = quarter(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Quarter).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(3, b.value(2));
@@ -1059,7 +775,7 @@ mod tests {
     fn test_temporal_array_date32_quarter() {
         let a: PrimitiveArray<Date32Type> = vec![Some(1), None, Some(300)].into();
 
-        let b = quarter(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Quarter).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(4, b.value(2));
@@ -1069,10 +785,10 @@ mod tests {
     fn test_temporal_array_timestamp_quarter_with_timezone() {
         // 24 * 60 * 60 = 86400
         let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("+00:00".to_string());
-        let b = quarter(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Quarter).unwrap();
         assert_eq!(2, b.value(0));
         let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("-10:00".to_string());
-        let b = quarter(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Quarter).unwrap();
         assert_eq!(1, b.value(0));
     }
 
@@ -1083,7 +799,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = month(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Month).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2, b.value(2));
@@ -1093,7 +809,7 @@ mod tests {
     fn test_temporal_array_date32_month() {
         let a: PrimitiveArray<Date32Type> = vec![Some(1), None, Some(31)].into();
 
-        let b = month(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Month).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2, b.value(2));
@@ -1103,10 +819,10 @@ mod tests {
     fn test_temporal_array_timestamp_month_with_timezone() {
         // 24 * 60 * 60 = 86400
         let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("+00:00".to_string());
-        let b = month(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Month).unwrap();
         assert_eq!(2, b.value(0));
         let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("-10:00".to_string());
-        let b = month(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Month).unwrap();
         assert_eq!(1, b.value(0));
     }
 
@@ -1114,10 +830,10 @@ mod tests {
     fn test_temporal_array_timestamp_day_with_timezone() {
         // 24 * 60 * 60 = 86400
         let a = TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
-        let b = day(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Day).unwrap();
         assert_eq!(2, b.value(0));
         let a = TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
-        let b = day(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Day).unwrap();
         assert_eq!(1, b.value(0));
     }
 
@@ -1128,7 +844,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = num_days_from_monday(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::DayOfWeekMonday0).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2, b.value(2));
@@ -1147,7 +863,7 @@ mod tests {
         ]
         .into();
 
-        let b = num_days_from_sunday(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::DayOfWeekSunday0).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(1, b.value(2));
@@ -1161,7 +877,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = day(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Day).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(20, b.value(2));
@@ -1171,7 +887,7 @@ mod tests {
     fn test_temporal_array_date32_day() {
         let a: PrimitiveArray<Date32Type> = vec![Some(0), None, Some(31)].into();
 
-        let b = day(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Day).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(1, b.value(2));
@@ -1190,7 +906,7 @@ mod tests {
         ]
         .into();
 
-        let b = doy(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::DayOfYear).unwrap();
         assert_eq!(1, b.value(0));
         assert_eq!(1, b.value(1));
         assert!(!b.is_valid(2));
@@ -1202,7 +918,7 @@ mod tests {
         let a: TimestampMicrosecondArray =
             vec![Some(1612025847000000), None, Some(1722015847000000)].into();
 
-        let b = year(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Year).unwrap();
         assert_eq!(2021, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2024, b.value(2));
@@ -1213,7 +929,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = minute(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Minute).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(23, b.value(2));
@@ -1224,7 +940,7 @@ mod tests {
         let a: TimestampMicrosecondArray =
             vec![Some(1612025847000000), None, Some(1722015847000000)].into();
 
-        let b = minute(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Minute).unwrap();
         assert_eq!(57, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(44, b.value(2));
@@ -1234,7 +950,7 @@ mod tests {
     fn test_temporal_array_date32_week() {
         let a: PrimitiveArray<Date32Type> = vec![Some(0), None, Some(7)].into();
 
-        let b = week(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Week).unwrap();
         assert_eq!(1, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(2, b.value(2));
@@ -1252,7 +968,7 @@ mod tests {
         ]
         .into();
 
-        let b = week(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Week).unwrap();
         assert_eq!(9, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(1, b.value(2));
@@ -1266,7 +982,7 @@ mod tests {
         let a: TimestampMicrosecondArray =
             vec![Some(1612025847000000), None, Some(1722015847000000)].into();
 
-        let b = week(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Week).unwrap();
         assert_eq!(4, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(30, b.value(2));
@@ -1277,7 +993,7 @@ mod tests {
         let a: PrimitiveArray<Date64Type> =
             vec![Some(1514764800000), None, Some(1550636625000)].into();
 
-        let b = second(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Second).unwrap();
         assert_eq!(0, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(45, b.value(2));
@@ -1288,7 +1004,7 @@ mod tests {
         let a: TimestampMicrosecondArray =
             vec![Some(1612025847000000), None, Some(1722015847000000)].into();
 
-        let b = second(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Second).unwrap();
         assert_eq!(27, b.value(0));
         assert!(!b.is_valid(1));
         assert_eq!(7, b.value(2));
@@ -1297,7 +1013,7 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_second_with_timezone() {
         let a = TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
-        let b = second(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Second).unwrap();
         assert_eq!(10, b.value(0));
         assert_eq!(20, b.value(1));
     }
@@ -1305,7 +1021,7 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_minute_with_timezone() {
         let a = TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
-        let b = minute(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Minute).unwrap();
         assert_eq!(50, b.value(0));
         assert_eq!(51, b.value(1));
     }
@@ -1313,42 +1029,46 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_minute_with_negative_timezone() {
         let a = TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
-        let b = minute(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Minute).unwrap();
         assert_eq!(5, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone() {
         let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01:00".to_string());
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
         let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+0100".to_string());
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_minutes() {
         let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01".to_string());
-        let b = hour(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Hour).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign() {
         let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("0100".to_string());
-        let err = hour(&a).unwrap_err().to_string();
+        let err = date_part_primitive(&a, DatePart::Hour)
+            .unwrap_err()
+            .to_string();
         assert!(err.contains("Invalid timezone"), "{}", err);
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
         let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("01:00".to_string());
-        let err = hour(&a).unwrap_err().to_string();
+        let err = date_part_primitive(&a, DatePart::Hour)
+            .unwrap_err()
+            .to_string();
         assert!(err.contains("Invalid timezone"), "{}", err);
     }
 
@@ -1358,7 +1078,7 @@ mod tests {
         // 1970-01-01T00:00:00 + 4 days            -> 1970-01-05T00:00:00 Monday   (week 2)
         // 1970-01-01T00:00:00 + 4 days - 1 second -> 1970-01-04T23:59:59 Sunday   (week 1)
         let a = TimestampSecondArray::from(vec![0, 86400 * 4, 86400 * 4 - 1]);
-        let b = week(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Week).unwrap();
         assert_eq!(1, b.value(0));
         assert_eq!(2, b.value(1));
         assert_eq!(1, b.value(2));
@@ -1371,7 +1091,7 @@ mod tests {
         // 1970-01-01T01:00:00+01:00 + 4 days - 1 second -> 1970-01-05T00:59:59+01:00 Monday   (week 2)
         let a = TimestampSecondArray::from(vec![0, 86400 * 4, 86400 * 4 - 1])
             .with_timezone("+01:00".to_string());
-        let b = week(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Week).unwrap();
         assert_eq!(1, b.value(0));
         assert_eq!(2, b.value(1));
         assert_eq!(2, b.value(2));
@@ -1389,7 +1109,7 @@ mod tests {
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 1]);
         let dict = DictionaryArray::try_new(keys.clone(), Arc::new(a)).unwrap();
 
-        let b = hour_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Hour).unwrap();
 
         let expected_dict =
             DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![11, 21, 7])));
@@ -1398,7 +1118,7 @@ mod tests {
 
         let b = date_part(&dict, DatePart::Minute).unwrap();
 
-        let b_old = minute_dyn(&dict).unwrap();
+        let b_old = date_part(&dict, DatePart::Minute).unwrap();
 
         let expected_dict =
             DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 2, 3])));
@@ -1408,7 +1128,7 @@ mod tests {
 
         let b = date_part(&dict, DatePart::Second).unwrap();
 
-        let b_old = second_dyn(&dict).unwrap();
+        let b_old = date_part(&dict, DatePart::Second).unwrap();
 
         let expected_dict =
             DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 2, 3])));
@@ -1431,7 +1151,7 @@ mod tests {
         let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
 
-        let b = year_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Year).unwrap();
 
         let expected_dict = DictionaryArray::new(
             keys,
@@ -1450,13 +1170,13 @@ mod tests {
         let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
 
-        let b = quarter_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Quarter).unwrap();
 
         let expected =
             DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 3, 3, 1])));
         assert_eq!(b.as_ref(), &expected);
 
-        let b = month_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Month).unwrap();
 
         let expected = DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
         assert_eq!(b.as_ref(), &expected);
@@ -1471,31 +1191,31 @@ mod tests {
         let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), Some(0), None]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
 
-        let b = num_days_from_monday_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::DayOfWeekMonday0).unwrap();
 
         let a = Int32Array::from(vec![Some(0), Some(2), Some(2), Some(0), None]);
         let expected = DictionaryArray::new(keys.clone(), Arc::new(a));
         assert_eq!(b.as_ref(), &expected);
 
-        let b = num_days_from_sunday_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::DayOfWeekSunday0).unwrap();
 
         let a = Int32Array::from(vec![Some(1), Some(3), Some(3), Some(1), None]);
         let expected = DictionaryArray::new(keys.clone(), Arc::new(a));
         assert_eq!(b.as_ref(), &expected);
 
-        let b = day_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Day).unwrap();
 
         let a = Int32Array::from(vec![Some(1), Some(20), Some(20), Some(1), None]);
         let expected = DictionaryArray::new(keys.clone(), Arc::new(a));
         assert_eq!(b.as_ref(), &expected);
 
-        let b = doy_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::DayOfYear).unwrap();
 
         let a = Int32Array::from(vec![Some(1), Some(51), Some(51), Some(1), None]);
         let expected = DictionaryArray::new(keys.clone(), Arc::new(a));
         assert_eq!(b.as_ref(), &expected);
 
-        let b = week_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Week).unwrap();
 
         let a = Int32Array::from(vec![Some(1), Some(8), Some(8), Some(1), None]);
         let expected = DictionaryArray::new(keys, Arc::new(a));
@@ -1512,13 +1232,13 @@ mod tests {
 
         let a: PrimitiveArray<Date64Type> = vec![None, Some(1667328721453)].into();
 
-        let b = nanosecond(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Nanosecond).unwrap();
         assert!(!b.is_valid(0));
         assert_eq!(453_000_000, b.value(1));
 
         let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1)]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
-        let b = nanosecond_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Nanosecond).unwrap();
 
         let a = Int32Array::from(vec![None, Some(453_000_000)]);
         let expected_dict = DictionaryArray::new(keys, Arc::new(a));
@@ -1530,13 +1250,13 @@ mod tests {
     fn test_temporal_array_date64_microsecond() {
         let a: PrimitiveArray<Date64Type> = vec![None, Some(1667328721453)].into();
 
-        let b = microsecond(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Microsecond).unwrap();
         assert!(!b.is_valid(0));
         assert_eq!(453_000, b.value(1));
 
         let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1)]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
-        let b = microsecond_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Microsecond).unwrap();
 
         let a = Int32Array::from(vec![None, Some(453_000)]);
         let expected_dict = DictionaryArray::new(keys, Arc::new(a));
@@ -1548,13 +1268,13 @@ mod tests {
     fn test_temporal_array_date64_millisecond() {
         let a: PrimitiveArray<Date64Type> = vec![None, Some(1667328721453)].into();
 
-        let b = millisecond(&a).unwrap();
+        let b = date_part_primitive(&a, DatePart::Millisecond).unwrap();
         assert!(!b.is_valid(0));
         assert_eq!(453, b.value(1));
 
         let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1)]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
-        let b = millisecond_dyn(&dict).unwrap();
+        let b = date_part(&dict, DatePart::Millisecond).unwrap();
 
         let a = Int32Array::from(vec![None, Some(453)]);
         let expected_dict = DictionaryArray::new(keys, Arc::new(a));
diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index a65c0c9ca8e6..8ab0bb290e96 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -44,9 +44,11 @@ arrow-schema = { workspace = true }
 arrow-data = { workspace = true }
 chrono = { workspace = true }
 chrono-tz = { version = "0.10", optional = true }
-num = { version = "0.4.1", default-features = false, features = ["std"] }
+num-complex = { version = "0.4.6", default-features = false, features = ["std"] }
+num-integer = { version = "0.1.46", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 half = { version = "2.1", default-features = false, features = ["num-traits"] }
-hashbrown = { version = "0.15.1", default-features = false }
+hashbrown = { version = "0.16.0", default-features = false }
 
 [package.metadata.docs.rs]
 all-features = true
@@ -57,14 +59,14 @@ force_validate = []
 
 [dev-dependencies]
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
-criterion = { version = "0.5", default-features = false }
+criterion = { workspace = true, default-features = false }
 
 [[bench]]
 name = "occupancy"
 harness = false
 
 [[bench]]
-name = "gc_view_types"
+name = "view_types"
 harness = false
 
 [[bench]]
@@ -78,3 +80,7 @@ harness = false
 [[bench]]
 name = "union_array"
 harness = false
+
+[[bench]]
+name = "record_batch"
+harness = false
\ No newline at end of file
diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs
index 2bdb0c252b8a..72319cdb9b3c 100644
--- a/arrow-array/benches/fixed_size_list_array.rs
+++ b/arrow-array/benches/fixed_size_list_array.rs
@@ -18,7 +18,7 @@
 use arrow_array::{Array, FixedSizeListArray, Int32Array};
 use arrow_schema::Field;
 use criterion::*;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 use std::{hint, sync::Arc};
 
 fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray {
diff --git a/arrow-array/benches/occupancy.rs b/arrow-array/benches/occupancy.rs
index 283020364199..c088577bc37b 100644
--- a/arrow-array/benches/occupancy.rs
+++ b/arrow-array/benches/occupancy.rs
@@ -19,7 +19,7 @@ use arrow_array::types::Int32Type;
 use arrow_array::{DictionaryArray, Int32Array};
 use arrow_buffer::NullBuffer;
 use criterion::*;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 use std::{hint, sync::Arc};
 
 fn gen_dict(
diff --git a/arrow-array/benches/record_batch.rs b/arrow-array/benches/record_batch.rs
new file mode 100644
index 000000000000..5f2ba5d3d7b5
--- /dev/null
+++ b/arrow-array/benches/record_batch.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchOptions};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use num_integer::Integer;
+use std::hint::black_box;
+use std::sync::Arc;
+
+fn make_record_batch(column_count: usize, row_count: usize) -> RecordBatch {
+    let fields = (0..column_count)
+        .map(|i| Field::new(format!("col_{}", i), DataType::Int64, i.is_even()))
+        .collect::<Vec<_>>();
+
+    let columns = fields
+        .iter()
+        .map(|_| {
+            let array_ref: ArrayRef = Arc::new(Int64Array::from_value(0, row_count));
+            array_ref
+        })
+        .collect::<Vec<_>>();
+
+    let schema = Schema::new(fields);
+
+    let mut options = RecordBatchOptions::new();
+    options.row_count = Some(row_count);
+
+    RecordBatch::try_new_with_options(SchemaRef::new(schema), columns, &options).unwrap()
+}
+
+fn project_benchmark(
+    c: &mut Criterion,
+    column_count: usize,
+    row_count: usize,
+    projection_size: usize,
+) {
+    let input = make_input(column_count, row_count, projection_size);
+
+    c.bench_with_input(
+        BenchmarkId::new(
+            "project",
+            format!(
+                "{:?}x{:?} -> {:?}x{:?}",
+                input.0.num_columns(),
+                input.0.num_rows(),
+                input.1.len(),
+                input.0.num_rows()
+            ),
+        ),
+        &input,
+        |b, (rb, projection)| {
+            b.iter(|| black_box(rb.project(projection).unwrap()));
+        },
+    );
+}
+
+fn make_input(
+    column_count: usize,
+    row_count: usize,
+    projection_size: usize,
+) -> (RecordBatch, Vec<usize>) {
+    let rb = make_record_batch(column_count, row_count);
+    let projection = (0..projection_size).collect::<Vec<_>>();
+    (rb, projection)
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    [10, 100, 1000].iter().for_each(|&column_count| {
+        [1, column_count / 2, column_count - 1]
+            .iter()
+            .for_each(|&projection_size| {
+                project_benchmark(c, column_count, 8192, projection_size);
+            })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-array/benches/union_array.rs b/arrow-array/benches/union_array.rs
index f3894e249f4c..414529882a29 100644
--- a/arrow-array/benches/union_array.rs
+++ b/arrow-array/benches/union_array.rs
@@ -15,17 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{
-    hint,
-    iter::{repeat, repeat_with},
-    sync::Arc,
-};
+use std::{hint, iter::repeat_with, sync::Arc};
 
 use arrow_array::{Array, ArrayRef, Int32Array, UnionArray};
 use arrow_buffer::{NullBuffer, ScalarBuffer};
 use arrow_schema::{DataType, Field, UnionFields};
 use criterion::*;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 
 fn array_with_nulls() -> ArrayRef {
     let mut rng = rng();
@@ -58,18 +54,17 @@ fn criterion_benchmark(c: &mut Criterion) {
                 |b| {
                     let type_ids = 0..with_nulls+without_nulls;
 
-                    let fields = UnionFields::new(
+                    let fields = UnionFields::try_new(
                         type_ids.clone(),
                         type_ids.clone().map(|i| Field::new(format!("f{i}"), DataType::Int32, true)),
-                    );
+                    ).unwrap();
 
                     let array = UnionArray::try_new(
                         fields,
                         type_ids.cycle().take(4096).collect(),
                         None,
-                        repeat(array_with_nulls())
-                            .take(with_nulls as usize)
-                            .chain(repeat(array_without_nulls()).take(without_nulls as usize))
+                        std::iter::repeat_n(array_with_nulls(), with_nulls as usize)
+                            .chain(std::iter::repeat_n(array_without_nulls(), without_nulls as usize))
                             .collect(),
                     )
                     .unwrap();
diff --git a/arrow-array/benches/view_types.rs b/arrow-array/benches/view_types.rs
new file mode 100644
index 000000000000..e194c268c19d
--- /dev/null
+++ b/arrow-array/benches/view_types.rs
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::StringViewArray;
+use criterion::{Criterion, criterion_group, criterion_main};
+use std::hint::black_box;
+
+fn gen_view_array(size: usize) -> StringViewArray {
+    StringViewArray::from_iter((0..size).map(|v| match v % 3 {
+        0 => Some("small"),
+        1 => Some("larger than 12 bytes array"),
+        2 => None,
+        _ => unreachable!("unreachable"),
+    }))
+}
+
+fn gen_view_array_without_nulls(size: usize) -> StringViewArray {
+    StringViewArray::from_iter((0..size).map(|v| {
+        let s = match v % 3 {
+            0 => "small".to_string(),                      // < 12 bytes
+            1 => "larger than 12 bytes array".to_string(), // >12 bytes
+            2 => "x".repeat(300),                          // 300 bytes (>256)
+            _ => unreachable!(),
+        };
+        Some(s)
+    }))
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let array = gen_view_array(100_000);
+
+    c.bench_function("view types slice", |b| {
+        b.iter(|| {
+            black_box(array.slice(0, 100_000 / 2));
+        });
+    });
+
+    c.bench_function("gc view types all[100000]", |b| {
+        b.iter(|| {
+            black_box(array.gc());
+        });
+    });
+
+    let sliced = array.slice(0, 100_000 / 2);
+    c.bench_function("gc view types slice half[100000]", |b| {
+        b.iter(|| {
+            black_box(sliced.gc());
+        });
+    });
+
+    let array = gen_view_array_without_nulls(100_000);
+
+    c.bench_function("gc view types all without nulls[100000]", |b| {
+        b.iter(|| {
+            black_box(array.gc());
+        });
+    });
+
+    let sliced = array.slice(0, 100_000 / 2);
+    c.bench_function("gc view types slice half without nulls[100000]", |b| {
+        b.iter(|| {
+            black_box(sliced.gc());
+        });
+    });
+
+    let array = gen_view_array(8000);
+
+    c.bench_function("gc view types all[8000]", |b| {
+        b.iter(|| {
+            black_box(array.gc());
+        });
+    });
+
+    let sliced = array.slice(0, 8000 / 2);
+    c.bench_function("gc view types slice half[8000]", |b| {
+        b.iter(|| {
+            black_box(sliced.gc());
+        });
+    });
+
+    let array = gen_view_array_without_nulls(8000);
+
+    c.bench_function("gc view types all without nulls[8000]", |b| {
+        b.iter(|| {
+            black_box(array.gc());
+        });
+    });
+
+    let sliced = array.slice(0, 8000 / 2);
+    c.bench_function("gc view types slice half without nulls[8000]", |b| {
+        b.iter(|| {
+            black_box(sliced.gc());
+        });
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index b5f4a106f5ad..52708da7810f 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::{i256, ArrowNativeType, IntervalDayTime, IntervalMonthDayNano};
+use arrow_buffer::{ArrowNativeType, IntervalDayTime, IntervalMonthDayNano, i256};
 use arrow_schema::ArrowError;
 use half::f16;
-use num::complex::ComplexFloat;
+use num_complex::ComplexFloat;
 use std::cmp::Ordering;
 
 /// Trait for [`ArrowNativeType`] that adds checked and unchecked arithmetic operations,
@@ -288,7 +288,7 @@ native_type_op!(u8);
 native_type_op!(u16);
 native_type_op!(u32);
 native_type_op!(u64);
-native_type_op!(i256, i256::ZERO, i256::ONE, i256::MIN, i256::MAX);
+native_type_op!(i256, i256::ZERO, i256::ONE);
 
 native_type_op!(IntervalDayTime, IntervalDayTime::ZERO, IntervalDayTime::ONE);
 native_type_op!(
@@ -418,15 +418,35 @@ native_type_float_op!(
     f32,
     0.,
     1.,
-    unsafe { std::mem::transmute(-1_i32) },
-    unsafe { std::mem::transmute(i32::MAX) }
+    unsafe {
+        // Need to allow in clippy because
+        // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
+        #[allow(unnecessary_transmutes)]
+        std::mem::transmute(-1_i32)
+    },
+    unsafe {
+        // Need to allow in clippy because
+        // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
+        #[allow(unnecessary_transmutes)]
+        std::mem::transmute(i32::MAX)
+    }
 );
 native_type_float_op!(
     f64,
     0.,
     1.,
-    unsafe { std::mem::transmute(-1_i64) },
-    unsafe { std::mem::transmute(i64::MAX) }
+    unsafe {
+        // Need to allow in clippy because
+        // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
+        #[allow(unnecessary_transmutes)]
+        std::mem::transmute(-1_i64)
+    },
+    unsafe {
+        // Need to allow in clippy because
+        // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0`
+        #[allow(unnecessary_transmutes)]
+        std::mem::transmute(i64::MAX)
+    }
 );
 
 #[cfg(test)]
@@ -434,9 +454,7 @@ mod tests {
     use super::*;
 
     macro_rules! assert_approx_eq {
-        ( $x: expr, $y: expr ) => {{
-            assert_approx_eq!($x, $y, 1.0e-4)
-        }};
+        ( $x: expr, $y: expr ) => {{ assert_approx_eq!($x, $y, 1.0e-4) }};
         ( $x: expr, $y: expr, $tol: expr ) => {{
             let x_val = $x;
             let y_val = $y;
diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs
index 8e2158416f49..7cfa1b52728e 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -90,7 +90,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
         &'a self,
         indexes: impl Iterator<Item = Option<usize>> + 'a,
     ) -> impl Iterator<Item = Option<&'a [u8]>> {
-        indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
+        unsafe { indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) }
     }
 }
 
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index fcebf5a0f718..acea680ae374 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -19,7 +19,7 @@ use crate::array::print_long_array;
 use crate::builder::BooleanBuilder;
 use crate::iterator::BooleanIter;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
-use arrow_buffer::{bit_util, BooleanBuffer, Buffer, MutableBuffer, NullBuffer};
+use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::DataType;
 use std::any::Any;
@@ -178,13 +178,20 @@ impl BooleanArray {
 
     /// Returns the boolean value at index `i`.
     ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     /// This doesn't check bounds, the caller must ensure that index < self.len()
     pub unsafe fn value_unchecked(&self, i: usize) -> bool {
-        self.values.value_unchecked(i)
+        unsafe { self.values.value_unchecked(i) }
     }
 
     /// Returns the boolean value at index `i`.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds
     pub fn value(&self, i: usize) -> bool {
@@ -215,7 +222,7 @@ impl BooleanArray {
         &'a self,
         indexes: impl Iterator<Item = Option<usize>> + 'a,
     ) -> impl Iterator<Item = Option<bool>> + 'a {
-        indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
+        indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
     }
 
     /// Create a [`BooleanArray`] by evaluating the operation for
@@ -279,6 +286,8 @@ impl BooleanArray {
     }
 }
 
+impl super::private::Sealed for BooleanArray {}
+
 impl Array for BooleanArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -348,7 +357,7 @@ impl ArrayAccessor for &BooleanArray {
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        BooleanArray::value_unchecked(self, index)
+        unsafe { BooleanArray::value_unchecked(self, index) }
     }
 }
 
@@ -429,11 +438,84 @@ impl<'a> BooleanArray {
     }
 }
 
-impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
+/// An optional boolean value
+///
+/// This struct is used as an adapter when creating `BooleanArray` from an iterator.
+/// `FromIterator` for `BooleanArray` takes an iterator where the elements can be `into`
+/// this struct. So once implementing `From` or `Into` trait for a type, an iterator of
+/// the type can be collected to `BooleanArray`.
+///
+/// See also [NativeAdapter](crate::array::NativeAdapter).
+#[derive(Debug)]
+struct BooleanAdapter {
+    /// Corresponding Rust native type if available
+    pub native: Option<bool>,
+}
+
+impl From<bool> for BooleanAdapter {
+    fn from(value: bool) -> Self {
+        BooleanAdapter {
+            native: Some(value),
+        }
+    }
+}
+
+impl From<&bool> for BooleanAdapter {
+    fn from(value: &bool) -> Self {
+        BooleanAdapter {
+            native: Some(*value),
+        }
+    }
+}
+
+impl From<Option<bool>> for BooleanAdapter {
+    fn from(value: Option<bool>) -> Self {
+        BooleanAdapter { native: value }
+    }
+}
+
+impl From<&Option<bool>> for BooleanAdapter {
+    fn from(value: &Option<bool>) -> Self {
+        BooleanAdapter { native: *value }
+    }
+}
+
+impl<Ptr: Into<BooleanAdapter>> FromIterator<Ptr> for BooleanArray {
     fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
         let iter = iter.into_iter();
-        let (_, data_len) = iter.size_hint();
-        let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
+        let capacity = match iter.size_hint() {
+            (lower, Some(upper)) if lower == upper => lower,
+            _ => 0,
+        };
+        let mut builder = BooleanBuilder::with_capacity(capacity);
+        builder.extend(iter.map(|item| item.into().native));
+        builder.finish()
+    }
+}
+
+impl BooleanArray {
+    /// Creates a [`BooleanArray`] from an iterator of trusted length.
+    ///
+    /// # Safety
+    ///
+    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
+    /// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger
+    /// guarantee that `ExactSizeIterator` provides which could still report a wrong length.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the iterator does not report an upper bound on `size_hint()`.
+    #[inline]
+    #[allow(
+        private_bounds,
+        reason = "We will expose BooleanAdapter if there is a need"
+    )]
+    pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
+    where
+        P: Into<BooleanAdapter>,
+        I: ExactSizeIterator<Item = P>,
+    {
+        let data_len = iter.len();
 
         let num_bytes = bit_util::ceil(data_len, 8);
         let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes);
@@ -443,10 +525,14 @@ impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray
 
         let null_slice = null_builder.as_slice_mut();
         iter.enumerate().for_each(|(i, item)| {
-            if let Some(a) = item.borrow() {
-                bit_util::set_bit(null_slice, i);
-                if *a {
-                    bit_util::set_bit(data, i);
+            if let Some(a) = item.into().native {
+                unsafe {
+                    // SAFETY: There will be enough space in the buffers due to the trusted len size
+                    // hint
+                    bit_util::set_bit_raw(null_slice.as_mut_ptr(), i);
+                    if a {
+                        bit_util::set_bit_raw(data.as_mut_ptr(), i);
+                    }
                 }
             }
         });
@@ -479,7 +565,7 @@ impl From<BooleanBuffer> for BooleanArray {
 mod tests {
     use super::*;
     use arrow_buffer::Buffer;
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
 
     #[test]
     fn test_boolean_fmt_debug() {
@@ -592,6 +678,20 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_boolean_array_from_non_nullable_iter() {
+        let v = vec![true, false, true];
+        let arr = v.into_iter().collect::<BooleanArray>();
+        assert_eq!(3, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        assert!(arr.nulls().is_none());
+
+        assert!(arr.value(0));
+        assert!(!arr.value(1));
+        assert!(arr.value(2));
+    }
+
     #[test]
     fn test_boolean_array_from_nullable_iter() {
         let v = vec![Some(true), None, Some(false), None];
@@ -610,6 +710,29 @@ mod tests {
         assert!(!arr.value(2));
     }
 
+    #[test]
+    fn test_boolean_array_from_nullable_trusted_len_iter() {
+        // Should exhibit the same behavior as `from_iter`, which is tested above.
+        let v = vec![Some(true), None, Some(false), None];
+        let expected = v.clone().into_iter().collect::<BooleanArray>();
+        let actual = unsafe {
+            // SAFETY: `v` has trusted length
+            BooleanArray::from_trusted_len_iter(v.into_iter())
+        };
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn test_boolean_array_from_iter_with_larger_upper_bound() {
+        // See https://github.com/apache/arrow-rs/issues/8505
+        // This returns an upper size hint of 4
+        let iterator = vec![Some(true), None, Some(false), None]
+            .into_iter()
+            .filter(Option::is_some);
+        let arr = iterator.collect::<BooleanArray>();
+        assert_eq!(2, arr.len());
+    }
+
     #[test]
     fn test_boolean_array_builder() {
         // Test building a boolean array with ArrayData builder and offset
@@ -708,4 +831,32 @@ mod tests {
         assert_eq!(values.values(), &[0b1000_0000]);
         assert!(nulls.is_none());
     }
+
+    #[test]
+    fn test_new_null_array() {
+        let arr = BooleanArray::new_null(5);
+
+        assert_eq!(arr.len(), 5);
+        assert_eq!(arr.null_count(), 5);
+        assert_eq!(arr.true_count(), 0);
+        assert_eq!(arr.false_count(), 0);
+
+        for i in 0..5 {
+            assert!(arr.is_null(i));
+            assert!(!arr.is_valid(i));
+        }
+    }
+
+    #[test]
+    fn test_slice_with_nulls() {
+        let arr = BooleanArray::from(vec![Some(true), None, Some(false)]);
+        let sliced = arr.slice(1, 2);
+
+        assert_eq!(sliced.len(), 2);
+        assert_eq!(sliced.null_count(), 1);
+
+        assert!(sliced.is_null(0));
+        assert!(sliced.is_valid(1));
+        assert!(!sliced.value(1));
+    }
 }
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index 192c9654b055..bd85bffcfe44 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -18,8 +18,8 @@
 use crate::array::{get_offsets, print_long_array};
 use crate::builder::GenericByteBuilder;
 use crate::iterator::ArrayIter;
-use crate::types::bytes::ByteArrayNativeType;
 use crate::types::ByteArrayType;
+use crate::types::bytes::ByteArrayNativeType;
 use crate::{Array, ArrayAccessor, ArrayRef, OffsetSizeTrait, Scalar};
 use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
 use arrow_buffer::{NullBuffer, OffsetBuffer};
@@ -190,6 +190,29 @@ impl<T: ByteArrayType> GenericByteArray<T> {
         Scalar::new(Self::from_iter_values(std::iter::once(value)))
     }
 
+    /// Create a new [`GenericByteArray`] where `value` is repeated `repeat_count` times.
+    ///
+    /// # Panics
+    /// This will panic if value's length multiplied by `repeat_count` overflows usize.
+    ///
+    pub fn new_repeated(value: impl AsRef<T::Native>, repeat_count: usize) -> Self {
+        let s: &[u8] = value.as_ref().as_ref();
+        let value_offsets = OffsetBuffer::from_repeated_length(s.len(), repeat_count);
+        let bytes: Buffer = {
+            let mut mutable_buffer = MutableBuffer::with_capacity(0);
+            mutable_buffer.repeat_slice_n_times(s, repeat_count);
+
+            mutable_buffer.into()
+        };
+
+        Self {
+            data_type: T::DATA_TYPE,
+            value_data: bytes,
+            value_offsets,
+            nulls: None,
+        }
+    }
+
     /// Creates a [`GenericByteArray`] based on an iterator of values without nulls
     pub fn from_iter_values<Ptr, I>(iter: I) -> Self
     where
@@ -276,11 +299,15 @@ impl<T: ByteArrayType> GenericByteArray<T> {
     }
 
     /// Returns the element at index `i`
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     /// Caller is responsible for ensuring that the index is within the bounds of the array
     pub unsafe fn value_unchecked(&self, i: usize) -> &T::Native {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
+        let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
+        let start = *unsafe { self.value_offsets().get_unchecked(i) };
 
         // Soundness
         // pointer alignment & location is ensured by RawPtrBox
@@ -291,19 +318,25 @@ impl<T: ByteArrayType> GenericByteArray<T> {
         // OffsetSizeTrait. Currently, only i32 and i64 implement OffsetSizeTrait,
         // both of which should cleanly cast to isize on an architecture that supports
         // 32/64-bit offsets
-        let b = std::slice::from_raw_parts(
-            self.value_data
-                .as_ptr()
-                .offset(start.to_isize().unwrap_unchecked()),
-            (end - start).to_usize().unwrap_unchecked(),
-        );
+        let b = unsafe {
+            std::slice::from_raw_parts(
+                self.value_data
+                    .as_ptr()
+                    .offset(start.to_isize().unwrap_unchecked()),
+                (end - start).to_usize().unwrap_unchecked(),
+            )
+        };
 
         // SAFETY:
         // ArrayData is valid
-        T::Native::from_bytes_unchecked(b)
+        unsafe { T::Native::from_bytes_unchecked(b) }
     }
 
     /// Returns the element at index `i`
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds.
     pub fn value(&self, i: usize) -> &T::Native {
@@ -429,6 +462,8 @@ impl<T: ByteArrayType> std::fmt::Debug for GenericByteArray<T> {
     }
 }
 
+impl<T: ByteArrayType> super::private::Sealed for GenericByteArray<T> {}
+
 impl<T: ByteArrayType> Array for GenericByteArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -501,7 +536,7 @@ impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        GenericByteArray::value_unchecked(self, index)
+        unsafe { GenericByteArray::value_unchecked(self, index) }
     }
 }
 
@@ -583,7 +618,7 @@ where
 
 #[cfg(test)]
 mod tests {
-    use crate::{BinaryArray, StringArray};
+    use crate::{Array, BinaryArray, StringArray};
     use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer};
 
     #[test]
@@ -595,14 +630,23 @@ mod tests {
         let nulls = NullBuffer::new_null(3);
         let err =
             StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3"
+        );
 
         let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3"
+        );
 
         let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
         let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2"
+        );
 
         BinaryArray::new(offsets, non_utf8_data, None);
 
@@ -632,4 +676,42 @@ mod tests {
 
         BinaryArray::new(offsets, non_ascii_data, None);
     }
+
+    #[test]
+    fn create_repeated() {
+        let arr = BinaryArray::new_repeated(b"hello", 3);
+        assert_eq!(arr.len(), 3);
+        assert_eq!(arr.value(0), b"hello");
+        assert_eq!(arr.value(1), b"hello");
+        assert_eq!(arr.value(2), b"hello");
+
+        let arr = StringArray::new_repeated("world", 2);
+        assert_eq!(arr.len(), 2);
+        assert_eq!(arr.value(0), "world");
+        assert_eq!(arr.value(1), "world");
+    }
+
+    #[test]
+    #[should_panic(expected = "usize overflow")]
+    fn create_repeated_usize_overflow_1() {
+        let _arr = BinaryArray::new_repeated(b"hello", (usize::MAX / "hello".len()) + 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "usize overflow")]
+    fn create_repeated_usize_overflow_2() {
+        let _arr = BinaryArray::new_repeated(b"hello", usize::MAX);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn create_repeated_i32_offset_overflow_1() {
+        let _arr = BinaryArray::new_repeated(b"hello", usize::MAX / "hello".len());
+    }
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn create_repeated_i32_offset_overflow_2() {
+        let _arr = BinaryArray::new_repeated(b"hello", ((i32::MAX as usize) / "hello".len()) + 1);
+    }
 }
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
index 713e275d186c..ca8ddfbe2ad5 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -22,11 +22,12 @@ use crate::types::bytes::ByteArrayNativeType;
 use crate::types::{BinaryViewType, ByteViewType, StringViewType};
 use crate::{Array, ArrayAccessor, ArrayRef, GenericByteArray, OffsetSizeTrait, Scalar};
 use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
-use arrow_data::{ArrayData, ArrayDataBuilder, ByteView};
+use arrow_data::{ArrayData, ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
 use arrow_schema::{ArrowError, DataType};
 use core::str;
-use num::ToPrimitive;
+use num_traits::ToPrimitive;
 use std::any::Any;
+use std::cmp::Ordering;
 use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::sync::Arc;
@@ -77,8 +78,9 @@ use super::ByteArrayType;
 ///                          0    31       63      95    127
 /// ```
 ///
-/// * Strings with length <= 12 are stored directly in the view. See
-///   [`Self::inline_value`] to access the inlined prefix from a short view.
+/// * Strings with length <= 12 ([`MAX_INLINE_VIEW_LEN`]) are stored directly in
+///   the view. See [`Self::inline_value`] to access the inlined prefix from a
+///   short view.
 ///
 /// * Strings with length > 12: The first four bytes are stored inline in the
 ///   view and the entire string is stored in one of the buffers. See [`ByteView`]
@@ -128,6 +130,7 @@ use super::ByteArrayType;
 /// assert_eq!(value, "this string is also longer than 12 bytes");
 /// ```
 ///
+/// [`MAX_INLINE_VIEW_LEN`]: arrow_data::MAX_INLINE_VIEW_LEN
 /// [`arrow_compute`]: https://docs.rs/arrow/latest/arrow/compute/index.html
 ///
 /// Unlike [`GenericByteArray`], there are no constraints on the offsets other
@@ -162,7 +165,7 @@ use super::ByteArrayType;
 pub struct GenericByteViewArray<T: ByteViewType + ?Sized> {
     data_type: DataType,
     views: ScalarBuffer<u128>,
-    buffers: Vec<Buffer>,
+    buffers: Arc<[Buffer]>,
     phantom: PhantomData<T>,
     nulls: Option<NullBuffer>,
 }
@@ -185,7 +188,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// # Panics
     ///
     /// Panics if [`GenericByteViewArray::try_new`] returns an error
-    pub fn new(views: ScalarBuffer<u128>, buffers: Vec<Buffer>, nulls: Option<NullBuffer>) -> Self {
+    pub fn new<U>(views: ScalarBuffer<u128>, buffers: U, nulls: Option<NullBuffer>) -> Self
+    where
+        U: Into<Arc<[Buffer]>>,
+    {
         Self::try_new(views, buffers, nulls).unwrap()
     }
 
@@ -195,11 +201,16 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     ///
     /// * `views.len() != nulls.len()`
     /// * [ByteViewType::validate] fails
-    pub fn try_new(
+    pub fn try_new<U>(
         views: ScalarBuffer<u128>,
-        buffers: Vec<Buffer>,
+        buffers: U,
         nulls: Option<NullBuffer>,
-    ) -> Result<Self, ArrowError> {
+    ) -> Result<Self, ArrowError>
+    where
+        U: Into<Arc<[Buffer]>>,
+    {
+        let buffers: Arc<[Buffer]> = buffers.into();
+
         T::validate(&views, &buffers)?;
 
         if let Some(n) = nulls.as_ref() {
@@ -227,11 +238,14 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// # Safety
     ///
     /// Safe if [`Self::try_new`] would not error
-    pub unsafe fn new_unchecked(
+    pub unsafe fn new_unchecked<U>(
         views: ScalarBuffer<u128>,
-        buffers: Vec<Buffer>,
+        buffers: U,
         nulls: Option<NullBuffer>,
-    ) -> Self {
+    ) -> Self
+    where
+        U: Into<Arc<[Buffer]>>,
+    {
         if cfg!(feature = "force_validate") {
             return Self::new(views, buffers, nulls);
         }
@@ -240,7 +254,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
             data_type: T::DATA_TYPE,
             phantom: Default::default(),
             views,
-            buffers,
+            buffers: buffers.into(),
             nulls,
         }
     }
@@ -250,7 +264,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         Self {
             data_type: T::DATA_TYPE,
             views: vec![0; len].into(),
-            buffers: vec![],
+            buffers: vec![].into(),
             nulls: Some(NullBuffer::new_null(len)),
             phantom: Default::default(),
         }
@@ -276,7 +290,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     }
 
     /// Deconstruct this array into its constituent parts
-    pub fn into_parts(self) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+    pub fn into_parts(self) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
         (self.views, self.buffers, self.nulls)
     }
 
@@ -293,6 +307,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     }
 
     /// Returns the element at index `i`
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds.
     pub fn value(&self, i: usize) -> &T::Native {
@@ -309,33 +327,38 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
 
     /// Returns the element at index `i` without bounds checking
     ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     ///
     /// Caller is responsible for ensuring that the index is within the bounds
     /// of the array
     pub unsafe fn value_unchecked(&self, idx: usize) -> &T::Native {
-        let v = self.views.get_unchecked(idx);
+        let v = unsafe { self.views.get_unchecked(idx) };
         let len = *v as u32;
-        let b = if len <= 12 {
-            Self::inline_value(v, len as usize)
+        let b = if len <= MAX_INLINE_VIEW_LEN {
+            unsafe { Self::inline_value(v, len as usize) }
         } else {
             let view = ByteView::from(*v);
-            let data = self.buffers.get_unchecked(view.buffer_index as usize);
+            let data = unsafe { self.buffers.get_unchecked(view.buffer_index as usize) };
             let offset = view.offset as usize;
-            data.get_unchecked(offset..offset + len as usize)
+            unsafe { data.get_unchecked(offset..offset + len as usize) }
         };
-        T::Native::from_bytes_unchecked(b)
+        unsafe { T::Native::from_bytes_unchecked(b) }
     }
 
     /// Returns the first `len` bytes the inline value of the view.
     ///
     /// # Safety
     /// - The `view` must be a valid element from `Self::views()` that adheres to the view layout.
-    /// - The `len` must be the length of the inlined value. It should never be larger than 12.
+    /// - The `len` must be the length of the inlined value. It should never be larger than [`MAX_INLINE_VIEW_LEN`].
     #[inline(always)]
     pub unsafe fn inline_value(view: &u128, len: usize) -> &[u8] {
-        debug_assert!(len <= 12);
-        std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
+        debug_assert!(len <= MAX_INLINE_VIEW_LEN as usize);
+        unsafe {
+            std::slice::from_raw_parts((view as *const u128 as *const u8).wrapping_add(4), len)
+        }
     }
 
     /// Constructs a new iterator for iterating over the values of this array
@@ -347,7 +370,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     pub fn bytes_iter(&self) -> impl Iterator<Item = &[u8]> {
         self.views.iter().map(move |v| {
             let len = *v as u32;
-            if len <= 12 {
+            if len <= MAX_INLINE_VIEW_LEN {
                 unsafe { Self::inline_value(v, len as usize) }
             } else {
                 let view = ByteView::from(*v);
@@ -371,7 +394,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
                 return &[] as &[u8];
             }
 
-            if prefix_len <= 4 || len <= 12 {
+            if prefix_len <= 4 || len as u32 <= MAX_INLINE_VIEW_LEN {
                 unsafe { StringViewArray::inline_value(v, prefix_len) }
             } else {
                 let view = ByteView::from(*v);
@@ -401,7 +424,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
                 return &[] as &[u8];
             }
 
-            if len <= 12 {
+            if len as u32 <= MAX_INLINE_VIEW_LEN {
                 unsafe { &StringViewArray::inline_value(v, len)[len - suffix_len..] }
             } else {
                 let view = ByteView::from(*v);
@@ -415,6 +438,26 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         })
     }
 
+    /// Return an iterator over the length of each array element, including null values.
+    ///
+    /// Null values length would equal to the underlying bytes length and NOT 0
+    ///
+    /// Example of getting 0 for null values
+    /// ```rust
+    /// # use arrow_array::StringViewArray;
+    /// # use arrow_array::Array;
+    /// use arrow_data::ByteView;
+    ///
+    /// fn lengths_with_zero_for_nulls(view: &StringViewArray) -> impl Iterator<Item = u32> {
+    ///     view.lengths()
+    ///         .enumerate()
+    ///         .map(|(index, length)| if view.is_null(index) { 0 } else { length })
+    /// }
+    /// ```
+    pub fn lengths(&self) -> impl ExactSizeIterator<Item = u32> + Clone {
+        self.views().iter().map(|v| *v as u32)
+    }
+
     /// Returns a zero-copy slice of this array with the indicated offset and length.
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         Self {
@@ -470,13 +513,161 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// Note: this function does not attempt to canonicalize / deduplicate values. For this
     /// feature see  [`GenericByteViewBuilder::with_deduplicate_strings`].
     pub fn gc(&self) -> Self {
-        let mut builder = GenericByteViewBuilder::<T>::with_capacity(self.len());
+        // 1) Read basic properties once
+        let len = self.len(); // number of elements
+        let nulls = self.nulls().cloned(); // reuse & clone existing null bitmap
+
+        // 1.5) Fast path: if there are no buffers, just reuse original views and no data blocks
+        if self.data_buffers().is_empty() {
+            return unsafe {
+                GenericByteViewArray::new_unchecked(
+                    self.views().clone(),
+                    vec![], // empty data blocks
+                    nulls,
+                )
+            };
+        }
 
-        for v in self.iter() {
-            builder.append_option(v);
+        // 2) Calculate total size of all non-inline data and detect if any exists
+        let total_large = self.total_buffer_bytes_used();
+
+        // 2.5) Fast path: if there is no non-inline data, avoid buffer allocation & processing
+        if total_large == 0 {
+            // Views are inline-only or all null; just reuse original views and no data blocks
+            return unsafe {
+                GenericByteViewArray::new_unchecked(
+                    self.views().clone(),
+                    vec![], // empty data blocks
+                    nulls,
+                )
+            };
         }
 
-        builder.finish()
+        let (views_buf, data_blocks) = if total_large < i32::MAX as usize {
+            // fast path, the entire data fits in a single buffer
+            // 3) Allocate exactly capacity for all non-inline data
+            let mut data_buf = Vec::with_capacity(total_large);
+
+            // 4) Iterate over views and process each inline/non-inline view
+            let views_buf: Vec<u128> = (0..len)
+                .map(|i| unsafe { self.copy_view_to_buffer(i, 0, &mut data_buf) })
+                .collect();
+            let data_block = Buffer::from_vec(data_buf);
+            let data_blocks = vec![data_block];
+            (views_buf, data_blocks)
+        } else {
+            // slow path, need to split into multiple buffers
+
+            struct GcCopyGroup {
+                total_buffer_bytes: usize,
+                total_len: usize,
+            }
+
+            impl GcCopyGroup {
+                fn new(total_buffer_bytes: u32, total_len: usize) -> Self {
+                    Self {
+                        total_buffer_bytes: total_buffer_bytes as usize,
+                        total_len,
+                    }
+                }
+            }
+
+            let mut groups = Vec::new();
+            let mut current_length = 0;
+            let mut current_elements = 0;
+
+            for view in self.views() {
+                let len = *view as u32;
+                if len > MAX_INLINE_VIEW_LEN {
+                    if current_length + len > i32::MAX as u32 {
+                        // Start a new group
+                        groups.push(GcCopyGroup::new(current_length, current_elements));
+                        current_length = 0;
+                        current_elements = 0;
+                    }
+                    current_length += len;
+                    current_elements += 1;
+                }
+            }
+            if current_elements != 0 {
+                groups.push(GcCopyGroup::new(current_length, current_elements));
+            }
+            debug_assert!(groups.len() <= i32::MAX as usize);
+
+            // 3) Copy the buffers group by group
+            let mut views_buf = Vec::with_capacity(len);
+            let mut data_blocks = Vec::with_capacity(groups.len());
+
+            let mut current_view_idx = 0;
+
+            for (group_idx, gc_copy_group) in groups.iter().enumerate() {
+                let mut data_buf = Vec::with_capacity(gc_copy_group.total_buffer_bytes);
+
+                // Directly push views to avoid intermediate Vec allocation
+                let new_views = (current_view_idx..current_view_idx + gc_copy_group.total_len).map(
+                    |view_idx| {
+                        // safety: the view index came from iterating over valid range
+                        unsafe {
+                            self.copy_view_to_buffer(view_idx, group_idx as i32, &mut data_buf)
+                        }
+                    },
+                );
+                views_buf.extend(new_views);
+
+                data_blocks.push(Buffer::from_vec(data_buf));
+                current_view_idx += gc_copy_group.total_len;
+            }
+            (views_buf, data_blocks)
+        };
+
+        // 5) Wrap up views buffer
+        let views_scalar = ScalarBuffer::from(views_buf);
+
+        // SAFETY: views_scalar, data_blocks, and nulls are correctly aligned and sized
+        unsafe { GenericByteViewArray::new_unchecked(views_scalar, data_blocks, nulls) }
+    }
+
+    /// Copy the i‑th view into `data_buf` if it refers to an out‑of‑line buffer.
+    ///
+    /// # Safety
+    ///
+    /// - `i < self.len()`.
+    /// - Every element in `self.views()` must currently refer to a valid slice
+    ///   inside one of `self.buffers`.
+    /// - `data_buf` must be ready to have additional bytes appended.
+    /// - After this call, the returned view will have its
+    ///   `buffer_index` reset to `buffer_idx` and its `offset` updated so that it points
+    ///   into the bytes just appended at the end of `data_buf`.
+    #[inline(always)]
+    unsafe fn copy_view_to_buffer(
+        &self,
+        i: usize,
+        buffer_idx: i32,
+        data_buf: &mut Vec<u8>,
+    ) -> u128 {
+        // SAFETY: `i < self.len()` ensures this is in‑bounds.
+        let raw_view = unsafe { *self.views().get_unchecked(i) };
+        let mut bv = ByteView::from(raw_view);
+
+        // Inline‑small views stay as‑is.
+        if bv.length <= MAX_INLINE_VIEW_LEN {
+            raw_view
+        } else {
+            // SAFETY: `bv.buffer_index` and `bv.offset..bv.offset+bv.length`
+            // must both lie within valid ranges for `self.buffers`.
+            let buffer = unsafe { self.buffers.get_unchecked(bv.buffer_index as usize) };
+            let start = bv.offset as usize;
+            let end = start + bv.length as usize;
+            let slice = unsafe { buffer.get_unchecked(start..end) };
+
+            // Copy out‑of‑line data into our single “0” buffer.
+            let new_offset = data_buf.len() as u32;
+            data_buf.extend_from_slice(slice);
+
+            bv.buffer_index = buffer_idx as u32;
+            bv.offset = new_offset;
+            bv.into()
+        }
     }
 
     /// Returns the total number of bytes used by all non inlined views in all
@@ -495,9 +686,9 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         self.views()
             .iter()
             .map(|v| {
-                let len = (*v as u32) as usize;
-                if len > 12 {
-                    len
+                let len = *v as u32;
+                if len > MAX_INLINE_VIEW_LEN {
+                    len as usize
                 } else {
                     0
                 }
@@ -511,11 +702,11 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     /// It takes a bit of patience to understand why we don't just compare two &[u8] directly.
     ///
     /// ByteView types give us the following two advantages, and we need to be careful not to lose them:
-    /// (1) For string/byte smaller than 12 bytes, the entire data is inlined in the view.
+    /// (1) For string/byte smaller than [`MAX_INLINE_VIEW_LEN`] bytes, the entire data is inlined in the view.
     ///     Meaning that reading one array element requires only one memory access
     ///     (two memory access required for StringArray, one for offset buffer, the other for value buffer).
     ///
-    /// (2) For string/byte larger than 12 bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
+    /// (2) For string/byte larger than [`MAX_INLINE_VIEW_LEN`] bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
     ///     thanks to the inlined 4 bytes.
     ///     Consider equality check:
     ///     If the first four bytes of the two strings are different, we can return false immediately (with just one memory access).
@@ -525,8 +716,8 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
     ///   e.g., if the inlined 4 bytes are different, we can directly return unequal without looking at the full string.
     ///
     /// # Order check flow
-    /// (1) if both string are smaller than 12 bytes, we can directly compare the data inlined to the view.
-    /// (2) if any of the string is larger than 12 bytes, we need to compare the full string.
+    /// (1) if both string are smaller than [`MAX_INLINE_VIEW_LEN`] bytes, we can directly compare the data inlined to the view.
+    /// (2) if any of the string is larger than [`MAX_INLINE_VIEW_LEN`] bytes, we need to compare the full string.
     ///     (2.1) if the inlined 4 bytes are different, we can return the result immediately.
     ///     (2.2) o.w., we need to compare the full string.
     ///
@@ -537,25 +728,30 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
         left_idx: usize,
         right: &GenericByteViewArray<T>,
         right_idx: usize,
-    ) -> std::cmp::Ordering {
-        let l_view = left.views().get_unchecked(left_idx);
-        let l_len = *l_view as u32;
+    ) -> Ordering {
+        let l_view = unsafe { left.views().get_unchecked(left_idx) };
+        let l_byte_view = ByteView::from(*l_view);
+
+        let r_view = unsafe { right.views().get_unchecked(right_idx) };
+        let r_byte_view = ByteView::from(*r_view);
 
-        let r_view = right.views().get_unchecked(right_idx);
-        let r_len = *r_view as u32;
+        let l_len = l_byte_view.length;
+        let r_len = r_byte_view.length;
 
         if l_len <= 12 && r_len <= 12 {
-            let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
-            let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
-            return l_data.cmp(r_data);
+            return Self::inline_key_fast(*l_view).cmp(&Self::inline_key_fast(*r_view));
         }
 
         // one of the string is larger than 12 bytes,
         // we then try to compare the inlined data first
-        let l_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 4) };
-        let r_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 4) };
-        if r_inlined_data != l_inlined_data {
-            return l_inlined_data.cmp(r_inlined_data);
+
+        // Note: In theory, ByteView is only used for string which is larger than 12 bytes,
+        // but we can still use it to get the inlined prefix for shorter strings.
+        // The prefix is always the first 4 bytes of the view, for both short and long strings.
+        let l_inlined_be = l_byte_view.prefix.swap_bytes();
+        let r_inlined_be = r_byte_view.prefix.swap_bytes();
+        if l_inlined_be != r_inlined_be {
+            return l_inlined_be.cmp(&r_inlined_be);
         }
 
         // unfortunately, we need to compare the full data
@@ -564,6 +760,119 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
 
         l_full_data.cmp(r_full_data)
     }
+
+    /// Builds a 128-bit composite key for an inline value:
+    ///
+    /// - High 96 bits: the inline data in big-endian byte order (for correct lexicographical sorting).
+    /// - Low  32 bits: the length in big-endian byte order, acting as a tiebreaker so shorter strings
+    ///   (or those with fewer meaningful bytes) always numerically sort before longer ones.
+    ///
+    /// This function extracts the length and the 12-byte inline string data from the raw
+    /// little-endian `u128` representation, converts them to big-endian ordering, and packs them
+    /// into a single `u128` value suitable for fast, branchless comparisons.
+    ///
+    /// # Why include length?
+    ///
+    /// A pure 96-bit content comparison can’t distinguish between two values whose inline bytes
+    /// compare equal—either because one is a true prefix of the other or because zero-padding
+    /// hides extra bytes. By tucking the 32-bit length into the lower bits, a single `u128` compare
+    /// handles both content and length in one go.
+    ///
+    /// Example: comparing "bar" (3 bytes) vs "bar\0" (4 bytes)
+    ///
+    /// | String     | Bytes 0–4 (length LE) | Bytes 4–16 (data + padding)    |
+    /// |------------|-----------------------|---------------------------------|
+    /// | `"bar"`   | `03 00 00 00`         | `62 61 72` + 9 × `00`           |
+    /// | `"bar\0"`| `04 00 00 00`         | `62 61 72 00` + 8 × `00`        |
+    ///
+    /// Both inline parts become `62 61 72 00…00`, so they tie on content. The length field
+    /// then differentiates:
+    ///
+    /// ```text
+    /// key("bar")   = 0x0000000000000000000062617200000003
+    /// key("bar\0") = 0x0000000000000000000062617200000004
+    /// ⇒ key("bar") < key("bar\0")
+    /// ```
+    /// # Inlining and Endianness
+    ///
+    /// - We start by calling `.to_le_bytes()` on the `raw` `u128`, because Rust’s native in‑memory
+    ///   representation is little‑endian on x86/ARM.
+    /// - We extract the low 32 bits numerically (`raw as u32`)—this step is endianness‑free.
+    /// - We copy the 12 bytes of inline data (original order) into `buf[0..12]`.
+    /// - We serialize `length` as big‑endian into `buf[12..16]`.
+    /// - Finally, `u128::from_be_bytes(buf)` treats `buf[0]` as the most significant byte
+    ///   and `buf[15]` as the least significant, producing a `u128` whose integer value
+    ///   directly encodes “inline data then length” in big‑endian form.
+    ///
+    /// This ensures that a simple `u128` comparison is equivalent to the desired
+    /// lexicographical comparison of the inline bytes followed by length.
+    #[inline(always)]
+    pub fn inline_key_fast(raw: u128) -> u128 {
+        // 1. Decompose `raw` into little‑endian bytes:
+        //    - raw_bytes[0..4]  = length in LE
+        //    - raw_bytes[4..16] = inline string data
+        let raw_bytes = raw.to_le_bytes();
+
+        // 2. Numerically truncate to get the low 32‑bit length (endianness‑free).
+        let length = raw as u32;
+
+        // 3. Build a 16‑byte buffer in big‑endian order:
+        //    - buf[0..12]  = inline string bytes (in original order)
+        //    - buf[12..16] = length.to_be_bytes() (BE)
+        let mut buf = [0u8; 16];
+        buf[0..12].copy_from_slice(&raw_bytes[4..16]); // inline data
+
+        // Why convert length to big-endian for comparison?
+        //
+        // Rust (on most platforms) stores integers in little-endian format,
+        // meaning the least significant byte is at the lowest memory address.
+        // For example, an u32 value like 0x22345677 is stored in memory as:
+        //
+        //   [0x77, 0x56, 0x34, 0x22]  // little-endian layout
+        //    ^     ^     ^     ^
+        //  LSB   ↑↑↑           MSB
+        //
+        // This layout is efficient for arithmetic but *not* suitable for
+        // lexicographic (dictionary-style) comparison of byte arrays.
+        //
+        // To compare values by byte order—e.g., for sorted keys or binary trees—
+        // we must convert them to **big-endian**, where:
+        //
+        //   - The most significant byte (MSB) comes first (index 0)
+        //   - The least significant byte (LSB) comes last (index N-1)
+        //
+        // In big-endian, the same u32 = 0x22345677 would be represented as:
+        //
+        //   [0x22, 0x34, 0x56, 0x77]
+        //
+        // This ordering aligns with natural string/byte sorting, so calling
+        // `.to_be_bytes()` allows us to construct
+        // keys where standard numeric comparison (e.g., `<`, `>`) behaves
+        // like lexicographic byte comparison.
+        buf[12..16].copy_from_slice(&length.to_be_bytes()); // length in BE
+
+        // 4. Deserialize the buffer as a big‑endian u128:
+        //    buf[0] is MSB, buf[15] is LSB.
+        // Details:
+        // Note on endianness and layout:
+        //
+        // Although `buf[0]` is stored at the lowest memory address,
+        // calling `u128::from_be_bytes(buf)` interprets it as the **most significant byte (MSB)**,
+        // and `buf[15]` as the **least significant byte (LSB)**.
+        //
+        // This is the core principle of **big-endian decoding**:
+        //   - Byte at index 0 maps to bits 127..120 (highest)
+        //   - Byte at index 1 maps to bits 119..112
+        //   - ...
+        //   - Byte at index 15 maps to bits 7..0 (lowest)
+        //
+        // So even though memory layout goes from low to high (left to right),
+        // big-endian treats the **first byte** as highest in value.
+        //
+        // This guarantees that comparing two `u128` keys is equivalent to lexicographically
+        // comparing the original inline bytes, followed by length.
+        u128::from_be_bytes(buf)
+    }
 }
 
 impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
@@ -576,6 +885,8 @@ impl<T: ByteViewType + ?Sized> Debug for GenericByteViewArray<T> {
     }
 }
 
+impl<T: ByteViewType + ?Sized> super::private::Sealed for GenericByteViewArray<T> {}
+
 impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -607,8 +918,21 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
 
     fn shrink_to_fit(&mut self) {
         self.views.shrink_to_fit();
-        self.buffers.iter_mut().for_each(|b| b.shrink_to_fit());
-        self.buffers.shrink_to_fit();
+
+        // The goal of `shrink_to_fit` is to minimize the space used by any of
+        // its allocations. The use of `Arc::get_mut` over `Arc::make_mut` is
+        // because if the reference count is greater than 1, `Arc::make_mut`
+        // will first clone its contents. So, any large allocations will first
+        // be cloned before being shrunk, leaving the pre-cloned allocations
+        // intact, before adding the extra (used) space of the new clones.
+        if let Some(buffers) = Arc::get_mut(&mut self.buffers) {
+            buffers.iter_mut().for_each(|b| b.shrink_to_fit());
+        }
+
+        // With the assumption that this is a best-effort function, no attempt
+        // is made to shrink `self.buffers`, which it can't because it's type
+        // does not expose a `shrink_to_fit` method.
+
         if let Some(nulls) = &mut self.nulls {
             nulls.shrink_to_fit();
         }
@@ -649,7 +973,7 @@ impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T>
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        GenericByteViewArray::value_unchecked(self, index)
+        unsafe { GenericByteViewArray::value_unchecked(self, index) }
     }
 }
 
@@ -663,15 +987,16 @@ impl<'a, T: ByteViewType + ?Sized> IntoIterator for &'a GenericByteViewArray<T>
 }
 
 impl<T: ByteViewType + ?Sized> From<ArrayData> for GenericByteViewArray<T> {
-    fn from(value: ArrayData) -> Self {
-        let views = value.buffers()[0].clone();
-        let views = ScalarBuffer::new(views, value.offset(), value.len());
-        let buffers = value.buffers()[1..].to_vec();
+    fn from(data: ArrayData) -> Self {
+        let (_data_type, len, nulls, offset, mut buffers, _child_data) = data.into_parts();
+        let views = buffers.remove(0); // need to maintain order of remaining buffers
+        let buffers = Arc::from(buffers);
+        let views = ScalarBuffer::new(views, offset, len);
         Self {
             data_type: T::DATA_TYPE,
             views,
             buffers,
-            nulls: value.nulls().cloned(),
+            nulls,
             phantom: Default::default(),
         }
     }
@@ -734,12 +1059,15 @@ where
 }
 
 impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
-    fn from(mut array: GenericByteViewArray<T>) -> Self {
+    fn from(array: GenericByteViewArray<T>) -> Self {
         let len = array.len();
-        array.buffers.insert(0, array.views.into_inner());
+
+        let mut buffers = array.buffers.to_vec();
+        buffers.insert(0, array.views.into_inner());
+
         let builder = ArrayDataBuilder::new(T::DATA_TYPE)
             .len(len)
-            .buffers(array.buffers)
+            .buffers(buffers)
             .nulls(array.nulls);
 
         unsafe { builder.build_unchecked() }
@@ -795,7 +1123,7 @@ impl BinaryViewArray {
     /// # Safety
     /// Caller is responsible for ensuring that items in array are utf8 data.
     pub unsafe fn to_string_view_unchecked(self) -> StringViewArray {
-        StringViewArray::new_unchecked(self.views, self.buffers, self.nulls)
+        unsafe { StringViewArray::new_unchecked(self.views, self.buffers, self.nulls) }
     }
 }
 
@@ -872,9 +1200,16 @@ impl From<Vec<Option<String>>> for StringViewArray {
 #[cfg(test)]
 mod tests {
     use crate::builder::{BinaryViewBuilder, StringViewBuilder};
-    use crate::{Array, BinaryViewArray, StringViewArray};
-    use arrow_buffer::{Buffer, ScalarBuffer};
-    use arrow_data::ByteView;
+    use crate::types::BinaryViewType;
+    use crate::{
+        Array, BinaryViewArray, GenericBinaryArray, GenericByteViewArray, StringViewArray,
+    };
+    use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
+    use arrow_data::{ByteView, MAX_INLINE_VIEW_LEN};
+    use rand::prelude::StdRng;
+    use rand::{Rng, SeedableRng};
+
+    const BLOCK_SIZE: u32 = 8;
 
     #[test]
     fn try_new_string() {
@@ -960,7 +1295,10 @@ mod tests {
             builder.finish()
         };
         assert_eq!(array.value(0), "large payload over 12 bytes");
-        assert_eq!(array.value(1), "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created");
+        assert_eq!(
+            array.value(1),
+            "another large payload over 12 bytes that double than the first one, so that we can trigger the in_progress in builder re-created"
+        );
         assert_eq!(2, array.buffers.len());
     }
 
@@ -1064,6 +1402,180 @@ mod tests {
         check_gc(&array.slice(3, 1));
     }
 
+    /// 1) Empty array: no elements, expect gc to return empty with no data buffers
+    #[test]
+    fn test_gc_empty_array() {
+        let array = StringViewBuilder::new()
+            .with_fixed_block_size(BLOCK_SIZE)
+            .finish();
+        let gced = array.gc();
+        // length and null count remain zero
+        assert_eq!(gced.len(), 0);
+        assert_eq!(gced.null_count(), 0);
+        // no underlying data buffers should be allocated
+        assert!(
+            gced.data_buffers().is_empty(),
+            "Expected no data buffers for empty array"
+        );
+    }
+
+    /// 2) All inline values (<= INLINE_LEN): capacity-only data buffer, same values
+    #[test]
+    fn test_gc_all_inline() {
+        let mut builder = StringViewBuilder::new().with_fixed_block_size(BLOCK_SIZE);
+        // append many short strings, each exactly INLINE_LEN long
+        for _ in 0..100 {
+            let s = "A".repeat(MAX_INLINE_VIEW_LEN as usize);
+            builder.append_option(Some(&s));
+        }
+        let array = builder.finish();
+        let gced = array.gc();
+        // Since all views fit inline, data buffer is empty
+        assert_eq!(
+            gced.data_buffers().len(),
+            0,
+            "Should have no data buffers for inline values"
+        );
+        assert_eq!(gced.len(), 100);
+        // verify element-wise equality
+        array.iter().zip(gced.iter()).for_each(|(orig, got)| {
+            assert_eq!(orig, got, "Inline value mismatch after gc");
+        });
+    }
+
+    /// 3) All large values (> INLINE_LEN): each must be copied into the new data buffer
+    #[test]
+    fn test_gc_all_large() {
+        let mut builder = StringViewBuilder::new().with_fixed_block_size(BLOCK_SIZE);
+        let large_str = "X".repeat(MAX_INLINE_VIEW_LEN as usize + 5);
+        // append multiple large strings
+        for _ in 0..50 {
+            builder.append_option(Some(&large_str));
+        }
+        let array = builder.finish();
+        let gced = array.gc();
+        // New data buffers should be populated (one or more blocks)
+        assert!(
+            !gced.data_buffers().is_empty(),
+            "Expected data buffers for large values"
+        );
+        assert_eq!(gced.len(), 50);
+        // verify that every large string emerges unchanged
+        array.iter().zip(gced.iter()).for_each(|(orig, got)| {
+            assert_eq!(orig, got, "Large view mismatch after gc");
+        });
+    }
+
+    /// 4) All null elements: ensure null bitmap handling path is correct
+    #[test]
+    fn test_gc_all_nulls() {
+        let mut builder = StringViewBuilder::new().with_fixed_block_size(BLOCK_SIZE);
+        for _ in 0..20 {
+            builder.append_null();
+        }
+        let array = builder.finish();
+        let gced = array.gc();
+        // length and null count match
+        assert_eq!(gced.len(), 20);
+        assert_eq!(gced.null_count(), 20);
+        // data buffers remain empty for null-only array
+        assert!(
+            gced.data_buffers().is_empty(),
+            "No data should be stored for nulls"
+        );
+    }
+
+    /// 5) Random mix of inline, large, and null values with slicing tests
+    #[test]
+    fn test_gc_random_mixed_and_slices() {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut builder = StringViewBuilder::new().with_fixed_block_size(BLOCK_SIZE);
+        // Keep a Vec of original Option<String> for later comparison
+        let mut original: Vec<Option<String>> = Vec::new();
+
+        for _ in 0..200 {
+            if rng.random_bool(0.1) {
+                // 10% nulls
+                builder.append_null();
+                original.push(None);
+            } else {
+                // random length between 0 and twice the inline limit
+                let len = rng.random_range(0..(MAX_INLINE_VIEW_LEN * 2));
+                let s: String = "A".repeat(len as usize);
+                builder.append_option(Some(&s));
+                original.push(Some(s));
+            }
+        }
+
+        let array = builder.finish();
+        // Test multiple slice ranges to ensure offset logic is correct
+        for (offset, slice_len) in &[(0, 50), (10, 100), (150, 30)] {
+            let sliced = array.slice(*offset, *slice_len);
+            let gced = sliced.gc();
+            // Build expected slice of Option<&str>
+            let expected: Vec<Option<&str>> = original[*offset..(*offset + *slice_len)]
+                .iter()
+                .map(|opt| opt.as_deref())
+                .collect();
+
+            assert_eq!(gced.len(), *slice_len, "Slice length mismatch");
+            // Compare element-wise
+            gced.iter().zip(expected.iter()).for_each(|(got, expect)| {
+                assert_eq!(got, *expect, "Value mismatch in mixed slice after gc");
+            });
+        }
+    }
+
+    #[test]
+    #[cfg_attr(miri, ignore)] // Takes too long
+    fn test_gc_huge_array() {
+        // Construct multiple 128 MiB BinaryView entries so total > 4 GiB
+        let block_len: usize = 128 * 1024 * 1024; // 128 MiB per view
+        let num_views: usize = 36;
+
+        // Create a single 128 MiB data block with a simple byte pattern
+        let buffer = Buffer::from_vec(vec![0xAB; block_len]);
+        let buffer2 = Buffer::from_vec(vec![0xFF; block_len]);
+
+        // Append this block and then add many views pointing to it
+        let mut builder = BinaryViewBuilder::new();
+        let block_id = builder.append_block(buffer);
+        for _ in 0..num_views / 2 {
+            builder
+                .try_append_view(block_id, 0, block_len as u32)
+                .expect("append view into 128MiB block");
+        }
+        let block_id2 = builder.append_block(buffer2);
+        for _ in 0..num_views / 2 {
+            builder
+                .try_append_view(block_id2, 0, block_len as u32)
+                .expect("append view into 128MiB block");
+        }
+
+        let array = builder.finish();
+        let total = array.total_buffer_bytes_used();
+        assert!(
+            total > u32::MAX as usize,
+            "Expected total non-inline bytes to exceed 4 GiB, got {}",
+            total
+        );
+
+        // Run gc and verify correctness
+        let gced = array.gc();
+        assert_eq!(gced.len(), num_views, "Length mismatch after gc");
+        assert_eq!(gced.null_count(), 0, "Null count mismatch after gc");
+        assert_ne!(
+            gced.data_buffers().len(),
+            1,
+            "gc with huge buffer should not consolidate data into a single buffer"
+        );
+
+        // Element-wise equality check across the entire array
+        array.iter().zip(gced.iter()).for_each(|(orig, got)| {
+            assert_eq!(orig, got, "Value mismatch after gc on huge array");
+        });
+    }
+
     #[test]
     fn test_eq() {
         let test_data = [
@@ -1088,4 +1600,218 @@ mod tests {
         assert_eq!(array2, array2.clone());
         assert_eq!(array1, array2);
     }
+
+    /// Integration tests for `inline_key_fast` covering:
+    ///
+    /// 1. Monotonic ordering across increasing lengths and lexical variations.
+    /// 2. Cross-check against `GenericBinaryArray` comparison to ensure semantic equivalence.
+    ///
+    /// This also includes a specific test for the “bar” vs. “bar\0” case, demonstrating why
+    /// the length field is required even when all inline bytes fit in 12 bytes.
+    ///
+    /// The test includes strings that verify correct byte order (prevent reversal bugs),
+    /// and length-based tie-breaking in the composite key.
+    ///
+    /// The test confirms that `inline_key_fast` produces keys which sort consistently
+    /// with the expected lexicographical order of the raw byte arrays.
+    #[test]
+    fn test_inline_key_fast_various_lengths_and_lexical() {
+        /// Helper to create a raw u128 value representing an inline ByteView:
+        /// - `length`: number of meaningful bytes (must be ≤ 12)
+        /// - `data`: the actual inline data bytes
+        ///
+        /// The first 4 bytes encode length in little-endian,
+        /// the following 12 bytes contain the inline string data (unpadded).
+        fn make_raw_inline(length: u32, data: &[u8]) -> u128 {
+            assert!(length as usize <= 12, "Inline length must be ≤ 12");
+            assert!(
+                data.len() == length as usize,
+                "Data length must match `length`"
+            );
+
+            let mut raw_bytes = [0u8; 16];
+            raw_bytes[0..4].copy_from_slice(&length.to_le_bytes()); // length stored little-endian
+            raw_bytes[4..(4 + data.len())].copy_from_slice(data); // inline data
+            u128::from_le_bytes(raw_bytes)
+        }
+
+        // Test inputs: various lengths and lexical orders,
+        // plus special cases for byte order and length tie-breaking
+        let test_inputs: Vec<&[u8]> = vec![
+            b"a",
+            b"aa",
+            b"aaa",
+            b"aab",
+            b"abcd",
+            b"abcde",
+            b"abcdef",
+            b"abcdefg",
+            b"abcdefgh",
+            b"abcdefghi",
+            b"abcdefghij",
+            b"abcdefghijk",
+            b"abcdefghijkl",
+            // Tests for byte-order reversal bug:
+            // Without the fix, "backend one" would compare as "eno dnekcab",
+            // causing incorrect sort order relative to "backend two".
+            b"backend one",
+            b"backend two",
+            // Tests length-tiebreaker logic:
+            // "bar" (3 bytes) and "bar\0" (4 bytes) have identical inline data,
+            // so only the length differentiates their ordering.
+            b"bar",
+            b"bar\0",
+            // Additional lexical and length tie-breaking cases with same prefix, in correct lex order:
+            b"than12Byt",
+            b"than12Bytes",
+            b"than12Bytes\0",
+            b"than12Bytesx",
+            b"than12Bytex",
+            b"than12Bytez",
+            // Additional lexical tests
+            b"xyy",
+            b"xyz",
+            b"xza",
+        ];
+
+        // Create a GenericBinaryArray for cross-comparison of lex order
+        let array: GenericBinaryArray<i32> =
+            GenericBinaryArray::from(test_inputs.iter().map(|s| Some(*s)).collect::<Vec<_>>());
+
+        for i in 0..array.len() - 1 {
+            let v1 = array.value(i);
+            let v2 = array.value(i + 1);
+
+            // Assert the array's natural lexical ordering is correct
+            assert!(v1 < v2, "Array compare failed: {v1:?} !< {v2:?}");
+
+            // Assert the keys produced by inline_key_fast reflect the same ordering
+            let key1 = GenericByteViewArray::<BinaryViewType>::inline_key_fast(make_raw_inline(
+                v1.len() as u32,
+                v1,
+            ));
+            let key2 = GenericByteViewArray::<BinaryViewType>::inline_key_fast(make_raw_inline(
+                v2.len() as u32,
+                v2,
+            ));
+
+            assert!(
+                key1 < key2,
+                "Key compare failed: key({v1:?})=0x{key1:032x} !< key({v2:?})=0x{key2:032x}",
+            );
+        }
+    }
+
+    #[test]
+    fn empty_array_should_return_empty_lengths_iterator() {
+        let empty = GenericByteViewArray::<BinaryViewType>::from(Vec::<&[u8]>::new());
+
+        let mut lengths_iter = empty.lengths();
+        assert_eq!(lengths_iter.len(), 0);
+        assert_eq!(lengths_iter.next(), None);
+    }
+
+    #[test]
+    fn array_lengths_should_return_correct_length_for_both_inlined_and_non_inlined() {
+        let cases = GenericByteViewArray::<BinaryViewType>::from(vec![
+            // Not inlined as longer than 12 bytes
+            b"Supercalifragilisticexpialidocious" as &[u8],
+            // Inlined as shorter than 12 bytes
+            b"Hello",
+            // Empty value
+            b"",
+            // Exactly 12 bytes
+            b"abcdefghijkl",
+        ]);
+
+        let mut lengths_iter = cases.lengths();
+
+        assert_eq!(lengths_iter.len(), cases.len());
+
+        let cases_iter = cases.iter();
+
+        for case in cases_iter {
+            let case_value = case.unwrap();
+            let length = lengths_iter.next().expect("Should have a length");
+
+            assert_eq!(case_value.len(), length as usize);
+        }
+
+        assert_eq!(lengths_iter.next(), None, "Should not have more lengths");
+    }
+
+    #[test]
+    fn array_lengths_should_return_the_underlying_length_for_null_values() {
+        let cases = GenericByteViewArray::<BinaryViewType>::from(vec![
+            // Not inlined as longer than 12 bytes
+            b"Supercalifragilisticexpialidocious" as &[u8],
+            // Inlined as shorter than 12 bytes
+            b"Hello",
+            // Empty value
+            b"",
+            // Exactly 12 bytes
+            b"abcdefghijkl",
+        ]);
+
+        let (views, buffer, _) = cases.clone().into_parts();
+
+        // Keeping the values but just adding nulls on top
+        let cases_with_all_nulls = GenericByteViewArray::<BinaryViewType>::new(
+            views,
+            buffer,
+            Some(NullBuffer::new_null(cases.len())),
+        );
+
+        let lengths_iter = cases.lengths();
+        let mut all_nulls_lengths_iter = cases_with_all_nulls.lengths();
+
+        assert_eq!(lengths_iter.len(), all_nulls_lengths_iter.len());
+
+        for expected_length in lengths_iter {
+            let actual_length = all_nulls_lengths_iter.next().expect("Should have a length");
+
+            assert_eq!(expected_length, actual_length);
+        }
+
+        assert_eq!(
+            all_nulls_lengths_iter.next(),
+            None,
+            "Should not have more lengths"
+        );
+    }
+
+    #[test]
+    fn array_lengths_on_sliced_should_only_return_lengths_for_sliced_data() {
+        let array = GenericByteViewArray::<BinaryViewType>::from(vec![
+            b"aaaaaaaaaaaaaaaaaaaaaaaaaaa" as &[u8],
+            b"Hello",
+            b"something great",
+            b"is",
+            b"coming soon!",
+            b"when you find what it is",
+            b"let me know",
+            b"cause",
+            b"I",
+            b"have no idea",
+            b"what it",
+            b"is",
+        ]);
+
+        let sliced_array = array.slice(2, array.len() - 3);
+
+        let mut lengths_iter = sliced_array.lengths();
+
+        assert_eq!(lengths_iter.len(), sliced_array.len());
+
+        let values_iter = sliced_array.iter();
+
+        for value in values_iter {
+            let value = value.unwrap();
+            let length = lengths_iter.next().expect("Should have a length");
+
+            assert_eq!(value.len(), length as usize);
+        }
+
+        assert_eq!(lengths_iter.next(), None, "Should not have more lengths");
+    }
 }
diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs
index acbdcb8b60fa..be7703b13c5c 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -20,8 +20,8 @@ use crate::cast::AsArray;
 use crate::iterator::ArrayIter;
 use crate::types::*;
 use crate::{
-    make_array, Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar,
-    StringArray,
+    Array, ArrayAccessor, ArrayRef, ArrowNativeTypeOp, PrimitiveArray, Scalar, StringArray,
+    make_array,
 };
 use arrow_buffer::bit_util::set_bit;
 use arrow_buffer::buffer::NullBuffer;
@@ -697,6 +697,8 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str> for DictionaryArray<T>
     }
 }
 
+impl<T: ArrowDictionaryKeyType> super::private::Sealed for DictionaryArray<T> {}
+
 impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -856,6 +858,8 @@ impl<'a, K: ArrowDictionaryKeyType, V> TypedDictionaryArray<'a, K, V> {
     }
 }
 
+impl<K: ArrowDictionaryKeyType, V: Sync> super::private::Sealed for TypedDictionaryArray<'_, K, V> {}
+
 impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V> {
     fn as_any(&self) -> &dyn Any {
         self.dictionary
@@ -947,13 +951,13 @@ where
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        let val = self.dictionary.keys.value_unchecked(index);
+        let val = unsafe { self.dictionary.keys.value_unchecked(index) };
         let value_idx = val.as_usize();
 
         // As dictionary keys are only verified for non-null indexes
         // we must check the value is within bounds
         match value_idx < self.values.len() {
-            true => self.values.value_unchecked(value_idx),
+            true => unsafe { self.values.value_unchecked(value_idx) },
             false => Default::default(),
         }
     }
@@ -1051,7 +1055,7 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
 mod tests {
     use super::*;
     use crate::cast::as_dictionary_array;
-    use crate::{Int16Array, Int32Array, Int8Array, RunArray};
+    use crate::{Int8Array, Int16Array, Int32Array, RunArray};
     use arrow_buffer::{Buffer, ToByteSlice};
 
     #[test]
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs
index 576b8012491b..b94e168cfe7c 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -19,7 +19,7 @@ use crate::array::print_long_array;
 use crate::iterator::FixedSizeBinaryIter;
 use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
 use arrow_buffer::buffer::NullBuffer;
-use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, bit_util};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -76,10 +76,14 @@ impl FixedSizeBinaryArray {
 
     /// Create a new [`FixedSizeBinaryArray`] from the provided parts, returning an error on failure
     ///
+    /// Creating an arrow with `size == 0` will try to get the length from the null buffer. If
+    /// no null buffer is provided, the resulting array will have length zero.
+    ///
     /// # Errors
     ///
     /// * `size < 0`
     /// * `values.len() / size != nulls.len()`
+    /// * `size == 0 && values.len() != 0`
     pub fn try_new(
         size: i32,
         values: Buffer,
@@ -87,10 +91,21 @@ impl FixedSizeBinaryArray {
     ) -> Result<Self, ArrowError> {
         let data_type = DataType::FixedSizeBinary(size);
         let s = size.to_usize().ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}"))
         })?;
 
-        let len = values.len() / s;
+        let len = if s == 0 {
+            if !values.is_empty() {
+                return Err(ArrowError::InvalidArgumentError(
+                    "Buffer cannot have non-zero length if the item size is zero".to_owned(),
+                ));
+            }
+
+            // If the item size is zero, try to determine the length from the null buffer
+            nulls.as_ref().map(|n| n.len()).unwrap_or(0)
+        } else {
+            values.len() / s
+        };
         if let Some(n) = nulls.as_ref() {
             if n.len() != len {
                 return Err(ArrowError::InvalidArgumentError(format!(
@@ -119,10 +134,11 @@ impl FixedSizeBinaryArray {
     /// * `size < 0`
     /// * `size * len` would overflow `usize`
     pub fn new_null(size: i32, len: usize) -> Self {
-        let capacity = size.to_usize().unwrap().checked_mul(len).unwrap();
+        const BITS_IN_A_BYTE: usize = 8;
+        let capacity_in_bytes = size.to_usize().unwrap().checked_mul(len).unwrap();
         Self {
             data_type: DataType::FixedSizeBinary(size),
-            value_data: MutableBuffer::new(capacity).into(),
+            value_data: MutableBuffer::new_null(capacity_in_bytes * BITS_IN_A_BYTE).into(),
             nulls: Some(NullBuffer::new_null(len)),
             value_length: size,
             len,
@@ -135,6 +151,10 @@ impl FixedSizeBinaryArray {
     }
 
     /// Returns the element at index `i` as a byte slice.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds.
     pub fn value(&self, i: usize) -> &[u8] {
@@ -155,15 +175,23 @@ impl FixedSizeBinaryArray {
     }
 
     /// Returns the element at index `i` as a byte slice.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
-    /// Caller is responsible for ensuring that the index is within the bounds of the array
+    ///
+    /// Caller is responsible for ensuring that the index is within the bounds
+    /// of the array
     pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
         let offset = i + self.offset();
         let pos = self.value_offset_at(offset);
-        std::slice::from_raw_parts(
-            self.value_data.as_ptr().offset(pos as isize),
-            (self.value_offset_at(offset + 1) - pos) as usize,
-        )
+        unsafe {
+            std::slice::from_raw_parts(
+                self.value_data.as_ptr().offset(pos as isize),
+                (self.value_offset_at(offset + 1) - pos) as usize,
+            )
+        }
     }
 
     /// Returns the offset for the element at index `i`.
@@ -574,6 +602,8 @@ impl std::fmt::Debug for FixedSizeBinaryArray {
     }
 }
 
+impl super::private::Sealed for FixedSizeBinaryArray {}
+
 impl Array for FixedSizeBinaryArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -644,7 +674,7 @@ impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        FixedSizeBinaryArray::value_unchecked(self, index)
+        unsafe { FixedSizeBinaryArray::value_unchecked(self, index) }
     }
 }
 
@@ -659,11 +689,10 @@ impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use crate::RecordBatch;
     use arrow_schema::{Field, Schema};
 
-    use super::*;
-
     #[test]
     fn test_fixed_size_binary_array() {
         let values: [u8; 15] = *b"hellotherearrow";
@@ -971,6 +1000,10 @@ mod tests {
         let nulls = NullBuffer::new_null(5);
         FixedSizeBinaryArray::new(2, buffer.clone(), Some(nulls));
 
+        let null_array = FixedSizeBinaryArray::new_null(4, 3);
+        assert_eq!(null_array.len(), 3);
+        assert_eq!(null_array.values().len(), 12);
+
         let a = FixedSizeBinaryArray::new(3, buffer.clone(), None);
         assert_eq!(a.len(), 3);
 
@@ -985,7 +1018,24 @@ mod tests {
         );
 
         let nulls = NullBuffer::new_null(3);
-        let err = FixedSizeBinaryArray::try_new(2, buffer, Some(nulls)).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3");
+        let err = FixedSizeBinaryArray::try_new(2, buffer.clone(), Some(nulls)).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of null buffer for FixedSizeBinaryArray, expected 5 got 3"
+        );
+
+        let zero_sized = FixedSizeBinaryArray::new(0, Buffer::default(), None);
+        assert_eq!(zero_sized.len(), 0);
+
+        let nulls = NullBuffer::new_null(3);
+        let zero_sized_with_nulls = FixedSizeBinaryArray::new(0, Buffer::default(), Some(nulls));
+        assert_eq!(zero_sized_with_nulls.len(), 3);
+
+        let zero_sized_with_non_empty_buffer_err =
+            FixedSizeBinaryArray::try_new(0, buffer, None).unwrap_err();
+        assert_eq!(
+            zero_sized_with_non_empty_buffer_err.to_string(),
+            "Invalid argument error: Buffer cannot have non-zero length if the item size is zero"
+        );
     }
 }
diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs
index 44be442c9f85..f53b042f873b 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -18,9 +18,9 @@
 use crate::array::print_long_array;
 use crate::builder::{FixedSizeListBuilder, PrimitiveBuilder};
 use crate::iterator::FixedSizeListIter;
-use crate::{make_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType};
-use arrow_buffer::buffer::NullBuffer;
+use crate::{Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, make_array};
 use arrow_buffer::ArrowNativeType;
+use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, FieldRef};
 use std::any::Any;
@@ -114,7 +114,7 @@ use std::sync::Arc;
 /// ```
 ///
 /// [`StringArray`]: crate::array::StringArray
-/// [fixed size arrays](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout)
+/// [fixed length lists]: https://arrow.apache.org/docs/format/Columnar.html#fixed-size-list-layout
 #[derive(Clone)]
 pub struct FixedSizeListArray {
     data_type: DataType, // Must be DataType::FixedSizeList(value_length)
@@ -125,7 +125,15 @@ pub struct FixedSizeListArray {
 }
 
 impl FixedSizeListArray {
-    /// Create a new [`FixedSizeListArray`] with `size` element size, panicking on failure
+    /// Create a new [`FixedSizeListArray`] with `size` element size, panicking on failure.
+    ///
+    /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable
+    /// `FixedSizeListArray`), this function will set the length of the array to 0.
+    ///
+    /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary
+    /// length, use the [`try_new_with_length()`] constructor.
+    ///
+    /// [`try_new_with_length()`]: Self::try_new_with_length
     ///
     /// # Panics
     ///
@@ -134,12 +142,20 @@ impl FixedSizeListArray {
         Self::try_new(field, size, values, nulls).unwrap()
     }
 
-    /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure
+    /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure.
+    ///
+    /// Note that if `size == 0` and `nulls` is `None` (a degenerate, non-nullable
+    /// `FixedSizeListArray`), this function will set the length of the array to 0.
+    ///
+    /// If you would like to have a degenerate, non-nullable `FixedSizeListArray` with arbitrary
+    /// length, use the [`try_new_with_length()`] constructor.
+    ///
+    /// [`try_new_with_length()`]: Self::try_new_with_length
     ///
     /// # Errors
     ///
     /// * `size < 0`
-    /// * `values.len() / size != nulls.len()`
+    /// * `values.len() != nulls.len() * size` if `nulls` is `Some`
     /// * `values.data_type() != field.data_type()`
     /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())`
     pub fn try_new(
@@ -149,25 +165,91 @@ impl FixedSizeListArray {
         nulls: Option<NullBuffer>,
     ) -> Result<Self, ArrowError> {
         let s = size.to_usize().ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}"))
         })?;
 
-        let len = match s {
-            0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
-            _ => {
-                let len = values.len() / s.max(1);
-                if let Some(n) = nulls.as_ref() {
-                    if n.len() != len {
-                        return Err(ArrowError::InvalidArgumentError(format!(
-                            "Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
-                            len,
-                            n.len(),
-                        )));
-                    }
+        if s == 0 {
+            // Note that for degenerate (`size == 0`) and non-nullable `FixedSizeList`s, we will set
+            // the length to 0 (`_or_default`).
+            let len = nulls.as_ref().map(|x| x.len()).unwrap_or_default();
+
+            Self::try_new_with_length(field, size, values, nulls, len)
+        } else {
+            if values.len() % s != 0 {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Incorrect length of values buffer for FixedSizeListArray, \
+                     expected a multiple of {s} got {}",
+                    values.len(),
+                )));
+            }
+
+            let len = values.len() / s;
+
+            // Check that the null buffer length is correct (if it exists).
+            if let Some(null_buffer) = &nulls {
+                if s * null_buffer.len() != values.len() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Incorrect length of values buffer for FixedSizeListArray, \
+                            expected {} got {}",
+                        s * null_buffer.len(),
+                        values.len(),
+                    )));
                 }
-                len
             }
-        };
+
+            Self::try_new_with_length(field, size, values, nulls, len)
+        }
+    }
+
+    /// Create a new [`FixedSizeListArray`] from the provided parts, returning an error on failure.
+    ///
+    /// This method exists to allow the construction of arbitrary length degenerate (`size == 0`)
+    /// and non-nullable `FixedSizeListArray`s. If you want a nullable `FixedSizeListArray`, then
+    /// you can use [`try_new()`] instead.
+    ///
+    /// [`try_new()`]: Self::try_new
+    ///
+    /// # Errors
+    ///
+    /// * `size < 0`
+    /// * `nulls.len() != len` if `nulls` is `Some`
+    /// * `values.len() != len * size`
+    /// * `values.data_type() != field.data_type()`
+    /// * `!field.is_nullable() && !nulls.expand(size).contains(values.logical_nulls())`
+    pub fn try_new_with_length(
+        field: FieldRef,
+        size: i32,
+        values: ArrayRef,
+        nulls: Option<NullBuffer>,
+        len: usize,
+    ) -> Result<Self, ArrowError> {
+        let s = size.to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}"))
+        })?;
+
+        if let Some(null_buffer) = &nulls {
+            if null_buffer.len() != len {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Invalid null buffer for FixedSizeListArray, expected {len} found {}",
+                    null_buffer.len()
+                )));
+            }
+        }
+
+        if s == 0 && !values.is_empty() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "An degenerate FixedSizeListArray should have no underlying values, found {} values",
+                values.len()
+            )));
+        }
+
+        if values.len() != len * s {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Incorrect length of values buffer for FixedSizeListArray, expected {} got {}",
+                len * s,
+                values.len(),
+            )));
+        }
 
         if field.data_type() != values.data_type() {
             return Err(ArrowError::InvalidArgumentError(format!(
@@ -243,6 +325,12 @@ impl FixedSizeListArray {
     }
 
     /// Returns ith value of this list array.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
+    /// # Panics
+    /// Panics if index `i` is out of bounds
     pub fn value(&self, i: usize) -> ArrayRef {
         self.values
             .slice(self.value_offset_at(i), self.value_length() as usize)
@@ -343,8 +431,10 @@ impl From<ArrayData> for FixedSizeListArray {
     fn from(data: ArrayData) -> Self {
         let value_length = match data.data_type() {
             DataType::FixedSizeList(_, len) => *len,
-            _ => {
-                panic!("FixedSizeListArray data should contain a FixedSizeList data type")
+            data_type => {
+                panic!(
+                    "FixedSizeListArray data should contain a FixedSizeList data type, got {data_type}"
+                )
             }
         };
 
@@ -372,6 +462,8 @@ impl From<FixedSizeListArray> for ArrayData {
     }
 }
 
+impl super::private::Sealed for FixedSizeListArray {}
+
 impl Array for FixedSizeListArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -474,12 +566,12 @@ impl ArrayAccessor for &FixedSizeListArray {
 
 #[cfg(test)]
 mod tests {
-    use arrow_buffer::{bit_util, BooleanBuffer, Buffer};
+    use arrow_buffer::{BooleanBuffer, Buffer, bit_util};
     use arrow_schema::Field;
 
     use crate::cast::AsArray;
     use crate::types::Int32Type;
-    use crate::{new_empty_array, Int32Array};
+    use crate::{Int32Array, new_empty_array};
 
     use super::*;
 
@@ -665,8 +757,23 @@ mod tests {
         let list = FixedSizeListArray::new(field.clone(), 2, values.clone(), Some(nulls));
         assert_eq!(list.len(), 3);
 
-        let list = FixedSizeListArray::new(field.clone(), 4, values.clone(), None);
-        assert_eq!(list.len(), 1);
+        let list = FixedSizeListArray::new(field.clone(), 3, values.clone(), None);
+        assert_eq!(list.len(), 2);
+
+        let err = FixedSizeListArray::try_new(field.clone(), 4, values.clone(), None).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, \
+             expected a multiple of 4 got 6",
+        );
+
+        let err =
+            FixedSizeListArray::try_new_with_length(field.clone(), 4, values.clone(), None, 1)
+                .unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6"
+        );
 
         let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err();
         assert_eq!(
@@ -674,16 +781,19 @@ mod tests {
             "Invalid argument error: Size cannot be negative, got -1"
         );
 
-        let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None);
-        assert_eq!(list.len(), 0);
-
         let nulls = NullBuffer::new_null(2);
         let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Incorrect length of values buffer for FixedSizeListArray, expected 4 got 6"
+        );
 
         let field = Arc::new(Field::new_list_field(DataType::Int32, false));
         let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\""
+        );
 
         // Valid as nulls in child masked by parent
         let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3));
@@ -691,15 +801,49 @@ mod tests {
 
         let field = Arc::new(Field::new_list_field(DataType::Int64, true));
         let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\""
+        );
     }
 
     #[test]
-    fn empty_fixed_size_list() {
+    fn degenerate_fixed_size_list() {
         let field = Arc::new(Field::new_list_field(DataType::Int32, true));
         let nulls = NullBuffer::new_null(2);
         let values = new_empty_array(&DataType::Int32);
-        let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
+        let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), Some(nulls.clone()));
         assert_eq!(list.len(), 2);
+
+        // Test invalid null buffer length.
+        let err = FixedSizeListArray::try_new_with_length(
+            field.clone(),
+            0,
+            values.clone(),
+            Some(nulls),
+            5,
+        )
+        .unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Invalid null buffer for FixedSizeListArray, expected 5 found 2"
+        );
+
+        // Test non-empty values for degenerate list.
+        let non_empty_values = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let err =
+            FixedSizeListArray::try_new_with_length(field.clone(), 0, non_empty_values, None, 3)
+                .unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: An degenerate FixedSizeListArray should have no underlying values, found 3 values"
+        );
+    }
+
+    #[test]
+    fn test_fixed_size_list_new_null_len() {
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
+        let array = FixedSizeListArray::new_null(field, 2, 5);
+        assert_eq!(array.len(), 5);
     }
 }
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index 79627776569b..225be14ae365 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -18,13 +18,13 @@
 use crate::array::{get_offsets, make_array, print_long_array};
 use crate::builder::{GenericListBuilder, PrimitiveBuilder};
 use crate::{
-    iterator::GenericListArrayIter, new_empty_array, Array, ArrayAccessor, ArrayRef,
-    ArrowPrimitiveType, FixedSizeListArray,
+    Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
+    iterator::GenericListArrayIter, new_empty_array,
 };
 use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, FieldRef};
-use num::Integer;
+use num_integer::Integer;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -37,7 +37,9 @@ use std::sync::Arc;
 /// [`LargeBinaryArray`]: crate::array::LargeBinaryArray
 /// [`StringArray`]: crate::array::StringArray
 /// [`LargeStringArray`]: crate::array::LargeStringArray
-pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
+pub trait OffsetSizeTrait:
+    ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd
+{
     /// True for 64 bit offset size and false for 32 bit offset size
     const IS_LARGE: bool;
     /// Prefix for the offset size
@@ -108,21 +110,21 @@ impl OffsetSizeTrait for i64 {
 ///  ┌─────────────┐  ┌───────┐             │     ┌───┐   ┌───┐       ┌───┐ ┌───┐
 ///  │   [A,B,C]   │  │ (0,3) │                   │ 1 │   │ 0 │     │ │ 1 │ │ A │ │ 0  │
 ///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
-///  │      []     │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
+///  │ [] (empty)  │  │ (3,3) │                   │ 1 │   │ 3 │     │ │ 1 │ │ B │ │ 1  │
 ///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
-///  │    NULL     │  │ (3,4) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
+///  │    NULL     │  │ (3,3) │                   │ 0 │   │ 3 │     │ │ 1 │ │ C │ │ 2  │
 ///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
-///  │     [D]     │  │ (4,5) │                   │ 1 │   │ 4 │     │ │ ? │ │ ? │ │ 3  │
+///  │     [D]     │  │ (3,4) │                   │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
 ///  ├─────────────┤  ├───────┤             │     ├───┤   ├───┤       ├───┤ ├───┤
-///  │  [NULL, F]  │  │ (5,7) │                   │ 1 │   │ 5 │     │ │ 1 │ │ D │ │ 4  │
+///  │  [NULL, F]  │  │ (4,6) │                   │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 4  │
 ///  └─────────────┘  └───────┘             │     └───┘   ├───┤       ├───┤ ├───┤
-///                                                       │ 7 │     │ │ 0 │ │ ? │ │ 5  │
-///                                         │  Validity   └───┘       ├───┤ ├───┤
-///     Logical       Logical                  (nulls)   Offsets    │ │ 1 │ │ F │ │ 6  │
-///      Values       Offsets               │                         └───┘ └───┘
-///                                                                 │    Values   │    │
-///                 (offsets[i],            │   ListArray               (Array)
-///                offsets[i+1])                                    └ ─ ─ ─ ─ ─ ─ ┘    │
+///                                                       │ 6 │     │ │ 1 │ │ F │ │ 5  │
+///                                         │  Validity   └───┘       └───┘ └───┘
+///     Logical       Logical                  (nulls)   Offsets    │    Values   │    │
+///      Values       Offsets               │                           (Array)
+///                                                                 └ ─ ─ ─ ─ ─ ─ ┘    │
+///                 (offsets[i],            │   ListArray
+///                offsets[i+1])                                                       │
 ///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
 /// ```
 ///
@@ -145,19 +147,19 @@ impl OffsetSizeTrait for i64 {
 ///  ┌─────────────┐  ┌───────┐     │     ┌───┐   ┌───┐       ╠═══╣ ╠═══╣
 ///  │ [] (empty)  │  │ (3,3) │           │ 1 │   │ 3 │     │ ║ 1 ║ ║ B ║ │ 1  │
 ///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠═══╣ ╠═══╣
-///  │    NULL     │  │ (3,4) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
-///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╠───╣ ╠───╣
-///  │     [D]     │  │ (4,5) │           │ 1 │   │ 4 │     │ │ 0 │ │ ? │ │ 3  │
-///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ├───┤ ├───┤
-///                                               │ 5 │     │ │ 1 │ │ D │ │ 4  │
-///                                 │             └───┘       ├───┤ ├───┤
-///                                                         │ │ 0 │ │ ? │ │ 5  │
-///                                 │  Validity               ╠═══╣ ╠═══╣
-///     Logical       Logical          (nulls)   Offsets    │ ║ 1 ║ ║ F ║ │ 6  │
-///      Values       Offsets       │                         ╚═══╝ ╚═══╝
-///                                                         │    Values   │    │
-///                 (offsets[i],    │   ListArray               (Array)
-///                offsets[i+1])                            └ ─ ─ ─ ─ ─ ─ ┘    │
+///  │    NULL     │  │ (3,3) │           │ 0 │   │ 3 │     │ ║ 1 ║ ║ C ║ │ 2  │
+///  ├─────────────┤  ├───────┤     │     ├───┤   ├───┤       ╚═══╝ ╚═══╝
+///  │     [D]     │  │ (3,4) │           │ 1 │   │ 3 │     │ │ 1 │ │ D │ │ 3  │
+///  └─────────────┘  └───────┘     │     └───┘   ├───┤       ╔═══╗ ╔═══╗
+///                                               │ 4 │     │ ║ 0 ║ ║ ? ║ │ 4  │
+///                                 │             └───┘       ╠═══╣ ╠═══╣
+///                                                         │ ║ 1 ║ ║ F ║ │ 5  │
+///                                 │  Validity               ╚═══╝ ╚═══╝
+///     Logical       Logical          (nulls)   Offsets    │    Values   │    │
+///      Values       Offsets       │                           (Array)
+///                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
+///                 (offsets[i],    │   ListArray
+///                offsets[i+1])                                               │
 ///                                 └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
 /// ```
 ///
@@ -327,15 +329,25 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
     }
 
     /// Returns ith value of this list array.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     /// Caller must ensure that the index is within the array bounds
     pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
-        let end = self.value_offsets().get_unchecked(i + 1).as_usize();
-        let start = self.value_offsets().get_unchecked(i).as_usize();
+        let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() };
+        let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
         self.values.slice(start, end - start)
     }
 
     /// Returns ith value of this list array.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
+    /// # Panics
+    /// Panics if index `i` is out of bounds
     pub fn value(&self, i: usize) -> ArrayRef {
         let end = self.value_offsets()[i + 1].as_usize();
         let start = self.value_offsets()[i].as_usize();
@@ -454,7 +466,7 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<
             _ => unreachable!(),
         };
 
-        let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
+        let offsets = OffsetBuffer::from_repeated_length(size, value.len());
 
         Self {
             data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
@@ -513,6 +525,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
     }
 }
 
+impl<OffsetSize: OffsetSizeTrait> super::private::Sealed for GenericListArray<OffsetSize> {}
+
 impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -623,7 +637,7 @@ mod tests {
     use crate::cast::AsArray;
     use crate::types::Int32Type;
     use crate::{Int32Array, Int64Array};
-    use arrow_buffer::{bit_util, Buffer, ScalarBuffer};
+    use arrow_buffer::{Buffer, ScalarBuffer, bit_util};
     use arrow_schema::Field;
 
     fn create_from_buffers() -> ListArray {
@@ -1272,4 +1286,11 @@ mod tests {
         let field = Arc::new(Field::new("element", values.data_type().clone(), false));
         ListArray::new(field.clone(), offsets, Arc::new(values), None);
     }
+
+    #[test]
+    fn test_list_new_null_len() {
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
+        let array = ListArray::new_null(field, 5);
+        assert_eq!(array.len(), 5);
+    }
 }
diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs
index 6118607bcbbf..52c88d581d20 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -23,8 +23,12 @@ use std::ops::Add;
 use std::sync::Arc;
 
 use crate::array::{make_array, print_long_array};
+use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
 use crate::iterator::GenericListViewArrayIter;
-use crate::{new_empty_array, Array, ArrayAccessor, ArrayRef, FixedSizeListArray, OffsetSizeTrait};
+use crate::{
+    Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, GenericListArray,
+    OffsetSizeTrait, new_empty_array,
+};
 
 /// A [`GenericListViewArray`] of variable size lists, storing offsets as `i32`.
 pub type ListViewArray = GenericListViewArray<i32>;
@@ -89,9 +93,9 @@ pub type LargeListViewArray = GenericListViewArray<i64>;
 ///                                                                         │ │ 1 │ │ D │ │ 5  │
 ///     Logical       Logical               │  Validity  Offsets  Sizes       └───┘ └───┘
 ///      Values       Offset                   (nulls)                      │    Values   │    │
-///                   & Size                │                                   (Array)  
+///                   & Size                │                                   (Array)
 ///                                                                         └ ─ ─ ─ ─ ─ ─ ┘    │
-///                 (offsets[i],            │   ListViewArray                          
+///                 (offsets[i],            │   ListViewArray
 ///                  sizes[i])                                                                 │
 ///                                         └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
 /// ```
@@ -154,7 +158,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
         if len != sizes.len() {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Length of offsets buffer and sizes buffer must be equal for {}ListViewArray, got {len} and {}",
-                OffsetSize::PREFIX, sizes.len()
+                OffsetSize::PREFIX,
+                sizes.len()
             )));
         }
 
@@ -224,8 +229,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
         Self {
             data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
             nulls: Some(NullBuffer::new_null(len)),
-            value_offsets: ScalarBuffer::from(vec![]),
-            value_sizes: ScalarBuffer::from(vec![]),
+            value_offsets: ScalarBuffer::from(vec![OffsetSize::usize_as(0); len]),
+            value_sizes: ScalarBuffer::from(vec![OffsetSize::usize_as(0); len]),
             values,
         }
     }
@@ -283,15 +288,23 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
     }
 
     /// Returns ith value of this list view array.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     /// Caller must ensure that the index is within the array bounds
     pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef {
-        let offset = self.value_offsets().get_unchecked(i).as_usize();
-        let length = self.value_sizes().get_unchecked(i).as_usize();
+        let offset = unsafe { self.value_offsets().get_unchecked(i).as_usize() };
+        let length = unsafe { self.value_sizes().get_unchecked(i).as_usize() };
         self.values.slice(offset, length)
     }
 
     /// Returns ith value of this list view array.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if the index is out of bounds
     pub fn value(&self, i: usize) -> ArrayRef {
@@ -348,6 +361,46 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
             value_sizes: self.value_sizes.slice(offset, length),
         }
     }
+
+    /// Creates a [`GenericListViewArray`] from an iterator of primitive values
+    /// # Example
+    /// ```
+    /// # use arrow_array::ListViewArray;
+    /// # use arrow_array::types::Int32Type;
+    ///
+    /// let data = vec![
+    ///    Some(vec![Some(0), Some(1), Some(2)]),
+    ///    None,
+    ///    Some(vec![Some(3), None, Some(5)]),
+    ///    Some(vec![Some(6), Some(7)]),
+    /// ];
+    /// let list_array = ListViewArray::from_iter_primitive::<Int32Type, _, _>(data);
+    /// println!("{:?}", list_array);
+    /// ```
+    pub fn from_iter_primitive<T, P, I>(iter: I) -> Self
+    where
+        T: ArrowPrimitiveType,
+        P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>,
+        I: IntoIterator<Item = Option<P>>,
+    {
+        let iter = iter.into_iter();
+        let size_hint = iter.size_hint().0;
+        let mut builder =
+            GenericListViewBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint);
+
+        for i in iter {
+            match i {
+                Some(p) => {
+                    for t in p {
+                        builder.values().append_option(t);
+                    }
+                    builder.append(true);
+                }
+                None => builder.append(false),
+            }
+        }
+        builder.finish()
+    }
 }
 
 impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<OffsetSize> {
@@ -358,10 +411,12 @@ impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListViewArray<Offset
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        GenericListViewArray::value_unchecked(self, index)
+        unsafe { GenericListViewArray::value_unchecked(self, index) }
     }
 }
 
+impl<OffsetSize: OffsetSizeTrait> super::private::Sealed for GenericListViewArray<OffsetSize> {}
+
 impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -445,6 +500,29 @@ impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<Offse
     }
 }
 
+impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>>
+    for GenericListViewArray<OffsetSize>
+{
+    fn from(value: GenericListArray<OffsetSize>) -> Self {
+        let (field, offsets, values, nulls) = value.into_parts();
+        let len = offsets.len() - 1;
+        let mut sizes = Vec::with_capacity(len);
+        let mut view_offsets = Vec::with_capacity(len);
+        for (i, offset) in offsets.iter().enumerate().take(len) {
+            view_offsets.push(*offset);
+            sizes.push(offsets[i + 1] - offsets[i]);
+        }
+
+        Self::new(
+            field,
+            ScalarBuffer::from(view_offsets),
+            ScalarBuffer::from(sizes),
+            values,
+            nulls,
+        )
+    }
+}
+
 impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for ArrayData {
     fn from(array: GenericListViewArray<OffsetSize>) -> Self {
         let len = array.len();
@@ -475,7 +553,7 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewAr
             _ => unreachable!(),
         };
         let mut acc = 0_usize;
-        let iter = std::iter::repeat(size).take(value.len());
+        let iter = std::iter::repeat_n(size, value.len());
         let mut sizes = Vec::with_capacity(iter.size_hint().0);
         let mut offsets = Vec::with_capacity(iter.size_hint().0);
 
@@ -550,7 +628,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
 
 #[cfg(test)]
 mod tests {
-    use arrow_buffer::{bit_util, BooleanBuffer, Buffer, ScalarBuffer};
+    use arrow_buffer::{BooleanBuffer, Buffer, NullBufferBuilder, ScalarBuffer, bit_util};
     use arrow_schema::Field;
 
     use crate::builder::{FixedSizeListBuilder, Int32Builder};
@@ -1111,4 +1189,36 @@ mod tests {
             .collect();
         assert_eq!(values, vec![Some(vec![]), Some(vec![]), Some(vec![])]);
     }
+
+    #[test]
+    fn test_list_view_new_null_len() {
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
+        let array = ListViewArray::new_null(field, 5);
+        assert_eq!(array.len(), 5);
+    }
+
+    #[test]
+    fn test_from_iter_primitive() {
+        let data = vec![
+            Some(vec![Some(0), Some(1), Some(2)]),
+            None,
+            Some(vec![Some(3), Some(4), Some(5)]),
+            Some(vec![Some(6), Some(7)]),
+        ];
+        let list_array = ListViewArray::from_iter_primitive::<Int32Type, _, _>(data);
+
+        //  [[0, 1, 2], NULL, [3, 4, 5], [6, 7]]
+        let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
+        let offsets = ScalarBuffer::from(vec![0, 3, 3, 6]);
+        let sizes = ScalarBuffer::from(vec![3, 0, 3, 2]);
+        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
+
+        let mut nulls = NullBufferBuilder::new(4);
+        nulls.append(true);
+        nulls.append(false);
+        nulls.append_n_non_nulls(2);
+        let another = ListViewArray::new(field, offsets, sizes, Arc::new(values), nulls.finish());
+
+        assert_eq!(list_array, another)
+    }
 }
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index 18a7c491aa16..86608d586f34 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -17,7 +17,7 @@
 
 use crate::array::{get_offsets, print_long_array};
 use crate::iterator::MapArrayIter;
-use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
+use crate::{Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray, make_array};
 use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Field, FieldRef};
@@ -173,6 +173,15 @@ impl MapArray {
         &self.entries
     }
 
+    /// Returns a reference to the fields of the [`StructArray`] that backs this map.
+    pub fn entries_fields(&self) -> (&Field, &Field) {
+        let fields = self.entries.fields().iter().collect::<Vec<_>>();
+        let fields = TryInto::<[&FieldRef; 2]>::try_into(fields)
+            .expect("Every map has a key and value field");
+
+        (fields[0].as_ref(), fields[1].as_ref())
+    }
+
     /// Returns the data type of the map's keys.
     pub fn key_type(&self) -> &DataType {
         self.keys().data_type()
@@ -185,11 +194,14 @@ impl MapArray {
 
     /// Returns ith value of this map array.
     ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     /// Caller must ensure that the index is within the array bounds
     pub unsafe fn value_unchecked(&self, i: usize) -> StructArray {
-        let end = *self.value_offsets().get_unchecked(i + 1);
-        let start = *self.value_offsets().get_unchecked(i);
+        let end = *unsafe { self.value_offsets().get_unchecked(i + 1) };
+        let start = *unsafe { self.value_offsets().get_unchecked(i) };
         self.entries
             .slice(start.to_usize().unwrap(), (end - start).to_usize().unwrap())
     }
@@ -197,6 +209,12 @@ impl MapArray {
     /// Returns ith value of this map array.
     ///
     /// This is a [`StructArray`] containing two fields
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
+    /// # Panics
+    /// Panics if index `i` is out of bounds
     pub fn value(&self, i: usize) -> StructArray {
         let end = self.value_offsets()[i + 1] as usize;
         let start = self.value_offsets()[i] as usize;
@@ -343,6 +361,8 @@ impl MapArray {
     }
 }
 
+impl super::private::Sealed for MapArray {}
+
 impl Array for MapArray {
     fn as_any(&self) -> &dyn Any {
         self
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 29d284e3c5c4..aae382ace7b4 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -78,8 +78,18 @@ pub use list_view_array::*;
 
 use crate::iterator::ArrayIter;
 
+mod private {
+    /// Private marker trait to ensure [`super::Array`] can not be implemented outside this crate
+    pub trait Sealed {}
+
+    impl<T: Sealed> Sealed for &T {}
+}
+
 /// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
-pub trait Array: std::fmt::Debug + Send + Sync {
+///
+/// This trait is sealed as it is not intended for custom array types, rather only
+/// those defined in this crate.
+pub trait Array: std::fmt::Debug + Send + Sync + private::Sealed {
     /// Returns the array as [`Any`] so that it can be
     /// downcasted to a specific implementation.
     ///
@@ -341,6 +351,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {
 /// A reference-counted reference to a generic `Array`
 pub type ArrayRef = Arc<dyn Array>;
 
+impl private::Sealed for ArrayRef {}
+
 /// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
 impl Array for ArrayRef {
     fn as_any(&self) -> &dyn Any {
@@ -620,10 +632,11 @@ impl<'a> StringArrayType<'a> for &'a StringViewArray {
     }
 }
 
-/// A trait for Arrow String Arrays, currently three types are supported:
+/// A trait for Arrow Binary Arrays, currently four types are supported:
 /// - `BinaryArray`
 /// - `LargeBinaryArray`
 /// - `BinaryViewArray`
+/// - `FixedSizeBinaryArray`
 ///
 /// This trait helps to abstract over the different types of binary arrays
 /// so that we don't need to duplicate the implementation for each type.
@@ -642,6 +655,11 @@ impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
         BinaryViewArray::iter(self)
     }
 }
+impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
+    fn iter(&self) -> ArrayIter<Self> {
+        FixedSizeBinaryArray::iter(self)
+    }
+}
 
 impl PartialEq for dyn Array + '_ {
     fn eq(&self, other: &Self) -> bool {
@@ -739,8 +757,36 @@ impl<R: RunEndIndexType> PartialEq for RunArray<R> {
     }
 }
 
-/// Constructs an array using the input `data`.
-/// Returns a reference-counted `Array` instance.
+/// Constructs an [`ArrayRef`] from an [`ArrayData`].
+///
+/// # Notes:
+///
+/// It is more efficient to directly construct the concrete array type rather
+/// than using this function as creating an `ArrayData` requires at least one
+/// additional allocation (the Vec of buffers).
+///
+/// # Example:
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_data::ArrayData;
+/// # use arrow_array::{make_array, ArrayRef, Int32Array};
+/// # use arrow_buffer::{Buffer, ScalarBuffer};
+/// # use arrow_schema::DataType;
+/// // Create an Int32Array with values [1, 2, 3]
+/// let values_buffer = Buffer::from_slice_ref(&[1, 2, 3]);
+/// // ArrayData can be constructed using ArrayDataBuilder
+///  let builder = ArrayData::builder(DataType::Int32)
+///    .len(3)
+///    .add_buffer(values_buffer.clone());
+/// let array_data = builder.build().unwrap();
+/// // Create the ArrayRef from the ArrayData
+/// let array = make_array(array_data);
+///
+/// // It is equivalent to directly constructing the Int32Array
+/// let scalar_buffer = ScalarBuffer::from(values_buffer);
+/// let int32_array: ArrayRef = Arc::new(Int32Array::new(scalar_buffer, None));
+/// assert_eq!(&array, &int32_array);
+/// ```
 pub fn make_array(data: ArrayData) -> ArrayRef {
     match data.data_type() {
         DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
@@ -815,7 +861,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
         DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
         DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
-        DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
+        DataType::Dictionary(key_type, _) => match key_type.as_ref() {
             DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
             DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
             DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
@@ -824,18 +870,20 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
             DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
             DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
             DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
-            dt => panic!("Unexpected dictionary key type {dt:?}"),
+            dt => unimplemented!("Unexpected dictionary key type {dt}"),
         },
-        DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
+        DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
             DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
             DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
             DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
-            dt => panic!("Unexpected data type for run_ends array {dt:?}"),
+            dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
         },
         DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
+        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
+        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
         DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
         DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
-        dt => panic!("Unexpected data type {dt:?}"),
+        dt => unimplemented!("Unexpected data type {dt}"),
     }
 }
 
@@ -1065,13 +1113,14 @@ mod tests {
     fn test_null_union() {
         for mode in [UnionMode::Sparse, UnionMode::Dense] {
             let data_type = DataType::Union(
-                UnionFields::new(
+                UnionFields::try_new(
                     vec![2, 1],
                     vec![
                         Field::new("foo", DataType::Int32, true),
                         Field::new("bar", DataType::Int64, true),
                     ],
-                ),
+                )
+                .unwrap(),
                 mode,
             );
             let array = new_null_array(&data_type, 4);
diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs
index 2dd9570a0e94..b682466b6738 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -76,6 +76,8 @@ impl NullArray {
     }
 }
 
+impl super::private::Sealed for NullArray {}
+
 impl Array for NullArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -170,7 +172,7 @@ impl std::fmt::Debug for NullArray {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::{make_array, Int64Array, StructArray};
+    use crate::{Int64Array, StructArray, make_array};
     use arrow_data::transform::MutableArrayData;
     use arrow_schema::Field;
 
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 073ad9774459..87de5f61605f 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -25,7 +25,7 @@ use crate::timezone::Tz;
 use crate::trusted_len::trusted_len_unzip;
 use crate::types::*;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
-use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, ScalarBuffer, i256};
 use arrow_data::bit_iterator::try_for_each_valid_idx;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
@@ -410,6 +410,44 @@ pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
 /// A [`PrimitiveArray`] of elapsed durations in nanoseconds
 pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
 
+/// A [`PrimitiveArray`] of 32-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal32Array;
+/// // Create from Vec<Option<i32>>
+/// let arr = Decimal32Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i32>
+/// let arr = Decimal32Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal32Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal32Array = PrimitiveArray<Decimal32Type>;
+
+/// A [`PrimitiveArray`] of 64-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal64Array;
+/// // Create from Vec<Option<i64>>
+/// let arr = Decimal64Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i64>
+/// let arr = Decimal64Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal64Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal64Array = PrimitiveArray<Decimal64Type>;
+
 /// A [`PrimitiveArray`] of 128-bit fixed point decimals
 ///
 /// # Examples
@@ -455,6 +493,9 @@ pub use crate::types::ArrowPrimitiveType;
 ///
 /// # Example: From a Vec
 ///
+/// *Note*: Converting a `Vec` to a `PrimitiveArray` does not copy the data.
+/// The new `PrimitiveArray` uses the same underlying allocation from the `Vec`.
+///
 /// ```
 /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type};
 /// let arr: PrimitiveArray<Int32Type> = vec![1, 2, 3, 4].into();
@@ -463,6 +504,33 @@ pub use crate::types::ArrowPrimitiveType;
 /// assert_eq!(arr.values(), &[1, 2, 3, 4])
 /// ```
 ///
+/// # Example: To a `Vec<T>`
+///
+/// *Note*: In some cases, converting `PrimitiveArray` to a `Vec` is zero-copy
+/// and does not copy the data (see [`Buffer::into_vec`] for conditions). In
+/// such cases, the `Vec` will use the same underlying memory allocation from
+/// the `PrimitiveArray`.
+///
+/// The Rust compiler generates highly optimized code for operations on
+/// Vec, so using a Vec can often be faster than using a PrimitiveArray directly.
+///
+/// ```
+/// # use arrow_array::{Array, PrimitiveArray, types::Int32Type};
+/// let arr = PrimitiveArray::<Int32Type>::from(vec![1, 2, 3, 4]);
+/// let starting_ptr = arr.values().as_ptr();
+/// // split into its parts
+/// let (datatype, buffer, nulls) = arr.into_parts();
+/// // Convert the buffer to a Vec<i32> (zero copy)
+/// // (note this requires that there are no other references)
+/// let mut vec: Vec<i32> = buffer.into();
+/// vec[2] = 300;
+/// // put the parts back together
+/// let arr = PrimitiveArray::<Int32Type>::try_new(vec.into(), nulls).unwrap();
+/// assert_eq!(arr.values(), &[1, 2, 300, 4]);
+/// // The same allocation was used
+/// assert_eq!(starting_ptr, arr.values().as_ptr());
+/// ```
+///
 /// # Example: From an optional Vec
 ///
 /// ```
@@ -672,6 +740,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
             DataType::Timestamp(t1, _) => {
                 matches!(data_type, DataType::Timestamp(t2, _) if &t1 == t2)
             }
+            DataType::Decimal32(_, _) => matches!(data_type, DataType::Decimal32(_, _)),
+            DataType::Decimal64(_, _) => matches!(data_type, DataType::Decimal64(_, _)),
             DataType::Decimal128(_, _) => matches!(data_type, DataType::Decimal128(_, _)),
             DataType::Decimal256(_, _) => matches!(data_type, DataType::Decimal256(_, _)),
             _ => T::DATA_TYPE.eq(data_type),
@@ -680,15 +750,22 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
     /// Returns the primitive value at index `i`.
     ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Safety
     ///
     /// caller must ensure that the passed in offset is less than the array len()
     #[inline]
     pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
-        *self.values.get_unchecked(i)
+        unsafe { *self.values.get_unchecked(i) }
     }
 
     /// Returns the primitive value at index `i`.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds
     #[inline]
@@ -749,7 +826,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         &'a self,
         indexes: impl Iterator<Item = Option<usize>> + 'a,
     ) -> impl Iterator<Item = Option<T::Native>> + 'a {
-        indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
+        indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
     }
 
     /// Returns a zero-copy slice of this array with the indicated offset and length.
@@ -782,11 +859,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
     where
         K: ArrowPrimitiveType<Native = T::Native>,
     {
-        let d = self.to_data().into_builder().data_type(K::DATA_TYPE);
-
-        // SAFETY:
-        // Native type is the same
-        PrimitiveArray::from(unsafe { d.build_unchecked() })
+        PrimitiveArray::new(self.values.clone(), self.nulls.clone())
     }
 
     /// Applies a unary infallible function to a primitive array, producing a
@@ -1113,6 +1186,8 @@ impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData {
     }
 }
 
+impl<T: ArrowPrimitiveType> super::private::Sealed for PrimitiveArray<T> {}
+
 impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -1183,7 +1258,7 @@ impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> {
 
     #[inline]
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        PrimitiveArray::value_unchecked(self, index)
+        unsafe { PrimitiveArray::value_unchecked(self, index) }
     }
 }
 
@@ -1195,6 +1270,8 @@ where
     ///
     /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
     /// A valid value is expected, thus the user should first check for validity.
+    ///
+    /// See notes on [`PrimitiveArray::value`] regarding nulls and panics
     pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
         as_datetime::<T>(i64::from(self.value(i)))
     }
@@ -1203,6 +1280,8 @@ where
     ///
     /// functionally it is same as `value_as_datetime`, however it adds
     /// the passed tz to the to-be-returned NaiveDateTime
+    ///
+    /// See notes on [`PrimitiveArray::value`] regarding nulls and panics
     pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> Option<DateTime<Tz>> {
         as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz)
     }
@@ -1210,6 +1289,8 @@ where
     /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
     ///
     /// If a data type cannot be converted to `NaiveDate`, a `None` is returned
+    ///
+    /// See notes on [`PrimitiveArray::value`] regarding nulls and panics
     pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> {
         self.value_as_datetime(i).map(|datetime| datetime.date())
     }
@@ -1217,6 +1298,8 @@ where
     /// Returns a value as a chrono `NaiveTime`
     ///
     /// `Date32` and `Date64` return UTC midnight as they do not have time resolution
+    ///
+    /// See notes on [`PrimitiveArray::value`] regarding nulls and panics
     pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
         as_time::<T>(i64::from(self.value(i)))
     }
@@ -1224,6 +1307,8 @@ where
     /// Returns a value as a chrono `Duration`
     ///
     /// If a data type cannot be converted to `Duration`, a `None` is returned
+    ///
+    /// See notes on [`PrimitiveArray::value`] regarding nulls and panics
     pub fn value_as_duration(&self, i: usize) -> Option<Duration> {
         as_duration::<T>(i64::from(self.value(i)))
     }
@@ -1233,7 +1318,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         let data_type = self.data_type();
 
-        write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?;
+        write!(f, "PrimitiveArray<{data_type}>\n[\n")?;
         print_long_array(self, f, |array, index, f| match data_type {
             DataType::Date32 | DataType::Date64 => {
                 let v = self.value(index).to_i64().unwrap();
@@ -1242,7 +1327,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
                     None => {
                         write!(
                             f,
-                            "Cast error: Failed to convert {v} to temporal for {data_type:?}"
+                            "Cast error: Failed to convert {v} to temporal for {data_type}"
                         )
                     }
                 }
@@ -1254,7 +1339,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
                     None => {
                         write!(
                             f,
-                            "Cast error: Failed to convert {v} to temporal for {data_type:?}"
+                            "Cast error: Failed to convert {v} to temporal for {data_type}"
                         )
                     }
                 }
@@ -1343,6 +1428,8 @@ def_from_for_primitive!(UInt64Type, u64);
 def_from_for_primitive!(Float16Type, f16);
 def_from_for_primitive!(Float32Type, f32);
 def_from_for_primitive!(Float64Type, f64);
+def_from_for_primitive!(Decimal32Type, i32);
+def_from_for_primitive!(Decimal64Type, i64);
 def_from_for_primitive!(Decimal128Type, i128);
 def_from_for_primitive!(Decimal256Type, i256);
 
@@ -1412,10 +1499,11 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         let (_, upper) = iterator.size_hint();
         let len = upper.expect("trusted_len_unzip requires an upper limit");
 
-        let (null, buffer) = trusted_len_unzip(iterator);
+        let (null, buffer) = unsafe { trusted_len_unzip(iterator) };
 
-        let data =
-            ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
+        let data = unsafe {
+            ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![])
+        };
         PrimitiveArray::from(data)
     }
 }
@@ -1455,6 +1543,8 @@ def_numeric_from_vec!(UInt64Type);
 def_numeric_from_vec!(Float16Type);
 def_numeric_from_vec!(Float32Type);
 def_numeric_from_vec!(Float64Type);
+def_numeric_from_vec!(Decimal32Type);
+def_numeric_from_vec!(Decimal64Type);
 def_numeric_from_vec!(Decimal128Type);
 def_numeric_from_vec!(Decimal256Type);
 
@@ -1539,10 +1629,16 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Validates values in this array can be properly interpreted
     /// with the specified precision.
     pub fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> {
+        if precision < self.scale() as u8 {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Decimal precision {precision} is less than scale {}",
+                self.scale()
+            )));
+        }
         (0..self.len()).try_for_each(|idx| {
             if self.is_valid(idx) {
                 let decimal = unsafe { self.value_unchecked(idx) };
-                T::validate_decimal_precision(decimal, precision)
+                T::validate_decimal_precision(decimal, precision, self.scale())
             } else {
                 Ok(())
             }
@@ -1563,6 +1659,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal precision of this array
     pub fn precision(&self) -> u8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(p, _) = self.data_type() {
                     *p
@@ -1590,6 +1706,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal scale of this array
     pub fn scale(&self) -> i8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(_, s) = self.data_type() {
                     *s
@@ -1618,9 +1754,11 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Decimal128Builder, Decimal256Builder};
-    use crate::cast::downcast_array;
     use crate::BooleanArray;
+    use crate::builder::{
+        Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder,
+    };
+    use crate::cast::downcast_array;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
     use arrow_schema::TimeUnit;
 
@@ -1990,7 +2128,7 @@ mod tests {
         let arr: PrimitiveArray<TimestampMillisecondType> =
             TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]);
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n  2018-12-31T00:00:00,\n  2018-12-31T00:00:00,\n  1921-01-02T00:00:00,\n]",
+            "PrimitiveArray<Timestamp(ms)>\n[\n  2018-12-31T00:00:00,\n  2018-12-31T00:00:00,\n  1921-01-02T00:00:00,\n]",
             format!("{arr:?}")
         );
     }
@@ -2001,7 +2139,7 @@ mod tests {
             TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
                 .with_timezone_utc();
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n  2018-12-31T00:00:00+00:00,\n  2018-12-31T00:00:00+00:00,\n  1921-01-02T00:00:00+00:00,\n]",
+            "PrimitiveArray<Timestamp(ms, \"+00:00\")>\n[\n  2018-12-31T00:00:00+00:00,\n  2018-12-31T00:00:00+00:00,\n  1921-01-02T00:00:00+00:00,\n]",
             format!("{arr:?}")
         );
     }
@@ -2013,8 +2151,8 @@ mod tests {
             TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
                 .with_timezone("Asia/Taipei".to_string());
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
-            format!("{:?}", arr)
+            "PrimitiveArray<Timestamp(ms, \"Asia/Taipei\")>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
+            format!("{arr:?}")
         );
     }
 
@@ -2028,7 +2166,7 @@ mod tests {
         println!("{arr:?}");
 
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n  2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n  2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n  1921-01-02T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n]",
+            "PrimitiveArray<Timestamp(ms, \"Asia/Taipei\")>\n[\n  2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n  2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n  1921-01-02T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n]",
             format!("{arr:?}")
         );
     }
@@ -2039,7 +2177,7 @@ mod tests {
             TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
                 .with_timezone("+08:00".to_string());
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
+            "PrimitiveArray<Timestamp(ms, \"+08:00\")>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
             format!("{arr:?}")
         );
     }
@@ -2050,7 +2188,7 @@ mod tests {
             TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
                 .with_timezone("xxx".to_string());
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
+            "PrimitiveArray<Timestamp(ms, \"xxx\")>\n[\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
             format!("{arr:?}")
         );
     }
@@ -2066,8 +2204,8 @@ mod tests {
         ])
         .with_timezone("America/Denver".to_string());
         assert_eq!(
-            "PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n  2022-03-13T01:59:59-07:00,\n  2022-03-13T03:00:00-06:00,\n  2022-11-06T00:59:59-06:00,\n  2022-11-06T01:00:00-06:00,\n]",
-            format!("{:?}", arr)
+            "PrimitiveArray<Timestamp(ms, \"America/Denver\")>\n[\n  2022-03-13T01:59:59-07:00,\n  2022-03-13T03:00:00-06:00,\n  2022-11-06T00:59:59-06:00,\n  2022-11-06T01:00:00-06:00,\n]",
+            format!("{arr:?}")
         );
     }
 
@@ -2084,7 +2222,7 @@ mod tests {
     fn test_time32second_fmt_debug() {
         let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
         assert_eq!(
-            "PrimitiveArray<Time32(Second)>\n[\n  02:00:01,\n  16:40:54,\n]",
+            "PrimitiveArray<Time32(s)>\n[\n  02:00:01,\n  16:40:54,\n]",
             format!("{arr:?}")
         );
     }
@@ -2094,8 +2232,8 @@ mod tests {
         // chrono::NaiveDatetime::from_timestamp_opt returns None while input is invalid
         let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into();
         assert_eq!(
-        "PrimitiveArray<Time32(Second)>\n[\n  Cast error: Failed to convert -7201 to temporal for Time32(Second),\n  Cast error: Failed to convert -60054 to temporal for Time32(Second),\n]",
-            // "PrimitiveArray<Time32(Second)>\n[\n  null,\n  null,\n]",
+            "PrimitiveArray<Time32(s)>\n[\n  Cast error: Failed to convert -7201 to temporal for Time32(s),\n  Cast error: Failed to convert -60054 to temporal for Time32(s),\n]",
+            // "PrimitiveArray<Time32(s)>\n[\n  null,\n  null,\n]",
             format!("{arr:?}")
         )
     }
@@ -2105,7 +2243,7 @@ mod tests {
         // replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832
         let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into();
         assert_eq!(
-            "PrimitiveArray<Timestamp(Microsecond, None)>\n[\n  null,\n]",
+            "PrimitiveArray<Timestamp(µs)>\n[\n  null,\n]",
             format!("{arr:?}")
         )
     }
@@ -2228,6 +2366,42 @@ mod tests {
         let _ = PrimitiveArray::<Int64Type>::from(foo.into_data());
     }
 
+    #[test]
+    fn test_decimal32() {
+        let values: Vec<_> = vec![0, 1, -1, i32::MIN, i32::MAX];
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let values: Vec<_> = vec![0, 1, -1, i64::MIN, i64::MAX];
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
     #[test]
     fn test_decimal128() {
         let values: Vec<_> = vec![0, 1, -1, i128::MIN, i128::MAX];
@@ -2297,7 +2471,7 @@ mod tests {
         let result = arr.validate_decimal_precision(5);
         let error = result.unwrap_err();
         assert_eq!(
-            "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999",
+            "Invalid argument error: 123.456 is too large to store in a Decimal128 of precision 5. Max is 99.999",
             error.to_string()
         );
 
@@ -2316,7 +2490,7 @@ mod tests {
         let result = arr.validate_decimal_precision(2);
         let error = result.unwrap_err();
         assert_eq!(
-            "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99",
+            "Invalid argument error: 10.0 is too large to store in a Decimal128 of precision 2. Max is 9.9",
             error.to_string()
         );
     }
@@ -2402,7 +2576,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999"
+        expected = "-1232234234324.32 is too small to store in a Decimal128 of precision 5. Min is -999.99"
     )]
     fn test_decimal_array_with_precision_and_scale_out_of_range() {
         let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432])
@@ -2499,6 +2673,74 @@ mod tests {
         assert!(!array.is_null(2));
     }
 
+    #[test]
+    fn test_decimal64_iter() {
+        let mut builder = Decimal64Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal64Array = builder.finish().with_precision_and_scale(18, 4).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal64array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal64Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(18, 4).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal64(18, 4));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
+    #[test]
+    fn test_decimal32_iter() {
+        let mut builder = Decimal32Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal32Array = builder.finish().with_precision_and_scale(9, 2).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal32array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal32Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(9, 2).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal32(9, 2));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
     #[test]
     fn test_unary_opt() {
         let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
@@ -2641,10 +2883,11 @@ mod tests {
             None,
         ]
         .into();
-        let debug_str = format!("{:?}", array);
-        assert_eq!("PrimitiveArray<Time32(Second)>\n[\n  Cast error: Failed to convert -1 to temporal for Time32(Second),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400 to temporal for Time32(Second),\n  Cast error: Failed to convert 86401 to temporal for Time32(Second),\n  null,\n]",
-    debug_str
-    );
+        let debug_str = format!("{array:?}");
+        assert_eq!(
+            "PrimitiveArray<Time32(s)>\n[\n  Cast error: Failed to convert -1 to temporal for Time32(s),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400 to temporal for Time32(s),\n  Cast error: Failed to convert 86401 to temporal for Time32(s),\n  null,\n]",
+            debug_str
+        );
     }
 
     #[test]
@@ -2658,8 +2901,9 @@ mod tests {
             None,
         ]
         .into();
-        let debug_str = format!("{:?}", array);
-        assert_eq!("PrimitiveArray<Time32(Millisecond)>\n[\n  Cast error: Failed to convert -1 to temporal for Time32(Millisecond),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000 to temporal for Time32(Millisecond),\n  Cast error: Failed to convert 86401000 to temporal for Time32(Millisecond),\n  null,\n]",
+        let debug_str = format!("{array:?}");
+        assert_eq!(
+            "PrimitiveArray<Time32(ms)>\n[\n  Cast error: Failed to convert -1 to temporal for Time32(ms),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000 to temporal for Time32(ms),\n  Cast error: Failed to convert 86401000 to temporal for Time32(ms),\n  null,\n]",
             debug_str
         );
     }
@@ -2675,9 +2919,9 @@ mod tests {
             None,
         ]
         .into();
-        let debug_str = format!("{:?}", array);
+        let debug_str = format!("{array:?}");
         assert_eq!(
-        "PrimitiveArray<Time64(Nanosecond)>\n[\n  Cast error: Failed to convert -1 to temporal for Time64(Nanosecond),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000000000 to temporal for Time64(Nanosecond),\n  Cast error: Failed to convert 86401000000000 to temporal for Time64(Nanosecond),\n  null,\n]",
+            "PrimitiveArray<Time64(ns)>\n[\n  Cast error: Failed to convert -1 to temporal for Time64(ns),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000000000 to temporal for Time64(ns),\n  Cast error: Failed to convert 86401000000000 to temporal for Time64(ns),\n  null,\n]",
             debug_str
         );
     }
@@ -2693,8 +2937,11 @@ mod tests {
             None,
         ]
         .into();
-        let debug_str = format!("{:?}", array);
-        assert_eq!("PrimitiveArray<Time64(Microsecond)>\n[\n  Cast error: Failed to convert -1 to temporal for Time64(Microsecond),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000000 to temporal for Time64(Microsecond),\n  Cast error: Failed to convert 86401000000 to temporal for Time64(Microsecond),\n  null,\n]", debug_str);
+        let debug_str = format!("{array:?}");
+        assert_eq!(
+            "PrimitiveArray<Time64(µs)>\n[\n  Cast error: Failed to convert -1 to temporal for Time64(µs),\n  00:00:00,\n  23:59:59,\n  Cast error: Failed to convert 86400000000 to temporal for Time64(µs),\n  Cast error: Failed to convert 86401000000 to temporal for Time64(µs),\n  null,\n]",
+            debug_str
+        );
     }
 
     #[test]
diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs
index 05cfa2d17135..9ca1af943d27 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -23,23 +23,22 @@ use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Field};
 
 use crate::{
+    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
     builder::StringRunBuilder,
     make_array,
     run_iterator::RunArrayIter,
     types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
-    Array, ArrayAccessor, ArrayRef, PrimitiveArray,
 };
 
-/// An array of [run-end encoded values](https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout)
-///
-/// This encoding is variation on [run-length encoding (RLE)](https://en.wikipedia.org/wiki/Run-length_encoding)
-/// and is good for representing data containing same values repeated consecutively.
+/// An array of [run-end encoded values].
 ///
-/// [`RunArray`] contains `run_ends` array and `values` array of same length.
-/// The `run_ends` array stores the indexes at which the run ends. The `values` array
-/// stores the value of each run. Below example illustrates how a logical array is represented in
-/// [`RunArray`]
+/// This encoding is variation on [run-length encoding (RLE)] and is good for representing
+/// data containing the same values repeated consecutively.
 ///
+/// A [`RunArray`] consists of a `run_ends` buffer and a `values` array of equivalent
+/// lengths. The `run_ends` buffer stores the indexes at which the run ends. The
+/// `values` array stores the corresponding value of each run. The below example
+/// illustrates how a logical array is represented by a [`RunArray`]:
 ///
 /// ```text
 /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
@@ -60,6 +59,9 @@ use crate::{
 ///                                             Logical array
 ///                                                Contents
 /// ```
+///
+/// [run-end encoded values]: https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout
+/// [run-length encoding (RLE)]: https://en.wikipedia.org/wiki/Run-length_encoding
 pub struct RunArray<R: RunEndIndexType> {
     data_type: DataType,
     run_ends: RunEndBuffer<R::Native>,
@@ -77,8 +79,8 @@ impl<R: RunEndIndexType> Clone for RunArray<R> {
 }
 
 impl<R: RunEndIndexType> RunArray<R> {
-    /// Calculates the logical length of the array encoded
-    /// by the given run_ends array.
+    /// Calculates the logical length of the array encoded by treating the `run_ends`
+    /// array as if it were a [`RunEndBuffer`].
     pub fn logical_len(run_ends: &PrimitiveArray<R>) -> usize {
         let len = run_ends.len();
         if len == 0 {
@@ -87,9 +89,13 @@ impl<R: RunEndIndexType> RunArray<R> {
         run_ends.value(len - 1).as_usize()
     }
 
-    /// Attempts to create RunArray using given run_ends (index where a run ends)
-    /// and the values (value of the run). Returns an error if the given data is not compatible
-    /// with RunEndEncoded specification.
+    /// Attempts to create a [`RunArray`] using the given `run_ends` and `values`.
+    ///
+    /// # Errors
+    ///
+    /// - If `run_ends` and `values` have different lengths
+    /// - If `run_ends` has any null values
+    /// - If `run_ends` doesn't consist of strictly increasing positive integers
     pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
         let run_ends_type = run_ends.data_type().clone();
         let values_type = values.data_type().clone();
@@ -117,25 +123,39 @@ impl<R: RunEndIndexType> RunArray<R> {
         Ok(array_data.into())
     }
 
-    /// Returns a reference to [`RunEndBuffer`]
+    /// Returns a reference to the [`RunEndBuffer`].
     pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
         &self.run_ends
     }
 
-    /// Returns a reference to values array
+    /// Returns a reference to the values array.
     ///
-    /// Note: any slicing of this [`RunArray`] array is not applied to the returned array
-    /// and must be handled separately
+    /// Any slicing of this [`RunArray`] array is **not** applied to the returned
+    /// values here and must be handled separately.
     pub fn values(&self) -> &ArrayRef {
         &self.values
     }
 
+    /// Similar to [`values`] but accounts for logical slicing, returning only the values
+    /// that are part of the logical slice of this array.
+    ///
+    /// [`values`]: Self::values
+    pub fn values_slice(&self) -> ArrayRef {
+        let start = self.get_start_physical_index();
+        let end = self.get_end_physical_index();
+        self.values.slice(start, end - start + 1)
+    }
+
     /// Returns the physical index at which the array slice starts.
+    ///
+    /// See [`RunEndBuffer::get_start_physical_index`].
     pub fn get_start_physical_index(&self) -> usize {
         self.run_ends.get_start_physical_index()
     }
 
     /// Returns the physical index at which the array slice ends.
+    ///
+    /// See [`RunEndBuffer::get_end_physical_index`].
     pub fn get_end_physical_index(&self) -> usize {
         self.run_ends.get_end_physical_index()
     }
@@ -152,7 +172,6 @@ impl<R: RunEndIndexType> RunArray<R> {
     /// assert_eq!(typed.value(1), "b");
     /// assert!(typed.values().is_null(2));
     /// ```
-    ///
     pub fn downcast<V: 'static>(&self) -> Option<TypedRunArray<'_, R, V>> {
         let values = self.values.as_any().downcast_ref()?;
         Some(TypedRunArray {
@@ -161,89 +180,37 @@ impl<R: RunEndIndexType> RunArray<R> {
         })
     }
 
-    /// Returns index to the physical array for the given index to the logical array.
-    /// This function adjusts the input logical index based on `ArrayData::offset`
-    /// Performs a binary search on the run_ends array for the input index.
+    /// Calls [`RunEndBuffer::get_physical_index`].
     ///
     /// The result is arbitrary if `logical_index >= self.len()`
     pub fn get_physical_index(&self, logical_index: usize) -> usize {
         self.run_ends.get_physical_index(logical_index)
     }
 
-    /// Returns the physical indices of the input logical indices. Returns error if any of the logical
-    /// index cannot be converted to physical index. The logical indices are sorted and iterated along
-    /// with run_ends array to find matching physical index. The approach used here was chosen over
-    /// finding physical index for each logical index using binary search using the function
-    /// `get_physical_index`. Running benchmarks on both approaches showed that the approach used here
-    /// scaled well for larger inputs.
-    /// See <https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727> for more details.
+    /// Returns the physical indices corresponding to the provided logical indices.
+    ///
+    /// See [`RunEndBuffer::get_physical_indices`] for more details.
     #[inline]
     pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
     where
         I: ArrowNativeType,
     {
-        let len = self.run_ends().len();
-        let offset = self.run_ends().offset();
-
-        let indices_len = logical_indices.len();
-
-        if indices_len == 0 {
-            return Ok(vec![]);
-        }
-
-        // `ordered_indices` store index into `logical_indices` and can be used
-        // to iterate `logical_indices` in sorted order.
-        let mut ordered_indices: Vec<usize> = (0..indices_len).collect();
-
-        // Instead of sorting `logical_indices` directly, sort the `ordered_indices`
-        // whose values are index of `logical_indices`
-        ordered_indices.sort_unstable_by(|lhs, rhs| {
-            logical_indices[*lhs]
-                .partial_cmp(&logical_indices[*rhs])
-                .unwrap()
-        });
-
-        // Return early if all the logical indices cannot be converted to physical indices.
-        let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
-        if largest_logical_index >= len {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
-            )));
-        }
-
-        // Skip some physical indices based on offset.
-        let skip_value = self.get_start_physical_index();
-
-        let mut physical_indices = vec![0; indices_len];
-
-        let mut ordered_index = 0_usize;
-        for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
-        {
-            // Get the run end index (relative to offset) of current physical index
-            let run_end_value = run_end.as_usize() - offset;
-
-            // All the `logical_indices` that are less than current run end index
-            // belongs to current physical index.
-            while ordered_index < indices_len
-                && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
-            {
-                physical_indices[ordered_indices[ordered_index]] = physical_index;
-                ordered_index += 1;
-            }
-        }
-
-        // If there are input values >= run_ends.last_value then we'll not be able to convert
-        // all logical indices to physical indices.
-        if ordered_index < logical_indices.len() {
-            let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
-            )));
-        }
-        Ok(physical_indices)
+        self.run_ends()
+            .get_physical_indices(logical_indices)
+            .map_err(|index| {
+                ArrowError::InvalidArgumentError(format!(
+                    "Logical index {} is out of bounds for RunArray of length {}",
+                    index.as_usize(),
+                    self.len()
+                ))
+            })
     }
 
     /// Returns a zero-copy slice of this array with the indicated offset and length.
+    ///
+    /// # Panics
+    ///
+    /// - Specified slice (`offset` + `length`) exceeds existing length
     pub fn slice(&self, offset: usize, length: usize) -> Self {
         Self {
             data_type: self.data_type.clone(),
@@ -259,7 +226,9 @@ impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
         match data.data_type() {
             DataType::RunEndEncoded(_, _) => {}
             _ => {
-                panic!("Invalid data type for RunArray. The data type should be DataType::RunEndEncoded");
+                panic!(
+                    "Invalid data type for RunArray. The data type should be DataType::RunEndEncoded"
+                );
             }
         }
 
@@ -301,6 +270,8 @@ impl<R: RunEndIndexType> From<RunArray<R>> for ArrayData {
     }
 }
 
+impl<T: RunEndIndexType> super::private::Sealed for RunArray<T> {}
+
 impl<T: RunEndIndexType> Array for RunArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
@@ -560,6 +531,8 @@ impl<'a, R: RunEndIndexType, V> TypedRunArray<'a, R, V> {
     }
 }
 
+impl<R: RunEndIndexType, V: Sync> super::private::Sealed for TypedRunArray<'_, R, V> {}
+
 impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
     fn as_any(&self) -> &dyn Any {
         self.run_array
@@ -641,7 +614,7 @@ where
 
     unsafe fn value_unchecked(&self, logical_index: usize) -> Self::Item {
         let physical_index = self.run_array.get_physical_index(logical_index);
-        self.values().value_unchecked(physical_index)
+        unsafe { self.values().value_unchecked(physical_index) }
     }
 }
 
@@ -662,9 +635,9 @@ where
 
 #[cfg(test)]
 mod tests {
+    use rand::Rng;
     use rand::rng;
     use rand::seq::SliceRandom;
-    use rand::Rng;
 
     use super::*;
     use crate::builder::PrimitiveRunBuilder;
@@ -1169,4 +1142,35 @@ mod tests {
 
         assert_eq!(array_i16_1, array_i16_2);
     }
+
+    #[test]
+    fn test_run_array_values_slice() {
+        // 0, 0, 1, 1, 1, 2...2 (15 2s)
+        let run_ends: PrimitiveArray<Int32Type> = vec![2, 5, 20].into();
+        let values: PrimitiveArray<Int32Type> = vec![0, 1, 2].into();
+        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+
+        let slice = array.slice(1, 4); // 0 | 1, 1, 1 |
+        // logical indices: 1, 2, 3, 4
+        // physical indices: 0, 1, 1, 1
+        // values at 0 is 0
+        // values at 1 is 1
+        // values slice should be [0, 1]
+        assert_eq!(slice.get_start_physical_index(), 0);
+        assert_eq!(slice.get_end_physical_index(), 1);
+
+        let values_slice = slice.values_slice();
+        let values_slice = values_slice.as_primitive::<Int32Type>();
+        assert_eq!(values_slice.values(), &[0, 1]);
+
+        let slice2 = array.slice(2, 3); // 1, 1, 1
+        // logical indices: 2, 3, 4
+        // physical indices: 1, 1, 1
+        assert_eq!(slice2.get_start_physical_index(), 1);
+        assert_eq!(slice2.get_end_physical_index(), 1);
+
+        let values_slice2 = slice2.values_slice();
+        let values_slice2 = values_slice2.as_primitive::<Int32Type>();
+        assert_eq!(values_slice2.values(), &[1]);
+    }
 }
diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs
index ed70e5744fff..80f3153eceed 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -48,7 +48,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
         &'a self,
         indexes: impl Iterator<Item = Option<usize>> + 'a,
     ) -> impl Iterator<Item = Option<&'a str>> {
-        indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index)))
+        indexes.map(|opt_index| opt_index.map(|index| unsafe { self.value_unchecked(index) }))
     }
 
     /// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning
@@ -156,9 +156,9 @@ pub type LargeStringArray = GenericStringArray<i64>;
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::Array;
     use crate::builder::{ListBuilder, PrimitiveBuilder, StringBuilder};
     use crate::types::UInt8Type;
-    use crate::Array;
     use arrow_buffer::Buffer;
     use arrow_data::ArrayData;
     use arrow_schema::{DataType, Field};
diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs
index fbc34ef0c85b..a738a733218a 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::array::print_long_array;
-use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
+use crate::{Array, ArrayRef, RecordBatch, make_array, new_null_array};
 use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
@@ -347,25 +347,26 @@ impl StructArray {
 
 impl From<ArrayData> for StructArray {
     fn from(data: ArrayData) -> Self {
-        let parent_offset = data.offset();
-        let parent_len = data.len();
+        let (data_type, len, nulls, offset, _buffers, child_data) = data.into_parts();
 
-        let fields = data
-            .child_data()
-            .iter()
+        let parent_offset = offset;
+        let parent_len = len;
+
+        let fields = child_data
+            .into_iter()
             .map(|cd| {
                 if parent_offset != 0 || parent_len != cd.len() {
                     make_array(cd.slice(parent_offset, parent_len))
                 } else {
-                    make_array(cd.clone())
+                    make_array(cd)
                 }
             })
             .collect();
 
         Self {
-            len: data.len(),
-            data_type: data.data_type().clone(),
-            nulls: data.nulls().cloned(),
+            len,
+            data_type,
+            nulls,
             fields,
         }
     }
@@ -401,6 +402,8 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
     }
 }
 
+impl super::private::Sealed for StructArray {}
+
 impl Array for StructArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -922,7 +925,10 @@ mod tests {
                 (0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
             ))),
         );
-        assert_eq!(format!("{arr:?}"), "StructArray\n-- validity:\n[\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  ...10 elements...,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,\n  ...10 elements...,\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,\n]\n]")
+        assert_eq!(
+            format!("{arr:?}"),
+            "StructArray\n-- validity:\n[\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  ...10 elements...,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n  valid,\n  null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n  0,\n  1,\n  2,\n  3,\n  4,\n  5,\n  6,\n  7,\n  8,\n  9,\n  ...10 elements...,\n  20,\n  21,\n  22,\n  23,\n  24,\n  25,\n  26,\n  27,\n  28,\n  29,\n]\n]"
+        )
     }
 
     #[test]
diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs
index 2afe9af47327..e08542bc8638 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -16,7 +16,7 @@
 // under the License.
 #![allow(clippy::enum_clike_unportable_variant)]
 
-use crate::{make_array, Array, ArrayRef};
+use crate::{Array, ArrayRef, make_array};
 use arrow_buffer::bit_chunk_iterator::{BitChunkIterator, BitChunks};
 use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::{BooleanBuffer, MutableBuffer, ScalarBuffer};
@@ -137,11 +137,11 @@ impl UnionArray {
     ///
     /// # Safety
     ///
-    /// The `type_ids` values should be positive and must match one of the type ids of the fields provided in `fields`.
+    /// The `type_ids` values should be non-negative and must match one of the type ids of the fields provided in `fields`.
     /// These values are used to index into the `children` arrays.
     ///
     /// The `offsets` is provided in the case of a dense union, sparse unions should use `None`.
-    /// If provided the `offsets` values should be positive and must be less than the length of the
+    /// If provided the `offsets` values should be non-negative and must be less than the length of the
     /// corresponding array.
     ///
     /// In both cases above we use signed integer types to maintain compatibility with other
@@ -165,8 +165,8 @@ impl UnionArray {
             .len(len);
 
         let data = match offsets {
-            Some(offsets) => builder.add_buffer(offsets.into_inner()).build_unchecked(),
-            None => builder.build_unchecked(),
+            Some(offsets) => unsafe { builder.add_buffer(offsets.into_inner()).build_unchecked() },
+            None => unsafe { builder.build_unchecked() },
         };
         Self::from(data)
     }
@@ -219,7 +219,7 @@ impl UnionArray {
                 _ => {
                     return Err(ArrowError::InvalidArgumentError(
                         "Type Ids values must match one of the field type ids".to_owned(),
-                    ))
+                    ));
                 }
             }
         }
@@ -230,7 +230,7 @@ impl UnionArray {
             if iter.any(|(type_id, &offset)| offset < 0 || offset >= array_lens[*type_id as usize])
             {
                 return Err(ArrowError::InvalidArgumentError(
-                    "Offsets must be positive and within the length of the Array".to_owned(),
+                    "Offsets must be non-negative and within the length of the Array".to_owned(),
                 ));
             }
         }
@@ -287,6 +287,10 @@ impl UnionArray {
     }
 
     /// Returns the array's value at index `i`.
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
     /// # Panics
     /// Panics if index `i` is out of bounds
     pub fn value(&self, i: usize) -> ArrayRef {
@@ -307,8 +311,16 @@ impl UnionArray {
         }
     }
 
+    /// Returns the [`UnionFields`] for the union.
+    pub fn fields(&self) -> &UnionFields {
+        match self.data_type() {
+            DataType::Union(fields, _) => fields,
+            _ => unreachable!("Union array's data type is not a union!"),
+        }
+    }
+
     /// Returns whether the `UnionArray` is dense (or sparse if `false`).
-    fn is_dense(&self) -> bool {
+    pub fn is_dense(&self) -> bool {
         match self.data_type() {
             DataType::Union(_, mode) => mode == &UnionMode::Dense,
             _ => unreachable!("Union array's data type is not a union!"),
@@ -726,6 +738,8 @@ impl From<UnionArray> for ArrayData {
     }
 }
 
+impl super::private::Sealed for UnionArray {}
+
 impl Array for UnionArray {
     fn as_any(&self) -> &dyn Any {
         self
@@ -781,13 +795,18 @@ impl Array for UnionArray {
         };
 
         if fields.len() <= 1 {
-            return self
-                .fields
-                .iter()
-                .flatten()
-                .map(Array::logical_nulls)
-                .next()
-                .flatten();
+            return self.fields.iter().find_map(|field_opt| {
+                field_opt
+                    .as_ref()
+                    .and_then(|field| field.logical_nulls())
+                    .map(|logical_nulls| {
+                        if self.is_dense() {
+                            self.gather_nulls(vec![(0, logical_nulls)]).into()
+                        } else {
+                            logical_nulls
+                        }
+                    })
+            });
         }
 
         let logical_nulls = self.fields_logical_nulls();
@@ -940,7 +959,7 @@ impl std::fmt::Debug for UnionArray {
 
         if let Some(offsets) = &self.offsets {
             writeln!(f, "-- offsets buffer:")?;
-            writeln!(f, "{:?}", offsets)?;
+            writeln!(f, "{offsets:?}")?;
         }
 
         let fields = match self.data_type() {
@@ -1074,6 +1093,30 @@ mod tests {
         }
     }
 
+    #[test]
+    fn slice_union_array_single_field() {
+        // Dense Union
+        // [1, null, 3, null, 4]
+        let union_array = {
+            let mut builder = UnionBuilder::new_dense();
+            builder.append::<Int32Type>("a", 1).unwrap();
+            builder.append_null::<Int32Type>("a").unwrap();
+            builder.append::<Int32Type>("a", 3).unwrap();
+            builder.append_null::<Int32Type>("a").unwrap();
+            builder.append::<Int32Type>("a", 4).unwrap();
+            builder.build().unwrap()
+        };
+
+        // [null, 3, null]
+        let union_slice = union_array.slice(1, 3);
+        let logical_nulls = union_slice.logical_nulls().unwrap();
+
+        assert_eq!(logical_nulls.len(), 3);
+        assert!(logical_nulls.is_null(0));
+        assert!(logical_nulls.is_valid(1));
+        assert!(logical_nulls.is_null(2));
+    }
+
     #[test]
     #[cfg_attr(miri, ignore)]
     fn test_dense_i32_large() {
@@ -1641,14 +1684,15 @@ mod tests {
     #[test]
     fn test_custom_type_ids() {
         let data_type = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![8, 4, 9],
                 vec![
                     Field::new("strings", DataType::Utf8, false),
                     Field::new("integers", DataType::Int32, false),
                     Field::new("floats", DataType::Float64, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         );
 
@@ -1755,14 +1799,15 @@ mod tests {
     fn into_parts_custom_type_ids() {
         let set_field_type_ids: [i8; 3] = [8, 4, 9];
         let data_type = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 set_field_type_ids,
                 [
                     Field::new("strings", DataType::Utf8, false),
                     Field::new("integers", DataType::Int32, false),
                     Field::new("floats", DataType::Float64, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         );
         let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
@@ -1795,13 +1840,14 @@ mod tests {
 
     #[test]
     fn test_invalid() {
-        let fields = UnionFields::new(
+        let fields = UnionFields::try_new(
             [3, 2],
             [
                 Field::new("a", DataType::Utf8, false),
                 Field::new("b", DataType::Utf8, false),
             ],
-        );
+        )
+        .unwrap();
         let children = vec![
             Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
             Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
@@ -1844,7 +1890,7 @@ mod tests {
 
         assert_eq!(
             err.to_string(),
-            "Invalid argument error: Offsets must be positive and within the length of the Array"
+            "Invalid argument error: Offsets must be non-negative and within the length of the Array"
         );
 
         let offsets = Some(vec![0, 1].into());
@@ -1871,13 +1917,14 @@ mod tests {
 
         assert_eq!(array.logical_nulls(), None);
 
-        let fields = UnionFields::new(
+        let fields = UnionFields::try_new(
             [1, 3],
             [
                 Field::new("a", DataType::Int8, false), // non nullable
                 Field::new("b", DataType::Int8, false), // non nullable
             ],
-        );
+        )
+        .unwrap();
         let array = UnionArray::try_new(
             fields,
             vec![1].into(),
@@ -1891,13 +1938,14 @@ mod tests {
 
         assert_eq!(array.logical_nulls(), None);
 
-        let nullable_fields = UnionFields::new(
+        let nullable_fields = UnionFields::try_new(
             [1, 3],
             [
                 Field::new("a", DataType::Int8, true), // nullable but without nulls
                 Field::new("b", DataType::Int8, true), // nullable but without nulls
             ],
-        );
+        )
+        .unwrap();
         let array = UnionArray::try_new(
             nullable_fields.clone(),
             vec![1, 1].into(),
diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index a0bd5745d21d..275aa8c9e56a 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -234,9 +234,12 @@ impl ArrayBuilder for BooleanBuilder {
 impl Extend<Option<bool>> for BooleanBuilder {
     #[inline]
     fn extend<T: IntoIterator<Item = Option<bool>>>(&mut self, iter: T) {
-        for v in iter {
-            self.append_option(v)
-        }
+        let buffered = iter.into_iter().collect::<Vec<_>>();
+        let array = unsafe {
+            // SAFETY: std::vec::IntoIter implements TrustedLen
+            BooleanArray::from_trusted_len_iter(buffered.into_iter())
+        };
+        self.append_array(&array)
     }
 }
 
diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs
index c0cabb1f7353..d183aae86551 100644
--- a/arrow-array/src/builder/buffer_builder.rs
+++ b/arrow-array/src/builder/buffer_builder.rs
@@ -45,6 +45,10 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
 /// Buffer builder for 64-bit floating point type.
 pub type Float64BufferBuilder = BufferBuilder<f64>;
 
+/// Buffer builder for 32-bit decimal type.
+pub type Decimal32BufferBuilder = BufferBuilder<<Decimal32Type as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit decimal type.
+pub type Decimal64BufferBuilder = BufferBuilder<<Decimal64Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 128-bit decimal type.
 pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 256-bit decimal type.
@@ -106,8 +110,8 @@ pub type DurationNanosecondBufferBuilder =
 
 #[cfg(test)]
 mod tests {
-    use crate::builder::{ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder};
     use crate::Array;
+    use crate::builder::{ArrayBuilder, Int8Builder, Int32BufferBuilder, UInt8BufferBuilder};
 
     #[test]
     fn test_builder_i32_empty() {
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs
index b5f268917c92..f6b4c33d9454 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
+use crate::array::Array;
+use crate::builder::ArrayBuilder;
 use crate::{ArrayRef, FixedSizeBinaryArray};
 use arrow_buffer::Buffer;
 use arrow_buffer::NullBufferBuilder;
@@ -42,7 +43,7 @@ use std::sync::Arc;
 /// ```
 #[derive(Debug)]
 pub struct FixedSizeBinaryBuilder {
-    values_builder: UInt8BufferBuilder,
+    values_builder: Vec<u8>,
     null_buffer_builder: NullBufferBuilder,
     value_length: i32,
 }
@@ -61,7 +62,7 @@ impl FixedSizeBinaryBuilder {
             "value length ({byte_width}) of the array must >= 0"
         );
         Self {
-            values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
+            values_builder: Vec::with_capacity(capacity * byte_width as usize),
             null_buffer_builder: NullBufferBuilder::new(capacity),
             value_length: byte_width,
         }
@@ -79,7 +80,7 @@ impl FixedSizeBinaryBuilder {
                     .to_string(),
             ))
         } else {
-            self.values_builder.append_slice(value.as_ref());
+            self.values_builder.extend_from_slice(value.as_ref());
             self.null_buffer_builder.append_non_null();
             Ok(())
         }
@@ -89,7 +90,7 @@ impl FixedSizeBinaryBuilder {
     #[inline]
     pub fn append_null(&mut self) {
         self.values_builder
-            .append_slice(&vec![0u8; self.value_length as usize][..]);
+            .extend(std::iter::repeat_n(0u8, self.value_length as usize));
         self.null_buffer_builder.append_null();
     }
 
@@ -97,10 +98,27 @@ impl FixedSizeBinaryBuilder {
     #[inline]
     pub fn append_nulls(&mut self, n: usize) {
         self.values_builder
-            .append_slice(&vec![0u8; self.value_length as usize * n][..]);
+            .extend(std::iter::repeat_n(0u8, self.value_length as usize * n));
         self.null_buffer_builder.append_n_nulls(n);
     }
 
+    /// Appends all elements in array into the builder.
+    pub fn append_array(&mut self, array: &FixedSizeBinaryArray) -> Result<(), ArrowError> {
+        if self.value_length != array.value_length() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Cannot append FixedSizeBinaryArray with different value length".to_string(),
+            ));
+        }
+        let buffer = array.value_data();
+        self.values_builder.extend_from_slice(buffer);
+        if let Some(validity) = array.nulls() {
+            self.null_buffer_builder.append_buffer(validity);
+        } else {
+            self.null_buffer_builder.append_n_non_nulls(array.len());
+        }
+        Ok(())
+    }
+
     /// Returns the current values buffer as a slice
     pub fn values_slice(&self) -> &[u8] {
         self.values_builder.as_slice()
@@ -110,7 +128,7 @@ impl FixedSizeBinaryBuilder {
     pub fn finish(&mut self) -> FixedSizeBinaryArray {
         let array_length = self.len();
         let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
-            .add_buffer(self.values_builder.finish())
+            .add_buffer(std::mem::take(&mut self.values_builder).into())
             .nulls(self.null_buffer_builder.finish())
             .len(array_length);
         let array_data = unsafe { array_data_builder.build_unchecked() };
@@ -270,4 +288,45 @@ mod tests {
     fn test_fixed_size_binary_builder_invalid_value_length() {
         let _ = FixedSizeBinaryBuilder::with_capacity(15, -1);
     }
+
+    #[test]
+    fn test_fixed_size_binary_builder_append_array() {
+        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
+        other_builder.append_value(b"hello").unwrap();
+        other_builder.append_null();
+        other_builder.append_value(b"arrow").unwrap();
+        let other_array = other_builder.finish();
+
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(6, 5);
+        builder.append_array(&other_array).unwrap();
+        // Append again to test if breaks when appending multiple times
+        builder.append_array(&other_array).unwrap();
+        let array = builder.finish();
+
+        assert_eq!(array.value_length(), other_array.value_length());
+        assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
+        assert_eq!(6, array.len());
+        assert_eq!(2, array.null_count());
+        for i in 0..6 {
+            assert_eq!(i * 5, array.value_offset(i as usize));
+        }
+
+        assert_eq!(b"hello", array.value(0));
+        assert!(array.is_null(1));
+        assert_eq!(b"arrow", array.value(2));
+
+        assert_eq!(b"hello", array.value(3));
+        assert!(array.is_null(4));
+        assert_eq!(b"arrow", array.value(5));
+    }
+
+    #[test]
+    #[should_panic(expected = "Cannot append FixedSizeBinaryArray with different value length")]
+    fn test_fixed_size_binary_builder_append_array_invalid_value_length() {
+        let mut other_builder = FixedSizeBinaryBuilder::with_capacity(3, 4);
+        other_builder.append_value(b"test").unwrap();
+        let other_array = other_builder.finish();
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);
+        builder.append_array(&other_array).unwrap();
+    }
 }
diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
index f3460353b164..fa3066b7e11e 100644
--- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
@@ -17,11 +17,12 @@
 
 use crate::builder::{ArrayBuilder, FixedSizeBinaryBuilder, PrimitiveBuilder};
 use crate::types::ArrowDictionaryKeyType;
-use crate::{Array, ArrayRef, DictionaryArray};
+use crate::{Array, ArrayRef, DictionaryArray, PrimitiveArray};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::DataType::FixedSizeBinary;
 use arrow_schema::{ArrowError, DataType};
 use hashbrown::HashTable;
+use num_traits::NumCast;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -100,6 +101,71 @@ where
             byte_width,
         }
     }
+
+    /// Creates a new `FixedSizeBinaryDictionaryBuilder` from the existing builder with the same
+    /// keys and values, but with a new data type for the keys.
+    ///
+    /// # Example
+    /// ```
+    /// # use arrow_array::builder::FixedSizeBinaryDictionaryBuilder;
+    /// # use arrow_array::types::{UInt8Type, UInt16Type, UInt64Type};
+    /// # use arrow_array::UInt16Array;
+    /// # use arrow_schema::ArrowError;
+    ///
+    /// let mut u8_keyed_builder = FixedSizeBinaryDictionaryBuilder::<UInt8Type>::new(2);
+    /// // appending too many values causes the dictionary to overflow
+    /// for i in 0..=255 {
+    ///     u8_keyed_builder.append_value(vec![0, i]);
+    /// }
+    /// let result = u8_keyed_builder.append(vec![1, 0]);
+    /// assert!(matches!(result, Err(ArrowError::DictionaryKeyOverflowError{})));
+    ///
+    /// // we need to upgrade to a larger key type
+    /// let mut u16_keyed_builder = FixedSizeBinaryDictionaryBuilder::<UInt16Type>::try_new_from_builder(u8_keyed_builder).unwrap();
+    /// let dictionary_array = u16_keyed_builder.finish();
+    /// let keys = dictionary_array.keys();
+    ///
+    /// assert_eq!(keys, &UInt16Array::from_iter(0..256));
+    /// ```
+    pub fn try_new_from_builder<K2>(
+        mut source: FixedSizeBinaryDictionaryBuilder<K2>,
+    ) -> Result<Self, ArrowError>
+    where
+        K::Native: NumCast,
+        K2: ArrowDictionaryKeyType,
+        K2::Native: NumCast,
+    {
+        let state = source.state;
+        let dedup = source.dedup;
+        let values_builder = source.values_builder;
+        let byte_width = source.byte_width;
+
+        let source_keys = source.keys_builder.finish();
+        let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
+            num_traits::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Can't cast dictionary keys from source type {:?} to type {:?}",
+                    K2::DATA_TYPE,
+                    K::DATA_TYPE
+                ))
+            })
+        })?;
+
+        // drop source key here because currently source_keys and new_keys are holding reference to
+        // the same underlying null_buffer. Below we want to call new_keys.into_builder() it must
+        // be the only reference holder.
+        drop(source_keys);
+
+        Ok(Self {
+            state,
+            dedup,
+            keys_builder: new_keys
+                .into_builder()
+                .expect("underlying buffer has no references"),
+            values_builder,
+            byte_width,
+        })
+    }
 }
 
 impl<K> ArrayBuilder for FixedSizeBinaryDictionaryBuilder<K>
@@ -186,6 +252,28 @@ where
         }
     }
 
+    /// Append a value multiple times to the array.
+    /// This is the same as [`Self::append`] but allows to append the same value multiple times without doing multiple lookups.
+    ///
+    /// Returns an error if the new index would overflow the key type.
+    pub fn append_n(
+        &mut self,
+        value: impl AsRef<[u8]>,
+        count: usize,
+    ) -> Result<K::Native, ArrowError> {
+        if self.byte_width != value.as_ref().len() as i32 {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "Invalid input length passed to FixedSizeBinaryBuilder. Expected {} got {}",
+                self.byte_width,
+                value.as_ref().len()
+            )))
+        } else {
+            let key = self.get_or_insert_key(value)?;
+            self.keys_builder.append_value_n(key, count);
+            Ok(key)
+        }
+    }
+
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
@@ -245,6 +333,41 @@ where
 
         DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
+
+    /// Builds the `DictionaryArray` without resetting the values builder or
+    /// the internal de-duplication map.
+    ///
+    /// The advantage of doing this is that the values will represent the entire
+    /// set of what has been built so-far by this builder and ensures
+    /// consistency in the assignment of keys to values across multiple calls
+    /// to `finish_preserve_values`. This enables ipc writers to efficiently
+    /// emit delta dictionaries.
+    ///
+    /// The downside to this is that building the record requires creating a
+    /// copy of the values, which can become slowly more expensive if the
+    /// dictionary grows.
+    ///
+    /// Additionally, if record batches from multiple different dictionary
+    /// builders for the same column are fed into a single ipc writer, beware
+    /// that entire dictionaries are likely to be re-sent frequently even when
+    /// the majority of the values are not used by the current record batch.
+    pub fn finish_preserve_values(&mut self) -> DictionaryArray<K> {
+        let values = self.values_builder.finish_cloned();
+        let keys = self.keys_builder.finish();
+
+        let data_type = DataType::Dictionary(
+            Box::new(K::DATA_TYPE),
+            Box::new(FixedSizeBinary(self.byte_width)),
+        );
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
+    }
 }
 
 fn get_bytes(values: &FixedSizeBinaryBuilder, byte_width: i32, idx: usize) -> &[u8] {
@@ -258,8 +381,8 @@ fn get_bytes(values: &FixedSizeBinaryBuilder, byte_width: i32, idx: usize) -> &[
 mod tests {
     use super::*;
 
-    use crate::types::Int8Type;
-    use crate::{FixedSizeBinaryArray, Int8Array};
+    use crate::types::{Int8Type, Int16Type, Int32Type, UInt8Type, UInt16Type};
+    use crate::{ArrowPrimitiveType, FixedSizeBinaryArray, Int8Array};
 
     #[test]
     fn test_fixed_size_dictionary_builder() {
@@ -300,13 +423,57 @@ mod tests {
         assert_eq!(ava.value(1), values[1].as_bytes());
     }
 
+    #[test]
+    fn test_fixed_size_dictionary_builder_append_n() {
+        let values = ["abc", "def"];
+        let mut b = FixedSizeBinaryDictionaryBuilder::<Int8Type>::new(3);
+        assert_eq!(b.append_n(values[0], 2).unwrap(), 0);
+        assert_eq!(b.append_n(values[1], 3).unwrap(), 1);
+        assert_eq!(b.append_n(values[0], 2).unwrap(), 0);
+        let array = b.finish();
+
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![
+                Some(0),
+                Some(0),
+                Some(1),
+                Some(1),
+                Some(1),
+                Some(0),
+                Some(0),
+            ]),
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let ava = array
+            .values()
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+
+        assert_eq!(ava.value(0), values[0].as_bytes());
+        assert_eq!(ava.value(1), values[1].as_bytes());
+    }
+
     #[test]
     fn test_fixed_size_dictionary_builder_wrong_size() {
         let mut b = FixedSizeBinaryDictionaryBuilder::<Int8Type>::new(3);
         let err = b.append(b"too long").unwrap_err().to_string();
-        assert_eq!(err, "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 8");
+        assert_eq!(
+            err,
+            "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 8"
+        );
         let err = b.append("").unwrap_err().to_string();
-        assert_eq!(err, "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 0");
+        assert_eq!(
+            err,
+            "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 0"
+        );
+        let err = b.append_n("a", 3).unwrap_err().to_string();
+        assert_eq!(
+            err,
+            "Invalid argument error: Invalid input length passed to FixedSizeBinaryBuilder. Expected 3 got 1"
+        );
     }
 
     #[test]
@@ -368,4 +535,136 @@ mod tests {
         assert_eq!(ava2.value(1), values[1].as_bytes());
         assert_eq!(ava2.value(2), values[2].as_bytes());
     }
+
+    fn _test_try_new_from_builder_generic_for_key_types<K1, K2>(values: Vec<[u8; 3]>)
+    where
+        K1: ArrowDictionaryKeyType,
+        K1::Native: NumCast,
+        K2: ArrowDictionaryKeyType,
+        K2::Native: NumCast + From<u8>,
+    {
+        let mut source = FixedSizeBinaryDictionaryBuilder::<K1>::new(3);
+        source.append_value(values[0]);
+        source.append_null();
+        source.append_value(values[1]);
+        source.append_value(values[2]);
+
+        let mut result =
+            FixedSizeBinaryDictionaryBuilder::<K2>::try_new_from_builder(source).unwrap();
+        let array = result.finish();
+
+        let mut expected_keys_builder = PrimitiveBuilder::<K2>::new();
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(0u8));
+        expected_keys_builder.append_null();
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(1u8));
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(2u8));
+        let expected_keys = expected_keys_builder.finish();
+        assert_eq!(array.keys(), &expected_keys);
+
+        let av = array.values();
+        let ava = av.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
+        assert_eq!(ava.value(0), values[0]);
+        assert_eq!(ava.value(1), values[1]);
+        assert_eq!(ava.value(2), values[2]);
+    }
+
+    #[test]
+    fn test_try_new_from_builder() {
+        let values = vec![[1, 2, 3], [5, 6, 7], [6, 7, 8]];
+        // test cast to bigger size unsigned
+        _test_try_new_from_builder_generic_for_key_types::<UInt8Type, UInt16Type>(values.clone());
+        // test cast going to smaller size unsigned
+        _test_try_new_from_builder_generic_for_key_types::<UInt16Type, UInt8Type>(values.clone());
+        // test cast going to bigger size signed
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, Int16Type>(values.clone());
+        // test cast going to smaller size signed
+        _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type>(values.clone());
+        // test going from signed to signed for different size changes
+        _test_try_new_from_builder_generic_for_key_types::<UInt8Type, Int16Type>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt8Type>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt16Type>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type>(values.clone());
+    }
+
+    #[test]
+    fn test_try_new_from_builder_cast_fails() {
+        let mut source_builder = FixedSizeBinaryDictionaryBuilder::<UInt16Type>::new(2);
+        for i in 0u16..257u16 {
+            source_builder.append_value(vec![(i >> 8) as u8, i as u8]);
+        }
+
+        // there should be too many values that we can't downcast to the underlying type
+        // we have keys that wouldn't fit into UInt8Type
+        let result =
+            FixedSizeBinaryDictionaryBuilder::<UInt8Type>::try_new_from_builder(source_builder);
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert!(matches!(e, ArrowError::CastError(_)));
+            assert_eq!(
+                e.to_string(),
+                "Cast error: Can't cast dictionary keys from source type UInt16 to type UInt8"
+            );
+        }
+    }
+
+    #[test]
+    fn test_finish_preserve_values() {
+        // Create the first dictionary
+        let mut builder = FixedSizeBinaryDictionaryBuilder::<Int32Type>::new(3);
+        builder.append_value("aaa");
+        builder.append_value("bbb");
+        builder.append_value("ccc");
+        let dict = builder.finish_preserve_values();
+        assert_eq!(dict.keys().values(), &[0, 1, 2]);
+        let values = dict
+            .downcast_dict::<FixedSizeBinaryArray>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(
+            values,
+            vec![
+                Some("aaa".as_bytes()),
+                Some("bbb".as_bytes()),
+                Some("ccc".as_bytes())
+            ]
+        );
+
+        // Create a new dictionary
+        builder.append_value("ddd");
+        builder.append_value("eee");
+        let dict2 = builder.finish_preserve_values();
+
+        // Make sure the keys are assigned after the old ones and we have the
+        // right values
+        assert_eq!(dict2.keys().values(), &[3, 4]);
+        let values = dict2
+            .downcast_dict::<FixedSizeBinaryArray>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(values, [Some("ddd".as_bytes()), Some("eee".as_bytes())]);
+
+        // Check that we have all of the expected values
+        let all_values = dict2
+            .values()
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(
+            all_values,
+            [
+                Some("aaa".as_bytes()),
+                Some("bbb".as_bytes()),
+                Some("ccc".as_bytes()),
+                Some("ddd".as_bytes()),
+                Some("eee".as_bytes())
+            ]
+        );
+    }
 }
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs
index 5c142b277d14..6eb48fc0527c 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -172,7 +172,8 @@ where
         let nulls = self.null_buffer_builder.finish();
 
         assert_eq!(
-            values.len(), len * self.list_len as usize,
+            values.len(),
+            len * self.list_len as usize,
             "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
             values.len(),
             self.list_len,
@@ -194,7 +195,8 @@ where
         let nulls = self.null_buffer_builder.finish_cloned();
 
         assert_eq!(
-            values.len(), len * self.list_len as usize,
+            values.len(),
+            len * self.list_len as usize,
             "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
             values.len(),
             self.list_len,
@@ -220,9 +222,9 @@ mod tests {
     use super::*;
     use arrow_schema::DataType;
 
-    use crate::builder::Int32Builder;
     use crate::Array;
     use crate::Int32Array;
+    use crate::builder::Int32Builder;
 
     fn make_list_builder(
         include_null_element: bool,
diff --git a/arrow-array/src/builder/generic_byte_run_builder.rs b/arrow-array/src/builder/generic_byte_run_builder.rs
index 0bf5658b297e..18544f7e75c9 100644
--- a/arrow-array/src/builder/generic_byte_run_builder.rs
+++ b/arrow-array/src/builder/generic_byte_run_builder.rs
@@ -19,8 +19,8 @@ use crate::types::bytes::ByteArrayNativeType;
 use std::{any::Any, sync::Arc};
 
 use crate::{
-    types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
     ArrayRef, ArrowPrimitiveType, RunArray,
+    types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
 };
 
 use super::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
@@ -375,11 +375,11 @@ pub type LargeBinaryRunBuilder<K> = GenericByteRunBuilder<K, LargeBinaryType>;
 mod tests {
     use super::*;
 
+    use crate::GenericByteArray;
+    use crate::Int16RunArray;
     use crate::array::Array;
     use crate::cast::AsArray;
     use crate::types::{Int16Type, Int32Type};
-    use crate::GenericByteArray;
-    use crate::Int16RunArray;
 
     fn test_bytes_run_builder<T>(values: Vec<&T::Native>)
     where
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index 91ac2a483ef4..7ed4bc5826c0 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
+use crate::builder::ArrayBuilder;
 use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
 use crate::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait};
-use arrow_buffer::NullBufferBuilder;
-use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
 use arrow_data::ArrayDataBuilder;
+use arrow_schema::ArrowError;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -29,8 +29,8 @@ use std::sync::Arc;
 /// For building strings, see docs on [`GenericStringBuilder`].
 /// For building binary, see docs on [`GenericBinaryBuilder`].
 pub struct GenericByteBuilder<T: ByteArrayType> {
-    value_builder: UInt8BufferBuilder,
-    offsets_builder: BufferBuilder<T::Offset>,
+    value_builder: Vec<u8>,
+    offsets_builder: Vec<T::Offset>,
     null_buffer_builder: NullBufferBuilder,
 }
 
@@ -47,10 +47,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     /// - `data_capacity` is the total number of bytes of data to pre-allocate
     ///   (for all items, not per item).
     pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let mut offsets_builder = BufferBuilder::<T::Offset>::new(item_capacity + 1);
-        offsets_builder.append(T::Offset::from_usize(0).unwrap());
+        let mut offsets_builder = Vec::with_capacity(item_capacity + 1);
+        offsets_builder.push(T::Offset::from_usize(0).unwrap());
         Self {
-            value_builder: UInt8BufferBuilder::new(data_capacity),
+            value_builder: Vec::with_capacity(data_capacity),
             offsets_builder,
             null_buffer_builder: NullBufferBuilder::new(item_capacity),
         }
@@ -67,8 +67,9 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         value_buffer: MutableBuffer,
         null_buffer: Option<MutableBuffer>,
     ) -> Self {
-        let offsets_builder = BufferBuilder::<T::Offset>::new_from_buffer(offsets_buffer);
-        let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
+        let offsets_builder: Vec<T::Offset> =
+            ScalarBuffer::<T::Offset>::from(offsets_buffer).into();
+        let value_builder: Vec<u8> = ScalarBuffer::<u8>::from(value_buffer).into();
 
         let null_buffer_builder = null_buffer
             .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
@@ -103,9 +104,10 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     /// [`BinaryArray`]: crate::BinaryArray
     #[inline]
     pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
-        self.value_builder.append_slice(value.as_ref().as_ref());
+        self.value_builder
+            .extend_from_slice(value.as_ref().as_ref());
         self.null_buffer_builder.append(true);
-        self.offsets_builder.append(self.next_offset());
+        self.offsets_builder.push(self.next_offset());
     }
 
     /// Append an `Option` value into the builder.
@@ -126,7 +128,7 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     #[inline]
     pub fn append_null(&mut self) {
         self.null_buffer_builder.append(false);
-        self.offsets_builder.append(self.next_offset());
+        self.offsets_builder.push(self.next_offset());
     }
 
     /// Appends `n` `null`s into the builder.
@@ -134,15 +136,17 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
     pub fn append_nulls(&mut self, n: usize) {
         self.null_buffer_builder.append_n_nulls(n);
         let next_offset = self.next_offset();
-        self.offsets_builder.append_n(n, next_offset);
+        self.offsets_builder
+            .extend(std::iter::repeat_n(next_offset, n));
     }
 
     /// Appends array values and null to this builder as is
     /// (this means that underlying null values are copied as is).
     #[inline]
-    pub fn append_array(&mut self, array: &GenericByteArray<T>) {
+    pub fn append_array(&mut self, array: &GenericByteArray<T>) -> Result<(), ArrowError> {
+        use num_traits::CheckedAdd;
         if array.len() == 0 {
-            return;
+            return Ok(());
         }
 
         let offsets = array.offsets();
@@ -150,25 +154,23 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         // If the offsets are contiguous, we can append them directly avoiding the need to align
         // for example, when the first appended array is not sliced (starts at offset 0)
         if self.next_offset() == offsets[0] {
-            self.offsets_builder.append_slice(&offsets[1..]);
+            self.offsets_builder.extend_from_slice(&offsets[1..]);
         } else {
             // Shifting all the offsets
             let shift: T::Offset = self.next_offset() - offsets[0];
 
-            // Creating intermediate offsets instead of pushing each offset is faster
-            // (even if we make MutableBuffer to avoid updating length on each push
-            //  and reserve the necessary capacity, it's still slower)
-            let mut intermediate = Vec::with_capacity(offsets.len() - 1);
-
-            for &offset in &offsets[1..] {
-                intermediate.push(offset + shift)
+            if shift.checked_add(&offsets[offsets.len() - 1]).is_none() {
+                return Err(ArrowError::OffsetOverflowError(
+                    shift.as_usize() + offsets[offsets.len() - 1].as_usize(),
+                ));
             }
 
-            self.offsets_builder.append_slice(&intermediate);
+            self.offsets_builder
+                .extend(offsets[1..].iter().map(|&offset| offset + shift));
         }
 
         // Append underlying values, starting from the first offset and ending at the last offset
-        self.value_builder.append_slice(
+        self.value_builder.extend_from_slice(
             &array.values().as_slice()[offsets[0].as_usize()..offsets[array.len()].as_usize()],
         );
 
@@ -177,6 +179,7 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         } else {
             self.null_buffer_builder.append_n_non_nulls(array.len());
         }
+        Ok(())
     }
 
     /// Builds the [`GenericByteArray`] and reset this builder.
@@ -184,11 +187,11 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         let array_type = T::DATA_TYPE;
         let array_builder = ArrayDataBuilder::new(array_type)
             .len(self.len())
-            .add_buffer(self.offsets_builder.finish())
-            .add_buffer(self.value_builder.finish())
+            .add_buffer(std::mem::take(&mut self.offsets_builder).into())
+            .add_buffer(std::mem::take(&mut self.value_builder).into())
             .nulls(self.null_buffer_builder.finish());
 
-        self.offsets_builder.append(self.next_offset());
+        self.offsets_builder.push(self.next_offset());
         let array_data = unsafe { array_builder.build_unchecked() };
         GenericByteArray::from(array_data)
     }
@@ -340,11 +343,99 @@ pub type GenericStringBuilder<O> = GenericByteBuilder<GenericStringType<O>>;
 
 impl<O: OffsetSizeTrait> std::fmt::Write for GenericStringBuilder<O> {
     fn write_str(&mut self, s: &str) -> std::fmt::Result {
-        self.value_builder.append_slice(s.as_bytes());
+        self.value_builder.extend_from_slice(s.as_bytes());
         Ok(())
     }
 }
 
+/// A byte size value representing the number of bytes to allocate per string in [`GenericStringBuilder`]
+///
+/// To create a [`GenericStringBuilder`] using `.with_capacity` we are required to provide: \
+/// - `item_capacity` - the row count \
+/// - `data_capacity` - total string byte count \
+///
+/// We will use the `AVERAGE_STRING_LENGTH` * row_count for `data_capacity`. \
+///
+/// These capacities are preallocation hints used to improve performance,
+/// but consequences of passing a hint too large or too small should be negligible.
+const AVERAGE_STRING_LENGTH: usize = 16;
+/// Trait for string-like array builders
+///
+/// This trait provides unified interface for builders that append string-like data
+/// such as [`GenericStringBuilder<O>`] and [`crate::builder::StringViewBuilder`]
+pub trait StringLikeArrayBuilder: ArrayBuilder {
+    /// Returns a human-readable type name for the builder.
+    fn type_name() -> &'static str;
+
+    /// Creates a new builder with the given row capacity.
+    fn with_capacity(capacity: usize) -> Self;
+
+    /// Appends a non-null string value to the builder.
+    fn append_value(&mut self, value: &str);
+
+    /// Appends a null value to the builder.
+    fn append_null(&mut self);
+}
+
+impl<O: OffsetSizeTrait> StringLikeArrayBuilder for GenericStringBuilder<O> {
+    fn type_name() -> &'static str {
+        std::any::type_name::<Self>()
+    }
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity(capacity, capacity * AVERAGE_STRING_LENGTH)
+    }
+    fn append_value(&mut self, value: &str) {
+        Self::append_value(self, value);
+    }
+    fn append_null(&mut self) {
+        Self::append_null(self);
+    }
+}
+
+/// A byte size value representing the number of bytes to allocate per binary in [`GenericBinaryBuilder`]
+///
+/// To create a [`GenericBinaryBuilder`] using `.with_capacity` we are required to provide: \
+/// - `item_capacity` - the row count \
+/// - `data_capacity` - total binary byte count \
+///
+/// We will use the `AVERAGE_BINARY_LENGTH` * row_count for `data_capacity`. \
+///
+/// These capacities are preallocation hints used to improve performance,
+/// but consequences of passing a hint too large or too small should be negligible.
+const AVERAGE_BINARY_LENGTH: usize = 128;
+/// Trait for binary-like array builders
+///
+/// This trait provides unified interface for builders that append binary-like data
+/// such as [`GenericBinaryBuilder<O>`] and [`crate::builder::BinaryViewBuilder`]
+pub trait BinaryLikeArrayBuilder: ArrayBuilder {
+    /// Returns a human-readable type name for the builder.
+    fn type_name() -> &'static str;
+
+    /// Creates a new builder with the given row capacity.
+    fn with_capacity(capacity: usize) -> Self;
+
+    /// Appends a non-null string value to the builder.
+    fn append_value(&mut self, value: &[u8]);
+
+    /// Appends a null value to the builder.
+    fn append_null(&mut self);
+}
+
+impl<O: OffsetSizeTrait> BinaryLikeArrayBuilder for GenericBinaryBuilder<O> {
+    fn type_name() -> &'static str {
+        std::any::type_name::<Self>()
+    }
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity(capacity, capacity * AVERAGE_BINARY_LENGTH)
+    }
+    fn append_value(&mut self, value: &[u8]) {
+        Self::append_value(self, value);
+    }
+    fn append_null(&mut self) {
+        Self::append_null(self);
+    }
+}
+
 ///  Array builder for [`GenericBinaryArray`][crate::GenericBinaryArray]
 ///
 /// Values can be appended using [`GenericByteBuilder::append_value`], and nulls with
@@ -394,7 +485,7 @@ pub type GenericBinaryBuilder<O> = GenericByteBuilder<GenericBinaryType<O>>;
 
 impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
     fn write(&mut self, bs: &[u8]) -> std::io::Result<usize> {
-        self.value_builder.append_slice(bs);
+        self.value_builder.extend_from_slice(bs);
         Ok(bs.len())
     }
 
@@ -406,8 +497,8 @@ impl<O: OffsetSizeTrait> std::io::Write for GenericBinaryBuilder<O> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::array::Array;
     use crate::GenericStringArray;
+    use crate::array::Array;
     use arrow_buffer::NullBuffer;
     use std::fmt::Write as _;
     use std::io::Write as _;
@@ -671,9 +762,9 @@ mod tests {
         let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
 
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&arr1);
-        builder.append_array(&arr2);
-        builder.append_array(&arr3);
+        builder.append_array(&arr1).unwrap();
+        builder.append_array(&arr2).unwrap();
+        builder.append_array(&arr3).unwrap();
 
         let actual = builder.finish();
         let expected = GenericStringArray::<i32>::from(input);
@@ -701,9 +792,9 @@ mod tests {
         let arr3 = GenericStringArray::<i32>::from(input[7..].to_vec());
 
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&arr1);
-        builder.append_array(&arr2);
-        builder.append_array(&arr3);
+        builder.append_array(&arr1).unwrap();
+        builder.append_array(&arr2).unwrap();
+        builder.append_array(&arr3).unwrap();
 
         let actual = builder.finish();
         let expected = GenericStringArray::<i32>::from(input);
@@ -715,7 +806,7 @@ mod tests {
     fn test_append_empty_array() {
         let arr = GenericStringArray::<i32>::from(Vec::<&str>::new());
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&arr);
+        builder.append_array(&arr).unwrap();
         let result = builder.finish();
         assert_eq!(result.len(), 0);
     }
@@ -742,7 +833,7 @@ mod tests {
         assert_ne!(sliced.offsets().last(), full_array.offsets().last());
 
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&sliced);
+        builder.append_array(&sliced).unwrap();
         let actual = builder.finish();
 
         let expected = GenericStringArray::<i32>::from(vec![None, Some("how"), None, None]);
@@ -778,8 +869,8 @@ mod tests {
         };
 
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&input_1_array_with_nulls);
-        builder.append_array(&input_2_array_with_nulls);
+        builder.append_array(&input_1_array_with_nulls).unwrap();
+        builder.append_array(&input_2_array_with_nulls).unwrap();
 
         let actual = builder.finish();
         let expected = GenericStringArray::<i32>::from(vec![
@@ -825,12 +916,27 @@ mod tests {
         let slice3 = full_array.slice(7, full_array.len() - 7);
 
         let mut builder = GenericStringBuilder::<i32>::new();
-        builder.append_array(&slice1);
-        builder.append_array(&slice2);
-        builder.append_array(&slice3);
+        builder.append_array(&slice1).unwrap();
+        builder.append_array(&slice2).unwrap();
+        builder.append_array(&slice3).unwrap();
 
         let actual = builder.finish();
 
         assert_eq!(actual, full_array);
     }
+
+    #[test]
+    fn test_append_array_offset_overflow_precise() {
+        let mut builder = GenericStringBuilder::<i32>::new();
+
+        let initial_string = "x".repeat(i32::MAX as usize - 100);
+        builder.append_value(&initial_string);
+
+        let overflow_string = "y".repeat(200);
+        let overflow_array = GenericStringArray::<i32>::from(vec![overflow_string.as_str()]);
+
+        let result = builder.append_array(&overflow_array);
+
+        assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
+    }
 }
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 3713a411232f..35c7bfced1fd 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -23,7 +23,7 @@ use crate::{
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
 use hashbrown::HashTable;
-use num::NumCast;
+use num_traits::NumCast;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -197,7 +197,7 @@ where
 
         let source_keys = source.keys_builder.finish();
         let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
-            num::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
+            num_traits::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
                 ArrowError::CastError(format!(
                     "Can't cast dictionary keys from source type {:?} to type {:?}",
                     K2::DATA_TYPE,
@@ -463,6 +463,38 @@ where
         DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
 
+    /// Builds the `DictionaryArray` without resetting the values builder or
+    /// the internal de-duplication map.
+    ///
+    /// The advantage of doing this is that the values will represent the entire
+    /// set of what has been built so-far by this builder and ensures
+    /// consistency in the assignment of keys to values across multiple calls
+    /// to `finish_preserve_values`. This enables ipc writers to efficiently
+    /// emit delta dictionaries.
+    ///
+    /// The downside to this is that building the record requires creating a
+    /// copy of the values, which can become slowly more expensive if the
+    /// dictionary grows.
+    ///
+    /// Additionally, if record batches from multiple different dictionary
+    /// builders for the same column are fed into a single ipc writer, beware
+    /// that entire dictionaries are likely to be re-sent frequently even when
+    /// the majority of the values are not used by the current record batch.
+    pub fn finish_preserve_values(&mut self) -> DictionaryArray<K> {
+        let values = self.values_builder.finish_cloned();
+        let keys = self.keys_builder.finish();
+
+        let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
+    }
+
     /// Returns the current null buffer as a slice
     pub fn validity_slice(&self) -> Option<&[u8]> {
         self.keys_builder.validity_slice()
@@ -571,7 +603,7 @@ mod tests {
 
     use crate::array::Int8Array;
     use crate::cast::AsArray;
-    use crate::types::{Int16Type, Int32Type, Int8Type, UInt16Type, UInt8Type, Utf8Type};
+    use crate::types::{Int8Type, Int16Type, Int32Type, UInt8Type, UInt16Type, Utf8Type};
     use crate::{ArrowPrimitiveType, BinaryArray, StringArray};
 
     fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
@@ -757,7 +789,7 @@ mod tests {
     fn test_try_new_from_builder_cast_fails() {
         let mut source_builder = StringDictionaryBuilder::<UInt16Type>::new();
         for i in 0..257 {
-            source_builder.append_value(format!("val{}", i));
+            source_builder.append_value(format!("val{i}"));
         }
 
         // there should be too many values that we can't downcast to the underlying type
@@ -1006,4 +1038,51 @@ mod tests {
 
         assert_eq!(values, [None, None]);
     }
+
+    #[test]
+    fn test_finish_preserve_values() {
+        // Create the first dictionary
+        let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+        builder.append("a").unwrap();
+        builder.append("b").unwrap();
+        builder.append("c").unwrap();
+        let dict = builder.finish_preserve_values();
+        assert_eq!(dict.keys().values(), &[0, 1, 2]);
+        assert_eq!(dict.values().len(), 3);
+        let values = dict
+            .downcast_dict::<GenericByteArray<Utf8Type>>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(values, [Some("a"), Some("b"), Some("c")]);
+
+        // Create a new dictionary
+        builder.append("d").unwrap();
+        builder.append("e").unwrap();
+        let dict2 = builder.finish_preserve_values();
+
+        // Make sure the keys are assigned after the old ones and we have the
+        // right values
+        assert_eq!(dict2.keys().values(), &[3, 4]);
+        let values = dict2
+            .downcast_dict::<GenericByteArray<Utf8Type>>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(values, [Some("d"), Some("e")]);
+
+        // Check that we have all of the expected values
+        assert_eq!(dict2.values().len(), 5);
+        let all_values = dict2
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(
+            all_values,
+            [Some("a"), Some("b"), Some("c"), Some("d"), Some("e"),]
+        );
+    }
 }
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs
index ae7355433f81..2d60187344cf 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -20,12 +20,12 @@ use std::marker::PhantomData;
 use std::sync::Arc;
 
 use arrow_buffer::{Buffer, NullBufferBuilder, ScalarBuffer};
-use arrow_data::ByteView;
+use arrow_data::{ByteView, MAX_INLINE_VIEW_LEN};
 use arrow_schema::ArrowError;
-use hashbrown::hash_table::Entry;
 use hashbrown::HashTable;
+use hashbrown::hash_table::Entry;
 
-use crate::builder::ArrayBuilder;
+use crate::builder::{ArrayBuilder, BinaryLikeArrayBuilder, StringLikeArrayBuilder};
 use crate::types::bytes::ByteArrayNativeType;
 use crate::types::{BinaryViewType, ByteViewType, StringViewType};
 use crate::{Array, ArrayRef, GenericByteViewArray};
@@ -68,8 +68,8 @@ impl BlockSizeGrowthStrategy {
 ///
 /// To avoid bump allocating, this builder allocates data in fixed size blocks, configurable
 /// using [`GenericByteViewBuilder::with_fixed_block_size`]. [`GenericByteViewBuilder::append_value`]
-/// writes values larger than 12 bytes to the current in-progress block, with values smaller
-/// than 12 bytes inlined into the views. If a value is appended that will not fit in the
+/// writes values larger than [`MAX_INLINE_VIEW_LEN`] bytes to the current in-progress block, with values smaller
+/// than [`MAX_INLINE_VIEW_LEN`] bytes inlined into the views. If a value is appended that will not fit in the
 /// in-progress block, it will be closed, and a new block of sufficient size allocated
 ///
 /// # Append Views
@@ -87,6 +87,7 @@ pub struct GenericByteViewBuilder<T: ByteViewType + ?Sized> {
     /// Some if deduplicating strings
     /// map `<string hash> -> <index to the views>`
     string_tracker: Option<(HashTable<usize>, ahash::RandomState)>,
+    max_deduplication_len: Option<u32>,
     phantom: PhantomData<T>,
 }
 
@@ -107,21 +108,39 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
                 current_size: STARTING_BLOCK_SIZE,
             },
             string_tracker: None,
+            max_deduplication_len: None,
             phantom: Default::default(),
         }
     }
 
+    /// Configure max deduplication length when deduplicating strings while building the array.
+    /// Default is None.
+    ///
+    /// When [`Self::with_deduplicate_strings`] is enabled, the builder attempts to deduplicate
+    /// any strings longer than 12 bytes. However, since it takes time proportional to the length
+    /// of the string to deduplicate, setting this option limits the CPU overhead for this option.  
+    pub fn with_max_deduplication_len(self, max_deduplication_len: u32) -> Self {
+        debug_assert!(
+            max_deduplication_len > 0,
+            "max_deduplication_len must be greater than 0"
+        );
+        Self {
+            max_deduplication_len: Some(max_deduplication_len),
+            ..self
+        }
+    }
+
     /// Set a fixed buffer size for variable length strings
     ///
     /// The block size is the size of the buffer used to store values greater
-    /// than 12 bytes. The builder allocates new buffers when the current
+    /// than [`MAX_INLINE_VIEW_LEN`] bytes. The builder allocates new buffers when the current
     /// buffer is full.
     ///
     /// By default the builder balances buffer size and buffer count by
     /// growing buffer size exponentially from 8KB up to 2MB. The
     /// first buffer allocated is 8KB, then 16KB, then 32KB, etc up to 2MB.
     ///
-    /// If this method is used, any new buffers allocated are  
+    /// If this method is used, any new buffers allocated are
     /// exactly this size. This can be useful for advanced users
     /// that want to control the memory usage and buffer count.
     ///
@@ -134,13 +153,6 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
         }
     }
 
-    /// Override the size of buffers to allocate for holding string data
-    /// Use `with_fixed_block_size` instead.
-    #[deprecated(since = "53.0.0", note = "Use `with_fixed_block_size` instead")]
-    pub fn with_block_size(self, block_size: u32) -> Self {
-        self.with_fixed_block_size(block_size)
-    }
-
     /// Deduplicate strings while building the array
     ///
     /// This will potentially decrease the memory usage if the array have repeated strings
@@ -195,10 +207,10 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
     /// (2) The range `offset..offset+length` must be within the bounds of the block
     /// (3) The data in the block must be valid of type `T`
     pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) {
-        let b = self.completed.get_unchecked(block as usize);
+        let b = unsafe { self.completed.get_unchecked(block as usize) };
         let start = offset as usize;
         let end = start.saturating_add(len as usize);
-        let b = b.get_unchecked(start..end);
+        let b = unsafe { b.get_unchecked(start..end) };
 
         let view = make_view(b, block, offset);
         self.views_buffer.push(view);
@@ -221,7 +233,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
         } else {
             self.views_buffer.extend(array.views().iter().map(|v| {
                 let mut byte_view = ByteView::from(*v);
-                if byte_view.length > 12 {
+                if byte_view.length > MAX_INLINE_VIEW_LEN {
                     // Small views (<=12 bytes) are inlined, so only need to update large views
                     byte_view.buffer_index += starting_buffer;
                 };
@@ -289,7 +301,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
     pub fn get_value(&self, index: usize) -> &[u8] {
         let view = self.views_buffer.as_slice().get(index).unwrap();
         let len = *view as u32;
-        if len <= 12 {
+        if len <= MAX_INLINE_VIEW_LEN {
             // # Safety
             // The view is valid from the builder
             unsafe { GenericByteViewArray::<T>::inline_value(view, len as usize) }
@@ -313,48 +325,70 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
     /// - String length exceeds `u32::MAX`
     #[inline]
     pub fn append_value(&mut self, value: impl AsRef<T::Native>) {
+        self.try_append_value(value).unwrap()
+    }
+
+    /// Appends a value into the builder
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - String buffer count exceeds `u32::MAX`
+    /// - String length exceeds `u32::MAX`
+    #[inline]
+    pub fn try_append_value(&mut self, value: impl AsRef<T::Native>) -> Result<(), ArrowError> {
         let v: &[u8] = value.as_ref().as_ref();
-        let length: u32 = v.len().try_into().unwrap();
-        if length <= 12 {
+        let length: u32 = v.len().try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!("String length {} exceeds u32::MAX", v.len()))
+        })?;
+
+        if length <= MAX_INLINE_VIEW_LEN {
             let mut view_buffer = [0; 16];
             view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
             view_buffer[4..4 + v.len()].copy_from_slice(v);
             self.views_buffer.push(u128::from_le_bytes(view_buffer));
             self.null_buffer_builder.append_non_null();
-            return;
+            return Ok(());
         }
 
         // Deduplication if:
         // (1) deduplication is enabled.
-        // (2) len > 12
-        if let Some((mut ht, hasher)) = self.string_tracker.take() {
-            let hash_val = hasher.hash_one(v);
-            let hasher_fn = |v: &_| hasher.hash_one(v);
-
-            let entry = ht.entry(
-                hash_val,
-                |idx| {
-                    let stored_value = self.get_value(*idx);
-                    v == stored_value
-                },
-                hasher_fn,
-            );
-            match entry {
-                Entry::Occupied(occupied) => {
-                    // If the string already exists, we will directly use the view
-                    let idx = occupied.get();
-                    self.views_buffer.push(self.views_buffer[*idx]);
-                    self.null_buffer_builder.append_non_null();
-                    self.string_tracker = Some((ht, hasher));
-                    return;
-                }
-                Entry::Vacant(vacant) => {
-                    // o.w. we insert the (string hash -> view index)
-                    // the idx is current length of views_builder, as we are inserting a new view
-                    vacant.insert(self.views_buffer.len());
+        // (2) len > `MAX_INLINE_VIEW_LEN` and len <= `max_deduplication_len`
+        let can_deduplicate = self.string_tracker.is_some()
+            && self
+                .max_deduplication_len
+                .map(|max_length| length <= max_length)
+                .unwrap_or(true);
+        if can_deduplicate {
+            if let Some((mut ht, hasher)) = self.string_tracker.take() {
+                let hash_val = hasher.hash_one(v);
+                let hasher_fn = |v: &_| hasher.hash_one(v);
+
+                let entry = ht.entry(
+                    hash_val,
+                    |idx| {
+                        let stored_value = self.get_value(*idx);
+                        v == stored_value
+                    },
+                    hasher_fn,
+                );
+                match entry {
+                    Entry::Occupied(occupied) => {
+                        // If the string already exists, we will directly use the view
+                        let idx = occupied.get();
+                        self.views_buffer.push(self.views_buffer[*idx]);
+                        self.null_buffer_builder.append_non_null();
+                        self.string_tracker = Some((ht, hasher));
+                        return Ok(());
+                    }
+                    Entry::Vacant(vacant) => {
+                        // o.w. we insert the (string hash -> view index)
+                        // the idx is current length of views_builder, as we are inserting a new view
+                        vacant.insert(self.views_buffer.len());
+                    }
                 }
+                self.string_tracker = Some((ht, hasher));
             }
-            self.string_tracker = Some((ht, hasher));
         }
 
         let required_cap = self.in_progress.len() + v.len();
@@ -363,17 +397,28 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
             let to_reserve = v.len().max(self.block_size.next_size() as usize);
             self.in_progress.reserve(to_reserve);
         };
+
         let offset = self.in_progress.len() as u32;
         self.in_progress.extend_from_slice(v);
 
+        let buffer_index: u32 = self.completed.len().try_into().map_err(|_| {
+            ArrowError::InvalidArgumentError(format!(
+                "Buffer count {} exceeds u32::MAX",
+                self.completed.len()
+            ))
+        })?;
+
         let view = ByteView {
             length,
+            // This won't panic as we checked the length of prefix earlier.
             prefix: u32::from_le_bytes(v[0..4].try_into().unwrap()),
-            buffer_index: self.completed.len() as u32,
+            buffer_index,
             offset,
         };
         self.views_buffer.push(view.into());
         self.null_buffer_builder.append_non_null();
+
+        Ok(())
     }
 
     /// Append an `Option` value into the builder
@@ -385,6 +430,53 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
         };
     }
 
+    /// Append the same value `n` times into the builder
+    ///
+    /// This is more efficient than calling [`Self::try_append_value`] `n` times,
+    /// especially when deduplication is enabled, as it only hashes the value once.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if
+    /// - String buffer count exceeds `u32::MAX`
+    /// - String length exceeds `u32::MAX`
+    ///
+    /// # Example
+    /// ```
+    /// # use arrow_array::builder::StringViewBuilder;
+    /// # use arrow_array::Array;
+    /// let mut builder = StringViewBuilder::new().with_deduplicate_strings();
+    ///
+    /// // Append "hello" 1000 times efficiently
+    /// builder.try_append_value_n("hello", 1000)?;
+    ///
+    /// let array = builder.finish();
+    /// assert_eq!(array.len(), 1000);
+    ///
+    /// // All values are "hello"
+    /// for value in array.iter() {
+    ///     assert_eq!(value, Some("hello"));
+    /// }
+    /// # Ok::<(), arrow_schema::ArrowError>(())
+    /// ```
+    #[inline]
+    pub fn try_append_value_n(
+        &mut self,
+        value: impl AsRef<T::Native>,
+        n: usize,
+    ) -> Result<(), ArrowError> {
+        if n == 0 {
+            return Ok(());
+        }
+        // Process value once (handles deduplication, buffer management, view creation)
+        self.try_append_value(value)?;
+        // Reuse the view (n-1) times
+        let view = *self.views_buffer.last().unwrap();
+        self.views_buffer.extend(std::iter::repeat_n(view, n - 1));
+        self.null_buffer_builder.append_n_non_nulls(n - 1);
+        Ok(())
+    }
+
     /// Append a null value into the builder
     #[inline]
     pub fn append_null(&mut self) {
@@ -397,7 +489,7 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
         self.flush_in_progress();
         let completed = std::mem::take(&mut self.completed);
         let nulls = self.null_buffer_builder.finish();
-        if let Some((ref mut ht, _)) = self.string_tracker.as_mut() {
+        if let Some((ht, _)) = self.string_tracker.as_mut() {
             ht.clear();
         }
         let views = std::mem::take(&mut self.views_buffer);
@@ -514,6 +606,21 @@ impl<T: ByteViewType + ?Sized, V: AsRef<T::Native>> Extend<Option<V>>
 /// ```
 pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;
 
+impl StringLikeArrayBuilder for StringViewBuilder {
+    fn type_name() -> &'static str {
+        std::any::type_name::<StringViewBuilder>()
+    }
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity(capacity)
+    }
+    fn append_value(&mut self, value: &str) {
+        Self::append_value(self, value);
+    }
+    fn append_null(&mut self) {
+        Self::append_null(self);
+    }
+}
+
 ///  Array builder for [`BinaryViewArray`][crate::BinaryViewArray]
 ///
 /// Values can be appended using [`GenericByteViewBuilder::append_value`], and nulls with
@@ -536,6 +643,21 @@ pub type StringViewBuilder = GenericByteViewBuilder<StringViewType>;
 ///
 pub type BinaryViewBuilder = GenericByteViewBuilder<BinaryViewType>;
 
+impl BinaryLikeArrayBuilder for BinaryViewBuilder {
+    fn type_name() -> &'static str {
+        std::any::type_name::<BinaryViewBuilder>()
+    }
+    fn with_capacity(capacity: usize) -> Self {
+        Self::with_capacity(capacity)
+    }
+    fn append_value(&mut self, value: &[u8]) {
+        Self::append_value(self, value);
+    }
+    fn append_null(&mut self) {
+        Self::append_null(self);
+    }
+}
+
 /// Creates a view from a fixed length input (the compiler can generate
 /// specialized code for this)
 fn make_inlined_view<const LEN: usize>(data: &[u8]) -> u128 {
@@ -587,8 +709,52 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
 mod tests {
     use core::str;
 
+    use arrow_buffer::ArrowNativeType;
+
     use super::*;
-    use crate::Array;
+
+    #[test]
+    fn test_string_max_deduplication_len() {
+        let value_1 = "short";
+        let value_2 = "not so similar string but long";
+        let value_3 = "1234567890123";
+
+        let max_deduplication_len = MAX_INLINE_VIEW_LEN * 2;
+
+        let mut builder = StringViewBuilder::new()
+            .with_deduplicate_strings()
+            .with_max_deduplication_len(max_deduplication_len);
+
+        assert!(value_1.len() < MAX_INLINE_VIEW_LEN.as_usize());
+        assert!(value_2.len() > max_deduplication_len.as_usize());
+        assert!(
+            value_3.len() > MAX_INLINE_VIEW_LEN.as_usize()
+                && value_3.len() < max_deduplication_len.as_usize()
+        );
+
+        // append value1 (short), expect it is inlined and not deduplicated
+        builder.append_value(value_1); // view 0
+        builder.append_value(value_1); // view 1
+        // append value2, expect second copy is not deduplicated as it exceeds max_deduplication_len
+        builder.append_value(value_2); // view 2
+        builder.append_value(value_2); // view 3
+        // append value3, expect second copy is deduplicated
+        builder.append_value(value_3); // view 4
+        builder.append_value(value_3); // view 5
+
+        let array = builder.finish();
+
+        // verify
+        let v2 = ByteView::from(array.views()[2]);
+        let v3 = ByteView::from(array.views()[3]);
+        assert_eq!(v2.buffer_index, v3.buffer_index); // stored in same buffer
+        assert_ne!(v2.offset, v3.offset); // different offsets --> not deduplicated
+
+        let v4 = ByteView::from(array.views()[4]);
+        let v5 = ByteView::from(array.views()[5]);
+        assert_eq!(v4.buffer_index, v5.buffer_index); // stored in same buffer
+        assert_eq!(v4.offset, v5.offset); // same offsets --> deduplicated
+    }
 
     #[test]
     fn test_string_view_deduplicate() {
@@ -695,7 +861,10 @@ mod tests {
         );
 
         let err = v.try_append_view(0, u32::MAX, 1).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: Range 4294967295..4294967296 out of bounds for block of length 17"
+        );
 
         let err = v.try_append_view(0, 1, u32::MAX).unwrap_err();
         assert_eq!(
@@ -746,10 +915,12 @@ mod tests {
             assert_eq!(fixed_builder.completed.len(), 2_usize.pow(i + 1) - 1);
 
             // Every buffer is fixed size
-            assert!(fixed_builder
-                .completed
-                .iter()
-                .all(|b| b.len() == STARTING_BLOCK_SIZE as usize));
+            assert!(
+                fixed_builder
+                    .completed
+                    .iter()
+                    .all(|b| b.len() == STARTING_BLOCK_SIZE as usize)
+            );
         }
 
         // Add one more value, and the buffer stop growing.
@@ -760,4 +931,76 @@ mod tests {
             MAX_BLOCK_SIZE as usize
         );
     }
+
+    #[test]
+    fn test_append_value_n() {
+        // Test with inline strings (<=12 bytes)
+        let mut builder = StringViewBuilder::new();
+
+        builder.try_append_value_n("hello", 100).unwrap();
+        builder.append_value("world");
+        builder.try_append_value_n("foo", 50).unwrap();
+
+        let array = builder.finish();
+        assert_eq!(array.len(), 151);
+        assert_eq!(array.null_count(), 0);
+
+        // Verify the values
+        for i in 0..100 {
+            assert_eq!(array.value(i), "hello");
+        }
+        assert_eq!(array.value(100), "world");
+        for i in 101..151 {
+            assert_eq!(array.value(i), "foo");
+        }
+
+        // All inline strings should have no data buffers
+        assert_eq!(array.data_buffers().len(), 0);
+    }
+
+    #[test]
+    fn test_append_value_n_with_deduplication() {
+        let long_string = "This is a very long string that exceeds the inline length";
+
+        // Test with deduplication enabled
+        let mut builder = StringViewBuilder::new().with_deduplicate_strings();
+
+        // First append the string once to add it to the hash map
+        builder.append_value(long_string);
+
+        // Then append_n the same string - should deduplicate and reuse the existing value
+        builder.try_append_value_n(long_string, 999).unwrap();
+
+        let array = builder.finish();
+        assert_eq!(array.len(), 1000);
+        assert_eq!(array.null_count(), 0);
+
+        // Verify all values are the same
+        for i in 0..1000 {
+            assert_eq!(array.value(i), long_string);
+        }
+
+        // With deduplication, should only have 1 data buffer containing the string once
+        assert_eq!(array.data_buffers().len(), 1);
+
+        // All views should be identical
+        let first_view = array.views()[0];
+        for view in array.views().iter() {
+            assert_eq!(*view, first_view);
+        }
+    }
+
+    #[test]
+    fn test_append_value_n_zero() {
+        let mut builder = StringViewBuilder::new();
+
+        builder.append_value("first");
+        builder.try_append_value_n("should not appear", 0).unwrap();
+        builder.append_value("second");
+
+        let array = builder.finish();
+        assert_eq!(array.len(), 2);
+        assert_eq!(array.value(0), "first");
+        assert_eq!(array.value(1), "second");
+    }
 }
diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs
index 463b498c55ba..cabf7a514050 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::{ArrayBuilder, BufferBuilder};
+use crate::builder::ArrayBuilder;
 use crate::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
 use arrow_buffer::NullBufferBuilder;
 use arrow_buffer::{Buffer, OffsetBuffer};
@@ -86,7 +86,7 @@ use std::sync::Arc;
 /// [`LargeListArray`]: crate::array::LargeListArray
 #[derive(Debug)]
 pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
-    offsets_builder: BufferBuilder<OffsetSize>,
+    offsets_builder: Vec<OffsetSize>,
     null_buffer_builder: NullBufferBuilder,
     values_builder: T,
     field: Option<FieldRef>,
@@ -108,8 +108,8 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize
     /// Creates a new [`GenericListBuilder`] from a given values array builder
     /// `capacity` is the number of items to pre-allocate space for in this builder
     pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
-        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
-        offsets_builder.append(OffsetSize::zero());
+        let mut offsets_builder = Vec::with_capacity(capacity + 1);
+        offsets_builder.push(OffsetSize::zero());
         Self {
             offsets_builder,
             null_buffer_builder: NullBufferBuilder::new(capacity),
@@ -192,7 +192,7 @@ where
     /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
     #[inline]
     pub fn append(&mut self, is_valid: bool) {
-        self.offsets_builder.append(self.next_offset());
+        self.offsets_builder.push(self.next_offset());
         self.null_buffer_builder.append(is_valid);
     }
 
@@ -266,7 +266,7 @@ where
     /// See [`Self::append_value`] for an example use.
     #[inline]
     pub fn append_null(&mut self) {
-        self.offsets_builder.append(self.next_offset());
+        self.offsets_builder.push(self.next_offset());
         self.null_buffer_builder.append_null();
     }
 
@@ -274,7 +274,8 @@ where
     #[inline]
     pub fn append_nulls(&mut self, n: usize) {
         let next_offset = self.next_offset();
-        self.offsets_builder.append_n(n, next_offset);
+        self.offsets_builder
+            .extend(std::iter::repeat_n(next_offset, n));
         self.null_buffer_builder.append_n_nulls(n);
     }
 
@@ -298,10 +299,10 @@ where
         let values = self.values_builder.finish();
         let nulls = self.null_buffer_builder.finish();
 
-        let offsets = self.offsets_builder.finish();
+        let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
         // Safety: Safe by construction
         let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
-        self.offsets_builder.append(OffsetSize::zero());
+        self.offsets_builder.push(OffsetSize::zero());
 
         let field = match &self.field {
             Some(f) => f.clone(),
@@ -362,10 +363,10 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{make_builder, Int32Builder, ListBuilder};
+    use crate::Int32Array;
+    use crate::builder::{Int32Builder, ListBuilder, make_builder};
     use crate::cast::AsArray;
     use crate::types::Int32Type;
-    use crate::Int32Array;
     use arrow_schema::DataType;
 
     fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs
index 5aaf9efefe24..c13c21cb988b 100644
--- a/arrow-array/src/builder/generic_list_view_builder.rs
+++ b/arrow-array/src/builder/generic_list_view_builder.rs
@@ -17,7 +17,7 @@
 
 use crate::builder::ArrayBuilder;
 use crate::{ArrayRef, GenericListViewArray, OffsetSizeTrait};
-use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
+use arrow_buffer::{Buffer, NullBufferBuilder, ScalarBuffer};
 use arrow_schema::{Field, FieldRef};
 use std::any::Any;
 use std::sync::Arc;
@@ -25,8 +25,8 @@ use std::sync::Arc;
 /// Builder for [`GenericListViewArray`]
 #[derive(Debug)]
 pub struct GenericListViewBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
-    offsets_builder: BufferBuilder<OffsetSize>,
-    sizes_builder: BufferBuilder<OffsetSize>,
+    offsets_builder: Vec<OffsetSize>,
+    sizes_builder: Vec<OffsetSize>,
     null_buffer_builder: NullBufferBuilder,
     values_builder: T,
     field: Option<FieldRef>,
@@ -83,8 +83,8 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListViewBuilder<Offset
     /// Creates a new [`GenericListViewBuilder`] from a given values array builder
     /// `capacity` is the number of items to pre-allocate space for in this builder
     pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
-        let offsets_builder = BufferBuilder::<OffsetSize>::new(capacity);
-        let sizes_builder = BufferBuilder::<OffsetSize>::new(capacity);
+        let offsets_builder = Vec::with_capacity(capacity);
+        let sizes_builder = Vec::with_capacity(capacity);
         Self {
             offsets_builder,
             null_buffer_builder: NullBufferBuilder::new(capacity),
@@ -132,8 +132,8 @@ where
     /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
     #[inline]
     pub fn append(&mut self, is_valid: bool) {
-        self.offsets_builder.append(self.current_offset);
-        self.sizes_builder.append(
+        self.offsets_builder.push(self.current_offset);
+        self.sizes_builder.push(
             OffsetSize::from_usize(
                 self.values_builder.len() - self.current_offset.to_usize().unwrap(),
             )
@@ -158,9 +158,8 @@ where
     /// See [`Self::append_value`] for an example use.
     #[inline]
     pub fn append_null(&mut self) {
-        self.offsets_builder.append(self.current_offset);
-        self.sizes_builder
-            .append(OffsetSize::from_usize(0).unwrap());
+        self.offsets_builder.push(self.current_offset);
+        self.sizes_builder.push(OffsetSize::from_usize(0).unwrap());
         self.null_buffer_builder.append_null();
     }
 
@@ -183,12 +182,12 @@ where
     pub fn finish(&mut self) -> GenericListViewArray<OffsetSize> {
         let values = self.values_builder.finish();
         let nulls = self.null_buffer_builder.finish();
-        let offsets = self.offsets_builder.finish();
+        let offsets = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
         self.current_offset = OffsetSize::zero();
 
         // Safety: Safe by construction
         let offsets = ScalarBuffer::from(offsets);
-        let sizes = self.sizes_builder.finish();
+        let sizes = Buffer::from_vec(std::mem::take(&mut self.sizes_builder));
         let sizes = ScalarBuffer::from(sizes);
         let field = match &self.field {
             Some(f) => f.clone(),
@@ -246,7 +245,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{make_builder, Int32Builder, ListViewBuilder};
+    use crate::builder::{Int32Builder, ListViewBuilder, make_builder};
     use crate::cast::AsArray;
     use crate::types::Int32Type;
     use crate::{Array, Int32Array};
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index 012a454e76c9..b70d4b73880b 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::{ArrayBuilder, BufferBuilder};
+use crate::builder::ArrayBuilder;
 use crate::{Array, ArrayRef, MapArray, StructArray};
 use arrow_buffer::Buffer;
 use arrow_buffer::{NullBuffer, NullBufferBuilder};
@@ -56,7 +56,7 @@ use std::sync::Arc;
 /// ```
 #[derive(Debug)]
 pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
-    offsets_builder: BufferBuilder<i32>,
+    offsets_builder: Vec<i32>,
     null_buffer_builder: NullBufferBuilder,
     field_names: MapFieldNames,
     key_builder: K,
@@ -100,8 +100,8 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
         value_builder: V,
         capacity: usize,
     ) -> Self {
-        let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
-        offsets_builder.append(0);
+        let mut offsets_builder = Vec::with_capacity(capacity + 1);
+        offsets_builder.push(0);
         Self {
             offsets_builder,
             null_buffer_builder: NullBufferBuilder::new(capacity),
@@ -166,7 +166,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
                 self.value_builder.len()
             )));
         }
-        self.offsets_builder.append(self.key_builder.len() as i32);
+        self.offsets_builder.push(self.key_builder.len() as i32);
         self.null_buffer_builder.append(is_valid);
         Ok(())
     }
@@ -177,8 +177,8 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
         // Build the keys
         let keys_arr = self.key_builder.finish();
         let values_arr = self.value_builder.finish();
-        let offset_buffer = self.offsets_builder.finish();
-        self.offsets_builder.append(0);
+        let offset_buffer = Buffer::from_vec(std::mem::take(&mut self.offsets_builder));
+        self.offsets_builder.push(0);
         let null_bit_buffer = self.null_buffer_builder.finish();
 
         self.finish_helper(keys_arr, values_arr, offset_buffer, null_bit_buffer, len)
@@ -284,7 +284,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{make_builder, Int32Builder, StringBuilder};
+    use crate::builder::{Int32Builder, StringBuilder, make_builder};
     use crate::{Int32Array, StringArray};
     use std::collections::HashMap;
 
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 680563c6cfc3..02c6df453b6c 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -273,8 +273,8 @@ mod union_builder;
 
 pub use union_builder::*;
 
-use crate::types::{Int16Type, Int32Type, Int64Type, Int8Type};
 use crate::ArrayRef;
+use crate::types::{Int8Type, Int16Type, Int32Type, Int64Type};
 use arrow_schema::{DataType, IntervalUnit, TimeUnit};
 use std::any::Any;
 
@@ -447,9 +447,16 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
         DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
         DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
+        DataType::BinaryView => Box::new(BinaryViewBuilder::with_capacity(capacity)),
         DataType::FixedSizeBinary(len) => {
             Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
         }
+        DataType::Decimal32(p, s) => Box::new(
+            Decimal32Builder::with_capacity(capacity).with_data_type(DataType::Decimal32(*p, *s)),
+        ),
+        DataType::Decimal64(p, s) => Box::new(
+            Decimal64Builder::with_capacity(capacity).with_data_type(DataType::Decimal64(*p, *s)),
+        ),
         DataType::Decimal128(p, s) => Box::new(
             Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
         ),
@@ -458,6 +465,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         ),
         DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
         DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
+        DataType::Utf8View => Box::new(StringViewBuilder::with_capacity(capacity)),
         DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
         DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
         DataType::Time32(TimeUnit::Second) => {
@@ -559,7 +567,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
                     .with_values_field(fields[1].clone()),
                 )
             }
-            t => panic!("The field of Map data type {t:?} should have a child Struct field"),
+            t => panic!("The field of Map data type {t} should have a child Struct field"),
         },
         DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
         t @ DataType::Dictionary(key_type, value_type) => {
@@ -586,7 +594,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
                                 LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
                             Box::new(dict_builder)
                         }
-                        t => panic!("Dictionary value type {t:?} is not currently supported"),
+                        t => unimplemented!("Dictionary value type {t} is not currently supported"),
                     }
                 };
             }
@@ -596,10 +604,12 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
                 DataType::Int32 => dict_builder!(Int32Type),
                 DataType::Int64 => dict_builder!(Int64Type),
                 _ => {
-                    panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
+                    unimplemented!(
+                        "Data type {t} with key type {key_type} is not currently supported"
+                    )
                 }
             }
         }
-        t => panic!("Data type {t:?} is not currently supported"),
+        t => unimplemented!("Data type {t} is not currently supported"),
     }
 }
diff --git a/arrow-array/src/builder/null_builder.rs b/arrow-array/src/builder/null_builder.rs
index 59086dffa907..489822065b56 100644
--- a/arrow-array/src/builder/null_builder.rs
+++ b/arrow-array/src/builder/null_builder.rs
@@ -59,18 +59,6 @@ impl NullBuilder {
         Self { len: 0 }
     }
 
-    /// Creates a new null builder with space for `capacity` elements without re-allocating
-    #[deprecated = "there is no actual notion of capacity in the NullBuilder, so emulating it makes little sense"]
-    pub fn with_capacity(_capacity: usize) -> Self {
-        Self::new()
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    #[deprecated = "there is no actual notion of capacity in the NullBuilder, so emulating it makes little sense"]
-    pub fn capacity(&self) -> usize {
-        self.len
-    }
-
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 41c65fe34e35..049cef241c83 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -15,11 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::{ArrayBuilder, BufferBuilder};
+use crate::builder::ArrayBuilder;
 use crate::types::*;
 use crate::{Array, ArrayRef, PrimitiveArray};
-use arrow_buffer::NullBufferBuilder;
-use arrow_buffer::{Buffer, MutableBuffer};
+use arrow_buffer::{Buffer, MutableBuffer, NullBufferBuilder, ScalarBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -87,6 +86,10 @@ pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
 /// An elapsed time in nanoseconds array builder.
 pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
 
+/// A decimal 32 array builder
+pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
+/// A decimal 64 array builder
+pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
 /// A decimal 128 array builder
 pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
 /// A decimal 256 array builder
@@ -95,7 +98,7 @@ pub type Decimal256Builder = PrimitiveBuilder<Decimal256Type>;
 /// Builder for [`PrimitiveArray`]
 #[derive(Debug)]
 pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
-    values_builder: BufferBuilder<T::Native>,
+    values_builder: Vec<T::Native>,
     null_buffer_builder: NullBufferBuilder,
     data_type: DataType,
 }
@@ -147,7 +150,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// Creates a new primitive array builder with capacity no of items
     pub fn with_capacity(capacity: usize) -> Self {
         Self {
-            values_builder: BufferBuilder::<T::Native>::new(capacity),
+            values_builder: Vec::with_capacity(capacity),
             null_buffer_builder: NullBufferBuilder::new(capacity),
             data_type: T::DATA_TYPE,
         }
@@ -158,7 +161,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         values_buffer: MutableBuffer,
         null_buffer: Option<MutableBuffer>,
     ) -> Self {
-        let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
+        let values_builder: Vec<T::Native> = ScalarBuffer::<T::Native>::from(values_buffer).into();
 
         let null_buffer_builder = null_buffer
             .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
@@ -175,7 +178,8 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// data type of the generated array.
     ///
     /// This method allows overriding the data type, to allow specifying timezones
-    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal128`] and [`DataType::Decimal256`]
+    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal32`],
+    /// [`DataType::Decimal64`], [`DataType::Decimal128`] and [`DataType::Decimal256`]
     ///
     /// # Panics
     ///
@@ -199,28 +203,29 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     #[inline]
     pub fn append_value(&mut self, v: T::Native) {
         self.null_buffer_builder.append_non_null();
-        self.values_builder.append(v);
+        self.values_builder.push(v);
     }
 
     /// Appends a value of type `T` into the builder `n` times
     #[inline]
     pub fn append_value_n(&mut self, v: T::Native, n: usize) {
         self.null_buffer_builder.append_n_non_nulls(n);
-        self.values_builder.append_n(n, v);
+        self.values_builder.extend(std::iter::repeat_n(v, n));
     }
 
     /// Appends a null slot into the builder
     #[inline]
     pub fn append_null(&mut self) {
         self.null_buffer_builder.append_null();
-        self.values_builder.advance(1);
+        self.values_builder.push(T::Native::default());
     }
 
     /// Appends `n` no. of null's into the builder
     #[inline]
     pub fn append_nulls(&mut self, n: usize) {
         self.null_buffer_builder.append_n_nulls(n);
-        self.values_builder.advance(n);
+        self.values_builder
+            .extend(std::iter::repeat_n(T::Native::default(), n));
     }
 
     /// Appends an `Option<T>` into the builder
@@ -236,7 +241,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     #[inline]
     pub fn append_slice(&mut self, v: &[T::Native]) {
         self.null_buffer_builder.append_n_non_nulls(v.len());
-        self.values_builder.append_slice(v);
+        self.values_builder.extend_from_slice(v);
     }
 
     /// Appends values from a slice of type `T` and a validity boolean slice
@@ -252,7 +257,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
             "Value and validity lengths must be equal"
         );
         self.null_buffer_builder.append_slice(is_valid);
-        self.values_builder.append_slice(values);
+        self.values_builder.extend_from_slice(values);
     }
 
     /// Appends array values and null to this builder as is
@@ -269,7 +274,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
             "array data type mismatch"
         );
 
-        self.values_builder.append_slice(array.values());
+        self.values_builder.extend_from_slice(array.values());
         if let Some(null_buffer) = array.nulls() {
             self.null_buffer_builder.append_buffer(null_buffer);
         } else {
@@ -291,7 +296,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
             .expect("append_trusted_len_iter requires an upper bound");
 
         self.null_buffer_builder.append_n_non_nulls(len);
-        self.values_builder.append_trusted_len_iter(iter);
+        self.values_builder.extend(iter);
     }
 
     /// Builds the [`PrimitiveArray`] and reset this builder.
@@ -300,7 +305,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         let nulls = self.null_buffer_builder.finish();
         let builder = ArrayData::builder(self.data_type.clone())
             .len(len)
-            .add_buffer(self.values_builder.finish())
+            .add_buffer(std::mem::take(&mut self.values_builder).into())
             .nulls(nulls);
 
         let array_data = unsafe { builder.build_unchecked() };
@@ -328,7 +333,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
 
     /// Returns the current values buffer as a mutable slice
     pub fn values_slice_mut(&mut self) -> &mut [T::Native] {
-        self.values_builder.as_slice_mut()
+        self.values_builder.as_mut_slice()
     }
 
     /// Returns the current null buffer as a slice
@@ -344,7 +349,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// Returns the current values buffer and null buffer as a slice
     pub fn slices_mut(&mut self) -> (&mut [T::Native], Option<&mut [u8]>) {
         (
-            self.values_builder.as_slice_mut(),
+            self.values_builder.as_mut_slice(),
             self.null_buffer_builder.as_slice_mut(),
         )
     }
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index f4a6662462e0..d9544aec3b9d 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -22,6 +22,7 @@ use crate::{
 };
 use arrow_buffer::{ArrowNativeType, ToByteSlice};
 use arrow_schema::{ArrowError, DataType};
+use num_traits::NumCast;
 use std::any::Any;
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -169,6 +170,68 @@ where
             map: HashMap::with_capacity(values_capacity),
         }
     }
+
+    /// Creates a new `PrimitiveDictionaryBuilder` from the existing builder with the same
+    /// keys and values, but with a new data type for the keys.
+    ///
+    /// # Example
+    /// ```
+    /// #
+    /// # use arrow_array::builder::PrimitiveDictionaryBuilder;
+    /// # use arrow_array::types::{UInt8Type, UInt16Type, UInt64Type};
+    /// # use arrow_array::UInt16Array;
+    /// # use arrow_schema::ArrowError;
+    ///
+    /// let mut u8_keyed_builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt64Type>::new();
+    ///
+    /// // appending too many values causes the dictionary to overflow
+    /// for i in 0..256 {
+    ///     u8_keyed_builder.append_value(i);
+    /// }
+    /// let result = u8_keyed_builder.append(256);
+    /// assert!(matches!(result, Err(ArrowError::DictionaryKeyOverflowError{})));
+    ///
+    /// // we need to upgrade to a larger key type
+    /// let mut u16_keyed_builder = PrimitiveDictionaryBuilder::<UInt16Type, UInt64Type>::try_new_from_builder(u8_keyed_builder).unwrap();
+    /// let dictionary_array = u16_keyed_builder.finish();
+    /// let keys = dictionary_array.keys();
+    ///
+    /// assert_eq!(keys, &UInt16Array::from_iter(0..256));
+    pub fn try_new_from_builder<K2>(
+        mut source: PrimitiveDictionaryBuilder<K2, V>,
+    ) -> Result<Self, ArrowError>
+    where
+        K::Native: NumCast,
+        K2: ArrowDictionaryKeyType,
+        K2::Native: NumCast,
+    {
+        let map = source.map;
+        let values_builder = source.values_builder;
+
+        let source_keys = source.keys_builder.finish();
+        let new_keys: PrimitiveArray<K> = source_keys.try_unary(|value| {
+            num_traits::cast::cast::<K2::Native, K::Native>(value).ok_or_else(|| {
+                ArrowError::CastError(format!(
+                    "Can't cast dictionary keys from source type {:?} to type {:?}",
+                    K2::DATA_TYPE,
+                    K::DATA_TYPE
+                ))
+            })
+        })?;
+
+        // drop source key here because currently source_keys and new_keys are holding reference to
+        // the same underlying null_buffer. Below we want to call new_keys.into_builder() it must
+        // be the only reference holder.
+        drop(source_keys);
+
+        Ok(Self {
+            map,
+            keys_builder: new_keys
+                .into_builder()
+                .expect("underlying buffer has no references"),
+            values_builder,
+        })
+    }
 }
 
 impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
@@ -397,6 +460,38 @@ where
         DictionaryArray::from(unsafe { builder.build_unchecked() })
     }
 
+    /// Builds the `DictionaryArray` without resetting the values builder or
+    /// the internal de-duplication map.
+    ///
+    /// The advantage of doing this is that the values will represent the entire
+    /// set of what has been built so-far by this builder and ensures
+    /// consistency in the assignment of keys to values across multiple calls
+    /// to `finish_preserve_values`. This enables ipc writers to efficiently
+    /// emit delta dictionaries.
+    ///
+    /// The downside to this is that building the record requires creating a
+    /// copy of the values, which can become slowly more expensive if the
+    /// dictionary grows.
+    ///
+    /// Additionally, if record batches from multiple different dictionary
+    /// builders for the same column are fed into a single ipc writer, beware
+    /// that entire dictionaries are likely to be re-sent frequently even when
+    /// the majority of the values are not used by the current record batch.
+    pub fn finish_preserve_values(&mut self) -> DictionaryArray<K> {
+        let values = self.values_builder.finish_cloned();
+        let keys = self.keys_builder.finish();
+
+        let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
+
+        let builder = keys
+            .into_data()
+            .into_builder()
+            .data_type(data_type)
+            .child_data(vec![values.into_data()]);
+
+        DictionaryArray::from(unsafe { builder.build_unchecked() })
+    }
+
     /// Returns the current dictionary values buffer as a slice
     pub fn values_slice(&self) -> &[V::Native] {
         self.values_builder.values_slice()
@@ -428,10 +523,14 @@ impl<K: ArrowDictionaryKeyType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
 mod tests {
     use super::*;
 
-    use crate::array::{Int32Array, UInt32Array, UInt8Array};
+    use crate::array::{Int32Array, UInt8Array, UInt32Array};
     use crate::builder::Decimal128Builder;
     use crate::cast::AsArray;
-    use crate::types::{Decimal128Type, Int32Type, UInt32Type, UInt8Type};
+    use crate::types::{
+        Date32Type, Decimal128Type, DurationNanosecondType, Float32Type, Float64Type, Int8Type,
+        Int16Type, Int32Type, Int64Type, TimestampNanosecondType, UInt8Type, UInt16Type,
+        UInt32Type, UInt64Type,
+    };
 
     #[test]
     fn test_primitive_dictionary_builder() {
@@ -649,4 +748,146 @@ mod tests {
             builder.values_builder.capacity()
         )
     }
+
+    fn _test_try_new_from_builder_generic_for_key_types<K1, K2, V>(values: Vec<V::Native>)
+    where
+        K1: ArrowDictionaryKeyType,
+        K1::Native: NumCast,
+        K2: ArrowDictionaryKeyType,
+        K2::Native: NumCast + From<u8>,
+        V: ArrowPrimitiveType,
+    {
+        let mut source = PrimitiveDictionaryBuilder::<K1, V>::new();
+        source.append(values[0]).unwrap();
+        source.append_null();
+        source.append(values[1]).unwrap();
+        source.append(values[2]).unwrap();
+
+        let mut result = PrimitiveDictionaryBuilder::<K2, V>::try_new_from_builder(source).unwrap();
+        let array = result.finish();
+
+        let mut expected_keys_builder = PrimitiveBuilder::<K2>::new();
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(0u8));
+        expected_keys_builder.append_null();
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(1u8));
+        expected_keys_builder
+            .append_value(<<K2 as ArrowPrimitiveType>::Native as From<u8>>::from(2u8));
+        let expected_keys = expected_keys_builder.finish();
+        assert_eq!(array.keys(), &expected_keys);
+
+        let av = array.values();
+        let ava = av.as_any().downcast_ref::<PrimitiveArray<V>>().unwrap();
+        assert_eq!(ava.value(0), values[0]);
+        assert_eq!(ava.value(1), values[1]);
+        assert_eq!(ava.value(2), values[2]);
+    }
+
+    fn _test_try_new_from_builder_generic_for_value<T>(values: Vec<T::Native>)
+    where
+        T: ArrowPrimitiveType,
+    {
+        // test cast to bigger size unsigned
+        _test_try_new_from_builder_generic_for_key_types::<UInt8Type, UInt16Type, T>(
+            values.clone(),
+        );
+        // test cast going to smaller size unsigned
+        _test_try_new_from_builder_generic_for_key_types::<UInt16Type, UInt8Type, T>(
+            values.clone(),
+        );
+        // test cast going to bigger size signed
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, Int16Type, T>(values.clone());
+        // test cast going to smaller size signed
+        _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
+        // test going from signed to signed for different size changes
+        _test_try_new_from_builder_generic_for_key_types::<UInt8Type, Int16Type, T>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt8Type, T>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int8Type, UInt16Type, T>(values.clone());
+        _test_try_new_from_builder_generic_for_key_types::<Int32Type, Int16Type, T>(values.clone());
+    }
+
+    #[test]
+    fn test_try_new_from_builder() {
+        // test unsigned types
+        _test_try_new_from_builder_generic_for_value::<UInt8Type>(vec![1, 2, 3]);
+        _test_try_new_from_builder_generic_for_value::<UInt16Type>(vec![1, 2, 3]);
+        _test_try_new_from_builder_generic_for_value::<UInt32Type>(vec![1, 2, 3]);
+        _test_try_new_from_builder_generic_for_value::<UInt64Type>(vec![1, 2, 3]);
+        // test signed types
+        _test_try_new_from_builder_generic_for_value::<Int8Type>(vec![-1, 0, 1]);
+        _test_try_new_from_builder_generic_for_value::<Int16Type>(vec![-1, 0, 1]);
+        _test_try_new_from_builder_generic_for_value::<Int32Type>(vec![-1, 0, 1]);
+        _test_try_new_from_builder_generic_for_value::<Int64Type>(vec![-1, 0, 1]);
+        // test some date types
+        _test_try_new_from_builder_generic_for_value::<Date32Type>(vec![5, 6, 7]);
+        _test_try_new_from_builder_generic_for_value::<DurationNanosecondType>(vec![1, 2, 3]);
+        _test_try_new_from_builder_generic_for_value::<TimestampNanosecondType>(vec![1, 2, 3]);
+        // test some floating point types
+        _test_try_new_from_builder_generic_for_value::<Float32Type>(vec![0.1, 0.2, 0.3]);
+        _test_try_new_from_builder_generic_for_value::<Float64Type>(vec![-0.1, 0.2, 0.3]);
+    }
+
+    #[test]
+    fn test_try_new_from_builder_cast_fails() {
+        let mut source_builder = PrimitiveDictionaryBuilder::<UInt16Type, UInt64Type>::new();
+        for i in 0..257 {
+            source_builder.append_value(i);
+        }
+
+        // there should be too many values that we can't downcast to the underlying type
+        // we have keys that wouldn't fit into UInt8Type
+        let result = PrimitiveDictionaryBuilder::<UInt8Type, UInt64Type>::try_new_from_builder(
+            source_builder,
+        );
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert!(matches!(e, ArrowError::CastError(_)));
+            assert_eq!(
+                e.to_string(),
+                "Cast error: Can't cast dictionary keys from source type UInt16 to type UInt8"
+            );
+        }
+    }
+
+    #[test]
+    fn test_finish_preserve_values() {
+        // Create the first dictionary
+        let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::new();
+        builder.append(10).unwrap();
+        builder.append(20).unwrap();
+        let array = builder.finish_preserve_values();
+        assert_eq!(array.keys(), &UInt8Array::from(vec![Some(0), Some(1)]));
+        let values: &[u32] = array
+            .values()
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap()
+            .values();
+        assert_eq!(values, &[10, 20]);
+
+        // Create a new dictionary
+        builder.append(30).unwrap();
+        builder.append(40).unwrap();
+        let array2 = builder.finish_preserve_values();
+
+        // Make sure the keys are assigned after the old ones
+        // and that we have the right values
+        assert_eq!(array2.keys(), &UInt8Array::from(vec![Some(2), Some(3)]));
+        let values = array2
+            .downcast_dict::<UInt32Array>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+        assert_eq!(values, vec![Some(30), Some(40)]);
+
+        // Check that we have all of the expected values
+        let all_values: &[u32] = array2
+            .values()
+            .as_any()
+            .downcast_ref::<UInt32Array>()
+            .unwrap()
+            .values();
+        assert_eq!(all_values, &[10, 20, 30, 40]);
+    }
 }
diff --git a/arrow-array/src/builder/primitive_run_builder.rs b/arrow-array/src/builder/primitive_run_builder.rs
index 1db9c91e081d..52bdaa6f40e4 100644
--- a/arrow-array/src/builder/primitive_run_builder.rs
+++ b/arrow-array/src/builder/primitive_run_builder.rs
@@ -17,7 +17,7 @@
 
 use std::{any::Any, sync::Arc};
 
-use crate::{types::RunEndIndexType, ArrayRef, ArrowPrimitiveType, RunArray};
+use crate::{ArrayRef, ArrowPrimitiveType, RunArray, types::RunEndIndexType};
 
 use super::{ArrayBuilder, PrimitiveBuilder};
 
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 3afee5863f52..4fb312739cb5 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::*;
 use crate::StructArray;
+use crate::builder::*;
 use arrow_buffer::NullBufferBuilder;
 use arrow_schema::{Fields, SchemaBuilder};
 use std::sync::Arc;
@@ -62,7 +62,7 @@ use std::sync::Arc;
 ///
 ///   // We can't obtain the ListBuilder<StructBuilder> with the expected generic types, because under the hood
 ///   // the StructBuilder was returned as a Box<dyn ArrayBuilder> and passed as such to the ListBuilder constructor
-///   
+///
 ///   // This panics in runtime, even though we know that the builder is a ListBuilder<StructBuilder>.
 ///   // let sb = col_struct_builder
 ///   //     .field_builder::<ListBuilder<StructBuilder>>(0)
@@ -201,6 +201,11 @@ impl StructBuilder {
         self.field_builders.len()
     }
 
+    /// Returns the fields for the struct this builder is building.
+    pub fn fields(&self) -> &Fields {
+        &self.fields
+    }
+
     /// Appends an element (either null or non-null) to the struct. The actual elements
     /// should be appended for each child sub-array in a consistent way.
     #[inline]
@@ -267,7 +272,7 @@ impl StructBuilder {
                 let schema = builder.finish();
 
                 panic!("{}", format!(
-                    "StructBuilder ({:?}) and field_builder with index {} ({:?}) are of unequal lengths: ({} != {}).",
+                    "StructBuilder ({}) and field_builder with index {} ({}) are of unequal lengths: ({} != {}).",
                     schema,
                     idx,
                     self.fields[idx].data_type(),
@@ -440,11 +445,13 @@ mod tests {
         match builder {
             Some(builder) => {
                 assert_eq!(builder.value_length(), LIST_LENGTH);
-                assert!(builder
-                    .values()
-                    .as_any_mut()
-                    .downcast_mut::<Int32Builder>()
-                    .is_some());
+                assert!(
+                    builder
+                        .values()
+                        .as_any_mut()
+                        .downcast_mut::<Int32Builder>()
+                        .is_some()
+                );
             }
             None => panic!("expected FixedSizeListBuilder, got a different builder type"),
         }
@@ -648,7 +655,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "StructBuilder (Schema { fields: [Field { name: \"f1\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"f2\", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
+        expected = "StructBuilder (Field { \"f1\": Int32 }, Field { \"f2\": Boolean }) and field_builder with index 1 (Boolean) are of unequal lengths: (2 != 1)."
     )]
     fn test_struct_array_builder_unequal_field_builders_lengths() {
         let mut int_builder = Int32Builder::with_capacity(10);
@@ -690,7 +697,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(Nanosecond, Some(\\\"UTC\\\")) got Timestamp(Nanosecond, None)"
+        expected = "Incorrect datatype for StructArray field \\\"timestamp\\\", expected Timestamp(ns, \\\"UTC\\\") got Timestamp(ns)"
     )]
     fn test_struct_array_mismatch_builder() {
         let fields = vec![Field::new(
diff --git a/arrow-array/src/builder/union_builder.rs b/arrow-array/src/builder/union_builder.rs
index e6184f4ac6d2..3b8934f2ebf4 100644
--- a/arrow-array/src/builder/union_builder.rs
+++ b/arrow-array/src/builder/union_builder.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::builder::buffer_builder::{Int32BufferBuilder, Int8BufferBuilder};
-use crate::builder::BufferBuilder;
-use crate::{make_array, ArrowPrimitiveType, UnionArray};
+use crate::builder::buffer_builder::{Int8BufferBuilder, Int32BufferBuilder};
+use crate::builder::{ArrayBuilder, BufferBuilder};
+use crate::{ArrayRef, ArrowPrimitiveType, UnionArray, make_array};
 use arrow_buffer::NullBufferBuilder;
-use arrow_buffer::{ArrowNativeType, Buffer};
+use arrow_buffer::{ArrowNativeType, Buffer, ScalarBuffer};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{ArrowError, DataType, Field};
 use std::any::Any;
@@ -42,12 +42,14 @@ struct FieldData {
 }
 
 /// A type-erased [`BufferBuilder`] used by [`FieldData`]
-trait FieldDataValues: std::fmt::Debug {
+trait FieldDataValues: std::fmt::Debug + Send + Sync {
     fn as_mut_any(&mut self) -> &mut dyn Any;
 
     fn append_null(&mut self);
 
     fn finish(&mut self) -> Buffer;
+
+    fn finish_cloned(&self) -> Buffer;
 }
 
 impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
@@ -62,6 +64,10 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
     fn finish(&mut self) -> Buffer {
         self.finish()
     }
+
+    fn finish_cloned(&self) -> Buffer {
+        Buffer::from_slice_ref(self.as_slice())
+    }
 }
 
 impl FieldData {
@@ -138,7 +144,7 @@ impl FieldData {
 /// assert_eq!(union.value_offset(1), 1);
 /// assert_eq!(union.value_offset(2), 2);
 /// ```
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct UnionBuilder {
     /// The current number of slots in the array
     len: usize,
@@ -310,4 +316,172 @@ impl UnionBuilder {
             children,
         )
     }
+
+    /// Builds this builder creating a new `UnionArray` without consuming the builder.
+    ///
+    /// This is used for the `finish_cloned` implementation in `ArrayBuilder`.
+    fn build_cloned(&self) -> Result<UnionArray, ArrowError> {
+        let mut children = Vec::with_capacity(self.fields.len());
+        let union_fields: Vec<_> = self
+            .fields
+            .iter()
+            .map(|(name, field_data)| {
+                let FieldData {
+                    type_id,
+                    data_type,
+                    values_buffer,
+                    slots,
+                    null_buffer_builder,
+                } = field_data;
+
+                let array_ref = make_array(unsafe {
+                    ArrayDataBuilder::new(data_type.clone())
+                        .add_buffer(values_buffer.finish_cloned())
+                        .len(*slots)
+                        .nulls(null_buffer_builder.finish_cloned())
+                        .build_unchecked()
+                });
+                children.push(array_ref);
+                (
+                    *type_id,
+                    Arc::new(Field::new(name.clone(), data_type.clone(), false)),
+                )
+            })
+            .collect();
+        UnionArray::try_new(
+            union_fields.into_iter().collect(),
+            ScalarBuffer::from(self.type_id_builder.as_slice().to_vec()),
+            self.value_offset_builder
+                .as_ref()
+                .map(|builder| ScalarBuffer::from(builder.as_slice().to_vec())),
+            children,
+        )
+    }
+}
+
+impl ArrayBuilder for UnionBuilder {
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Builds the array
+    fn finish(&mut self) -> ArrayRef {
+        // Even simpler - just move the builder using mem::take and replace with default
+        let builder = std::mem::take(self);
+
+        // Since UnionBuilder controls all invariants, this should never fail
+        Arc::new(builder.build().unwrap())
+    }
+
+    /// Builds the array without resetting the underlying builder
+    fn finish_cloned(&self) -> ArrayRef {
+        // We construct the UnionArray carefully to ensure try_new cannot fail.
+        // Since UnionBuilder controls all the invariants, this should never panic.
+        Arc::new(self.build_cloned().unwrap_or_else(|err| {
+            panic!("UnionBuilder::build_cloned failed unexpectedly: {}", err)
+        }))
+    }
+
+    /// Returns the builder as a non-mutable `Any` reference
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::array::Array;
+    use crate::cast::AsArray;
+    use crate::types::{Float64Type, Int32Type};
+
+    #[test]
+    fn test_union_builder_array_builder_trait() {
+        // Test that UnionBuilder implements ArrayBuilder trait
+        let mut builder = UnionBuilder::new_dense();
+
+        // Add some data
+        builder.append::<Int32Type>("a", 1).unwrap();
+        builder.append::<Float64Type>("b", 3.0).unwrap();
+        builder.append::<Int32Type>("a", 4).unwrap();
+
+        assert_eq!(builder.len(), 3);
+
+        // Test finish_cloned (non-destructive)
+        let array1 = builder.finish_cloned();
+        assert_eq!(array1.len(), 3);
+
+        // Verify values in cloned array
+        let union1 = array1.as_any().downcast_ref::<UnionArray>().unwrap();
+        assert_eq!(union1.type_ids(), &[0, 1, 0]);
+        assert_eq!(union1.offsets().unwrap().as_ref(), &[0, 0, 1]);
+        let int_array1 = union1.child(0).as_primitive::<Int32Type>();
+        let float_array1 = union1.child(1).as_primitive::<Float64Type>();
+        assert_eq!(int_array1.value(0), 1);
+        assert_eq!(int_array1.value(1), 4);
+        assert_eq!(float_array1.value(0), 3.0);
+
+        // Builder should still be usable after finish_cloned
+        builder.append::<Float64Type>("b", 5.0).unwrap();
+        assert_eq!(builder.len(), 4);
+
+        // Test finish (destructive)
+        let array2 = builder.finish();
+        assert_eq!(array2.len(), 4);
+
+        // Verify values in final array
+        let union2 = array2.as_any().downcast_ref::<UnionArray>().unwrap();
+        assert_eq!(union2.type_ids(), &[0, 1, 0, 1]);
+        assert_eq!(union2.offsets().unwrap().as_ref(), &[0, 0, 1, 1]);
+        let int_array2 = union2.child(0).as_primitive::<Int32Type>();
+        let float_array2 = union2.child(1).as_primitive::<Float64Type>();
+        assert_eq!(int_array2.value(0), 1);
+        assert_eq!(int_array2.value(1), 4);
+        assert_eq!(float_array2.value(0), 3.0);
+        assert_eq!(float_array2.value(1), 5.0);
+    }
+
+    #[test]
+    fn test_union_builder_type_erased() {
+        // Test type-erased usage with Box<dyn ArrayBuilder>
+        let mut builders: Vec<Box<dyn ArrayBuilder>> = vec![Box::new(UnionBuilder::new_sparse())];
+
+        // Downcast and use
+        let union_builder = builders[0]
+            .as_any_mut()
+            .downcast_mut::<UnionBuilder>()
+            .unwrap();
+        union_builder.append::<Int32Type>("x", 10).unwrap();
+        union_builder.append::<Float64Type>("y", 20.0).unwrap();
+
+        assert_eq!(builders[0].len(), 2);
+
+        let result = builders
+            .into_iter()
+            .map(|mut b| b.finish())
+            .collect::<Vec<_>>();
+        assert_eq!(result[0].len(), 2);
+
+        // Verify sparse union values
+        let union = result[0].as_any().downcast_ref::<UnionArray>().unwrap();
+        assert_eq!(union.type_ids(), &[0, 1]);
+        assert!(union.offsets().is_none()); // Sparse union has no offsets
+        let int_array = union.child(0).as_primitive::<Int32Type>();
+        let float_array = union.child(1).as_primitive::<Float64Type>();
+        assert_eq!(int_array.value(0), 10);
+        assert!(int_array.is_null(1)); // Null in sparse layout
+        assert!(float_array.is_null(0)); // Null in sparse layout
+        assert_eq!(float_array.value(1), 20.0);
+    }
 }
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index c9b92efe6c0e..de590ff87c77 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -365,6 +365,12 @@ macro_rules! downcast_primitive {
             $crate::repeat_pat!($crate::cast::__private::DataType::Float64, $($data_type),+) => {
                 $m!($crate::types::Float64Type $(, $args)*)
             }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal32(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal32Type $(, $args)*)
+            }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal64(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal64Type $(, $args)*)
+            }
             $crate::repeat_pat!($crate::cast::__private::DataType::Decimal128(_, _), $($data_type),+) => {
                 $m!($crate::types::Decimal128Type $(, $args)*)
             }
@@ -1126,6 +1132,18 @@ mod tests {
         assert!(!as_string_array(&array).is_empty())
     }
 
+    #[test]
+    fn test_decimal32array() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal32Type>(&a).is_empty());
+    }
+
+    #[test]
+    fn test_decimal64array() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal64Type>(&a).is_empty());
+    }
+
     #[test]
     fn test_decimal128array() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs
index ac28289e652b..f50dd3420baa 100644
--- a/arrow-array/src/ffi.rs
+++ b/arrow-array/src/ffi.rs
@@ -103,9 +103,9 @@ To export an array, create an `ArrowArray` using [ArrowArray::try_new].
 
 use std::{mem::size_of, ptr::NonNull, sync::Arc};
 
-use arrow_buffer::{bit_util, Buffer, MutableBuffer};
+use arrow_buffer::{Buffer, MutableBuffer, bit_util};
 pub use arrow_data::ffi::FFI_ArrowArray;
-use arrow_data::{layout, ArrayData};
+use arrow_data::{ArrayData, layout};
 pub use arrow_schema::ffi::FFI_ArrowSchema;
 use arrow_schema::{ArrowError, DataType, UnionMode};
 
@@ -134,23 +134,23 @@ pub unsafe fn export_array_into_raw(
     let array = FFI_ArrowArray::new(&data);
     let schema = FFI_ArrowSchema::try_from(data.data_type())?;
 
-    std::ptr::write_unaligned(out_array, array);
-    std::ptr::write_unaligned(out_schema, schema);
+    unsafe { std::ptr::write_unaligned(out_array, array) };
+    unsafe { std::ptr::write_unaligned(out_schema, schema) };
 
     Ok(())
 }
 
-// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
-// This is set by the Arrow specification
+/// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
+/// This is set by the Arrow specification
 fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
     if let Some(primitive) = data_type.primitive_width() {
         return match i {
             0 => Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
+                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
             ))),
             1 => Ok(primitive * 8),
             i => Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
             ))),
         };
     }
@@ -159,75 +159,84 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
         (DataType::Boolean, 1) => 1,
         (DataType::Boolean, _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
         (DataType::FixedSizeList(f, num_elems), 1) => {
             let child_bit_width = bit_width(f.data_type(), 1)?;
             child_bit_width * (*num_elems as usize)
-        },
+        }
         (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
-        },
+                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
+        }
         // Variable-size list and map have one i32 buffer.
         // Variable-sized binaries: have two buffers.
         // "small": first buffer is i32, second is in bytes
-        (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) | (DataType::Map(_, _), 1) => i32::BITS as _,
+        (DataType::Utf8, 1)
+        | (DataType::Binary, 1)
+        | (DataType::List(_), 1)
+        | (DataType::Map(_, _), 1) => i32::BITS as _,
         (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
+        // List views have two i32 buffers, offsets and sizes
+        (DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
+        // Large list views have two i64 buffers, offsets and sizes
+        (DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
         (DataType::List(_), _) | (DataType::Map(_, _), _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         (DataType::Utf8, _) | (DataType::Binary, _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         // Variable-sized binaries: have two buffers.
         // LargeUtf8: first buffer is i64, second is in bytes
-        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => i64::BITS as _,
-        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _,
-        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
+        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => {
+            i64::BITS as _
+        }
+        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => {
+            u8::BITS as _
+        }
+        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         // Variable-sized views: have 3 or more buffers.
         // Buffer 1 are the u128 views
         // Buffers 2...N-1 are u8 byte buffers
-        (DataType::Utf8View, 1) | (DataType::BinaryView,1) => u128::BITS as _,
-        (DataType::Utf8View, _) | (DataType::BinaryView, _) => {
-            u8::BITS as _
-        }
+        (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _,
+        (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _,
         // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0.
         (DataType::Union(_, _), 0) => i8::BITS as _,
         // Only DenseUnion has 2nd buffer
         (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
         (DataType::Union(_, UnionMode::Sparse), _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         (DataType::Union(_, UnionMode::Dense), _) => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
+            )));
         }
         (_, 0) => {
             // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
             // UnionArray, they should be handled above.
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
-            )))
+                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
+            )));
         }
         _ => {
             return Err(ArrowError::CDataInterface(format!(
-                "The datatype \"{data_type:?}\" is still not supported in Rust implementation"
-            )))
+                "The datatype \"{data_type}\" is still not supported in Rust implementation"
+            )));
         }
     })
 }
@@ -249,7 +258,7 @@ unsafe fn create_buffer(
         return None;
     }
     NonNull::new(array.buffer(index) as _)
-        .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner))
+        .map(|ptr| unsafe { Buffer::from_custom_allocation(ptr, len, owner) })
 }
 
 /// Export to the C Data Interface
@@ -346,6 +355,8 @@ impl ImportedArrowArray<'_> {
             DataType::List(field)
             | DataType::FixedSizeList(field, _)
             | DataType::LargeList(field)
+            | DataType::ListView(field)
+            | DataType::LargeListView(field)
             | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
             DataType::Struct(fields) => {
                 assert!(fields.len() == self.array.num_children());
@@ -408,7 +419,17 @@ impl ImportedArrowArray<'_> {
             .map(|index| {
                 let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
                 match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
-                    Some(buf) => Ok(buf),
+                    Some(buf) => {
+                        // External libraries may use a dangling pointer for a buffer with length 0.
+                        // We respect the array length specified in the C Data Interface. Actually,
+                        // if the length is incorrect, we cannot create a correct buffer even if
+                        // the pointer is valid.
+                        if buf.is_empty() {
+                            Ok(MutableBuffer::new(0).into())
+                        } else {
+                            Ok(buf)
+                        }
+                    }
                     None if len == 0 => {
                         // Null data buffer, which Rust doesn't allow. So create
                         // an empty buffer.
@@ -456,6 +477,14 @@ impl ImportedArrowArray<'_> {
                 debug_assert_eq!(bits % 8, 0);
                 (length + 1) * (bits / 8)
             }
+            (DataType::ListView(_), 1)
+            | (DataType::ListView(_), 2)
+            | (DataType::LargeListView(_), 1)
+            | (DataType::LargeListView(_), 2) => {
+                let bits = bit_width(data_type, i)?;
+                debug_assert_eq!(bits % 8, 0);
+                length * (bits / 8)
+            }
             (DataType::Utf8, 2) | (DataType::Binary, 2) => {
                 if self.array.is_empty() {
                     return Ok(0);
@@ -515,7 +544,7 @@ impl ImportedArrowArray<'_> {
         unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
     }
 
-    fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
+    fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
         match (self.array.dictionary(), &self.data_type) {
             (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
                 array,
@@ -538,12 +567,12 @@ mod tests_to_then_from_ffi {
     use std::collections::HashMap;
     use std::mem::ManuallyDrop;
 
-    use arrow_buffer::NullBuffer;
+    use arrow_buffer::{ArrowNativeType, NullBuffer};
     use arrow_schema::Field;
 
     use crate::builder::UnionBuilder;
     use crate::cast::AsArray;
-    use crate::types::{Float64Type, Int32Type, Int8Type};
+    use crate::types::{Float64Type, Int8Type, Int32Type};
     use crate::*;
 
     use super::*;
@@ -768,6 +797,71 @@ mod tests_to_then_from_ffi {
         test_generic_list::<i64>()
     }
 
+    fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        let value_offsets = [0_usize, 3, 6]
+            .iter()
+            .map(|i| Offset::from_usize(*i).unwrap())
+            .collect::<Buffer>();
+
+        let sizes_buffer = [3_usize, 3, 2]
+            .iter()
+            .map(|i| Offset::from_usize(*i).unwrap())
+            .collect::<Buffer>();
+
+        // Construct a list array from the above two
+        let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
+            Field::new_list_field(DataType::Int16, false),
+        ));
+
+        let list_data = ArrayData::builder(list_view_dt)
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+
+        let original = GenericListViewArray::<Offset>::from(list_data.clone());
+
+        // export it
+        let (array, schema) = to_ffi(&original.to_data())?;
+
+        // (simulate consumer) import it
+        let data = unsafe { from_ffi(array, &schema) }?;
+        let array = make_array(data);
+
+        // downcast
+        let array = array
+            .as_any()
+            .downcast_ref::<GenericListViewArray<Offset>>()
+            .unwrap();
+
+        assert_eq!(&array.value(0), &original.value(0));
+        assert_eq!(&array.value(1), &original.value(1));
+        assert_eq!(&array.value(2), &original.value(2));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_list_view() -> Result<()> {
+        test_generic_list_view::<i32>()
+    }
+
+    #[test]
+    fn test_large_list_view() -> Result<()> {
+        test_generic_list_view::<i64>()
+    }
+
     fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
         // create an array natively
         let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
@@ -1296,23 +1390,32 @@ mod tests_to_then_from_ffi {
 
 #[cfg(test)]
 mod tests_from_ffi {
+    #[cfg(not(feature = "force_validate"))]
+    use std::ptr::NonNull;
     use std::sync::Arc;
 
+    use arrow_buffer::NullBuffer;
+    #[cfg(not(feature = "force_validate"))]
+    use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
+    #[cfg(feature = "force_validate")]
     use arrow_buffer::{bit_util, buffer::Buffer};
-    use arrow_data::transform::MutableArrayData;
+
     use arrow_data::ArrayData;
+    use arrow_data::transform::MutableArrayData;
     use arrow_schema::{DataType, Field};
 
     use super::Result;
+
     use crate::builder::GenericByteViewBuilder;
     use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
     use crate::{
+        ArrayRef, GenericByteViewArray, ListArray,
         array::{
             Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
             Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
         },
-        ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
-        make_array, ArrayRef, GenericByteViewArray, ListArray,
+        ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi},
+        make_array,
     };
 
     fn test_round_trip(expected: &ArrayData) -> Result<()> {
@@ -1506,6 +1609,65 @@ mod tests_from_ffi {
         test_round_trip(&data)
     }
 
+    #[test]
+    fn test_list_view() -> Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
+        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
+
+        // Construct a list array from the above two
+        let list_view_dt =
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
+
+        let list_view_data = ArrayData::builder(list_view_dt)
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .build()
+            .unwrap();
+
+        test_round_trip(&list_view_data)
+    }
+
+    #[test]
+    fn test_list_view_with_nulls() -> Result<()> {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
+            .build()
+            .unwrap();
+
+        // Construct a buffer for value offsets, for the nested array:
+        //  [[0, 1, 2], [3, 4, 5], [6, 7], null]
+        let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
+        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
+
+        // Construct a list array from the above two
+        let list_view_dt =
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
+
+        let list_view_data = ArrayData::builder(list_view_dt)
+            .len(4)
+            .add_buffer(value_offsets)
+            .add_buffer(sizes_buffer)
+            .add_child_data(value_data)
+            .nulls(Some(NullBuffer::from(vec![true, true, true, false])))
+            .build()
+            .unwrap();
+
+        test_round_trip(&list_view_data)
+    }
+
     #[test]
     #[cfg(not(feature = "force_validate"))]
     fn test_empty_string_with_non_zero_offset() -> Result<()> {
@@ -1576,7 +1738,7 @@ mod tests_from_ffi {
         let mut strings = vec![];
 
         for i in 0..1000 {
-            strings.push(format!("string: {}", i));
+            strings.push(format!("string: {i}"));
         }
 
         let string_array = StringArray::from(strings);
@@ -1660,6 +1822,25 @@ mod tests_from_ffi {
         }
     }
 
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn test_utf8_view_ffi_from_dangling_pointer() {
+        let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
+        let buffers = empty.data_buffers().to_vec();
+        let nulls = empty.nulls().cloned();
+
+        // Create a dangling pointer to a view buffer with zero length.
+        let alloc = Arc::new(1);
+        let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
+        let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
+
+        let str_view: GenericByteViewArray<StringViewType> =
+            unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
+        let imported = roundtrip_byte_view_array(str_view);
+        assert_eq!(imported.len(), 0);
+        assert_eq!(&imported, &empty);
+    }
+
     #[test]
     fn test_round_trip_byte_view() {
         fn test_case<T>()
diff --git a/arrow-array/src/ffi_stream.rs b/arrow-array/src/ffi_stream.rs
index 3d4e89e80b89..c46943682914 100644
--- a/arrow-array/src/ffi_stream.rs
+++ b/arrow-array/src/ffi_stream.rs
@@ -64,7 +64,7 @@ use std::{
 };
 
 use arrow_data::ffi::FFI_ArrowArray;
-use arrow_schema::{ffi::FFI_ArrowSchema, ArrowError, Schema, SchemaRef};
+use arrow_schema::{ArrowError, Schema, SchemaRef, ffi::FFI_ArrowSchema};
 
 use crate::array::Array;
 use crate::array::StructArray;
@@ -105,13 +105,13 @@ unsafe extern "C" fn release_stream(stream: *mut FFI_ArrowArrayStream) {
     if stream.is_null() {
         return;
     }
-    let stream = &mut *stream;
+    let stream = unsafe { &mut *stream };
 
     stream.get_schema = None;
     stream.get_next = None;
     stream.get_last_error = None;
 
-    let private_data = Box::from_raw(stream.private_data as *mut StreamPrivateData);
+    let private_data = unsafe { Box::from_raw(stream.private_data as *mut StreamPrivateData) };
     drop(private_data);
 
     stream.release = None;
@@ -188,7 +188,7 @@ impl FFI_ArrowArrayStream {
     /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
     /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
     pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Self {
-        std::ptr::replace(raw_stream, Self::empty())
+        unsafe { std::ptr::replace(raw_stream, Self::empty()) }
     }
 
     /// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C Stream Interface.
@@ -330,7 +330,7 @@ impl ArrowArrayStreamReader {
     ///
     /// See [`FFI_ArrowArrayStream::from_raw`]
     pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Result<Self> {
-        Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream))
+        Self::try_new(unsafe { FFI_ArrowArrayStream::from_raw(raw_stream) })
     }
 
     /// Get the last error from `ArrowArrayStreamReader`
@@ -364,7 +364,9 @@ impl Iterator for ArrowArrayStreamReader {
             let result = unsafe {
                 from_ffi_and_data_type(array, DataType::Struct(self.schema().fields().clone()))
             };
-            Some(result.map(|data| RecordBatch::from(StructArray::from(data))))
+            Some(result.and_then(|data| {
+                RecordBatch::try_new(self.schema.clone(), StructArray::from(data).into_parts().1)
+            }))
         } else {
             let last_error = self.get_stream_last_error();
             let err = ArrowError::CDataInterface(last_error.unwrap());
@@ -382,6 +384,7 @@ impl RecordBatchReader for ArrowArrayStreamReader {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     use arrow_schema::Field;
 
@@ -417,11 +420,18 @@ mod tests {
     }
 
     fn _test_round_trip_export(arrays: Vec<Arc<dyn Array>>) -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", arrays[0].data_type().clone(), true),
-            Field::new("b", arrays[1].data_type().clone(), true),
-            Field::new("c", arrays[2].data_type().clone(), true),
-        ]));
+        let metadata = HashMap::from([("foo".to_owned(), "bar".to_owned())]);
+        let schema = Arc::new(Schema::new_with_metadata(
+            vec![
+                Field::new("a", arrays[0].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+                Field::new("b", arrays[1].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+                Field::new("c", arrays[2].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+            ],
+            metadata,
+        ));
         let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap();
         let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _;
 
@@ -452,7 +462,11 @@ mod tests {
 
             let array = unsafe { from_ffi(ffi_array, &ffi_schema) }.unwrap();
 
-            let record_batch = RecordBatch::from(StructArray::from(array));
+            let record_batch = RecordBatch::try_new(
+                SchemaRef::from(exported_schema.clone()),
+                StructArray::from(array).into_parts().1,
+            )
+            .unwrap();
             produced_batches.push(record_batch);
         }
 
@@ -462,11 +476,18 @@ mod tests {
     }
 
     fn _test_round_trip_import(arrays: Vec<Arc<dyn Array>>) -> Result<()> {
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("a", arrays[0].data_type().clone(), true),
-            Field::new("b", arrays[1].data_type().clone(), true),
-            Field::new("c", arrays[2].data_type().clone(), true),
-        ]));
+        let metadata = HashMap::from([("foo".to_owned(), "bar".to_owned())]);
+        let schema = Arc::new(Schema::new_with_metadata(
+            vec![
+                Field::new("a", arrays[0].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+                Field::new("b", arrays[1].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+                Field::new("c", arrays[2].data_type().clone(), true)
+                    .with_metadata(metadata.clone()),
+            ],
+            metadata,
+        ));
         let batch = RecordBatch::try_new(schema.clone(), arrays).unwrap();
         let iter = Box::new(vec![batch.clone(), batch.clone()].into_iter().map(Ok)) as _;
 
diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs
index 6708da3d5dd6..c281231a2e79 100644
--- a/arrow-array/src/iterator.rs
+++ b/arrow-array/src/iterator.rs
@@ -44,7 +44,7 @@ use arrow_buffer::NullBuffer;
 /// [`PrimitiveArray`]: crate::PrimitiveArray
 /// [`compute::unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.unary.html
 /// [`compute::try_unary`]: https://docs.rs/arrow/latest/arrow/compute/fn.try_unary.html
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ArrayIter<T: ArrayAccessor> {
     array: T,
     logical_nulls: Option<NullBuffer>,
@@ -56,7 +56,7 @@ impl<T: ArrayAccessor> ArrayIter<T> {
     /// create a new iterator
     pub fn new(array: T) -> Self {
         let len = array.len();
-        let logical_nulls = array.logical_nulls();
+        let logical_nulls = array.logical_nulls().filter(|x| x.null_count() > 0);
         ArrayIter {
             array,
             logical_nulls,
@@ -98,10 +98,42 @@ impl<T: ArrayAccessor> Iterator for ArrayIter<T> {
 
     fn size_hint(&self) -> (usize, Option<usize>) {
         (
-            self.array.len() - self.current,
-            Some(self.array.len() - self.current),
+            self.current_end - self.current,
+            Some(self.current_end - self.current),
         )
     }
+
+    #[inline]
+    fn nth(&mut self, n: usize) -> Option<Self::Item> {
+        // Check if we can advance to the desired offset
+        match self.current.checked_add(n) {
+            // Yes, and still within bounds
+            Some(new_current) if new_current < self.current_end => {
+                self.current = new_current;
+            }
+
+            // Either overflow or would exceed current_end
+            _ => {
+                self.current = self.current_end;
+                return None;
+            }
+        }
+
+        self.next()
+    }
+
+    #[inline]
+    fn last(mut self) -> Option<Self::Item> {
+        self.next_back()
+    }
+
+    #[inline]
+    fn count(self) -> usize
+    where
+        Self: Sized,
+    {
+        self.len()
+    }
 }
 
 impl<T: ArrayAccessor> DoubleEndedIterator for ArrayIter<T> {
@@ -122,6 +154,25 @@ impl<T: ArrayAccessor> DoubleEndedIterator for ArrayIter<T> {
             })
         }
     }
+
+    #[inline]
+    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
+        // Check if we advance to the one before the desired offset
+        match self.current_end.checked_sub(n) {
+            // Yes, and still within bounds
+            Some(new_offset) if self.current < new_offset => {
+                self.current_end = new_offset;
+            }
+
+            // Either underflow or would exceed current
+            _ => {
+                self.current = self.current_end;
+                return None;
+            }
+        }
+
+        self.next_back()
+    }
 }
 
 /// all arrays have known size.
@@ -147,9 +198,12 @@ pub type MapArrayIter<'a> = ArrayIter<&'a MapArray>;
 pub type GenericListViewArrayIter<'a, O> = ArrayIter<&'a GenericListViewArray<O>>;
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
     use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray};
+    use crate::iterator::ArrayIter;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use std::fmt::Debug;
+    use std::sync::Arc;
 
     #[test]
     fn test_primitive_array_iter_round_trip() {
@@ -264,4 +318,875 @@ mod tests {
         // check if ExactSizeIterator is implemented
         let _ = array.iter().rposition(|opt_b| opt_b == Some(true));
     }
+
+    trait SharedBetweenArrayIterAndSliceIter:
+        ExactSizeIterator<Item = Option<i32>> + DoubleEndedIterator<Item = Option<i32>> + Clone
+    {
+    }
+    impl<T: Clone + ExactSizeIterator<Item = Option<i32>> + DoubleEndedIterator<Item = Option<i32>>>
+        SharedBetweenArrayIterAndSliceIter for T
+    {
+    }
+
+    fn get_int32_iterator_cases() -> impl Iterator<Item = (Int32Array, Vec<Option<i32>>)> {
+        let mut rng = StdRng::seed_from_u64(42);
+
+        let no_nulls_and_no_duplicates = (0..10).map(Some).collect::<Vec<Option<i32>>>();
+        let no_nulls_random_values = (0..10)
+            .map(|_| rng.random::<i32>())
+            .map(Some)
+            .collect::<Vec<Option<i32>>>();
+
+        let all_nulls = (0..10).map(|_| None).collect::<Vec<Option<i32>>>();
+        let only_start_nulls = (0..10)
+            .map(|item| if item < 4 { None } else { Some(item) })
+            .collect::<Vec<Option<i32>>>();
+        let only_end_nulls = (0..10)
+            .map(|item| if item > 8 { None } else { Some(item) })
+            .collect::<Vec<Option<i32>>>();
+        let only_middle_nulls = (0..10)
+            .map(|item| {
+                if (4..=8).contains(&item) && rng.random_bool(0.9) {
+                    None
+                } else {
+                    Some(item)
+                }
+            })
+            .collect::<Vec<Option<i32>>>();
+        let random_values_with_random_nulls = (0..10)
+            .map(|_| {
+                if rng.random_bool(0.3) {
+                    None
+                } else {
+                    Some(rng.random::<i32>())
+                }
+            })
+            .collect::<Vec<Option<i32>>>();
+
+        let no_nulls_and_some_duplicates = (0..10)
+            .map(|item| item % 3)
+            .map(Some)
+            .collect::<Vec<Option<i32>>>();
+        let no_nulls_and_all_same_value =
+            (0..10).map(|_| 9).map(Some).collect::<Vec<Option<i32>>>();
+        let no_nulls_and_continues_duplicates = [0, 0, 0, 1, 1, 2, 2, 2, 2, 3]
+            .map(Some)
+            .into_iter()
+            .collect::<Vec<Option<i32>>>();
+
+        let single_null_and_no_duplicates = (0..10)
+            .map(|item| if item == 4 { None } else { Some(item) })
+            .collect::<Vec<Option<i32>>>();
+        let multiple_nulls_and_no_duplicates = (0..10)
+            .map(|item| if item % 3 == 2 { None } else { Some(item) })
+            .collect::<Vec<Option<i32>>>();
+        let continues_nulls_and_no_duplicates = [
+            Some(0),
+            Some(1),
+            None,
+            None,
+            Some(2),
+            Some(3),
+            None,
+            Some(4),
+            Some(5),
+            None,
+        ]
+        .into_iter()
+        .collect::<Vec<Option<i32>>>();
+
+        [
+            no_nulls_and_no_duplicates,
+            no_nulls_random_values,
+            no_nulls_and_some_duplicates,
+            no_nulls_and_all_same_value,
+            no_nulls_and_continues_duplicates,
+            all_nulls,
+            only_start_nulls,
+            only_end_nulls,
+            only_middle_nulls,
+            random_values_with_random_nulls,
+            single_null_and_no_duplicates,
+            multiple_nulls_and_no_duplicates,
+            continues_nulls_and_no_duplicates,
+        ]
+        .map(|case| (Int32Array::from(case.clone()), case))
+        .into_iter()
+    }
+
+    trait SetupIter {
+        fn description(&self) -> String;
+        fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I);
+    }
+
+    struct NoSetup;
+    impl SetupIter for NoSetup {
+        fn description(&self) -> String {
+            "no setup".to_string()
+        }
+        fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, _iter: &mut I) {
+            // none
+        }
+    }
+
+    fn setup_and_assert_cases_on_single_operation(
+        o: &impl ConsumingArrayIteratorOp,
+        setup_iterator: impl SetupIter,
+    ) {
+        for (array, source) in get_int32_iterator_cases() {
+            let mut actual = ArrayIter::new(&array);
+            let mut expected = source.iter().copied();
+
+            setup_iterator.setup(&mut actual);
+            setup_iterator.setup(&mut expected);
+
+            let current_iterator_values: Vec<Option<i32>> = expected.clone().collect();
+
+            assert_eq!(
+                o.get_value(actual),
+                o.get_value(expected),
+                "Failed on op {} for {} (left actual, right expected) ({current_iterator_values:?})",
+                o.name(),
+                setup_iterator.description(),
+            );
+        }
+    }
+
+    /// Trait representing an operation on a [`ArrayIter`]
+    /// that can be compared against a slice iterator
+    ///
+    /// this is for consuming operations (e.g. `count`, `last`, etc)
+    trait ConsumingArrayIteratorOp {
+        /// What the operation returns (e.g. Option<i32> for last, usize for count, etc)
+        type Output: PartialEq + Debug;
+
+        /// The name of the operation, used for error messages
+        fn name(&self) -> String;
+
+        /// Get the value of the operation for the provided iterator
+        /// This will be either a [`ArrayIter`] or a slice iterator to make sure they produce the same result
+        ///
+        /// Example implementation:
+        /// 1. for `last` it will be the last value
+        /// 2. for `count` it will be the returned length
+        fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) -> Self::Output;
+    }
+
+    /// Trait representing an operation on a [`ArrayIter`]
+    /// that can be compared against a slice iterator.
+    ///
+    /// This is for mutating operations (e.g. `position`, `any`, `find`, etc)
+    trait MutatingArrayIteratorOp {
+        /// What the operation returns (e.g. Option<i32> for last, usize for count, etc)
+        type Output: PartialEq + Debug;
+
+        /// The name of the operation, used for error messages
+        fn name(&self) -> String;
+
+        /// Get the value of the operation for the provided iterator
+        /// This will be either a [`ArrayIter`] or a slice iterator to make sure they produce the same result
+        ///
+        /// Example implementation:
+        /// 1. for `for_each` it will be the iterator element that the function was called with
+        /// 2. for `fold` it will be the accumulator and the iterator element from each call, as well as the final result
+        fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut T) -> Self::Output;
+    }
+
+    /// Helper function that will assert that the provided operation
+    /// produces the same result for both [`ArrayIter`] and slice iterator
+    /// under various consumption patterns (e.g. some calls to next/next_back/consume_all/etc)
+    fn assert_array_iterator_cases<O: ConsumingArrayIteratorOp>(o: O) {
+        setup_and_assert_cases_on_single_operation(&o, NoSetup);
+
+        struct Next;
+        impl SetupIter for Next {
+            fn description(&self) -> String {
+                "new iter after consuming 1 element from the start".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                iter.next();
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, Next);
+
+        struct NextBack;
+        impl SetupIter for NextBack {
+            fn description(&self) -> String {
+                "new iter after consuming 1 element from the end".to_string()
+            }
+
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                iter.next_back();
+            }
+        }
+
+        setup_and_assert_cases_on_single_operation(&o, NextBack);
+
+        struct NextAndBack;
+        impl SetupIter for NextAndBack {
+            fn description(&self) -> String {
+                "new iter after consuming 1 element from start and end".to_string()
+            }
+
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                iter.next();
+                iter.next_back();
+            }
+        }
+
+        setup_and_assert_cases_on_single_operation(&o, NextAndBack);
+
+        struct NextUntilLast;
+        impl SetupIter for NextUntilLast {
+            fn description(&self) -> String {
+                "new iter after consuming all from the start but 1".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                let len = iter.len();
+                if len > 1 {
+                    iter.nth(len - 2);
+                }
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextUntilLast);
+
+        struct NextBackUntilFirst;
+        impl SetupIter for NextBackUntilFirst {
+            fn description(&self) -> String {
+                "new iter after consuming all from the end but 1".to_string()
+            }
+
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                let len = iter.len();
+                if len > 1 {
+                    iter.nth_back(len - 2);
+                }
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextBackUntilFirst);
+
+        struct NextFinish;
+        impl SetupIter for NextFinish {
+            fn description(&self) -> String {
+                "new iter after consuming all from the start".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                iter.nth(iter.len());
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextFinish);
+
+        struct NextBackFinish;
+        impl SetupIter for NextBackFinish {
+            fn description(&self) -> String {
+                "new iter after consuming all from the end".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                iter.nth_back(iter.len());
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextBackFinish);
+
+        struct NextUntilLastNone;
+        impl SetupIter for NextUntilLastNone {
+            fn description(&self) -> String {
+                "new iter that have no nulls left".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                let last_null_position = iter.clone().rposition(|item| item.is_none());
+
+                // move the iterator to the location where there are no nulls anymore
+                if let Some(last_null_position) = last_null_position {
+                    iter.nth(last_null_position);
+                }
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextUntilLastNone);
+
+        struct NextUntilLastSome;
+        impl SetupIter for NextUntilLastSome {
+            fn description(&self) -> String {
+                "iter that only have nulls left".to_string()
+            }
+            fn setup<I: SharedBetweenArrayIterAndSliceIter>(&self, iter: &mut I) {
+                let last_some_position = iter.clone().rposition(|item| item.is_some());
+
+                // move the iterator to the location where there are only nulls
+                if let Some(last_some_position) = last_some_position {
+                    iter.nth(last_some_position);
+                }
+            }
+        }
+        setup_and_assert_cases_on_single_operation(&o, NextUntilLastSome);
+    }
+
+    /// Helper function that will assert that the provided operation
+    /// produces the same result for both [`ArrayIter`] and slice iterator
+    /// under various consumption patterns (e.g. some calls to next/next_back/consume_all/etc)
+    ///
+    /// this is different from [`assert_array_iterator_cases`] as this also check that the state after the call is correct
+    /// to make sure we don't leave the iterator in incorrect state
+    fn assert_array_iterator_cases_mutate<O: MutatingArrayIteratorOp>(o: O) {
+        struct Adapter<O: MutatingArrayIteratorOp> {
+            o: O,
+        }
+
+        #[derive(Debug, PartialEq)]
+        struct AdapterOutput<Value> {
+            value: Value,
+            /// collect on the iterator after running the operation
+            leftover: Vec<Option<i32>>,
+        }
+
+        impl<O: MutatingArrayIteratorOp> ConsumingArrayIteratorOp for Adapter<O> {
+            type Output = AdapterOutput<O::Output>;
+
+            fn name(&self) -> String {
+                self.o.name()
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                mut iter: T,
+            ) -> Self::Output {
+                let value = self.o.get_value(&mut iter);
+
+                // Get the rest of the iterator to make sure we leave the iterator in a valid state
+                let leftover: Vec<_> = iter.collect();
+
+                AdapterOutput { value, leftover }
+            }
+        }
+
+        assert_array_iterator_cases(Adapter { o })
+    }
+
+    #[derive(Debug, PartialEq)]
+    struct CallTrackingAndResult<Result: Debug + PartialEq, CallArgs: Debug + PartialEq> {
+        result: Result,
+        calls: Vec<CallArgs>,
+    }
+    type CallTrackingWithInputType<Result> = CallTrackingAndResult<Result, Option<i32>>;
+    type CallTrackingOnly = CallTrackingWithInputType<()>;
+
+    #[test]
+    fn assert_position() {
+        struct PositionOp {
+            reverse: bool,
+            number_of_false: usize,
+        }
+
+        impl MutatingArrayIteratorOp for PositionOp {
+            type Output = CallTrackingWithInputType<Option<usize>>;
+            fn name(&self) -> String {
+                if self.reverse {
+                    format!("rposition with {} false returned", self.number_of_false)
+                } else {
+                    format!("position with {} false returned", self.number_of_false)
+                }
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                iter: &mut T,
+            ) -> Self::Output {
+                let mut items = vec![];
+
+                let mut count = 0;
+
+                let cb = |item| {
+                    items.push(item);
+
+                    if count < self.number_of_false {
+                        count += 1;
+                        false
+                    } else {
+                        true
+                    }
+                };
+
+                let position_result = if self.reverse {
+                    iter.rposition(cb)
+                } else {
+                    iter.position(cb)
+                };
+
+                CallTrackingAndResult {
+                    result: position_result,
+                    calls: items,
+                }
+            }
+        }
+
+        for reverse in [false, true] {
+            for number_of_false in [0, 1, 2, usize::MAX] {
+                assert_array_iterator_cases_mutate(PositionOp {
+                    reverse,
+                    number_of_false,
+                });
+            }
+        }
+    }
+
+    #[test]
+    fn assert_nth() {
+        for (array, source) in get_int32_iterator_cases() {
+            let actual = ArrayIter::new(&array);
+            let expected = source.iter().copied();
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    #[allow(clippy::iter_nth_zero)]
+                    let actual_val = actual.nth(0);
+                    #[allow(clippy::iter_nth_zero)]
+                    let expected_val = expected.nth(0);
+                    assert_eq!(actual_val, expected_val, "Failed on nth(0)");
+                }
+            }
+
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    let actual_val = actual.nth(1);
+                    let expected_val = expected.nth(1);
+                    assert_eq!(actual_val, expected_val, "Failed on nth(1)");
+                }
+            }
+
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    let actual_val = actual.nth(2);
+                    let expected_val = expected.nth(2);
+                    assert_eq!(actual_val, expected_val, "Failed on nth(2)");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn assert_nth_back() {
+        for (array, source) in get_int32_iterator_cases() {
+            let actual = ArrayIter::new(&array);
+            let expected = source.iter().copied();
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    #[allow(clippy::iter_nth_zero)]
+                    let actual_val = actual.nth_back(0);
+                    #[allow(clippy::iter_nth_zero)]
+                    let expected_val = expected.nth_back(0);
+                    assert_eq!(actual_val, expected_val, "Failed on nth_back(0)");
+                }
+            }
+
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    let actual_val = actual.nth_back(1);
+                    let expected_val = expected.nth_back(1);
+                    assert_eq!(actual_val, expected_val, "Failed on nth_back(1)");
+                }
+            }
+
+            {
+                let mut actual = actual.clone();
+                let mut expected = expected.clone();
+                for _ in 0..expected.len() {
+                    let actual_val = actual.nth_back(2);
+                    let expected_val = expected.nth_back(2);
+                    assert_eq!(actual_val, expected_val, "Failed on nth_back(2)");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn assert_last() {
+        for (array, source) in get_int32_iterator_cases() {
+            let mut actual_forward = ArrayIter::new(&array);
+            let mut expected_forward = source.iter().copied();
+
+            for _ in 0..source.len() + 1 {
+                {
+                    let actual_forward_clone = actual_forward.clone();
+                    let expected_forward_clone = expected_forward.clone();
+
+                    assert_eq!(actual_forward_clone.last(), expected_forward_clone.last());
+                }
+
+                actual_forward.next();
+                expected_forward.next();
+            }
+
+            let mut actual_backward = ArrayIter::new(&array);
+            let mut expected_backward = source.iter().copied();
+            for _ in 0..source.len() + 1 {
+                {
+                    assert_eq!(
+                        actual_backward.clone().last(),
+                        expected_backward.clone().last()
+                    );
+                }
+
+                actual_backward.next_back();
+                expected_backward.next_back();
+            }
+        }
+    }
+
+    #[test]
+    fn assert_for_each() {
+        struct ForEachOp;
+
+        impl ConsumingArrayIteratorOp for ForEachOp {
+            type Output = CallTrackingOnly;
+
+            fn name(&self) -> String {
+                "for_each".to_string()
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) -> Self::Output {
+                let mut items = Vec::with_capacity(iter.len());
+
+                iter.for_each(|item| {
+                    items.push(item);
+                });
+
+                CallTrackingAndResult {
+                    calls: items,
+                    result: (),
+                }
+            }
+        }
+
+        assert_array_iterator_cases(ForEachOp)
+    }
+
+    #[test]
+    fn assert_fold() {
+        struct FoldOp {
+            reverse: bool,
+        }
+
+        #[derive(Debug, PartialEq)]
+        struct CallArgs {
+            acc: Option<i32>,
+            item: Option<i32>,
+        }
+
+        impl ConsumingArrayIteratorOp for FoldOp {
+            type Output = CallTrackingAndResult<Option<i32>, CallArgs>;
+
+            fn name(&self) -> String {
+                if self.reverse {
+                    "rfold".to_string()
+                } else {
+                    "fold".to_string()
+                }
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) -> Self::Output {
+                let mut items = Vec::with_capacity(iter.len());
+
+                let cb = |acc, item| {
+                    items.push(CallArgs { item, acc });
+
+                    item.map(|val| val + 100)
+                };
+
+                let result = if self.reverse {
+                    iter.rfold(Some(1), cb)
+                } else {
+                    #[allow(clippy::manual_try_fold)]
+                    iter.fold(Some(1), cb)
+                };
+
+                CallTrackingAndResult {
+                    calls: items,
+                    result,
+                }
+            }
+        }
+
+        assert_array_iterator_cases(FoldOp { reverse: false });
+        assert_array_iterator_cases(FoldOp { reverse: true });
+    }
+
+    #[test]
+    fn assert_count() {
+        struct CountOp;
+
+        impl ConsumingArrayIteratorOp for CountOp {
+            type Output = usize;
+
+            fn name(&self) -> String {
+                "count".to_string()
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) -> Self::Output {
+                iter.count()
+            }
+        }
+
+        assert_array_iterator_cases(CountOp)
+    }
+
+    #[test]
+    fn assert_any() {
+        struct AnyOp {
+            false_count: usize,
+        }
+
+        impl MutatingArrayIteratorOp for AnyOp {
+            type Output = CallTrackingWithInputType<bool>;
+
+            fn name(&self) -> String {
+                format!("any with {} false returned", self.false_count)
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                iter: &mut T,
+            ) -> Self::Output {
+                let mut items = Vec::with_capacity(iter.len());
+
+                let mut count = 0;
+                let res = iter.any(|item| {
+                    items.push(item);
+
+                    if count < self.false_count {
+                        count += 1;
+                        false
+                    } else {
+                        true
+                    }
+                });
+
+                CallTrackingWithInputType {
+                    calls: items,
+                    result: res,
+                }
+            }
+        }
+
+        for false_count in [0, 1, 2, usize::MAX] {
+            assert_array_iterator_cases_mutate(AnyOp { false_count });
+        }
+    }
+
+    #[test]
+    fn assert_all() {
+        struct AllOp {
+            true_count: usize,
+        }
+
+        impl MutatingArrayIteratorOp for AllOp {
+            type Output = CallTrackingWithInputType<bool>;
+
+            fn name(&self) -> String {
+                format!("all with {} false returned", self.true_count)
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                iter: &mut T,
+            ) -> Self::Output {
+                let mut items = Vec::with_capacity(iter.len());
+
+                let mut count = 0;
+                let res = iter.all(|item| {
+                    items.push(item);
+
+                    if count < self.true_count {
+                        count += 1;
+                        true
+                    } else {
+                        false
+                    }
+                });
+
+                CallTrackingWithInputType {
+                    calls: items,
+                    result: res,
+                }
+            }
+        }
+
+        for true_count in [0, 1, 2, usize::MAX] {
+            assert_array_iterator_cases_mutate(AllOp { true_count });
+        }
+    }
+
+    #[test]
+    fn assert_find() {
+        struct FindOp {
+            reverse: bool,
+            false_count: usize,
+        }
+
+        impl MutatingArrayIteratorOp for FindOp {
+            type Output = CallTrackingWithInputType<Option<Option<i32>>>;
+
+            fn name(&self) -> String {
+                if self.reverse {
+                    format!("rfind with {} false returned", self.false_count)
+                } else {
+                    format!("find with {} false returned", self.false_count)
+                }
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                iter: &mut T,
+            ) -> Self::Output {
+                let mut items = vec![];
+
+                let mut count = 0;
+
+                let cb = |item: &Option<i32>| {
+                    items.push(*item);
+
+                    if count < self.false_count {
+                        count += 1;
+                        false
+                    } else {
+                        true
+                    }
+                };
+
+                let position_result = if self.reverse {
+                    iter.rfind(cb)
+                } else {
+                    iter.find(cb)
+                };
+
+                CallTrackingWithInputType {
+                    calls: items,
+                    result: position_result,
+                }
+            }
+        }
+
+        for reverse in [false, true] {
+            for false_count in [0, 1, 2, usize::MAX] {
+                assert_array_iterator_cases_mutate(FindOp {
+                    reverse,
+                    false_count,
+                });
+            }
+        }
+    }
+
+    #[test]
+    fn assert_find_map() {
+        struct FindMapOp {
+            number_of_nones: usize,
+        }
+
+        impl MutatingArrayIteratorOp for FindMapOp {
+            type Output = CallTrackingWithInputType<Option<&'static str>>;
+
+            fn name(&self) -> String {
+                format!("find_map with {} None returned", self.number_of_nones)
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(
+                &self,
+                iter: &mut T,
+            ) -> Self::Output {
+                let mut items = vec![];
+
+                let mut count = 0;
+
+                let result = iter.find_map(|item| {
+                    items.push(item);
+
+                    if count < self.number_of_nones {
+                        count += 1;
+                        None
+                    } else {
+                        Some("found it")
+                    }
+                });
+
+                CallTrackingAndResult {
+                    result,
+                    calls: items,
+                }
+            }
+        }
+
+        for number_of_nones in [0, 1, 2, usize::MAX] {
+            assert_array_iterator_cases_mutate(FindMapOp { number_of_nones });
+        }
+    }
+
+    #[test]
+    fn assert_partition() {
+        struct PartitionOp<F: Fn(usize, &Option<i32>) -> bool> {
+            description: &'static str,
+            predicate: F,
+        }
+
+        #[derive(Debug, PartialEq)]
+        struct PartitionResult {
+            left: Vec<Option<i32>>,
+            right: Vec<Option<i32>>,
+        }
+
+        impl<F: Fn(usize, &Option<i32>) -> bool> ConsumingArrayIteratorOp for PartitionOp<F> {
+            type Output = CallTrackingWithInputType<PartitionResult>;
+
+            fn name(&self) -> String {
+                format!("partition by {}", self.description)
+            }
+
+            fn get_value<T: SharedBetweenArrayIterAndSliceIter>(&self, iter: T) -> Self::Output {
+                let mut items = vec![];
+
+                let mut index = 0;
+
+                let (left, right) = iter.partition(|item| {
+                    items.push(*item);
+
+                    let res = (self.predicate)(index, item);
+
+                    index += 1;
+                    res
+                });
+
+                CallTrackingAndResult {
+                    result: PartitionResult { left, right },
+                    calls: items,
+                }
+            }
+        }
+
+        assert_array_iterator_cases(PartitionOp {
+            description: "None on one side and Some(*) on the other",
+            predicate: |_, item| item.is_none(),
+        });
+
+        assert_array_iterator_cases(PartitionOp {
+            description: "all true",
+            predicate: |_, _| true,
+        });
+
+        assert_array_iterator_cases(PartitionOp {
+            description: "all false",
+            predicate: |_, _| false,
+        });
+
+        let random_values = (0..100).map(|_| rand::random_bool(0.5)).collect::<Vec<_>>();
+        assert_array_iterator_cases(PartitionOp {
+            description: "random",
+            predicate: |index, _| random_values[index % random_values.len()],
+        });
+    }
 }
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index 91696540d219..86c1c6550cdb 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -225,7 +225,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![deny(rustdoc::broken_intra_doc_links)]
 #![warn(missing_docs)]
 
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 73464358657c..cfec969165a9 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -19,7 +19,7 @@
 //! [schema](arrow_schema::Schema).
 
 use crate::cast::AsArray;
-use crate::{new_empty_array, Array, ArrayRef, StructArray};
+use crate::{Array, ArrayRef, StructArray, new_empty_array};
 use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema, SchemaBuilder, SchemaRef};
 use std::ops::Index;
 use std::sync::Arc;
@@ -65,7 +65,7 @@ pub trait RecordBatchWriter {
 /// Support for limited data types is available. The macro will return a compile error if an unsupported data type is used.
 /// Presently supported data types are:
 /// - `Boolean`, `Null`
-/// - `Decimal128`, `Decimal256`
+/// - `Decimal32`, `Decimal64`, `Decimal128`, `Decimal256`
 /// - `Float16`, `Float32`, `Float64`
 /// - `Int8`, `Int16`, `Int32`, `Int64`
 /// - `UInt8`, `UInt16`, `UInt32`, `UInt64`
@@ -107,6 +107,8 @@ macro_rules! create_array {
     (@from DurationMillisecond) => { $crate::DurationMillisecondArray };
     (@from DurationMicrosecond) => { $crate::DurationMicrosecondArray };
     (@from DurationNanosecond) => { $crate::DurationNanosecondArray };
+    (@from Decimal32) => { $crate::Decimal32Array };
+    (@from Decimal64) => { $crate::Decimal64Array };
     (@from Decimal128) => { $crate::Decimal128Array };
     (@from Decimal256) => { $crate::Decimal256Array };
     (@from TimestampSecond) => { $crate::TimestampSecondArray };
@@ -358,7 +360,8 @@ impl RecordBatch {
 
         if let Some((i, (col_type, field_type))) = not_match {
             return Err(ArrowError::InvalidArgumentError(format!(
-                "column types must match schema types, expected {field_type:?} but found {col_type:?} at column index {i}")));
+                "column types must match schema types, expected {field_type} but found {col_type} at column index {i}"
+            )));
         }
 
         Ok(RecordBatch {
@@ -420,7 +423,7 @@ impl RecordBatch {
     /// // Insert a key-value pair into the metadata
     /// batch.schema_metadata_mut().insert("key".into(), "value".into());
     /// assert_eq!(batch.schema().metadata().get("key"), Some(&String::from("value")));
-    /// ```    
+    /// ```
     pub fn schema_metadata_mut(&mut self) -> &mut std::collections::HashMap<String, String> {
         let schema = Arc::make_mut(&mut self.schema);
         &mut schema.metadata
@@ -442,14 +445,16 @@ impl RecordBatch {
             })
             .collect::<Result<Vec<_>, _>>()?;
 
-        RecordBatch::try_new_with_options(
-            SchemaRef::new(projected_schema),
-            batch_fields,
-            &RecordBatchOptions {
-                match_field_names: true,
-                row_count: Some(self.row_count),
-            },
-        )
+        unsafe {
+            // Since we're starting from a valid RecordBatch and project
+            // creates a strict subset of the original, there's no need to
+            // redo the validation checks in `try_new_with_options`.
+            Ok(RecordBatch::new_unchecked(
+                SchemaRef::new(projected_schema),
+                batch_fields,
+                self.row_count,
+            ))
+        }
     }
 
     /// Normalize a semi-structured [`RecordBatch`] into a flat table.
@@ -930,7 +935,7 @@ where
 mod tests {
     use super::*;
     use crate::{
-        BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray, StringViewArray,
+        BooleanArray, Int8Array, Int32Array, Int64Array, ListArray, StringArray, StringViewArray,
     };
     use arrow_buffer::{Buffer, ToByteSlice};
     use arrow_data::{ArrayData, ArrayDataBuilder};
@@ -1098,7 +1103,10 @@ mod tests {
         let a = Int64Array::from(vec![1, 2, 3, 4, 5]);
 
         let err = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap_err();
-        assert_eq!(err.to_string(), "Invalid argument error: column types must match schema types, expected Int32 but found Int64 at column index 0");
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: column types must match schema types, expected Int32 but found Int64 at column index 0"
+        );
     }
 
     #[test]
@@ -1572,9 +1580,10 @@ mod tests {
         let schema = Arc::new(Schema::empty());
 
         let err = RecordBatch::try_new(schema.clone(), vec![]).unwrap_err();
-        assert!(err
-            .to_string()
-            .contains("must either specify a row count or at least one column"));
+        assert!(
+            err.to_string()
+                .contains("must either specify a row count or at least one column")
+        );
 
         let options = RecordBatchOptions::new().with_row_count(Some(10));
 
@@ -1598,7 +1607,10 @@ mod tests {
             schema,
             vec![Arc::new(Int32Array::from(vec![Some(1), None]))],
         );
-        assert_eq!("Invalid argument error: Column 'a' is declared as non-nullable but contains null values", format!("{}", maybe_batch.err().unwrap()));
+        assert_eq!(
+            "Invalid argument error: Column 'a' is declared as non-nullable but contains null values",
+            format!("{}", maybe_batch.err().unwrap())
+        );
     }
     #[test]
     fn test_record_batch_options() {
diff --git a/arrow-array/src/run_iterator.rs b/arrow-array/src/run_iterator.rs
index 4fb0eef32eca..f7277a93ff62 100644
--- a/arrow-array/src/run_iterator.rs
+++ b/arrow-array/src/run_iterator.rs
@@ -17,7 +17,7 @@
 
 //! Idiomatic iterator for [`RunArray`](crate::RunArray)
 
-use crate::{array::ArrayAccessor, types::RunEndIndexType, Array, TypedRunArray};
+use crate::{Array, TypedRunArray, array::ArrayAccessor, types::RunEndIndexType};
 use arrow_buffer::ArrowNativeType;
 
 /// The [`RunArrayIter`] provides an idiomatic way to iterate over the run array.
@@ -172,13 +172,13 @@ where
 
 #[cfg(test)]
 mod tests {
-    use rand::{rng, seq::SliceRandom, Rng};
+    use rand::{Rng, rng, seq::SliceRandom};
 
     use crate::{
+        Array, Int64RunArray, PrimitiveArray, RunArray,
         array::{Int32Array, StringArray},
         builder::PrimitiveRunBuilder,
         types::{Int16Type, Int32Type},
-        Array, Int64RunArray, PrimitiveArray, RunArray,
     };
 
     fn build_input_array(size: usize) -> Vec<Option<i32>> {
diff --git a/arrow-array/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs
index 7a4c67602932..a5ec50da1fc6 100644
--- a/arrow-array/src/temporal_conversions.rs
+++ b/arrow-array/src/temporal_conversions.rs
@@ -17,8 +17,8 @@
 
 //! Conversion methods for dates and times.
 
-use crate::timezone::Tz;
 use crate::ArrowPrimitiveType;
+use crate::timezone::Tz;
 use arrow_schema::{DataType, TimeUnit};
 use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc};
 
@@ -322,9 +322,9 @@ pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
 #[cfg(test)]
 mod tests {
     use crate::temporal_conversions::{
-        date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
-        timestamp_s_to_date, timestamp_s_to_datetime, timestamp_s_to_time,
-        timestamp_us_to_datetime, NANOSECONDS,
+        NANOSECONDS, date64_to_datetime, split_second, timestamp_ms_to_datetime,
+        timestamp_ns_to_datetime, timestamp_s_to_date, timestamp_s_to_datetime,
+        timestamp_s_to_time, timestamp_us_to_datetime,
     };
     use chrono::DateTime;
 
diff --git a/arrow-array/src/timezone.rs b/arrow-array/src/timezone.rs
index b4df77deb4f5..bcf582152146 100644
--- a/arrow-array/src/timezone.rs
+++ b/arrow-array/src/timezone.rs
@@ -53,6 +53,7 @@ mod private {
     use super::*;
     use chrono::offset::TimeZone;
     use chrono::{LocalResult, NaiveDate, NaiveDateTime, Offset};
+    use std::fmt::Display;
     use std::str::FromStr;
 
     /// An [`Offset`] for [`Tz`]
@@ -97,6 +98,15 @@ mod private {
         }
     }
 
+    impl Display for Tz {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            match self.0 {
+                TzInner::Timezone(tz) => tz.fmt(f),
+                TzInner::Offset(offset) => offset.fmt(f),
+            }
+        }
+    }
+
     macro_rules! tz {
         ($s:ident, $tz:ident, $b:block) => {
             match $s.0 {
@@ -228,6 +238,15 @@ mod private {
                 sydney_offset_with_dst
             );
         }
+
+        #[test]
+        fn test_timezone_display() {
+            let test_cases = ["UTC", "America/Los_Angeles", "-08:00", "+05:30"];
+            for &case in &test_cases {
+                let tz: Tz = case.parse().unwrap();
+                assert_eq!(tz.to_string(), case);
+            }
+        }
     }
 }
 
diff --git a/arrow-array/src/trusted_len.rs b/arrow-array/src/trusted_len.rs
index 781cad38f7e9..b2e1948ccc76 100644
--- a/arrow-array/src/trusted_len.rs
+++ b/arrow-array/src/trusted_len.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::{bit_util, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util};
 
 /// Creates two [`Buffer`]s from an iterator of `Option`.
 /// The first buffer corresponds to a bitmap buffer, the second one
@@ -41,19 +41,19 @@ where
     for (i, item) in iterator.enumerate() {
         let item = item.borrow();
         if let Some(item) = item {
-            std::ptr::write(dst, *item);
-            bit_util::set_bit_raw(dst_null, i);
+            unsafe { std::ptr::write(dst, *item) };
+            unsafe { bit_util::set_bit_raw(dst_null, i) };
         } else {
-            std::ptr::write(dst, T::default());
+            unsafe { std::ptr::write(dst, T::default()) };
         }
-        dst = dst.add(1);
+        dst = unsafe { dst.add(1) };
     }
     assert_eq!(
-        dst.offset_from(buffer.as_ptr() as *mut T) as usize,
+        unsafe { dst.offset_from(buffer.as_ptr() as *mut T) as usize },
         upper,
         "Trusted iterator length was not accurately reported"
     );
-    buffer.set_len(len);
+    unsafe { buffer.set_len(len) };
     (null.into(), buffer.into())
 }
 
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 3d8cfcdb112b..fcd2d6958f35 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -23,15 +23,18 @@ use crate::delta::{
 use crate::temporal_conversions::as_datetime_with_timezone;
 use crate::timezone::Tz;
 use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
-use arrow_buffer::{i256, Buffer, OffsetBuffer};
+use arrow_buffer::{Buffer, OffsetBuffer, i256};
 use arrow_data::decimal::{
-    is_validate_decimal256_precision, is_validate_decimal_precision, validate_decimal256_precision,
-    validate_decimal_precision,
+    format_decimal_str, is_validate_decimal_precision, is_validate_decimal32_precision,
+    is_validate_decimal64_precision, is_validate_decimal256_precision, validate_decimal_precision,
+    validate_decimal32_precision, validate_decimal64_precision, validate_decimal256_precision,
 };
 use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
-    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
+    ArrowError, DECIMAL_DEFAULT_SCALE, DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION,
+    DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
+    DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
+    DataType, IntervalUnit, TimeUnit,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
@@ -68,12 +71,6 @@ pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
     /// the corresponding Arrow data type of this primitive type.
     const DATA_TYPE: DataType;
 
-    /// Returns the byte width of this primitive type.
-    #[deprecated(since = "52.0.0", note = "Use ArrowNativeType::get_byte_width")]
-    fn get_byte_width() -> usize {
-        std::mem::size_of::<Self::Native>()
-    }
-
     /// Returns a default value of this primitive type.
     ///
     /// This is useful for aggregate array ops like `sum()`, `mean()`.
@@ -1031,9 +1028,25 @@ impl Date64Type {
     /// # Arguments
     ///
     /// * `i` - The Date64Type to convert
+    #[deprecated(since = "56.0.0", note = "Use to_naive_date_opt instead.")]
     pub fn to_naive_date(i: <Date64Type as ArrowPrimitiveType>::Native) -> NaiveDate {
+        Self::to_naive_date_opt(i)
+            .unwrap_or_else(|| panic!("Date64Type::to_naive_date overflowed for date: {i}",))
+    }
+
+    /// Converts an arrow Date64Type into a chrono::NaiveDateTime if it fits in the range that chrono::NaiveDateTime can represent.
+    /// Returns `None` if the calculation would overflow or underflow.
+    ///
+    /// This function is able to handle dates ranging between 1677-09-21 (-9,223,372,800,000) and 2262-04-11 (9,223,286,400,000).
+    ///
+    /// # Arguments
+    ///
+    /// * `i` - The Date64Type to convert
+    ///
+    /// Returns `Some(NaiveDateTime)` if it fits, `None` otherwise.
+    pub fn to_naive_date_opt(i: <Date64Type as ArrowPrimitiveType>::Native) -> Option<NaiveDate> {
         let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
-        epoch.add(Duration::try_milliseconds(i).unwrap())
+        Duration::try_milliseconds(i).and_then(|d| epoch.checked_add_signed(d))
     }
 
     /// Converts a chrono::NaiveDate into an arrow Date64Type
@@ -1052,14 +1065,35 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to add
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `add_year_months_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn add_year_months(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
-        let prior = Date64Type::to_naive_date(date);
+        Self::add_year_months_opt(date, delta).unwrap_or_else(|| {
+            panic!("Date64Type::add_year_months overflowed for date: {date}, delta: {delta}",)
+        })
+    }
+
+    /// Adds the given IntervalYearMonthType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to add
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn add_year_months_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
+        let prior = Date64Type::to_naive_date_opt(date)?;
         let months = IntervalYearMonthType::to_months(delta);
         let posterior = shift_months(prior, months);
-        Date64Type::from_naive_date(posterior)
+        Some(Date64Type::from_naive_date(posterior))
     }
 
     /// Adds the given IntervalDayTimeType to an arrow Date64Type
@@ -1068,15 +1102,36 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to add
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `add_day_time_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn add_day_time(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
+        Self::add_day_time_opt(date, delta).unwrap_or_else(|| {
+            panic!("Date64Type::add_day_time overflowed for date: {date}, delta: {delta:?}",)
+        })
+    }
+
+    /// Adds the given IntervalDayTimeType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to add
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn add_day_time_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
         let (days, ms) = IntervalDayTimeType::to_parts(delta);
-        let res = Date64Type::to_naive_date(date);
-        let res = res.add(Duration::try_days(days as i64).unwrap());
-        let res = res.add(Duration::try_milliseconds(ms as i64).unwrap());
-        Date64Type::from_naive_date(res)
+        let res = Date64Type::to_naive_date_opt(date)?;
+        let res = res.checked_add_signed(Duration::try_days(days as i64)?)?;
+        let res = res.checked_add_signed(Duration::try_milliseconds(ms as i64)?)?;
+        Some(Date64Type::from_naive_date(res))
     }
 
     /// Adds the given IntervalMonthDayNanoType to an arrow Date64Type
@@ -1085,16 +1140,37 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to add
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `add_month_day_nano_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn add_month_day_nano(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
+        Self::add_month_day_nano_opt(date, delta).unwrap_or_else(|| {
+            panic!("Date64Type::add_month_day_nano overflowed for date: {date}, delta: {delta:?}",)
+        })
+    }
+
+    /// Adds the given IntervalMonthDayNanoType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to add
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn add_month_day_nano_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
         let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
-        let res = Date64Type::to_naive_date(date);
+        let res = Date64Type::to_naive_date_opt(date)?;
         let res = shift_months(res, months);
-        let res = res.add(Duration::try_days(days as i64).unwrap());
-        let res = res.add(Duration::nanoseconds(nanos));
-        Date64Type::from_naive_date(res)
+        let res = res.checked_add_signed(Duration::try_days(days as i64)?)?;
+        let res = res.checked_add_signed(Duration::nanoseconds(nanos))?;
+        Some(Date64Type::from_naive_date(res))
     }
 
     /// Subtract the given IntervalYearMonthType to an arrow Date64Type
@@ -1103,14 +1179,35 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to subtract
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `subtract_year_months_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn subtract_year_months(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
-        let prior = Date64Type::to_naive_date(date);
+        Self::subtract_year_months_opt(date, delta).unwrap_or_else(|| {
+            panic!("Date64Type::subtract_year_months overflowed for date: {date}, delta: {delta}",)
+        })
+    }
+
+    /// Subtract the given IntervalYearMonthType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to subtract
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn subtract_year_months_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalYearMonthType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
+        let prior = Date64Type::to_naive_date_opt(date)?;
         let months = IntervalYearMonthType::to_months(-delta);
         let posterior = shift_months(prior, months);
-        Date64Type::from_naive_date(posterior)
+        Some(Date64Type::from_naive_date(posterior))
     }
 
     /// Subtract the given IntervalDayTimeType to an arrow Date64Type
@@ -1119,15 +1216,36 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to subtract
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `subtract_day_time_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn subtract_day_time(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
+        Self::subtract_day_time_opt(date, delta).unwrap_or_else(|| {
+            panic!("Date64Type::subtract_day_time overflowed for date: {date}, delta: {delta:?}",)
+        })
+    }
+
+    /// Subtract the given IntervalDayTimeType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to subtract
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn subtract_day_time_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
         let (days, ms) = IntervalDayTimeType::to_parts(delta);
-        let res = Date64Type::to_naive_date(date);
-        let res = res.sub(Duration::try_days(days as i64).unwrap());
-        let res = res.sub(Duration::try_milliseconds(ms as i64).unwrap());
-        Date64Type::from_naive_date(res)
+        let res = Date64Type::to_naive_date_opt(date)?;
+        let res = res.checked_sub_signed(Duration::try_days(days as i64)?)?;
+        let res = res.checked_sub_signed(Duration::try_milliseconds(ms as i64)?)?;
+        Some(Date64Type::from_naive_date(res))
     }
 
     /// Subtract the given IntervalMonthDayNanoType to an arrow Date64Type
@@ -1136,16 +1254,39 @@ impl Date64Type {
     ///
     /// * `date` - The date on which to perform the operation
     /// * `delta` - The interval to subtract
+    #[deprecated(
+        since = "56.0.0",
+        note = "Use `subtract_month_day_nano_opt` instead, which returns an Option to handle overflow."
+    )]
     pub fn subtract_month_day_nano(
         date: <Date64Type as ArrowPrimitiveType>::Native,
         delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
     ) -> <Date64Type as ArrowPrimitiveType>::Native {
+        Self::subtract_month_day_nano_opt(date, delta).unwrap_or_else(|| {
+            panic!(
+                "Date64Type::subtract_month_day_nano overflowed for date: {date}, delta: {delta:?}",
+            )
+        })
+    }
+
+    /// Subtract the given IntervalMonthDayNanoType to an arrow Date64Type
+    ///
+    /// # Arguments
+    ///
+    /// * `date` - The date on which to perform the operation
+    /// * `delta` - The interval to subtract
+    ///
+    /// Returns `Some(Date64Type)` if it fits, `None` otherwise.
+    pub fn subtract_month_day_nano_opt(
+        date: <Date64Type as ArrowPrimitiveType>::Native,
+        delta: <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native,
+    ) -> Option<<Date64Type as ArrowPrimitiveType>::Native> {
         let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(delta);
-        let res = Date64Type::to_naive_date(date);
+        let res = Date64Type::to_naive_date_opt(date)?;
         let res = shift_months(res, -months);
-        let res = res.sub(Duration::try_days(days as i64).unwrap());
-        let res = res.sub(Duration::nanoseconds(nanos));
-        Date64Type::from_naive_date(res)
+        let res = res.checked_sub_signed(Duration::try_days(days as i64)?)?;
+        let res = res.checked_sub_signed(Duration::nanoseconds(nanos))?;
+        Some(Date64Type::from_naive_date(res))
     }
 }
 
@@ -1156,6 +1297,8 @@ mod decimal {
     use super::*;
 
     pub trait DecimalTypeSealed {}
+    impl DecimalTypeSealed for Decimal32Type {}
+    impl DecimalTypeSealed for Decimal64Type {}
     impl DecimalTypeSealed for Decimal128Type {}
     impl DecimalTypeSealed for Decimal256Type {}
 }
@@ -1163,10 +1306,12 @@ mod decimal {
 /// A trait over the decimal types, used by [`PrimitiveArray`] to provide a generic
 /// implementation across the various decimal types
 ///
-/// Implemented by [`Decimal128Type`] and [`Decimal256Type`] for [`Decimal128Array`]
-/// and [`Decimal256Array`] respectively
+/// Implemented by [`Decimal32Type`], [`Decimal64Type`], [`Decimal128Type`] and [`Decimal256Type`]
+/// for [`Decimal32Array`], [`Decimal64Array`], [`Decimal128Array`] and [`Decimal256Array`] respectively
 ///
 /// [`PrimitiveArray`]: crate::array::PrimitiveArray
+/// [`Decimal32Array`]: crate::array::Decimal32Array
+/// [`Decimal64Array`]: crate::array::Decimal64Array
 /// [`Decimal128Array`]: crate::array::Decimal128Array
 /// [`Decimal256Array`]: crate::array::Decimal256Array
 pub trait DecimalType:
@@ -1178,19 +1323,25 @@ pub trait DecimalType:
     const MAX_PRECISION: u8;
     /// Maximum no of digits after the decimal point (note the scale can be negative)
     const MAX_SCALE: i8;
+    /// The maximum value for each precision in `0..=MAX_PRECISION`: [0, 9, 99, ...]
+    const MAX_FOR_EACH_PRECISION: &'static [Self::Native];
     /// fn to create its [`DataType`]
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType;
     /// Default values for [`DataType`]
     const DEFAULT_TYPE: DataType;
 
-    /// "Decimal128" or "Decimal256", for use in error messages
+    /// "Decimal32", "Decimal64", "Decimal128" or "Decimal256", for use in error messages
     const PREFIX: &'static str;
 
     /// Formats the decimal value with the provided precision and scale
     fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String;
 
     /// Validates that `value` contains no more than `precision` decimal digits
-    fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
+    fn validate_decimal_precision(
+        value: Self::Native,
+        precision: u8,
+        scale: i8,
+    ) -> Result<(), ArrowError>;
 
     /// Determines whether `value` contains no more than `precision` decimal digits
     fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool;
@@ -1236,6 +1387,78 @@ pub fn validate_decimal_precision_and_scale<T: DecimalType>(
     Ok(())
 }
 
+/// The decimal type for a Decimal32Array
+#[derive(Debug)]
+pub struct Decimal32Type {}
+
+impl DecimalType for Decimal32Type {
+    const BYTE_LENGTH: usize = 4;
+    const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &'static [i32] =
+        &arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal32";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i32, precision: u8, scale: i8) -> Result<(), ArrowError> {
+        validate_decimal32_precision(num, precision, scale)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal32_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal32Type {
+    type Native = i32;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal32Type {}
+
+/// The decimal type for a Decimal64Array
+#[derive(Debug)]
+pub struct Decimal64Type {}
+
+impl DecimalType for Decimal64Type {
+    const BYTE_LENGTH: usize = 8;
+    const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &'static [i64] =
+        &arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal64";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i64, precision: u8, scale: i8) -> Result<(), ArrowError> {
+        validate_decimal64_precision(num, precision, scale)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal64_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal64Type {
+    type Native = i64;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal64Type {}
+
 /// The decimal type for a Decimal128Array
 #[derive(Debug)]
 pub struct Decimal128Type {}
@@ -1244,6 +1467,8 @@ impl DecimalType for Decimal128Type {
     const BYTE_LENGTH: usize = 16;
     const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL128_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &'static [i128] =
+        &arrow_data::decimal::MAX_DECIMAL128_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal128;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1253,8 +1478,8 @@ impl DecimalType for Decimal128Type {
         format_decimal_str(&value.to_string(), precision as usize, scale)
     }
 
-    fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> {
-        validate_decimal_precision(num, precision)
+    fn validate_decimal_precision(num: i128, precision: u8, scale: i8) -> Result<(), ArrowError> {
+        validate_decimal_precision(num, precision, scale)
     }
 
     fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
@@ -1278,6 +1503,8 @@ impl DecimalType for Decimal256Type {
     const BYTE_LENGTH: usize = 32;
     const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION;
     const MAX_SCALE: i8 = DECIMAL256_MAX_SCALE;
+    const MAX_FOR_EACH_PRECISION: &'static [i256] =
+        &arrow_data::decimal::MAX_DECIMAL256_FOR_EACH_PRECISION;
     const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal256;
     const DEFAULT_TYPE: DataType =
         DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE);
@@ -1287,8 +1514,8 @@ impl DecimalType for Decimal256Type {
         format_decimal_str(&value.to_string(), precision as usize, scale)
     }
 
-    fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> {
-        validate_decimal256_precision(num, precision)
+    fn validate_decimal_precision(num: i256, precision: u8, scale: i8) -> Result<(), ArrowError> {
+        validate_decimal256_precision(num, precision, scale)
     }
 
     fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
@@ -1304,29 +1531,6 @@ impl ArrowPrimitiveType for Decimal256Type {
 
 impl primitive::PrimitiveTypeSealed for Decimal256Type {}
 
-fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String {
-    let (sign, rest) = match value_str.strip_prefix('-') {
-        Some(stripped) => ("-", stripped),
-        None => ("", value_str),
-    };
-    let bound = precision.min(rest.len()) + sign.len();
-    let value_str = &value_str[0..bound];
-
-    if scale == 0 {
-        value_str.to_string()
-    } else if scale < 0 {
-        let padding = value_str.len() + scale.unsigned_abs() as usize;
-        format!("{value_str:0<padding$}")
-    } else if rest.len() > scale as usize {
-        // Decimal separator is in the middle of the string
-        let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize);
-        format!("{whole}.{decimal}")
-    } else {
-        // String has to be padded
-        format!("{}0.{:0>width$}", sign, rest, width = scale as usize)
-    }
-}
-
 /// Crate private types for Byte Arrays
 ///
 /// Not intended to be used outside this crate
@@ -1366,7 +1570,7 @@ pub(crate) mod bytes {
 
         #[inline]
         unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
-            std::str::from_utf8_unchecked(b)
+            unsafe { std::str::from_utf8_unchecked(b) }
         }
     }
 }
@@ -1541,7 +1745,7 @@ impl ByteViewType for BinaryViewType {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_data::{layout, BufferSpec};
+    use arrow_data::{BufferSpec, layout};
 
     #[test]
     fn month_day_nano_should_roundtrip() {
@@ -1607,6 +1811,8 @@ mod tests {
         test_layout::<Float16Type>();
         test_layout::<Float32Type>();
         test_layout::<Float64Type>();
+        test_layout::<Decimal32Type>();
+        test_layout::<Decimal64Type>();
         test_layout::<Decimal128Type>();
         test_layout::<Decimal256Type>();
         test_layout::<TimestampNanosecondType>();
diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml
index 24297f4a7e5f..48cea8467eb7 100644
--- a/arrow-avro/Cargo.toml
+++ b/arrow-avro/Cargo.toml
@@ -36,27 +36,62 @@ bench = false
 all-features = true
 
 [features]
-default = ["deflate", "snappy", "zstd"]
+default = ["deflate", "snappy", "zstd", "bzip2", "xz"]
 deflate = ["flate2"]
 snappy = ["snap", "crc"]
+canonical_extension_types = ["arrow-schema/canonical_extension_types"]
+md5 = ["dep:md5"]
+sha256 = ["dep:sha2"]
+small_decimals = []
+avro_custom_types = ["dep:arrow-select"]
 
 [dependencies]
 arrow-schema = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-array = { workspace = true }
+arrow-select = { workspace = true, optional = true }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 serde = { version = "1.0.188", features = ["derive"] }
-flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true }
+flate2 = { version = "1.0", default-features = false, features = [
+    "rust_backend",
+], optional = true }
 snap = { version = "1.0", default-features = false, optional = true }
 zstd = { version = "0.13", default-features = false, optional = true }
+bzip2 = { version = "0.6.0", optional = true }
+xz = { package = "liblzma", version = "0.4", default-features = false, optional = true }
 crc = { version = "3.0", optional = true }
+strum_macros = "0.27"
+uuid = "1.17"
+indexmap = "2.10"
+rand = "0.9"
+md5 = { version = "0.8", optional = true }
+sha2 = { version = "0.10", optional = true }
 
 [dev-dependencies]
-rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
-criterion = { version = "0.5", default-features = false }
+arrow-data = { workspace = true }
+rand = { version = "0.9.1", default-features = false, features = [
+    "std",
+    "std_rng",
+    "thread_rng",
+] }
+criterion = { workspace = true, default-features = false }
 tempfile = "3.3"
 arrow = { workspace = true }
+futures = "0.3.31"
+bytes = "1.10.1"
+async-stream = "0.3.6"
+apache-avro = "0.21.0"
+num-bigint = "0.4"
+once_cell = "1.21.3"
 
 [[bench]]
 name = "avro_reader"
 harness = false
+
+[[bench]]
+name = "decoder"
+harness = false
+
+[[bench]]
+name = "avro_writer"
+harness = false
diff --git a/arrow-avro/README.md b/arrow-avro/README.md
new file mode 100644
index 000000000000..85fd76094755
--- /dev/null
+++ b/arrow-avro/README.md
@@ -0,0 +1,182 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# `arrow-avro`
+
+[![crates.io](https://img.shields.io/crates/v/arrow-avro.svg)](https://crates.io/crates/arrow-avro)
+[![docs.rs](https://img.shields.io/docsrs/arrow-avro.svg)](https://docs.rs/arrow-avro/latest/arrow_avro/)
+
+Transfer data between the [Apache Arrow] memory format and [Apache Avro].
+
+This crate provides:
+
+- a **reader** that decodes Avro
+  - **Object Container Files (OCF)**,
+  - **Avro Single‑Object Encoding (SOE)**, and
+  - **Confluent Schema Registry wire format**  
+  into Arrow `RecordBatch`es; and
+- a **writer** that encodes Arrow `RecordBatch`es into Avro (**OCF** or **SOE**).
+
+> The latest API docs for `main` (unreleased) are published on the Arrow website: **arrow_avro**.
+
+[Apache Arrow]: https://arrow.apache.org/
+[Apache Avro]: https://avro.apache.org/
+
+---
+
+## Install
+
+```toml
+[dependencies]
+arrow-avro = "57.0.0"
+````
+
+Disable defaults and pick only what you need (see **Feature Flags**):
+
+```toml
+[dependencies]
+arrow-avro = { version = "57.0.0", default-features = false, features = ["deflate", "snappy"] }
+```
+
+---
+
+## Quick start
+
+### Read an Avro OCF file into Arrow
+
+```rust
+use std::fs::File;
+use std::io::BufReader;
+
+use arrow_avro::reader::ReaderBuilder;
+use arrow_array::RecordBatch;
+
+fn main() -> anyhow::Result<()> {
+    let file = BufReader::new(File::open("data/example.avro")?);
+    let mut reader = ReaderBuilder::new().build(file)?;
+    while let Some(batch) = reader.next() {
+        let batch: RecordBatch = batch?;
+        println!("rows: {}", batch.num_rows());
+    }
+    Ok(())
+}
+```
+
+### Write Arrow to Avro OCF (in‑memory)
+
+```rust
+use std::sync::Arc;
+
+use arrow_avro::writer::AvroWriter;
+use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+
+fn main() -> anyhow::Result<()> {
+    let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    let batch = RecordBatch::try_new(
+        Arc::new(schema.clone()),
+        vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+    )?;
+
+    let sink: Vec<u8> = Vec::new();
+    let mut w = AvroWriter::new(sink, schema)?;
+    w.write(&batch)?;
+    w.finish()?;
+    assert!(!w.into_inner().is_empty());
+    Ok(())
+}
+```
+
+See the crate docs for runnable SOE and Confluent round‑trip examples.
+
+---
+
+## Feature Flags (what they do and when to use them)
+
+### Compression codecs (OCF block compression)
+
+`arrow-avro` supports the Avro‑standard OCF codecs. The **defaults** include all five: `deflate`, `snappy`, `zstd`, `bzip2`, and `xz`.
+
+| Feature   | Default | What it enables                                                     | When to use                                                                                                                            |
+|-----------|--------:|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------|
+| `deflate` |       ✅ | DEFLATE compression via `flate2` (pure‑Rust backend)                | Most compatible; widely supported; good compression, slower than Snappy.                                                               |
+| `snappy`  |       ✅ | Snappy block compression via `snap` with CRC‑32 as required by Avro | Fastest decode/encode; common in streaming/data‑lake pipelines. (Avro requires a 4‑byte big‑endian CRC of the **uncompressed** block.) |
+| `zstd`    |       ✅ | Zstandard block compression via `zstd`                              | Great compression/speed trade‑off on modern systems. May pull in a native library.                                                     |
+| `bzip2`   |       ✅ | BZip2 block compression                                             | For compatibility with older datasets that used BZip2. Slower; larger deps.                                                            |
+| `xz`      |       ✅ | XZ/LZMA block compression                                           | Highest compression for archival data; slowest; larger deps.                                                                           |
+
+> Avro defines these codecs for OCF: `null` (no compression), `deflate`, `snappy`, `bzip2`, `xz`, and `zstandard` (recent spec versions).
+
+**Notes**
+
+* Only **OCF** uses these codecs (they compress per‑block). They do **not** apply to raw Avro frames used by Confluent wire format or SOE. The crate’s `compression` module is specifically for **OCF blocks**.
+* `deflate` uses `flate2` with the `rust_backend` (no system zlib required).
+
+### Schema fingerprints & custom logical type helpers
+
+| Feature                     | Default | What it enables                                                                  | When to use                                                                                                         |    
+|-----------------------------|--------:|----------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| `md5`                       |       ⬜ | `md5` dep for optional **MD5** schema fingerprints                               | If you want to compute MD5 fingerprints of writer schemas (i.e. for custom prefixing/validation).                   |   
+| `sha256`                    |       ⬜ | `sha2` dep for optional **SHA‑256** schema fingerprints                          | If you prefer longer fingerprints; affects max prefix length (i.e. when framing).                                   |  
+| `small_decimals`            |       ⬜ | Extra handling for **small decimal** logical types (`Decimal32` and `Decimal64`) | If your Avro `decimal` values are small and you want more compact Arrow representations.                            |
+| `avro_custom_types`         |       ⬜ | Annotates Avro values using Arrow specific custom logical types                  | Enable when you need arrow-avro to reinterpret certain Avro fields as Arrow types that Avro doesn’t natively model. | 
+| `canonical_extension_types` |       ⬜ | Re‑exports Arrow’s canonical extension types support from `arrow-schema`         | Enable if your workflow uses Arrow [canonical extension types] and you want `arrow-avro` to respect them.           | 
+
+[canonical extension types]: https://arrow.apache.org/docs/format/CanonicalExtensions.html
+
+**Lower‑level/internal toggles (rarely used directly)**
+
+* `flate2`, `snap`, `crc`, `zstd`, `bzip2`, `xz` are optional **dependencies** wired to the user‑facing features above. You normally enable `deflate`/`snappy`/`zstd`/`bzip2`/`xz`, not these directly.
+
+### Feature snippets
+
+* Minimal, fast build (common pipelines):
+
+  ```toml
+  arrow-avro = { version = "56", default-features = false, features = ["deflate", "snappy"] }
+  ```
+* Include Zstandard too (modern data lakes):
+
+  ```toml
+  arrow-avro = { version = "56", default-features = false, features = ["deflate", "snappy", "zstd"] }
+  ```
+* Fingerprint helpers:
+
+  ```toml
+  arrow-avro = { version = "56", features = ["md5", "sha256"] }
+  ```
+  
+---
+
+## What formats are supported?
+
+* **OCF (Object Container Files)**: self‑describing Avro files with header, optional compression, sync markers; reader and writer supported.
+* **Confluent Schema Registry wire format**: 1‑byte magic `0x00` + 4‑byte BE schema ID + Avro body; supports decode + encode helpers.
+* **Avro Single‑Object Encoding (SOE)**: 2‑byte magic `0xC3 0x01` + 8‑byte LE CRC‑64‑AVRO fingerprint + Avro body; supports decode + encode helpers.
+
+---
+
+## Examples
+
+* Read/write OCF in memory and from files (see crate docs “OCF round‑trip”).
+* Confluent wire‑format and SOE quickstarts are provided as runnable snippets in docs.
+
+There are additional examples under `arrow-avro/examples/` in the repository.
+
+---
diff --git a/arrow-avro/benches/avro_reader.rs b/arrow-avro/benches/avro_reader.rs
index b525a0c788cd..2f2a3a10dbf3 100644
--- a/arrow-avro/benches/avro_reader.rs
+++ b/arrow-avro/benches/avro_reader.rs
@@ -20,7 +20,7 @@
 //! This benchmark suite compares the performance characteristics of StringArray vs
 //! StringViewArray across three key dimensions:
 //! 1. Array creation performance
-//! 2. String value access operations  
+//! 2. String value access operations
 //! 3. Avro file reading with each array type
 
 use std::fs::File;
@@ -31,14 +31,13 @@ use std::time::Duration;
 use arrow::array::RecordBatch;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow_array::{ArrayRef, Int32Array, StringArray, StringViewArray};
-use arrow_avro::ReadOptions;
 use arrow_schema::ArrowError;
 use criterion::*;
 use tempfile::NamedTempFile;
 
 fn create_test_data(count: usize, str_length: usize) -> Vec<String> {
     (0..count)
-        .map(|i| format!("str_{}", i) + &"a".repeat(str_length))
+        .map(|i| format!("str_{i}") + &"a".repeat(str_length))
         .collect()
 }
 
@@ -79,7 +78,7 @@ fn create_avro_test_file(row_count: usize, str_length: usize) -> Result<NamedTem
 
 fn read_avro_test_file(
     file_path: &std::path::Path,
-    options: &ReadOptions,
+    use_utf8view: bool,
 ) -> Result<RecordBatch, ArrowError> {
     let file = File::open(file_path)?;
     let mut reader = BufReader::new(file);
@@ -101,7 +100,7 @@ fn read_avro_test_file(
         reader.read_exact(&mut buf)?;
 
         let s = String::from_utf8(buf)
-            .map_err(|e| ArrowError::ParseError(format!("Invalid UTF-8: {}", e)))?;
+            .map_err(|e| ArrowError::ParseError(format!("Invalid UTF-8: {e}")))?;
 
         strings.push(s);
 
@@ -110,7 +109,7 @@ fn read_avro_test_file(
         ints.push(i32::from_le_bytes(int_bytes));
     }
 
-    let string_array: ArrayRef = if options.use_utf8view() {
+    let string_array: ArrayRef = if use_utf8view {
         Arc::new(StringViewArray::from_iter(
             strings.iter().map(|s| Some(s.as_str())),
         ))
@@ -123,7 +122,7 @@ fn read_avro_test_file(
     let int_array: ArrayRef = Arc::new(Int32Array::from(ints));
 
     let schema = Arc::new(Schema::new(vec![
-        if options.use_utf8view() {
+        if use_utf8view {
             Field::new("string_field", DataType::Utf8View, false)
         } else {
             Field::new("string_field", DataType::Utf8, false)
@@ -143,7 +142,7 @@ fn bench_array_creation(c: &mut Criterion) {
         let data = create_test_data(10000, str_length);
         let row_count = 1000;
 
-        group.bench_function(format!("string_array_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_array_{str_length}_chars"), |b| {
             b.iter(|| {
                 let string_array =
                     StringArray::from_iter(data[0..row_count].iter().map(|s| Some(s.as_str())));
@@ -163,11 +162,11 @@ fn bench_array_creation(c: &mut Criterion) {
                 )
                 .unwrap();
 
-                criterion::black_box(batch)
+                std::hint::black_box(batch)
             })
         });
 
-        group.bench_function(format!("string_view_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_view_{str_length}_chars"), |b| {
             b.iter(|| {
                 let string_array =
                     StringViewArray::from_iter(data[0..row_count].iter().map(|s| Some(s.as_str())));
@@ -187,7 +186,7 @@ fn bench_array_creation(c: &mut Criterion) {
                 )
                 .unwrap();
 
-                criterion::black_box(batch)
+                std::hint::black_box(batch)
             })
         });
     }
@@ -208,23 +207,23 @@ fn bench_string_operations(c: &mut Criterion) {
         let string_view_array =
             StringViewArray::from_iter(data[0..rows].iter().map(|s| Some(s.as_str())));
 
-        group.bench_function(format!("string_array_value_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_array_value_{str_length}_chars"), |b| {
             b.iter(|| {
                 let mut sum_len = 0;
                 for i in 0..rows {
                     sum_len += string_array.value(i).len();
                 }
-                criterion::black_box(sum_len)
+                std::hint::black_box(sum_len)
             })
         });
 
-        group.bench_function(format!("string_view_value_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_view_value_{str_length}_chars"), |b| {
             b.iter(|| {
                 let mut sum_len = 0;
                 for i in 0..rows {
                     sum_len += string_view_array.value(i).len();
                 }
-                criterion::black_box(sum_len)
+                std::hint::black_box(sum_len)
             })
         });
     }
@@ -242,19 +241,17 @@ fn bench_avro_reader(c: &mut Criterion) {
         let temp_file = create_avro_test_file(row_count, str_length).unwrap();
         let file_path = temp_file.path();
 
-        group.bench_function(format!("string_array_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_array_{str_length}_chars"), |b| {
             b.iter(|| {
-                let options = ReadOptions::default();
-                let batch = read_avro_test_file(file_path, &options).unwrap();
-                criterion::black_box(batch)
+                let batch = read_avro_test_file(file_path, false).unwrap();
+                std::hint::black_box(batch)
             })
         });
 
-        group.bench_function(format!("string_view_{}_chars", str_length), |b| {
+        group.bench_function(format!("string_view_{str_length}_chars"), |b| {
             b.iter(|| {
-                let options = ReadOptions::default().with_utf8view(true);
-                let batch = read_avro_test_file(file_path, &options).unwrap();
-                criterion::black_box(batch)
+                let batch = read_avro_test_file(file_path, true).unwrap();
+                std::hint::black_box(batch)
             })
         });
     }
diff --git a/arrow-avro/benches/avro_writer.rs b/arrow-avro/benches/avro_writer.rs
new file mode 100644
index 000000000000..58b014c5a3fe
--- /dev/null
+++ b/arrow-avro/benches/avro_writer.rs
@@ -0,0 +1,849 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmarks for `arrow-avro` Writer (Avro Object Container File)
+
+extern crate arrow_avro;
+extern crate criterion;
+extern crate once_cell;
+
+use arrow_array::{
+    ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Decimal256Array, FixedSizeBinaryArray,
+    Float32Array, Float64Array, ListArray, PrimitiveArray, RecordBatch, StringArray, StructArray,
+    builder::{ListBuilder, StringBuilder},
+    types::{Int32Type, Int64Type, IntervalMonthDayNanoType, TimestampMicrosecondType},
+};
+#[cfg(feature = "small_decimals")]
+use arrow_array::{Decimal32Array, Decimal64Array};
+use arrow_avro::writer::AvroWriter;
+use arrow_buffer::{Buffer, i256};
+use arrow_schema::{DataType, Field, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode};
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use once_cell::sync::Lazy;
+use rand::{
+    Rng, SeedableRng,
+    distr::uniform::{SampleRange, SampleUniform},
+    rngs::StdRng,
+};
+use std::collections::HashMap;
+use std::io::Cursor;
+use std::sync::Arc;
+use std::time::Duration;
+use tempfile::tempfile;
+
+const SIZES: [usize; 4] = [4_096, 8_192, 100_000, 1_000_000];
+const BASE_SEED: u64 = 0x5EED_1234_ABCD_EF01;
+const MIX_CONST_1: u64 = 0x9E37_79B1_85EB_CA87;
+const MIX_CONST_2: u64 = 0xC2B2_AE3D_27D4_EB4F;
+
+#[inline]
+fn rng_for(tag: u64, n: usize) -> StdRng {
+    let seed = BASE_SEED ^ tag.wrapping_mul(MIX_CONST_1) ^ (n as u64).wrapping_mul(MIX_CONST_2);
+    StdRng::seed_from_u64(seed)
+}
+
+#[inline]
+fn sample_in<T, Rg>(rng: &mut StdRng, range: Rg) -> T
+where
+    T: SampleUniform,
+    Rg: SampleRange<T>,
+{
+    rng.random_range(range)
+}
+
+#[inline]
+fn make_bool_array_with_tag(n: usize, tag: u64) -> BooleanArray {
+    let mut rng = rng_for(tag, n);
+    // Can't use SampleUniform for bool; use the RNG's boolean helper
+    let values = (0..n).map(|_| rng.random_bool(0.5));
+    // This repo exposes `from_iter`, not `from_iter_values` for BooleanArray
+    BooleanArray::from_iter(values.map(Some))
+}
+
+#[inline]
+fn make_i32_array_with_tag(n: usize, tag: u64) -> PrimitiveArray<Int32Type> {
+    let mut rng = rng_for(tag, n);
+    let values = (0..n).map(|_| rng.random::<i32>());
+    PrimitiveArray::<Int32Type>::from_iter_values(values)
+}
+
+#[inline]
+fn make_i64_array_with_tag(n: usize, tag: u64) -> PrimitiveArray<Int64Type> {
+    let mut rng = rng_for(tag, n);
+    let values = (0..n).map(|_| rng.random::<i64>());
+    PrimitiveArray::<Int64Type>::from_iter_values(values)
+}
+
+#[inline]
+fn rand_ascii_string(rng: &mut StdRng, min_len: usize, max_len: usize) -> String {
+    let len = rng.random_range(min_len..=max_len);
+    (0..len)
+        .map(|_| rng.random_range(b'a'..=b'z') as char)
+        .collect()
+}
+
+#[inline]
+fn make_utf8_array_with_tag(n: usize, tag: u64) -> StringArray {
+    let mut rng = rng_for(tag, n);
+    let data: Vec<String> = (0..n).map(|_| rand_ascii_string(&mut rng, 3, 16)).collect();
+    StringArray::from_iter_values(data)
+}
+
+#[inline]
+fn make_f32_array_with_tag(n: usize, tag: u64) -> Float32Array {
+    let mut rng = rng_for(tag, n);
+    let values = (0..n).map(|_| rng.random::<f32>());
+    Float32Array::from_iter_values(values)
+}
+
+#[inline]
+fn make_f64_array_with_tag(n: usize, tag: u64) -> Float64Array {
+    let mut rng = rng_for(tag, n);
+    let values = (0..n).map(|_| rng.random::<f64>());
+    Float64Array::from_iter_values(values)
+}
+
+#[inline]
+fn make_binary_array_with_tag(n: usize, tag: u64) -> BinaryArray {
+    let mut rng = rng_for(tag, n);
+    let mut payloads: Vec<Vec<u8>> = Vec::with_capacity(n);
+    for _ in 0..n {
+        let len = rng.random_range(1..=16);
+        let mut p = vec![0u8; len];
+        rng.fill(&mut p[..]);
+        payloads.push(p);
+    }
+    let views: Vec<&[u8]> = payloads.iter().map(|p| &p[..]).collect();
+    // This repo exposes a simple `from_vec` for BinaryArray
+    BinaryArray::from_vec(views)
+}
+
+#[inline]
+fn make_fixed16_array_with_tag(n: usize, tag: u64) -> FixedSizeBinaryArray {
+    let mut rng = rng_for(tag, n);
+    let payloads = (0..n)
+        .map(|_| {
+            let mut b = [0u8; 16];
+            rng.fill(&mut b);
+            b
+        })
+        .collect::<Vec<[u8; 16]>>();
+    // Fixed-size constructor available in this repo
+    FixedSizeBinaryArray::try_from_iter(payloads.into_iter()).expect("build FixedSizeBinaryArray")
+}
+
+/// Make an Arrow `Interval(IntervalUnit::MonthDayNano)` array with **non-negative**
+/// (months, days, nanos) values, and nanos as **multiples of 1_000_000** (whole ms),
+/// per Avro `duration` constraints used by the writer.
+#[inline]
+fn make_interval_mdn_array_with_tag(
+    n: usize,
+    tag: u64,
+) -> PrimitiveArray<IntervalMonthDayNanoType> {
+    let mut rng = rng_for(tag, n);
+    let values = (0..n).map(|_| {
+        let months: i32 = rng.random_range(0..=120);
+        let days: i32 = rng.random_range(0..=31);
+        // pick millis within a day (safe within u32::MAX and realistic)
+        let millis: u32 = rng.random_range(0..=86_400_000);
+        let nanos: i64 = (millis as i64) * 1_000_000;
+        IntervalMonthDayNanoType::make_value(months, days, nanos)
+    });
+    PrimitiveArray::<IntervalMonthDayNanoType>::from_iter_values(values)
+}
+
+#[inline]
+fn make_ts_micros_array_with_tag(n: usize, tag: u64) -> PrimitiveArray<TimestampMicrosecondType> {
+    let mut rng = rng_for(tag, n);
+    let base: i64 = 1_600_000_000_000_000;
+    let year_us: i64 = 31_536_000_000_000;
+    let values = (0..n).map(|_| base + sample_in::<i64, _>(&mut rng, 0..year_us));
+    PrimitiveArray::<TimestampMicrosecondType>::from_iter_values(values)
+}
+
+// === Decimal helpers & generators ===
+
+#[inline]
+#[cfg(feature = "small_decimals")]
+fn pow10_i32(p: u8) -> i32 {
+    (0..p).fold(1i32, |acc, _| acc.saturating_mul(10))
+}
+
+#[inline]
+#[cfg(feature = "small_decimals")]
+fn pow10_i64(p: u8) -> i64 {
+    (0..p).fold(1i64, |acc, _| acc.saturating_mul(10))
+}
+
+#[inline]
+fn pow10_i128(p: u8) -> i128 {
+    (0..p).fold(1i128, |acc, _| acc.saturating_mul(10))
+}
+
+#[inline]
+#[cfg(feature = "small_decimals")]
+fn make_decimal32_array_with_tag(n: usize, tag: u64, precision: u8, scale: i8) -> Decimal32Array {
+    let mut rng = rng_for(tag, n);
+    let max = pow10_i32(precision).saturating_sub(1);
+    let values = (0..n).map(|_| rng.random_range(-max..=max));
+    Decimal32Array::from_iter_values(values)
+        .with_precision_and_scale(precision, scale)
+        .expect("set precision/scale on Decimal32Array")
+}
+
+#[inline]
+#[cfg(feature = "small_decimals")]
+fn make_decimal64_array_with_tag(n: usize, tag: u64, precision: u8, scale: i8) -> Decimal64Array {
+    let mut rng = rng_for(tag, n);
+    let max = pow10_i64(precision).saturating_sub(1);
+    let values = (0..n).map(|_| rng.random_range(-max..=max));
+    Decimal64Array::from_iter_values(values)
+        .with_precision_and_scale(precision, scale)
+        .expect("set precision/scale on Decimal64Array")
+}
+
+#[inline]
+fn make_decimal128_array_with_tag(n: usize, tag: u64, precision: u8, scale: i8) -> Decimal128Array {
+    let mut rng = rng_for(tag, n);
+    let max = pow10_i128(precision).saturating_sub(1);
+    let values = (0..n).map(|_| rng.random_range(-max..=max));
+    Decimal128Array::from_iter_values(values)
+        .with_precision_and_scale(precision, scale)
+        .expect("set precision/scale on Decimal128Array")
+}
+
+#[inline]
+fn make_decimal256_array_with_tag(n: usize, tag: u64, precision: u8, scale: i8) -> Decimal256Array {
+    // Generate within i128 range and widen to i256 to keep generation cheap and portable
+    let mut rng = rng_for(tag, n);
+    let max128 = pow10_i128(30).saturating_sub(1);
+    let values = (0..n).map(|_| {
+        let v: i128 = rng.random_range(-max128..=max128);
+        i256::from_i128(v)
+    });
+    Decimal256Array::from_iter_values(values)
+        .with_precision_and_scale(precision, scale)
+        .expect("set precision/scale on Decimal256Array")
+}
+
+#[inline]
+fn make_fixed16_array(n: usize) -> FixedSizeBinaryArray {
+    make_fixed16_array_with_tag(n, 0xF15E_D016)
+}
+
+#[inline]
+fn make_interval_mdn_array(n: usize) -> PrimitiveArray<IntervalMonthDayNanoType> {
+    make_interval_mdn_array_with_tag(n, 0xD0_1E_AD)
+}
+
+#[inline]
+fn make_bool_array(n: usize) -> BooleanArray {
+    make_bool_array_with_tag(n, 0xB001)
+}
+#[inline]
+fn make_i32_array(n: usize) -> PrimitiveArray<Int32Type> {
+    make_i32_array_with_tag(n, 0x1337_0032)
+}
+#[inline]
+fn make_i64_array(n: usize) -> PrimitiveArray<Int64Type> {
+    make_i64_array_with_tag(n, 0x1337_0064)
+}
+#[inline]
+fn make_f32_array(n: usize) -> Float32Array {
+    make_f32_array_with_tag(n, 0xF0_0032)
+}
+#[inline]
+fn make_f64_array(n: usize) -> Float64Array {
+    make_f64_array_with_tag(n, 0xF0_0064)
+}
+#[inline]
+fn make_binary_array(n: usize) -> BinaryArray {
+    make_binary_array_with_tag(n, 0xB1_0001)
+}
+#[inline]
+fn make_ts_micros_array(n: usize) -> PrimitiveArray<TimestampMicrosecondType> {
+    make_ts_micros_array_with_tag(n, 0x7157_0001)
+}
+#[inline]
+fn make_utf8_array(n: usize) -> StringArray {
+    make_utf8_array_with_tag(n, 0x5712_07F8)
+}
+#[inline]
+fn make_list_utf8_array(n: usize) -> ListArray {
+    make_list_utf8_array_with_tag(n, 0x0A11_57ED)
+}
+#[inline]
+fn make_struct_array(n: usize) -> StructArray {
+    make_struct_array_with_tag(n, 0x57_AB_C7)
+}
+
+#[inline]
+fn make_list_utf8_array_with_tag(n: usize, tag: u64) -> ListArray {
+    let mut rng = rng_for(tag, n);
+    let mut builder = ListBuilder::new(StringBuilder::new());
+    for _ in 0..n {
+        let items = rng.random_range(0..=5);
+        for _ in 0..items {
+            let s = rand_ascii_string(&mut rng, 1, 12);
+            builder.values().append_value(s.as_str());
+        }
+        builder.append(true);
+    }
+    builder.finish()
+}
+
+#[inline]
+fn make_struct_array_with_tag(n: usize, tag: u64) -> StructArray {
+    let s_tag = tag ^ 0x5u64;
+    let i_tag = tag ^ 0x6u64;
+    let f_tag = tag ^ 0x7u64;
+    let s_col: ArrayRef = Arc::new(make_utf8_array_with_tag(n, s_tag));
+    let i_col: ArrayRef = Arc::new(make_i32_array_with_tag(n, i_tag));
+    let f_col: ArrayRef = Arc::new(make_f64_array_with_tag(n, f_tag));
+    StructArray::from(vec![
+        (
+            Arc::new(Field::new("s1", DataType::Utf8, false)),
+            s_col.clone(),
+        ),
+        (
+            Arc::new(Field::new("s2", DataType::Int32, false)),
+            i_col.clone(),
+        ),
+        (
+            Arc::new(Field::new("s3", DataType::Float64, false)),
+            f_col.clone(),
+        ),
+    ])
+}
+
+#[inline]
+fn schema_single(name: &str, dt: DataType) -> Arc<Schema> {
+    Arc::new(Schema::new(vec![Field::new(name, dt, false)]))
+}
+
+#[inline]
+fn schema_mixed() -> Arc<Schema> {
+    Arc::new(Schema::new(vec![
+        Field::new("f1", DataType::Int32, false),
+        Field::new("f2", DataType::Int64, false),
+        Field::new("f3", DataType::Binary, false),
+        Field::new("f4", DataType::Float64, false),
+    ]))
+}
+
+#[inline]
+fn schema_fixed16() -> Arc<Schema> {
+    schema_single("field1", DataType::FixedSizeBinary(16))
+}
+
+#[inline]
+fn schema_uuid16() -> Arc<Schema> {
+    let mut md = HashMap::new();
+    md.insert("logicalType".to_string(), "uuid".to_string());
+    let field = Field::new("uuid", DataType::FixedSizeBinary(16), false).with_metadata(md);
+    Arc::new(Schema::new(vec![field]))
+}
+
+#[inline]
+fn schema_interval_mdn() -> Arc<Schema> {
+    schema_single("duration", DataType::Interval(IntervalUnit::MonthDayNano))
+}
+
+#[inline]
+fn schema_decimal_with_size(name: &str, dt: DataType, size_meta: Option<usize>) -> Arc<Schema> {
+    let field = if let Some(size) = size_meta {
+        let mut md = HashMap::new();
+        md.insert("size".to_string(), size.to_string());
+        Field::new(name, dt, false).with_metadata(md)
+    } else {
+        Field::new(name, dt, false)
+    };
+    Arc::new(Schema::new(vec![field]))
+}
+
+static BOOLEAN_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Boolean);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_bool_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static INT32_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Int32);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_i32_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static INT64_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Int64);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_i64_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static FLOAT32_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Float32);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_f32_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static FLOAT64_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Float64);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_f64_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static BINARY_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Binary);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_binary_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static FIXED16_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_fixed16();
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_fixed16_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static UUID16_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_uuid16();
+    SIZES
+        .iter()
+        .map(|&n| {
+            // Same values as Fixed16; writer path differs because of field metadata
+            let col: ArrayRef = Arc::new(make_fixed16_array_with_tag(n, 0x7575_6964_7575_6964));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static INTERVAL_MDN_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_interval_mdn();
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_interval_mdn_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static TIMESTAMP_US_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Timestamp(TimeUnit::Microsecond, None));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_ts_micros_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static MIXED_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_mixed();
+    SIZES
+        .iter()
+        .map(|&n| {
+            let f1: ArrayRef = Arc::new(make_i32_array_with_tag(n, 0xA1));
+            let f2: ArrayRef = Arc::new(make_i64_array_with_tag(n, 0xA2));
+            let f3: ArrayRef = Arc::new(make_binary_array_with_tag(n, 0xA3));
+            let f4: ArrayRef = Arc::new(make_f64_array_with_tag(n, 0xA4));
+            RecordBatch::try_new(schema.clone(), vec![f1, f2, f3, f4]).unwrap()
+        })
+        .collect()
+});
+
+static UTF8_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let schema = schema_single("field1", DataType::Utf8);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_utf8_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static LIST_UTF8_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // IMPORTANT: ListBuilder creates a child field named "item" that is nullable by default.
+    // Make the schema's list item nullable to match the array we construct.
+    let item_field = Arc::new(Field::new("item", DataType::Utf8, true));
+    let schema = schema_single("field1", DataType::List(item_field));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_list_utf8_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static STRUCT_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let struct_dt = DataType::Struct(
+        vec![
+            Field::new("s1", DataType::Utf8, false),
+            Field::new("s2", DataType::Int32, false),
+            Field::new("s3", DataType::Float64, false),
+        ]
+        .into(),
+    );
+    let schema = schema_single("field1", struct_dt);
+    SIZES
+        .iter()
+        .map(|&n| {
+            let col: ArrayRef = Arc::new(make_struct_array(n));
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+#[cfg(feature = "small_decimals")]
+static DECIMAL32_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // Choose a representative precision/scale within Decimal32 limits
+    let precision: u8 = 7;
+    let scale: i8 = 2;
+    let schema = schema_single("amount", DataType::Decimal32(precision, scale));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let arr = make_decimal32_array_with_tag(n, 0xDEC_0032, precision, scale);
+            let col: ArrayRef = Arc::new(arr);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+#[cfg(feature = "small_decimals")]
+static DECIMAL64_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let precision: u8 = 13;
+    let scale: i8 = 3;
+    let schema = schema_single("amount", DataType::Decimal64(precision, scale));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let arr = make_decimal64_array_with_tag(n, 0xDEC_0064, precision, scale);
+            let col: ArrayRef = Arc::new(arr);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static DECIMAL128_BYTES_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    let precision: u8 = 25;
+    let scale: i8 = 6;
+    let schema = schema_single("amount", DataType::Decimal128(precision, scale));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let arr = make_decimal128_array_with_tag(n, 0xDEC_0128, precision, scale);
+            let col: ArrayRef = Arc::new(arr);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static DECIMAL128_FIXED16_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // Same logical type as above but force Avro fixed(16) via metadata "size": "16"
+    let precision: u8 = 25;
+    let scale: i8 = 6;
+    let schema =
+        schema_decimal_with_size("amount", DataType::Decimal128(precision, scale), Some(16));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let arr = make_decimal128_array_with_tag(n, 0xDEC_F128, precision, scale);
+            let col: ArrayRef = Arc::new(arr);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static DECIMAL256_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // Use a higher precision typical of 256-bit decimals
+    let precision: u8 = 50;
+    let scale: i8 = 10;
+    let schema = schema_single("amount", DataType::Decimal256(precision, scale));
+    SIZES
+        .iter()
+        .map(|&n| {
+            let arr = make_decimal256_array_with_tag(n, 0xDEC_0256, precision, scale);
+            let col: ArrayRef = Arc::new(arr);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static MAP_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    use arrow_array::builder::{MapBuilder, StringBuilder};
+
+    let key_field = Arc::new(Field::new("keys", DataType::Utf8, false));
+    let value_field = Arc::new(Field::new("values", DataType::Utf8, true));
+    let entry_struct = Field::new(
+        "entries",
+        DataType::Struct(vec![key_field.as_ref().clone(), value_field.as_ref().clone()].into()),
+        false,
+    );
+    let map_dt = DataType::Map(Arc::new(entry_struct), false);
+    let schema = schema_single("field1", map_dt);
+
+    SIZES
+        .iter()
+        .map(|&n| {
+            // Build a MapArray with n rows
+            let mut builder = MapBuilder::new(None, StringBuilder::new(), StringBuilder::new());
+            let mut rng = rng_for(0x00D0_0D1A, n);
+            for _ in 0..n {
+                let entries = rng.random_range(0..=5);
+                for _ in 0..entries {
+                    let k = rand_ascii_string(&mut rng, 3, 10);
+                    let v = rand_ascii_string(&mut rng, 0, 12);
+                    // keys non-nullable, values nullable allowed but we provide non-null here
+                    builder.keys().append_value(k);
+                    builder.values().append_value(v);
+                }
+                builder.append(true).expect("Error building MapArray");
+            }
+            let col: ArrayRef = Arc::new(builder.finish());
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static ENUM_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // To represent an Avro enum, the Arrow writer expects a Dictionary<Int32, Utf8>
+    // field with metadata specifying the enum symbols.
+    let enum_symbols = r#"["RED", "GREEN", "BLUE"]"#;
+    let mut metadata = HashMap::new();
+    metadata.insert("avro.enum.symbols".to_string(), enum_symbols.to_string());
+
+    let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+    let field = Field::new("color_enum", dict_type, false).with_metadata(metadata);
+    let schema = Arc::new(Schema::new(vec![field]));
+
+    let dict_values: ArrayRef = Arc::new(StringArray::from(vec!["RED", "GREEN", "BLUE"]));
+
+    SIZES
+        .iter()
+        .map(|&n| {
+            use arrow_array::DictionaryArray;
+            let mut rng = rng_for(0x3A7A, n);
+            let keys_vec: Vec<i32> = (0..n).map(|_| rng.random_range(0..=2)).collect();
+            let keys = PrimitiveArray::<Int32Type>::from(keys_vec);
+
+            let dict_array =
+                DictionaryArray::<Int32Type>::try_new(keys, dict_values.clone()).unwrap();
+            let col: ArrayRef = Arc::new(dict_array);
+
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+static UNION_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
+    // Basic Dense Union of three types: Utf8, Int32, Float64
+    let union_fields = UnionFields::try_new(
+        vec![0, 1, 2],
+        vec![
+            Field::new("u_str", DataType::Utf8, true),
+            Field::new("u_int", DataType::Int32, true),
+            Field::new("u_f64", DataType::Float64, true),
+        ],
+    )
+    .expect("UnionFields should be valid");
+    let union_dt = DataType::Union(union_fields.clone(), UnionMode::Dense);
+    let schema = schema_single("field1", union_dt);
+
+    SIZES
+        .iter()
+        .map(|&n| {
+            // Cycle type ids 0 -> 1 -> 2 ... for determinism
+            let mut type_ids: Vec<i8> = Vec::with_capacity(n);
+            let mut offsets: Vec<i32> = Vec::with_capacity(n);
+            let (mut c0, mut c1, mut c2) = (0i32, 0i32, 0i32);
+            for i in 0..n {
+                let tid = (i % 3) as i8;
+                type_ids.push(tid);
+                match tid {
+                    0 => {
+                        offsets.push(c0);
+                        c0 += 1;
+                    }
+                    1 => {
+                        offsets.push(c1);
+                        c1 += 1;
+                    }
+                    _ => {
+                        offsets.push(c2);
+                        c2 += 1;
+                    }
+                }
+            }
+
+            // Build children arrays with lengths equal to counts per type id
+            let mut rng = rng_for(0xDEAD_0003, n);
+            let strings: Vec<String> = (0..c0)
+                .map(|_| rand_ascii_string(&mut rng, 3, 12))
+                .collect();
+            let ints = 0..c1;
+            let floats = (0..c2).map(|_| rng.random::<f64>());
+
+            let str_arr = StringArray::from_iter_values(strings);
+            let int_arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(ints);
+            let f_arr = Float64Array::from_iter_values(floats);
+
+            let type_ids_buf = Buffer::from_slice_ref(type_ids.as_slice());
+            let offsets_buf = Buffer::from_slice_ref(offsets.as_slice());
+
+            let union_array = arrow_array::UnionArray::try_new(
+                union_fields.clone(),
+                type_ids_buf.into(),
+                Some(offsets_buf.into()),
+                vec![
+                    Arc::new(str_arr) as ArrayRef,
+                    Arc::new(int_arr) as ArrayRef,
+                    Arc::new(f_arr) as ArrayRef,
+                ],
+            )
+            .unwrap();
+
+            let col: ArrayRef = Arc::new(union_array);
+            RecordBatch::try_new(schema.clone(), vec![col]).unwrap()
+        })
+        .collect()
+});
+
+fn ocf_size_for_batch(batch: &RecordBatch) -> usize {
+    let schema_owned: Schema = (*batch.schema()).clone();
+    let cursor = Cursor::new(Vec::<u8>::with_capacity(1024));
+    let mut writer = AvroWriter::new(cursor, schema_owned).expect("create writer");
+    writer.write(batch).expect("write batch");
+    writer.finish().expect("finish writer");
+    let inner = writer.into_inner();
+    inner.into_inner().len()
+}
+
+fn bench_writer_scenario(c: &mut Criterion, name: &str, data_sets: &[RecordBatch]) {
+    let mut group = c.benchmark_group(name);
+    let schema_owned: Schema = (*data_sets[0].schema()).clone();
+    for (idx, &rows) in SIZES.iter().enumerate() {
+        let batch = &data_sets[idx];
+        let bytes = ocf_size_for_batch(batch);
+        group.throughput(Throughput::Bytes(bytes as u64));
+        match rows {
+            4_096 | 8_192 => {
+                group
+                    .sample_size(40)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            100_000 => {
+                group
+                    .sample_size(20)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            1_000_000 => {
+                group
+                    .sample_size(10)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            _ => {}
+        }
+        group.bench_function(BenchmarkId::from_parameter(rows), |b| {
+            b.iter_batched_ref(
+                || {
+                    let file = tempfile().expect("create temp file");
+                    AvroWriter::new(file, schema_owned.clone()).expect("create writer")
+                },
+                |writer| {
+                    writer.write(batch).unwrap();
+                    writer.finish().unwrap();
+                },
+                BatchSize::SmallInput,
+            )
+        });
+    }
+    group.finish();
+}
+
+fn criterion_benches(c: &mut Criterion) {
+    bench_writer_scenario(c, "write-Boolean", &BOOLEAN_DATA);
+    bench_writer_scenario(c, "write-Int32", &INT32_DATA);
+    bench_writer_scenario(c, "write-Int64", &INT64_DATA);
+    bench_writer_scenario(c, "write-Float32", &FLOAT32_DATA);
+    bench_writer_scenario(c, "write-Float64", &FLOAT64_DATA);
+    bench_writer_scenario(c, "write-Binary(Bytes)", &BINARY_DATA);
+    bench_writer_scenario(c, "write-TimestampMicros", &TIMESTAMP_US_DATA);
+    bench_writer_scenario(c, "write-Mixed", &MIXED_DATA);
+    bench_writer_scenario(c, "write-Utf8", &UTF8_DATA);
+    bench_writer_scenario(c, "write-List<Utf8>", &LIST_UTF8_DATA);
+    bench_writer_scenario(c, "write-Struct", &STRUCT_DATA);
+    bench_writer_scenario(c, "write-FixedSizeBinary16", &FIXED16_DATA);
+    bench_writer_scenario(c, "write-UUID(logicalType)", &UUID16_DATA);
+    bench_writer_scenario(c, "write-IntervalMonthDayNanoDuration", &INTERVAL_MDN_DATA);
+    #[cfg(feature = "small_decimals")]
+    bench_writer_scenario(c, "write-Decimal32(bytes)", &DECIMAL32_DATA);
+    #[cfg(feature = "small_decimals")]
+    bench_writer_scenario(c, "write-Decimal64(bytes)", &DECIMAL64_DATA);
+    bench_writer_scenario(c, "write-Decimal128(bytes)", &DECIMAL128_BYTES_DATA);
+    bench_writer_scenario(c, "write-Decimal128(fixed16)", &DECIMAL128_FIXED16_DATA);
+    bench_writer_scenario(c, "write-Decimal256(bytes)", &DECIMAL256_DATA);
+    bench_writer_scenario(c, "write-Map", &MAP_DATA);
+    bench_writer_scenario(c, "write-Enum", &ENUM_DATA);
+    bench_writer_scenario(c, "write-Union", &UNION_DATA);
+}
+
+criterion_group! {
+    name = avro_writer;
+    config = Criterion::default().configure_from_args();
+    targets = criterion_benches
+}
+criterion_main!(avro_writer);
diff --git a/arrow-avro/benches/decoder.rs b/arrow-avro/benches/decoder.rs
new file mode 100644
index 000000000000..7180826b7b7d
--- /dev/null
+++ b/arrow-avro/benches/decoder.rs
@@ -0,0 +1,600 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmarks for `arrow‑avro` **Decoder**
+//!
+
+extern crate apache_avro;
+extern crate arrow_avro;
+extern crate criterion;
+extern crate num_bigint;
+extern crate once_cell;
+extern crate uuid;
+
+use apache_avro::types::Value;
+use apache_avro::{Decimal, Schema as ApacheSchema, to_avro_datum};
+use arrow_avro::schema::{CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC};
+use arrow_avro::{reader::ReaderBuilder, schema::AvroSchema};
+use criterion::{BatchSize, BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use once_cell::sync::Lazy;
+use std::{hint::black_box, time::Duration};
+use uuid::Uuid;
+
+fn make_prefix(fp: Fingerprint) -> Vec<u8> {
+    match fp {
+        Fingerprint::Rabin(val) => {
+            let mut buf = Vec::with_capacity(SINGLE_OBJECT_MAGIC.len() + size_of::<u64>());
+            buf.extend_from_slice(&SINGLE_OBJECT_MAGIC); // C3 01
+            buf.extend_from_slice(&val.to_le_bytes()); // little-endian
+            buf
+        }
+        Fingerprint::Id(id) => {
+            let mut buf = Vec::with_capacity(CONFLUENT_MAGIC.len() + size_of::<u32>());
+            buf.extend_from_slice(&CONFLUENT_MAGIC); // 00
+            buf.extend_from_slice(&id.to_be_bytes()); // big-endian
+            buf
+        }
+        Fingerprint::Id64(id) => {
+            let mut buf = Vec::with_capacity(CONFLUENT_MAGIC.len() + size_of::<u64>());
+            buf.extend_from_slice(&CONFLUENT_MAGIC); // 00
+            buf.extend_from_slice(&id.to_be_bytes()); // big-endian
+            buf
+        }
+        #[cfg(feature = "md5")]
+        Fingerprint::MD5(val) => {
+            let mut buf = Vec::with_capacity(SINGLE_OBJECT_MAGIC.len() + size_of_val(&val));
+            buf.extend_from_slice(&SINGLE_OBJECT_MAGIC); // C3 01
+            buf.extend_from_slice(&val);
+            buf
+        }
+        #[cfg(feature = "sha256")]
+        Fingerprint::SHA256(val) => {
+            let mut buf = Vec::with_capacity(SINGLE_OBJECT_MAGIC.len() + size_of_val(&val));
+            buf.extend_from_slice(&SINGLE_OBJECT_MAGIC); // C3 01
+            buf.extend_from_slice(&val);
+            buf
+        }
+    }
+}
+
+fn encode_records_with_prefix(
+    schema: &ApacheSchema,
+    prefix: &[u8],
+    rows: impl Iterator<Item = Value>,
+) -> Vec<u8> {
+    let mut out = Vec::new();
+    for v in rows {
+        out.extend_from_slice(prefix);
+        out.extend_from_slice(&to_avro_datum(schema, v).expect("encode datum failed"));
+    }
+    out
+}
+
+fn gen_int(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Int(i as i32))])),
+    )
+}
+
+fn gen_long(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Long(i as i64))])),
+    )
+}
+
+fn gen_float(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Float(i as f32 + 0.5678))])),
+    )
+}
+
+fn gen_bool(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Boolean(i % 2 == 0))])),
+    )
+}
+
+fn gen_double(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Double(i as f64 + 0.1234))])),
+    )
+}
+
+fn gen_bytes(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let payload = vec![(i & 0xFF) as u8; 16];
+            Value::Record(vec![("field1".into(), Value::Bytes(payload))])
+        }),
+    )
+}
+
+fn gen_string(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let s = if i % 3 == 0 {
+                format!("value-{i}")
+            } else {
+                "abcdefghij".into()
+            };
+            Value::Record(vec![("field1".into(), Value::String(s))])
+        }),
+    )
+}
+
+fn gen_date(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Int(i as i32))])),
+    )
+}
+
+fn gen_timemillis(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Int((i * 37) as i32))])),
+    )
+}
+
+fn gen_timemicros(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| Value::Record(vec![("field1".into(), Value::Long((i * 1_001) as i64))])),
+    )
+}
+
+fn gen_ts_millis(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            Value::Record(vec![(
+                "field1".into(),
+                Value::Long(1_600_000_000_000 + i as i64),
+            )])
+        }),
+    )
+}
+
+fn gen_ts_micros(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            Value::Record(vec![(
+                "field1".into(),
+                Value::Long(1_600_000_000_000_000 + i as i64),
+            )])
+        }),
+    )
+}
+
+fn gen_map(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    use std::collections::HashMap;
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let mut m = HashMap::new();
+            let int_val = |v: i32| Value::Union(0, Box::new(Value::Int(v)));
+            m.insert("key1".into(), int_val(i as i32));
+            let key2_val = if i % 5 == 0 {
+                Value::Union(1, Box::new(Value::Null))
+            } else {
+                int_val(i as i32 + 1)
+            };
+            m.insert("key2".into(), key2_val);
+            m.insert("key3".into(), int_val(42));
+            Value::Record(vec![("field1".into(), Value::Map(m))])
+        }),
+    )
+}
+
+fn gen_array(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let items = (0..5).map(|j| Value::Int(i as i32 + j)).collect();
+            Value::Record(vec![("field1".into(), Value::Array(items))])
+        }),
+    )
+}
+
+fn trim_i128_be(v: i128) -> Vec<u8> {
+    let full = v.to_be_bytes();
+    let first = full
+        .iter()
+        .enumerate()
+        .take_while(|(i, b)| {
+            *i < 15
+                && ((**b == 0x00 && full[i + 1] & 0x80 == 0)
+                    || (**b == 0xFF && full[i + 1] & 0x80 != 0))
+        })
+        .count();
+    full[first..].to_vec()
+}
+
+fn gen_decimal(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let unscaled = if i % 2 == 0 { i as i128 } else { -(i as i128) };
+            Value::Record(vec![(
+                "field1".into(),
+                Value::Decimal(Decimal::from(trim_i128_be(unscaled))),
+            )])
+        }),
+    )
+}
+
+fn gen_uuid(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let mut raw = (i as u128).to_be_bytes();
+            raw[6] = (raw[6] & 0x0F) | 0x40;
+            raw[8] = (raw[8] & 0x3F) | 0x80;
+            Value::Record(vec![("field1".into(), Value::Uuid(Uuid::from_bytes(raw)))])
+        }),
+    )
+}
+
+fn gen_fixed(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let mut buf = vec![0u8; 16];
+            buf[..8].copy_from_slice(&(i as u64).to_be_bytes());
+            Value::Record(vec![("field1".into(), Value::Fixed(16, buf))])
+        }),
+    )
+}
+
+fn gen_interval(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let months = (i % 24) as u32;
+            let days = (i % 32) as u32;
+            let millis = (i * 10) as u32;
+            let mut buf = Vec::with_capacity(12);
+            buf.extend_from_slice(&months.to_le_bytes());
+            buf.extend_from_slice(&days.to_le_bytes());
+            buf.extend_from_slice(&millis.to_le_bytes());
+            Value::Record(vec![("field1".into(), Value::Fixed(12, buf))])
+        }),
+    )
+}
+
+fn gen_enum(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    const SYMBOLS: [&str; 3] = ["A", "B", "C"];
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let idx = i % 3;
+            Value::Record(vec![(
+                "field1".into(),
+                Value::Enum(idx as u32, SYMBOLS[idx].into()),
+            )])
+        }),
+    )
+}
+
+fn gen_mixed(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            Value::Record(vec![
+                ("f1".into(), Value::Int(i as i32)),
+                ("f2".into(), Value::Long(i as i64)),
+                ("f3".into(), Value::String(format!("name-{i}"))),
+                ("f4".into(), Value::Double(i as f64 * 1.5)),
+            ])
+        }),
+    )
+}
+
+fn gen_nested(sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            let sub = Value::Record(vec![
+                ("x".into(), Value::Int(i as i32)),
+                ("y".into(), Value::String("constant".into())),
+            ]);
+            Value::Record(vec![("sub".into(), sub)])
+        }),
+    )
+}
+
+const LARGE_BATCH: usize = 65_536;
+const SMALL_BATCH: usize = 4096;
+
+fn new_decoder(
+    schema_json: &'static str,
+    batch_size: usize,
+    utf8view: bool,
+) -> arrow_avro::reader::Decoder {
+    let schema = AvroSchema::new(schema_json.parse().unwrap());
+    let mut store = arrow_avro::schema::SchemaStore::new();
+    store.register(schema.clone()).unwrap();
+    ReaderBuilder::new()
+        .with_writer_schema_store(store)
+        .with_batch_size(batch_size)
+        .with_utf8_view(utf8view)
+        .build_decoder()
+        .expect("failed to build decoder")
+}
+
+fn new_decoder_id(
+    schema_json: &'static str,
+    batch_size: usize,
+    utf8view: bool,
+    id: u32,
+) -> arrow_avro::reader::Decoder {
+    let schema = AvroSchema::new(schema_json.parse().unwrap());
+    let mut store = arrow_avro::schema::SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+    // Register the schema with a provided Confluent-style ID
+    store
+        .set(Fingerprint::Id(id), schema.clone())
+        .expect("failed to set schema with id");
+    ReaderBuilder::new()
+        .with_writer_schema_store(store)
+        .with_active_fingerprint(Fingerprint::Id(id))
+        .with_batch_size(batch_size)
+        .with_utf8_view(utf8view)
+        .build_decoder()
+        .expect("failed to build decoder for id")
+}
+
+const SIZES: [usize; 3] = [100, 10_000, 1_000_000];
+
+const INT_SCHEMA: &str =
+    r#"{"type":"record","name":"IntRec","fields":[{"name":"field1","type":"int"}]}"#;
+const LONG_SCHEMA: &str =
+    r#"{"type":"record","name":"LongRec","fields":[{"name":"field1","type":"long"}]}"#;
+const FLOAT_SCHEMA: &str =
+    r#"{"type":"record","name":"FloatRec","fields":[{"name":"field1","type":"float"}]}"#;
+const BOOL_SCHEMA: &str =
+    r#"{"type":"record","name":"BoolRec","fields":[{"name":"field1","type":"boolean"}]}"#;
+const DOUBLE_SCHEMA: &str =
+    r#"{"type":"record","name":"DoubleRec","fields":[{"name":"field1","type":"double"}]}"#;
+const BYTES_SCHEMA: &str =
+    r#"{"type":"record","name":"BytesRec","fields":[{"name":"field1","type":"bytes"}]}"#;
+const STRING_SCHEMA: &str =
+    r#"{"type":"record","name":"StrRec","fields":[{"name":"field1","type":"string"}]}"#;
+const DATE_SCHEMA: &str = r#"{"type":"record","name":"DateRec","fields":[{"name":"field1","type":{"type":"int","logicalType":"date"}}]}"#;
+const TMILLIS_SCHEMA: &str = r#"{"type":"record","name":"TimeMsRec","fields":[{"name":"field1","type":{"type":"int","logicalType":"time-millis"}}]}"#;
+const TMICROS_SCHEMA: &str = r#"{"type":"record","name":"TimeUsRec","fields":[{"name":"field1","type":{"type":"long","logicalType":"time-micros"}}]}"#;
+const TSMILLIS_SCHEMA: &str = r#"{"type":"record","name":"TsMsRec","fields":[{"name":"field1","type":{"type":"long","logicalType":"timestamp-millis"}}]}"#;
+const TSMICROS_SCHEMA: &str = r#"{"type":"record","name":"TsUsRec","fields":[{"name":"field1","type":{"type":"long","logicalType":"timestamp-micros"}}]}"#;
+const MAP_SCHEMA: &str = r#"{"type":"record","name":"MapRec","fields":[{"name":"field1","type":{"type":"map","values":["int","null"]}}]}"#;
+const ARRAY_SCHEMA: &str = r#"{"type":"record","name":"ArrRec","fields":[{"name":"field1","type":{"type":"array","items":"int"}}]}"#;
+const DECIMAL_SCHEMA: &str = r#"{"type":"record","name":"DecRec","fields":[{"name":"field1","type":{"type":"bytes","logicalType":"decimal","precision":10,"scale":3}}]}"#;
+const UUID_SCHEMA: &str = r#"{"type":"record","name":"UuidRec","fields":[{"name":"field1","type":{"type":"string","logicalType":"uuid"}}]}"#;
+const FIXED_SCHEMA: &str = r#"{"type":"record","name":"FixRec","fields":[{"name":"field1","type":{"type":"fixed","name":"Fixed16","size":16}}]}"#;
+const INTERVAL_SCHEMA: &str = r#"{"type":"record","name":"DurRec","fields":[{"name":"field1","type":{"type":"fixed","name":"Duration12","size":12,"logicalType":"duration"}}]}"#;
+const INTERVAL_SCHEMA_ENCODE: &str = r#"{"type":"record","name":"DurRec","fields":[{"name":"field1","type":{"type":"fixed","name":"Duration12","size":12}}]}"#;
+const ENUM_SCHEMA: &str = r#"{"type":"record","name":"EnumRec","fields":[{"name":"field1","type":{"type":"enum","name":"MyEnum","symbols":["A","B","C"]}}]}"#;
+const MIX_SCHEMA: &str = r#"{"type":"record","name":"MixRec","fields":[{"name":"f1","type":"int"},{"name":"f2","type":"long"},{"name":"f3","type":"string"},{"name":"f4","type":"double"}]}"#;
+const NEST_SCHEMA: &str = r#"{"type":"record","name":"NestRec","fields":[{"name":"sub","type":{"type":"record","name":"Sub","fields":[{"name":"x","type":"int"},{"name":"y","type":"string"}]}}]}"#;
+
+macro_rules! dataset {
+    ($name:ident, $schema_json:expr, $gen_fn:ident) => {
+        static $name: Lazy<Vec<Vec<u8>>> = Lazy::new(|| {
+            let schema =
+                ApacheSchema::parse_str($schema_json).expect("invalid schema for generator");
+            let arrow_schema = AvroSchema::new($schema_json.parse().unwrap());
+            let fingerprint = arrow_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .expect("fingerprint failed");
+            let prefix = make_prefix(fingerprint);
+            SIZES
+                .iter()
+                .map(|&n| $gen_fn(&schema, n, &prefix))
+                .collect()
+        });
+    };
+}
+
+/// Additional helper for Confluent's ID-based wire format (00 + BE u32).
+macro_rules! dataset_id {
+    ($name:ident, $schema_json:expr, $gen_fn:ident, $id:expr) => {
+        static $name: Lazy<Vec<Vec<u8>>> = Lazy::new(|| {
+            let schema =
+                ApacheSchema::parse_str($schema_json).expect("invalid schema for generator");
+            let prefix = make_prefix(Fingerprint::Id($id));
+            SIZES
+                .iter()
+                .map(|&n| $gen_fn(&schema, n, &prefix))
+                .collect()
+        });
+    };
+}
+
+const ID_BENCH_ID: u32 = 7;
+
+dataset_id!(INT_DATA_ID, INT_SCHEMA, gen_int, ID_BENCH_ID);
+dataset!(INT_DATA, INT_SCHEMA, gen_int);
+dataset!(LONG_DATA, LONG_SCHEMA, gen_long);
+dataset!(FLOAT_DATA, FLOAT_SCHEMA, gen_float);
+dataset!(BOOL_DATA, BOOL_SCHEMA, gen_bool);
+dataset!(DOUBLE_DATA, DOUBLE_SCHEMA, gen_double);
+dataset!(BYTES_DATA, BYTES_SCHEMA, gen_bytes);
+dataset!(STRING_DATA, STRING_SCHEMA, gen_string);
+dataset!(DATE_DATA, DATE_SCHEMA, gen_date);
+dataset!(TMILLIS_DATA, TMILLIS_SCHEMA, gen_timemillis);
+dataset!(TMICROS_DATA, TMICROS_SCHEMA, gen_timemicros);
+dataset!(TSMILLIS_DATA, TSMILLIS_SCHEMA, gen_ts_millis);
+dataset!(TSMICROS_DATA, TSMICROS_SCHEMA, gen_ts_micros);
+dataset!(MAP_DATA, MAP_SCHEMA, gen_map);
+dataset!(ARRAY_DATA, ARRAY_SCHEMA, gen_array);
+dataset!(DECIMAL_DATA, DECIMAL_SCHEMA, gen_decimal);
+dataset!(UUID_DATA, UUID_SCHEMA, gen_uuid);
+dataset!(FIXED_DATA, FIXED_SCHEMA, gen_fixed);
+dataset!(INTERVAL_DATA, INTERVAL_SCHEMA_ENCODE, gen_interval);
+dataset!(ENUM_DATA, ENUM_SCHEMA, gen_enum);
+dataset!(MIX_DATA, MIX_SCHEMA, gen_mixed);
+dataset!(NEST_DATA, NEST_SCHEMA, gen_nested);
+
+fn bench_with_decoder<F>(
+    c: &mut Criterion,
+    name: &str,
+    data_sets: &[Vec<u8>],
+    rows: &[usize],
+    mut new_decoder: F,
+) where
+    F: FnMut() -> arrow_avro::reader::Decoder,
+{
+    let mut group = c.benchmark_group(name);
+    for (idx, &row_count) in rows.iter().enumerate() {
+        let datum = &data_sets[idx];
+        group.throughput(Throughput::Bytes(datum.len() as u64));
+        match row_count {
+            10_000 => {
+                group
+                    .sample_size(25)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            1_000_000 => {
+                group
+                    .sample_size(10)
+                    .measurement_time(Duration::from_secs(10))
+                    .warm_up_time(Duration::from_secs(3));
+            }
+            _ => {}
+        }
+        group.bench_function(BenchmarkId::from_parameter(row_count), |b| {
+            b.iter_batched_ref(
+                &mut new_decoder,
+                |decoder| {
+                    black_box(decoder.decode(datum).unwrap());
+                    black_box(decoder.flush().unwrap().unwrap());
+                },
+                BatchSize::SmallInput,
+            )
+        });
+    }
+    group.finish();
+}
+
+fn criterion_benches(c: &mut Criterion) {
+    for &batch_size in &[SMALL_BATCH, LARGE_BATCH] {
+        bench_with_decoder(c, "Interval", &INTERVAL_DATA, &SIZES, || {
+            new_decoder(INTERVAL_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Int32", &INT_DATA, &SIZES, || {
+            new_decoder(INT_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Int32_Id", &INT_DATA_ID, &SIZES, || {
+            new_decoder_id(INT_SCHEMA, batch_size, false, ID_BENCH_ID)
+        });
+        bench_with_decoder(c, "Int64", &LONG_DATA, &SIZES, || {
+            new_decoder(LONG_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Float32", &FLOAT_DATA, &SIZES, || {
+            new_decoder(FLOAT_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Boolean", &BOOL_DATA, &SIZES, || {
+            new_decoder(BOOL_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Float64", &DOUBLE_DATA, &SIZES, || {
+            new_decoder(DOUBLE_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Binary(Bytes)", &BYTES_DATA, &SIZES, || {
+            new_decoder(BYTES_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "String", &STRING_DATA, &SIZES, || {
+            new_decoder(STRING_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "StringView", &STRING_DATA, &SIZES, || {
+            new_decoder(STRING_SCHEMA, batch_size, true)
+        });
+        bench_with_decoder(c, "Date32", &DATE_DATA, &SIZES, || {
+            new_decoder(DATE_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "TimeMillis", &TMILLIS_DATA, &SIZES, || {
+            new_decoder(TMILLIS_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "TimeMicros", &TMICROS_DATA, &SIZES, || {
+            new_decoder(TMICROS_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "TimestampMillis", &TSMILLIS_DATA, &SIZES, || {
+            new_decoder(TSMILLIS_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "TimestampMicros", &TSMICROS_DATA, &SIZES, || {
+            new_decoder(TSMICROS_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Map", &MAP_DATA, &SIZES, || {
+            new_decoder(MAP_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Array", &ARRAY_DATA, &SIZES, || {
+            new_decoder(ARRAY_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Decimal128", &DECIMAL_DATA, &SIZES, || {
+            new_decoder(DECIMAL_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "UUID", &UUID_DATA, &SIZES, || {
+            new_decoder(UUID_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "FixedSizeBinary", &FIXED_DATA, &SIZES, || {
+            new_decoder(FIXED_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Enum(Dictionary)", &ENUM_DATA, &SIZES, || {
+            new_decoder(ENUM_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Mixed", &MIX_DATA, &SIZES, || {
+            new_decoder(MIX_SCHEMA, batch_size, false)
+        });
+        bench_with_decoder(c, "Nested(Struct)", &NEST_DATA, &SIZES, || {
+            new_decoder(NEST_SCHEMA, batch_size, false)
+        });
+    }
+}
+
+criterion_group! {
+    name = avro_decoder;
+    config = Criterion::default().configure_from_args();
+    targets = criterion_benches
+}
+criterion_main!(avro_decoder);
diff --git a/arrow-avro/examples/decode_kafka_stream.rs b/arrow-avro/examples/decode_kafka_stream.rs
new file mode 100644
index 000000000000..46309ecd0cb9
--- /dev/null
+++ b/arrow-avro/examples/decode_kafka_stream.rs
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decode **Confluent Schema Registry - framed** Avro messages into Arrow [`RecordBatch`]es,
+//! resolving **older writer schemas** against a **current reader schema** without adding
+//! any new reader‑only fields.
+//!
+//! What this example shows:
+//! * A **reader schema** for the current topic version with fields: `{ id: long, name: string }`.
+//! * Two older **writer schemas** (Confluent IDs **0** and **1**):
+//!   - v0: `{ id: int, name: string }` (older type for `id`)
+//!   - v1: `{ id: long, name: string, email: ["null","string"] }` (extra writer field `email`)
+//! * Streaming decode with `ReaderBuilder::with_reader_schema(...)` so that:
+//!   - v0's `id:int` is **promoted** to `long` for the reader
+//!   - v1's extra `email` field is **ignored** by the reader (projection)
+//!
+//! Wire format reminder (message value bytes):
+//! `0x00` magic byte + 4‑byte **big‑endian** schema ID + Avro **binary** body.
+//!
+
+use arrow_array::{Int64Array, RecordBatch, StringArray};
+use arrow_avro::reader::ReaderBuilder;
+use arrow_avro::schema::{
+    AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SchemaStore,
+};
+use arrow_schema::ArrowError;
+
+fn encode_long(value: i64, out: &mut Vec<u8>) {
+    let mut n = ((value << 1) ^ (value >> 63)) as u64;
+    while (n & !0x7F) != 0 {
+        out.push(((n as u8) & 0x7F) | 0x80);
+        n >>= 7;
+    }
+    out.push(n as u8);
+}
+
+fn encode_len(len: usize, out: &mut Vec<u8>) {
+    encode_long(len as i64, out)
+}
+
+fn encode_string(s: &str, out: &mut Vec<u8>) {
+    encode_len(s.len(), out);
+    out.extend_from_slice(s.as_bytes());
+}
+
+fn encode_union_index(index: i64, out: &mut Vec<u8>) {
+    encode_long(index, out);
+}
+
+// Writer v0 (ID=0):
+//   {"type":"record","name":"User","fields":[
+//     {"name":"id","type":"int"},
+//     {"name":"name","type":"string"}]}
+fn encode_user_v0_body(id: i32, name: &str) -> Vec<u8> {
+    let mut v = Vec::with_capacity(16 + name.len());
+    encode_long(id as i64, &mut v);
+    encode_string(name, &mut v);
+    v
+}
+
+// Writer v1 (ID=1):
+//   {"type":"record","name":"User","fields":[
+//     {"name":"id","type":"long"},
+//     {"name":"name","type":"string"},
+//     {"name":"email","type":["null","string"],"default":null}]}
+fn encode_user_v1_body(id: i64, name: &str, email: Option<&str>) -> Vec<u8> {
+    let mut v = Vec::with_capacity(24 + name.len() + email.map(|s| s.len()).unwrap_or(0));
+    encode_long(id, &mut v); // id: long
+    encode_string(name, &mut v); // name: string
+    match email {
+        None => {
+            // union index 0 => null
+            encode_union_index(0, &mut v);
+            // no value bytes follow for null
+        }
+        Some(s) => {
+            // union index 1 => string, followed by the string payload
+            encode_union_index(1, &mut v);
+            encode_string(s, &mut v);
+        }
+    }
+    v
+}
+
+fn frame_confluent(id_be: u32, body: &[u8]) -> Vec<u8> {
+    let mut out = Vec::with_capacity(5 + body.len());
+    out.extend_from_slice(&CONFLUENT_MAGIC); // 0x00
+    out.extend_from_slice(&id_be.to_be_bytes());
+    out.extend_from_slice(body);
+    out
+}
+
+fn print_arrow_schema(schema: &arrow_schema::Schema) {
+    println!("Resolved Arrow schema (via reader schema):");
+    for (i, f) in schema.fields().iter().enumerate() {
+        println!(
+            "  {i:>2}: {}: {:?} (nullable: {})",
+            f.name(),
+            f.data_type(),
+            f.is_nullable()
+        );
+    }
+    if !schema.metadata.is_empty() {
+        println!("  metadata: {:?}", schema.metadata());
+    }
+}
+
+fn print_rows(batch: &RecordBatch) -> Result<(), ArrowError> {
+    let ids = batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<Int64Array>()
+        .ok_or_else(|| ArrowError::ComputeError("col 0 not Int64".into()))?;
+    let names = batch
+        .column(1)
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .ok_or_else(|| ArrowError::ComputeError("col 1 not Utf8".into()))?;
+    for row in 0..batch.num_rows() {
+        let id = ids.value(row);
+        let name = names.value(row);
+        println!("    row {row}: id={id}, name={name}");
+    }
+    Ok(())
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // The current topic schema as a READER schema
+    let reader_schema = AvroSchema::new(
+        r#"{
+            "type":"record","name":"User","fields":[
+                {"name":"id","type":"long"},
+                {"name":"name","type":"string"}
+            ]}"#
+        .to_string(),
+    );
+
+    // Two prior WRITER schemas versions under Confluent IDs 0 and 1
+    let writer_v0 = AvroSchema::new(
+        r#"{
+            "type":"record","name":"User","fields":[
+                {"name":"id","type":"int"},
+                {"name":"name","type":"string"}
+            ]}"#
+        .to_string(),
+    );
+    let writer_v1 = AvroSchema::new(
+        r#"{
+            "type":"record","name":"User","fields":[
+                {"name":"id","type":"long"},
+                {"name":"name","type":"string"},
+                {"name":"email","type":["null","string"],"default":null}
+            ]}"#
+        .to_string(),
+    );
+
+    let id_v0: u32 = 0;
+    let id_v1: u32 = 1;
+
+    // Confluent SchemaStore keyed by integer IDs (FingerprintAlgorithm::Id)
+    let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+    store.set(Fingerprint::Id(id_v0), writer_v0.clone())?;
+    store.set(Fingerprint::Id(id_v1), writer_v1.clone())?;
+
+    // Build a streaming Decoder with the READER schema
+    let mut decoder = ReaderBuilder::new()
+        .with_reader_schema(reader_schema)
+        .with_writer_schema_store(store)
+        .with_batch_size(8) // small batches for demo output
+        .build_decoder()?;
+
+    // Print the resolved Arrow schema (derived from reader and writer)
+    let resolved = decoder.schema();
+    print_arrow_schema(resolved.as_ref());
+    println!();
+
+    // Simulate an interleaved Kafka stream (IDs 0 and 1)
+    //    - v0: {id:int, name:string} --> reader: id promoted to long
+    //    - v1: {id:long, name:string, email: ...} --> reader ignores extra field
+    let mut frames: Vec<(u32, Vec<u8>)> = Vec::new();
+
+    // Some v0 messages
+    for (i, name) in ["v0-alice", "v0-bob", "v0-carol"].iter().enumerate() {
+        let body = encode_user_v0_body(1000 + i as i32, name);
+        frames.push((id_v0, frame_confluent(id_v0, &body)));
+    }
+
+    // Some v1 messages (may include optional email on the writer side)
+    let v1_rows = [
+        (2001_i64, "v1-dave", Some("dave@example.com")),
+        (2002_i64, "v1-erin", None),
+        (2003_i64, "v1-frank", Some("frank@example.com")),
+    ];
+    for (id, name, email) in v1_rows {
+        let body = encode_user_v1_body(id, name, email);
+        frames.push((id_v1, frame_confluent(id_v1, &body)));
+    }
+
+    // Interleave to show mid-stream schema ID changes (0,1,0,1, ...)
+    frames.swap(1, 3); // crude interleave for demo
+
+    // Decode frames as if they were Kafka record values
+    for (schema_id, frame) in frames {
+        println!("Decoding record framed with Confluent schema id = {schema_id}");
+        let _consumed = decoder.decode(&frame)?;
+        while let Some(batch) = decoder.flush()? {
+            println!(
+                "  -> Emitted batch: rows = {}, cols = {}",
+                batch.num_rows(),
+                batch.num_columns()
+            );
+            print_rows(&batch)?;
+        }
+        println!();
+    }
+
+    println!("Done decoding Kafka-style stream with schema resolution (no reader-added fields).");
+    Ok(())
+}
diff --git a/arrow-avro/examples/read_ocf_with_resolution.rs b/arrow-avro/examples/read_ocf_with_resolution.rs
new file mode 100644
index 000000000000..7367ba3cd5b0
--- /dev/null
+++ b/arrow-avro/examples/read_ocf_with_resolution.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Read an Avro **Object Container File (OCF)** using an inline **reader schema**
+//! that differs from the writer schema, demonstrating Avro **schema resolution**
+//! (field projection and legal type promotion) without ever fetching the writer
+//! schema from the file.
+//!
+//! What this example does:
+//! 1. Locates `<crate>/test/data/skippable_types.avro` (portable path).
+//! 2. Defines an inline **reader schema** JSON:
+//!    * Projects a subset of fields from the writer schema, and
+//!    * Promotes `"int"` to `"long"` where applicable.
+//! 3. Builds a `Reader` with `ReaderBuilder::with_reader_schema(...)` and prints batches.
+
+use std::fs::File;
+use std::io::BufReader;
+use std::path::PathBuf;
+
+use arrow_array::RecordBatch;
+use arrow_avro::reader::ReaderBuilder;
+use arrow_avro::schema::AvroSchema;
+
+fn default_ocf_path() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("test")
+        .join("data")
+        .join("skippable_types.avro")
+}
+
+// A minimal reader schema compatible with the provided writer schema
+const READER_SCHEMA_JSON: &str = r#"
+{
+  "type": "record",
+  "name": "SkippableTypesRecord",
+  "fields": [
+    { "name": "boolean_field", "type": "boolean" },
+    { "name": "int_field", "type": "long" },
+    { "name": "long_field", "type": "long" },
+    { "name": "string_field", "type": "string" },
+    { "name": "nullable_nullfirst_field", "type": ["null", "long"] }
+  ]
+}
+"#;
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let ocf_path = default_ocf_path();
+    let file = File::open(&ocf_path)?;
+    let reader_schema = AvroSchema::new(READER_SCHEMA_JSON.to_string());
+
+    let reader = ReaderBuilder::new()
+        .with_reader_schema(reader_schema)
+        .build(BufReader::new(file))?;
+
+    let resolved_schema = reader.schema();
+    println!(
+        "Reader-based decode: resolved Arrow schema with {} fields",
+        resolved_schema.fields().len()
+    );
+
+    // Iterate batches and print a brief summary
+    let mut total_batches = 0usize;
+    let mut total_rows = 0usize;
+    for next in reader {
+        let batch: RecordBatch = next?;
+        total_batches += 1;
+        total_rows += batch.num_rows();
+        println!(
+            "  Batch {:>3}: rows = {:>6}, cols = {:>2}",
+            total_batches,
+            batch.num_rows(),
+            batch.num_columns()
+        );
+    }
+
+    println!();
+    println!("Done (with reader/writer schema resolution).");
+    println!("  Batches : {total_batches}");
+    println!("  Rows    : {total_rows}");
+
+    Ok(())
+}
diff --git a/arrow-avro/examples/read_with_utf8view.rs b/arrow-avro/examples/read_with_utf8view.rs
index 2fa47820346b..85b07c8d033c 100644
--- a/arrow-avro/examples/read_with_utf8view.rs
+++ b/arrow-avro/examples/read_with_utf8view.rs
@@ -23,12 +23,10 @@
 use std::env;
 use std::fs::File;
 use std::io::{BufReader, Seek, SeekFrom};
-use std::sync::Arc;
 use std::time::Instant;
 
-use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray, StringViewArray};
-use arrow_avro::reader::ReadOptions;
-use arrow_schema::{ArrowError, DataType, Field, Schema};
+use arrow_array::{RecordBatch, StringArray, StringViewArray};
+use arrow_avro::reader::ReaderBuilder;
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let args: Vec<String> = env::args().collect();
@@ -41,22 +39,29 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     };
 
     let file = File::open(file_path)?;
-    let mut reader = BufReader::new(file);
+    let mut file_for_view = file.try_clone()?;
 
     let start = Instant::now();
-    let batch = read_avro_with_options(&mut reader, &ReadOptions::default())?;
+    let reader = BufReader::new(file);
+    let avro_reader = ReaderBuilder::new().build(reader)?;
+    let schema = avro_reader.schema();
+    let batches: Vec<RecordBatch> = avro_reader.collect::<Result<_, _>>()?;
     let regular_duration = start.elapsed();
 
-    reader.seek(SeekFrom::Start(0))?;
-
+    file_for_view.seek(SeekFrom::Start(0))?;
     let start = Instant::now();
-    let options = ReadOptions::default().with_utf8view(true);
-    let batch_view = read_avro_with_options(&mut reader, &options)?;
+    let reader_view = BufReader::new(file_for_view);
+    let avro_reader_view = ReaderBuilder::new()
+        .with_utf8_view(true)
+        .build(reader_view)?;
+    let batches_view: Vec<RecordBatch> = avro_reader_view.collect::<Result<_, _>>()?;
     let view_duration = start.elapsed();
 
-    println!("Read {} rows from {}", batch.num_rows(), file_path);
-    println!("Reading with StringArray: {:?}", regular_duration);
-    println!("Reading with StringViewArray: {:?}", view_duration);
+    let num_rows = batches.iter().map(|b| b.num_rows()).sum::<usize>();
+
+    println!("Read {num_rows} rows from {file_path}");
+    println!("Reading with StringArray: {regular_duration:?}");
+    println!("Reading with StringViewArray: {view_duration:?}");
 
     if regular_duration > view_duration {
         println!(
@@ -70,7 +75,16 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         );
     }
 
-    for (i, field) in batch.schema().fields().iter().enumerate() {
+    if batches.is_empty() {
+        println!("No data read from file.");
+        return Ok(());
+    }
+
+    // Inspect the first batch from each run to show the array types
+    let batch = &batches[0];
+    let batch_view = &batches_view[0];
+
+    for (i, field) in schema.fields().iter().enumerate() {
         let col = batch.column(i);
         let col_view = batch_view.column(i);
 
@@ -93,29 +107,3 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     Ok(())
 }
-
-fn read_avro_with_options(
-    reader: &mut BufReader<File>,
-    options: &ReadOptions,
-) -> Result<RecordBatch, ArrowError> {
-    reader.get_mut().seek(SeekFrom::Start(0))?;
-
-    let mock_schema = Schema::new(vec![
-        Field::new("string_field", DataType::Utf8, false),
-        Field::new("int_field", DataType::Int32, false),
-    ]);
-
-    let string_data = vec!["avro1", "avro2", "avro3", "avro4", "avro5"];
-    let int_data = vec![1, 2, 3, 4, 5];
-
-    let string_array: ArrayRef = if options.use_utf8view() {
-        Arc::new(StringViewArray::from(string_data))
-    } else {
-        Arc::new(StringArray::from(string_data))
-    };
-
-    let int_array: ArrayRef = Arc::new(Int32Array::from(int_data));
-
-    RecordBatch::try_new(Arc::new(mock_schema), vec![string_array, int_array])
-        .map_err(|e| ArrowError::ComputeError(format!("Failed to create record batch: {}", e)))
-}
diff --git a/arrow-avro/examples/write_avro_ocf.rs b/arrow-avro/examples/write_avro_ocf.rs
new file mode 100644
index 000000000000..5bdca0de7a3d
--- /dev/null
+++ b/arrow-avro/examples/write_avro_ocf.rs
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # Write an Avro Object Container File (OCF) from an Arrow `RecordBatch`
+//!
+//! This example builds a small Arrow `RecordBatch` and persists it to an
+//! **Avro Object Container File (OCF)** using
+//! `arrow_avro::writer::{Writer, WriterBuilder}`.
+//!
+//! ## What this example does
+//! - Define an Arrow schema with supported types (`Int64`, `Utf8`, `Boolean`,
+//!   `Float64`, `Binary`, and `Timestamp (Microsecond, "UTC")`).
+//! - Constructs arrays and a `RecordBatch`, ensuring each column’s data type
+//!   **exactly** matches the schema (timestamps include the `"UTC"` timezone).
+//! - Writes a single batch to `target/write_avro_ocf_example.avro` as an OCF,
+//!   using Snappy block compression (you can disable or change the codec).
+//! - Prints the file’s 16‑byte sync marker (used by OCF to delimit blocks).
+
+use std::fs::File;
+use std::io::BufWriter;
+use std::sync::Arc;
+
+use arrow_array::{
+    ArrayRef, BinaryArray, BooleanArray, Float64Array, Int64Array, RecordBatch, StringArray,
+    TimestampMicrosecondArray,
+};
+use arrow_avro::compression::CompressionCodec;
+use arrow_avro::writer::format::AvroOcfFormat;
+use arrow_avro::writer::{Writer, WriterBuilder};
+use arrow_schema::{DataType, Field, Schema, TimeUnit};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Arrow schema
+    // id:         Int64 (non-null)
+    // name:       Utf8  (nullable)
+    // active:     Boolean (non-null)
+    // score:      Float64 (nullable)
+    // payload:    Binary (nullable)
+    // created_at: Timestamp(Microsecond, Some("UTC")) (non-null)
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("name", DataType::Utf8, true),
+        Field::new("active", DataType::Boolean, false),
+        Field::new("score", DataType::Float64, true),
+        Field::new("payload", DataType::Binary, true),
+        Field::new(
+            "created_at",
+            DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC".to_string()))),
+            false,
+        ),
+    ]);
+
+    let schema_ref = Arc::new(schema.clone());
+    let ids = Int64Array::from(vec![1_i64, 2, 3]);
+    let names = StringArray::from(vec![Some("alpha"), None, Some("gamma")]);
+    let active = BooleanArray::from(vec![true, false, true]);
+    let scores = Float64Array::from(vec![Some(1.5_f64), None, Some(3.0)]);
+
+    // BinaryArray: include a null
+    let payload = BinaryArray::from_opt_vec(vec![Some(&b"abc"[..]), None, Some(&[0u8, 1, 2][..])]);
+
+    // Timestamp in microseconds since UNIX epoch
+    let created_at = TimestampMicrosecondArray::from(vec![
+        Some(1_722_000_000_000_000_i64),
+        Some(1_722_000_123_456_000_i64),
+        Some(1_722_000_999_999_000_i64),
+    ])
+    .with_timezone("UTC".to_string());
+
+    let columns: Vec<ArrayRef> = vec![
+        Arc::new(ids),
+        Arc::new(names),
+        Arc::new(active),
+        Arc::new(scores),
+        Arc::new(payload),
+        Arc::new(created_at),
+    ];
+
+    let batch = RecordBatch::try_new(schema_ref, columns)?;
+
+    // Build an OCF writer with optional compression
+    let out_path = "target/write_avro_ocf_example.avro";
+    let file = File::create(out_path)?;
+    let mut writer: Writer<_, AvroOcfFormat> = WriterBuilder::new(schema)
+        .with_compression(Some(CompressionCodec::Snappy))
+        .build(BufWriter::new(file))?;
+
+    // Write a single batch (use `write_batches` for multiple)
+    writer.write(&batch)?;
+    writer.finish()?; // flush and finalize
+
+    if let Some(sync) = writer.sync_marker() {
+        println!("Wrote OCF to {out_path} (sync marker: {:02x?})", &sync[..]);
+    } else {
+        println!("Wrote OCF to {out_path}");
+    }
+
+    Ok(())
+}
diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs
index 70f162f1471d..04ef87d7ef20 100644
--- a/arrow-avro/src/codec.rs
+++ b/arrow-avro/src/codec.rs
@@ -15,38 +15,171 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::schema::{Attributes, ComplexType, PrimitiveType, Record, Schema, TypeName};
+//! Codec for Mapping Avro and Arrow types.
+
+use crate::schema::{
+    AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_FIELD_DEFAULT_METADATA_KEY, AVRO_NAME_METADATA_KEY,
+    AVRO_NAMESPACE_METADATA_KEY, Array, Attributes, ComplexType, Enum, Fixed, Map, Nullability,
+    PrimitiveType, Record, Schema, Type, TypeName, make_full_name,
+};
 use arrow_schema::{
-    ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, SchemaBuilder, SchemaRef, TimeUnit,
+    ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field, Fields,
+    IntervalUnit, TimeUnit, UnionFields, UnionMode,
 };
-use std::borrow::Cow;
-use std::collections::HashMap;
+#[cfg(feature = "small_decimals")]
+use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
+use indexmap::IndexMap;
+use serde_json::Value;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet};
+use std::fmt;
+use std::fmt::Display;
 use std::sync::Arc;
+use strum_macros::AsRefStr;
+
+/// Contains information about how to resolve differences between a writer's and a reader's schema.
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) enum ResolutionInfo {
+    /// Indicates that the writer's type should be promoted to the reader's type.
+    Promotion(Promotion),
+    /// Indicates that a default value should be used for a field.
+    DefaultValue(AvroLiteral),
+    /// Provides mapping information for resolving enums.
+    EnumMapping(EnumMapping),
+    /// Provides resolution information for record fields.
+    Record(ResolvedRecord),
+    /// Provides mapping and shape info for resolving unions.
+    Union(ResolvedUnion),
+}
+
+/// Represents a literal Avro value.
+///
+/// This is used to represent default values in an Avro schema.
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) enum AvroLiteral {
+    /// Represents a null value.
+    Null,
+    /// Represents a boolean value.
+    Boolean(bool),
+    /// Represents an integer value.
+    Int(i32),
+    /// Represents a long value.
+    Long(i64),
+    /// Represents a float value.
+    Float(f32),
+    /// Represents a double value.
+    Double(f64),
+    /// Represents a bytes value.
+    Bytes(Vec<u8>),
+    /// Represents a string value.
+    String(String),
+    /// Represents an enum symbol.
+    Enum(String),
+    /// Represents a JSON array default for an Avro array, containing element literals.
+    Array(Vec<AvroLiteral>),
+    /// Represents a JSON object default for an Avro map/struct, mapping string keys to value literals.
+    Map(IndexMap<String, AvroLiteral>),
+}
+
+/// Contains the necessary information to resolve a writer's record against a reader's record schema.
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) struct ResolvedRecord {
+    /// Maps a writer's field index to the corresponding reader's field index.
+    /// `None` if the writer's field is not present in the reader's schema.
+    pub(crate) writer_to_reader: Arc<[Option<usize>]>,
+    /// A list of indices in the reader's schema for fields that have a default value.
+    pub(crate) default_fields: Arc<[usize]>,
+    /// For fields present in the writer's schema but not the reader's, this stores their data type.
+    /// This is needed to correctly skip over these fields during deserialization.
+    pub(crate) skip_fields: Arc<[Option<AvroDataType>]>,
+}
+
+/// Defines the type of promotion to be applied during schema resolution.
+///
+/// Schema resolution may require promoting a writer's data type to a reader's data type.
+/// For example, an `int` can be promoted to a `long`, `float`, or `double`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum Promotion {
+    /// Direct read with no data type promotion.
+    Direct,
+    /// Promotes an `int` to a `long`.
+    IntToLong,
+    /// Promotes an `int` to a `float`.
+    IntToFloat,
+    /// Promotes an `int` to a `double`.
+    IntToDouble,
+    /// Promotes a `long` to a `float`.
+    LongToFloat,
+    /// Promotes a `long` to a `double`.
+    LongToDouble,
+    /// Promotes a `float` to a `double`.
+    FloatToDouble,
+    /// Promotes a `string` to `bytes`.
+    StringToBytes,
+    /// Promotes `bytes` to a `string`.
+    BytesToString,
+}
+
+impl Display for Promotion {
+    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Direct => write!(formatter, "Direct"),
+            Self::IntToLong => write!(formatter, "Int->Long"),
+            Self::IntToFloat => write!(formatter, "Int->Float"),
+            Self::IntToDouble => write!(formatter, "Int->Double"),
+            Self::LongToFloat => write!(formatter, "Long->Float"),
+            Self::LongToDouble => write!(formatter, "Long->Double"),
+            Self::FloatToDouble => write!(formatter, "Float->Double"),
+            Self::StringToBytes => write!(formatter, "String->Bytes"),
+            Self::BytesToString => write!(formatter, "Bytes->String"),
+        }
+    }
+}
+
+/// Information required to resolve a writer union against a reader union (or single type).
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) struct ResolvedUnion {
+    /// For each writer branch index, the reader branch index and how to read it.
+    /// `None` means the writer branch doesn't resolve against the reader.
+    pub(crate) writer_to_reader: Arc<[Option<(usize, Promotion)>]>,
+    /// Whether the writer schema at this site is a union
+    pub(crate) writer_is_union: bool,
+    /// Whether the reader schema at this site is a union
+    pub(crate) reader_is_union: bool,
+}
 
-/// Avro types are not nullable, with nullability instead encoded as a union
-/// where one of the variants is the null type.
+/// Holds the mapping information for resolving Avro enums.
 ///
-/// To accommodate this we special case two-variant unions where one of the
-/// variants is the null type, and use this to derive arrow's notion of nullability
-#[derive(Debug, Copy, Clone)]
-pub enum Nullability {
-    /// The nulls are encoded as the first union variant
-    NullFirst,
-    /// The nulls are encoded as the second union variant
-    NullSecond,
+/// When resolving schemas, the writer's enum symbols must be mapped to the reader's symbols.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct EnumMapping {
+    /// A mapping from the writer's symbol index to the reader's symbol index.
+    pub(crate) mapping: Arc<[i32]>,
+    /// The index to use for a writer's symbol that is not present in the reader's enum
+    /// and a default value is specified in the reader's schema.
+    pub(crate) default_index: i32,
+}
+
+#[cfg(feature = "canonical_extension_types")]
+fn with_extension_type(codec: &Codec, field: Field) -> Field {
+    match codec {
+        Codec::Uuid => field.with_extension_type(arrow_schema::extension::Uuid),
+        _ => field,
+    }
 }
 
 /// An Avro datatype mapped to the arrow data model
-#[derive(Debug, Clone)]
-pub struct AvroDataType {
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) struct AvroDataType {
     nullability: Option<Nullability>,
     metadata: HashMap<String, String>,
     codec: Codec,
+    pub(crate) resolution: Option<ResolutionInfo>,
 }
 
 impl AvroDataType {
     /// Create a new [`AvroDataType`] with the given parts.
-    pub fn new(
+    pub(crate) fn new(
         codec: Codec,
         metadata: HashMap<String, String>,
         nullability: Option<Nullability>,
@@ -55,20 +188,49 @@ impl AvroDataType {
             codec,
             metadata,
             nullability,
+            resolution: None,
+        }
+    }
+
+    #[inline]
+    fn new_with_resolution(
+        codec: Codec,
+        metadata: HashMap<String, String>,
+        nullability: Option<Nullability>,
+        resolution: Option<ResolutionInfo>,
+    ) -> Self {
+        Self {
+            codec,
+            metadata,
+            nullability,
+            resolution,
         }
     }
 
     /// Returns an arrow [`Field`] with the given name
-    pub fn field_with_name(&self, name: &str) -> Field {
-        let d = self.codec.data_type();
-        Field::new(name, d, self.nullability.is_some()).with_metadata(self.metadata.clone())
+    pub(crate) fn field_with_name(&self, name: &str) -> Field {
+        let mut nullable = self.nullability.is_some();
+        if !nullable {
+            if let Codec::Union(children, _, _) = self.codec() {
+                // If any encoded branch is `null`, mark field as nullable
+                if children.iter().any(|c| matches!(c.codec(), Codec::Null)) {
+                    nullable = true;
+                }
+            }
+        }
+        let data_type = self.codec.data_type();
+        let field = Field::new(name, data_type, nullable).with_metadata(self.metadata.clone());
+        #[cfg(feature = "canonical_extension_types")]
+        return with_extension_type(&self.codec, field);
+        #[cfg(not(feature = "canonical_extension_types"))]
+        field
     }
 
     /// Returns a reference to the codec used by this data type
     ///
     /// The codec determines how Avro data is encoded and mapped to Arrow data types.
     /// This is useful when we need to inspect or use the specific encoding of a field.
-    pub fn codec(&self) -> &Codec {
+    pub(crate) fn codec(&self) -> &Codec {
         &self.codec
     }
 
@@ -79,26 +241,266 @@ impl AvroDataType {
     /// - `Some(Nullability::NullFirst)` - Nulls are encoded as the first union variant
     /// - `Some(Nullability::NullSecond)` - Nulls are encoded as the second union variant
     /// - `None` - The type is not nullable
-    pub fn nullability(&self) -> Option<Nullability> {
+    pub(crate) fn nullability(&self) -> Option<Nullability> {
         self.nullability
     }
+
+    #[inline]
+    fn parse_default_literal(&self, default_json: &Value) -> Result<AvroLiteral, ArrowError> {
+        fn expect_string<'v>(
+            default_json: &'v Value,
+            data_type: &str,
+        ) -> Result<&'v str, ArrowError> {
+            match default_json {
+                Value::String(s) => Ok(s.as_str()),
+                _ => Err(ArrowError::SchemaError(format!(
+                    "Default value must be a JSON string for {data_type}"
+                ))),
+            }
+        }
+
+        fn parse_bytes_default(
+            default_json: &Value,
+            expected_len: Option<usize>,
+        ) -> Result<Vec<u8>, ArrowError> {
+            let s = expect_string(default_json, "bytes/fixed logical types")?;
+            let mut out = Vec::with_capacity(s.len());
+            for ch in s.chars() {
+                let cp = ch as u32;
+                if cp > 0xFF {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Invalid codepoint U+{cp:04X} in bytes/fixed default; must be ≤ 0xFF"
+                    )));
+                }
+                out.push(cp as u8);
+            }
+            if let Some(len) = expected_len {
+                if out.len() != len {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Default length {} does not match expected fixed size {len}",
+                        out.len(),
+                    )));
+                }
+            }
+            Ok(out)
+        }
+
+        fn parse_json_i64(default_json: &Value, data_type: &str) -> Result<i64, ArrowError> {
+            match default_json {
+                Value::Number(n) => n.as_i64().ok_or_else(|| {
+                    ArrowError::SchemaError(format!("Default {data_type} must be an integer"))
+                }),
+                _ => Err(ArrowError::SchemaError(format!(
+                    "Default {data_type} must be a JSON integer"
+                ))),
+            }
+        }
+
+        fn parse_json_f64(default_json: &Value, data_type: &str) -> Result<f64, ArrowError> {
+            match default_json {
+                Value::Number(n) => n.as_f64().ok_or_else(|| {
+                    ArrowError::SchemaError(format!("Default {data_type} must be a number"))
+                }),
+                _ => Err(ArrowError::SchemaError(format!(
+                    "Default {data_type} must be a JSON number"
+                ))),
+            }
+        }
+
+        // Handle JSON nulls per-spec: allowed only for `null` type or unions with null FIRST
+        if default_json.is_null() {
+            return match self.codec() {
+                Codec::Null => Ok(AvroLiteral::Null),
+                Codec::Union(encodings, _, _) if !encodings.is_empty()
+                    && matches!(encodings[0].codec(), Codec::Null) =>
+                    {
+                        Ok(AvroLiteral::Null)
+                    }
+                _ if self.nullability() == Some(Nullability::NullFirst) => Ok(AvroLiteral::Null),
+                _ => Err(ArrowError::SchemaError(
+                    "JSON null default is only valid for `null` type or for a union whose first branch is `null`"
+                        .to_string(),
+                )),
+            };
+        }
+        let lit = match self.codec() {
+            Codec::Null => {
+                return Err(ArrowError::SchemaError(
+                    "Default for `null` type must be JSON null".to_string(),
+                ));
+            }
+            Codec::Boolean => match default_json {
+                Value::Bool(b) => AvroLiteral::Boolean(*b),
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        "Boolean default must be a JSON boolean".to_string(),
+                    ));
+                }
+            },
+            Codec::Int32 | Codec::Date32 | Codec::TimeMillis => {
+                let i = parse_json_i64(default_json, "int")?;
+                if i < i32::MIN as i64 || i > i32::MAX as i64 {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Default int {i} out of i32 range"
+                    )));
+                }
+                AvroLiteral::Int(i as i32)
+            }
+            Codec::Int64
+            | Codec::TimeMicros
+            | Codec::TimestampMillis(_)
+            | Codec::TimestampMicros(_)
+            | Codec::TimestampNanos(_) => AvroLiteral::Long(parse_json_i64(default_json, "long")?),
+            #[cfg(feature = "avro_custom_types")]
+            Codec::DurationNanos
+            | Codec::DurationMicros
+            | Codec::DurationMillis
+            | Codec::DurationSeconds => AvroLiteral::Long(parse_json_i64(default_json, "long")?),
+            Codec::Float32 => {
+                let f = parse_json_f64(default_json, "float")?;
+                if !f.is_finite() || f < f32::MIN as f64 || f > f32::MAX as f64 {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Default float {f} out of f32 range or not finite"
+                    )));
+                }
+                AvroLiteral::Float(f as f32)
+            }
+            Codec::Float64 => AvroLiteral::Double(parse_json_f64(default_json, "double")?),
+            Codec::Utf8 | Codec::Utf8View | Codec::Uuid => {
+                AvroLiteral::String(expect_string(default_json, "string/uuid")?.to_string())
+            }
+            Codec::Binary => AvroLiteral::Bytes(parse_bytes_default(default_json, None)?),
+            Codec::Fixed(sz) => {
+                AvroLiteral::Bytes(parse_bytes_default(default_json, Some(*sz as usize))?)
+            }
+            Codec::Decimal(_, _, fixed_size) => {
+                AvroLiteral::Bytes(parse_bytes_default(default_json, *fixed_size)?)
+            }
+            Codec::Enum(symbols) => {
+                let s = expect_string(default_json, "enum")?;
+                if symbols.iter().any(|sym| sym == s) {
+                    AvroLiteral::Enum(s.to_string())
+                } else {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Default enum symbol {s:?} not found in reader enum symbols"
+                    )));
+                }
+            }
+            Codec::Interval => AvroLiteral::Bytes(parse_bytes_default(default_json, Some(12))?),
+            Codec::List(item_dt) => match default_json {
+                Value::Array(items) => AvroLiteral::Array(
+                    items
+                        .iter()
+                        .map(|v| item_dt.parse_default_literal(v))
+                        .collect::<Result<_, _>>()?,
+                ),
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        "Default value must be a JSON array for Avro array type".to_string(),
+                    ));
+                }
+            },
+            Codec::Map(val_dt) => match default_json {
+                Value::Object(map) => {
+                    let mut out = IndexMap::with_capacity(map.len());
+                    for (k, v) in map {
+                        out.insert(k.clone(), val_dt.parse_default_literal(v)?);
+                    }
+                    AvroLiteral::Map(out)
+                }
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        "Default value must be a JSON object for Avro map type".to_string(),
+                    ));
+                }
+            },
+            Codec::Struct(fields) => match default_json {
+                Value::Object(obj) => {
+                    let mut out: IndexMap<String, AvroLiteral> =
+                        IndexMap::with_capacity(fields.len());
+                    for f in fields.as_ref() {
+                        let name = f.name().to_string();
+                        if let Some(sub) = obj.get(&name) {
+                            out.insert(name, f.data_type().parse_default_literal(sub)?);
+                        } else {
+                            // Cache metadata lookup once
+                            let stored_default =
+                                f.data_type().metadata.get(AVRO_FIELD_DEFAULT_METADATA_KEY);
+                            if stored_default.is_none()
+                                && f.data_type().nullability() == Some(Nullability::default())
+                            {
+                                out.insert(name, AvroLiteral::Null);
+                            } else if let Some(default_json) = stored_default {
+                                let v: Value =
+                                    serde_json::from_str(default_json).map_err(|e| {
+                                        ArrowError::SchemaError(format!(
+                                            "Failed to parse stored subfield default JSON for '{}': {e}",
+                                            f.name(),
+                                        ))
+                                    })?;
+                                out.insert(name, f.data_type().parse_default_literal(&v)?);
+                            } else {
+                                return Err(ArrowError::SchemaError(format!(
+                                    "Record default missing required subfield '{}' with non-nullable type {:?}",
+                                    f.name(),
+                                    f.data_type().codec()
+                                )));
+                            }
+                        }
+                    }
+                    AvroLiteral::Map(out)
+                }
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        "Default value for record/struct must be a JSON object".to_string(),
+                    ));
+                }
+            },
+            Codec::Union(encodings, _, _) => {
+                let Some(default_encoding) = encodings.first() else {
+                    return Err(ArrowError::SchemaError(
+                        "Union with no branches cannot have a default".to_string(),
+                    ));
+                };
+                default_encoding.parse_default_literal(default_json)?
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Codec::RunEndEncoded(values, _) => values.parse_default_literal(default_json)?,
+        };
+        Ok(lit)
+    }
+
+    fn store_default(&mut self, default_json: &Value) -> Result<(), ArrowError> {
+        let json_text = serde_json::to_string(default_json).map_err(|e| {
+            ArrowError::ParseError(format!("Failed to serialize default to JSON: {e}"))
+        })?;
+        self.metadata
+            .insert(AVRO_FIELD_DEFAULT_METADATA_KEY.to_string(), json_text);
+        Ok(())
+    }
+
+    fn parse_and_store_default(&mut self, default_json: &Value) -> Result<AvroLiteral, ArrowError> {
+        let lit = self.parse_default_literal(default_json)?;
+        self.store_default(default_json)?;
+        Ok(lit)
+    }
 }
 
 /// A named [`AvroDataType`]
-#[derive(Debug, Clone)]
-pub struct AvroField {
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) struct AvroField {
     name: String,
     data_type: AvroDataType,
 }
 
 impl AvroField {
     /// Returns the arrow [`Field`]
-    pub fn field(&self) -> Field {
+    pub(crate) fn field(&self) -> Field {
         self.data_type.field_with_name(&self.name)
     }
 
     /// Returns the [`AvroDataType`]
-    pub fn data_type(&self) -> &AvroDataType {
+    pub(crate) fn data_type(&self) -> &AvroDataType {
         &self.data_type
     }
 
@@ -110,7 +512,7 @@ impl AvroField {
     ///
     /// Returns a new `AvroField` with the same structure, but with string types
     /// converted to use `Utf8View` instead of `Utf8`.
-    pub fn with_utf8view(&self) -> Self {
+    pub(crate) fn with_utf8view(&self) -> Self {
         let mut field = self.clone();
         if let Codec::Utf8 = field.data_type.codec {
             field.data_type.codec = Codec::Utf8View;
@@ -122,7 +524,7 @@ impl AvroField {
     ///
     /// This is the field name as defined in the Avro schema.
     /// It's used to identify fields within a record structure.
-    pub fn name(&self) -> &str {
+    pub(crate) fn name(&self) -> &str {
         &self.name
     }
 }
@@ -133,8 +535,8 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField {
     fn try_from(schema: &Schema<'a>) -> Result<Self, Self::Error> {
         match schema {
             Schema::Complex(ComplexType::Record(r)) => {
-                let mut resolver = Resolver::default();
-                let data_type = make_data_type(schema, None, &mut resolver, false)?;
+                let mut resolver = Maker::new(false, false);
+                let data_type = resolver.make_data_type(schema, None, None)?;
                 Ok(AvroField {
                     data_type,
                     name: r.name.to_string(),
@@ -147,11 +549,73 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField {
     }
 }
 
+/// Builder for an [`AvroField`]
+#[derive(Debug)]
+pub(crate) struct AvroFieldBuilder<'a> {
+    writer_schema: &'a Schema<'a>,
+    reader_schema: Option<&'a Schema<'a>>,
+    use_utf8view: bool,
+    strict_mode: bool,
+}
+
+impl<'a> AvroFieldBuilder<'a> {
+    /// Creates a new [`AvroFieldBuilder`] for a given writer schema.
+    pub(crate) fn new(writer_schema: &'a Schema<'a>) -> Self {
+        Self {
+            writer_schema,
+            reader_schema: None,
+            use_utf8view: false,
+            strict_mode: false,
+        }
+    }
+
+    /// Sets the reader schema for schema resolution.
+    ///
+    /// If a reader schema is provided, the builder will produce a resolved `AvroField`
+    /// that can handle differences between the writer's and reader's schemas.
+    #[inline]
+    pub(crate) fn with_reader_schema(mut self, reader_schema: &'a Schema<'a>) -> Self {
+        self.reader_schema = Some(reader_schema);
+        self
+    }
+
+    /// Enable or disable Utf8View support
+    pub(crate) fn with_utf8view(mut self, use_utf8view: bool) -> Self {
+        self.use_utf8view = use_utf8view;
+        self
+    }
+
+    /// Enable or disable strict mode.
+    pub(crate) fn with_strict_mode(mut self, strict_mode: bool) -> Self {
+        self.strict_mode = strict_mode;
+        self
+    }
+
+    /// Build an [`AvroField`] from the builder
+    pub(crate) fn build(self) -> Result<AvroField, ArrowError> {
+        match self.writer_schema {
+            Schema::Complex(ComplexType::Record(r)) => {
+                let mut resolver = Maker::new(self.use_utf8view, self.strict_mode);
+                let data_type =
+                    resolver.make_data_type(self.writer_schema, self.reader_schema, None)?;
+                Ok(AvroField {
+                    name: r.name.to_string(),
+                    data_type,
+                })
+            }
+            _ => Err(ArrowError::ParseError(format!(
+                "Expected a Record schema to build an AvroField, but got {:?}",
+                self.writer_schema
+            ))),
+        }
+    }
+}
+
 /// An Avro encoding
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#encodings>
-#[derive(Debug, Clone)]
-pub enum Codec {
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) enum Codec {
     /// Represents Avro null type, maps to Arrow's Null data type
     Null,
     /// Represents Avro boolean type, maps to Arrow's Boolean data type
@@ -189,9 +653,27 @@ pub enum Codec {
     /// Maps to Arrow's Timestamp(TimeUnit::Microsecond) data type
     /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false)
     TimestampMicros(bool),
+    /// Represents Avro timestamp-nanos or local-timestamp-nanos logical type
+    ///
+    /// Maps to Arrow's Timestamp(TimeUnit::Nanosecond) data type
+    /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false)
+    TimestampNanos(bool),
     /// Represents Avro fixed type, maps to Arrow's FixedSizeBinary data type
     /// The i32 parameter indicates the fixed binary size
     Fixed(i32),
+    /// Represents Avro decimal type, maps to Arrow's Decimal32, Decimal64, Decimal128, or Decimal256 data types
+    ///
+    /// The fields are `(precision, scale, fixed_size)`.
+    /// - `precision` (`usize`): Total number of digits.
+    /// - `scale` (`Option<usize>`): Number of fractional digits.
+    /// - `fixed_size` (`Option<usize>`): Size in bytes if backed by a `fixed` type, otherwise `None`.
+    Decimal(usize, Option<usize>, Option<usize>),
+    /// Represents Avro Uuid type, a FixedSizeBinary with a length of 16.
+    Uuid,
+    /// Represents an Avro enum, maps to Arrow's Dictionary(Int32, Utf8) type.
+    ///
+    /// The enclosed value contains the enum's symbols.
+    Enum(Arc<[String]>),
     /// Represents Avro array type, maps to Arrow's List data type
     List(Arc<AvroDataType>),
     /// Represents Avro record type, maps to Arrow's Struct data type
@@ -200,6 +682,22 @@ pub enum Codec {
     Map(Arc<AvroDataType>),
     /// Represents Avro duration logical type, maps to Arrow's Interval(IntervalUnit::MonthDayNano) data type
     Interval,
+    /// Represents Avro union type, maps to Arrow's Union data type
+    Union(Arc<[AvroDataType]>, UnionFields, UnionMode),
+    /// Represents Avro custom logical type to map to Arrow Duration(TimeUnit::Nanosecond)
+    #[cfg(feature = "avro_custom_types")]
+    DurationNanos,
+    /// Represents Avro custom logical type to map to Arrow Duration(TimeUnit::Microsecond)
+    #[cfg(feature = "avro_custom_types")]
+    DurationMicros,
+    /// Represents Avro custom logical type to map to Arrow Duration(TimeUnit::Millisecond)
+    #[cfg(feature = "avro_custom_types")]
+    DurationMillis,
+    /// Represents Avro custom logical type to map to Arrow Duration(TimeUnit::Second)
+    #[cfg(feature = "avro_custom_types")]
+    DurationSeconds,
+    #[cfg(feature = "avro_custom_types")]
+    RunEndEncoded(Arc<AvroDataType>, u8),
 }
 
 impl Codec {
@@ -223,16 +721,45 @@ impl Codec {
             Self::TimestampMicros(is_utc) => {
                 DataType::Timestamp(TimeUnit::Microsecond, is_utc.then(|| "+00:00".into()))
             }
+            Self::TimestampNanos(is_utc) => {
+                DataType::Timestamp(TimeUnit::Nanosecond, is_utc.then(|| "+00:00".into()))
+            }
             Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano),
             Self::Fixed(size) => DataType::FixedSizeBinary(*size),
+            Self::Decimal(precision, scale, _size) => {
+                let p = *precision as u8;
+                let s = scale.unwrap_or(0) as i8;
+                #[cfg(feature = "small_decimals")]
+                {
+                    if *precision <= DECIMAL32_MAX_PRECISION as usize {
+                        DataType::Decimal32(p, s)
+                    } else if *precision <= DECIMAL64_MAX_PRECISION as usize {
+                        DataType::Decimal64(p, s)
+                    } else if *precision <= DECIMAL128_MAX_PRECISION as usize {
+                        DataType::Decimal128(p, s)
+                    } else {
+                        DataType::Decimal256(p, s)
+                    }
+                }
+                #[cfg(not(feature = "small_decimals"))]
+                {
+                    if *precision <= DECIMAL128_MAX_PRECISION as usize {
+                        DataType::Decimal128(p, s)
+                    } else {
+                        DataType::Decimal256(p, s)
+                    }
+                }
+            }
+            Self::Uuid => DataType::FixedSizeBinary(16),
+            Self::Enum(_) => {
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
+            }
             Self::List(f) => {
                 DataType::List(Arc::new(f.field_with_name(Field::LIST_FIELD_DEFAULT_NAME)))
             }
             Self::Struct(f) => DataType::Struct(f.iter().map(|x| x.field()).collect()),
             Self::Map(value_type) => {
-                let val_dt = value_type.codec.data_type();
-                let val_field = Field::new("value", val_dt, value_type.nullability.is_some())
-                    .with_metadata(value_type.metadata.clone());
+                let val_field = value_type.field_with_name("value");
                 DataType::Map(
                     Arc::new(Field::new(
                         "entries",
@@ -245,8 +772,48 @@ impl Codec {
                     false,
                 )
             }
+            Self::Union(_, fields, mode) => DataType::Union(fields.clone(), *mode),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationNanos => DataType::Duration(TimeUnit::Nanosecond),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationMicros => DataType::Duration(TimeUnit::Microsecond),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationMillis => DataType::Duration(TimeUnit::Millisecond),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationSeconds => DataType::Duration(TimeUnit::Second),
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(values, bits) => {
+                let run_ends_dt = match *bits {
+                    16 => DataType::Int16,
+                    32 => DataType::Int32,
+                    64 => DataType::Int64,
+                    _ => unreachable!(),
+                };
+                DataType::RunEndEncoded(
+                    Arc::new(Field::new("run_ends", run_ends_dt, false)),
+                    Arc::new(Field::new("values", values.codec().data_type(), true)),
+                )
+            }
+        }
+    }
+
+    /// Converts a string codec to use Utf8View if requested
+    ///
+    /// The conversion only happens if both:
+    /// 1. `use_utf8view` is true
+    /// 2. The codec is currently `Utf8`
+    pub(crate) fn with_utf8view(self, use_utf8view: bool) -> Self {
+        if use_utf8view && matches!(self, Self::Utf8) {
+            Self::Utf8View
+        } else {
+            self
         }
     }
+
+    #[inline]
+    fn union_field_name(&self) -> String {
+        UnionFieldKind::from(self).as_ref().to_owned()
+    }
 }
 
 impl From<PrimitiveType> for Codec {
@@ -264,34 +831,175 @@ impl From<PrimitiveType> for Codec {
     }
 }
 
-impl Codec {
-    /// Converts a string codec to use Utf8View if requested
-    ///
-    /// The conversion only happens if both:
-    /// 1. `use_utf8view` is true
-    /// 2. The codec is currently `Utf8`
-    ///
-    /// # Example
-    /// ```
-    /// # use arrow_avro::codec::Codec;
-    /// let utf8_codec1 = Codec::Utf8;
-    /// let utf8_codec2 = Codec::Utf8;
-    ///
-    /// // Convert to Utf8View
-    /// let view_codec = utf8_codec1.with_utf8view(true);
-    /// assert!(matches!(view_codec, Codec::Utf8View));
-    ///
-    /// // Don't convert if use_utf8view is false
-    /// let unchanged_codec = utf8_codec2.with_utf8view(false);
-    /// assert!(matches!(unchanged_codec, Codec::Utf8));
-    /// ```
-    pub fn with_utf8view(self, use_utf8view: bool) -> Self {
-        if use_utf8view && matches!(self, Self::Utf8) {
-            Self::Utf8View
-        } else {
-            self
+/// Compute the exact maximum base‑10 precision that fits in `n` bytes for Avro
+/// `fixed` decimals stored as two's‑complement unscaled integers (big‑endian).
+///
+/// Per Avro spec (Decimal logical type), for a fixed length `n`:
+/// max precision = ⌊log₁₀(2^(8n − 1) − 1)⌋.
+///
+/// This function returns `None` if `n` is 0 or greater than 32 (Arrow supports
+/// Decimal256, which is 32 bytes and has max precision 76).
+const fn max_precision_for_fixed_bytes(n: usize) -> Option<usize> {
+    // Precomputed exact table for n = 1..=32
+    // 1:2, 2:4, 3:6, 4:9, 5:11, 6:14, 7:16, 8:18, 9:21, 10:23, 11:26, 12:28,
+    // 13:31, 14:33, 15:35, 16:38, 17:40, 18:43, 19:45, 20:47, 21:50, 22:52,
+    // 23:55, 24:57, 25:59, 26:62, 27:64, 28:67, 29:69, 30:71, 31:74, 32:76
+    const MAX_P: [usize; 32] = [
+        2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55, 57,
+        59, 62, 64, 67, 69, 71, 74, 76,
+    ];
+    match n {
+        1..=32 => Some(MAX_P[n - 1]),
+        _ => None,
+    }
+}
+
+fn parse_decimal_attributes(
+    attributes: &Attributes,
+    fallback_size: Option<usize>,
+    precision_required: bool,
+) -> Result<(usize, usize, Option<usize>), ArrowError> {
+    let precision = attributes
+        .additional
+        .get("precision")
+        .and_then(|v| v.as_u64())
+        .or(if precision_required { None } else { Some(10) })
+        .ok_or_else(|| ArrowError::ParseError("Decimal requires precision".to_string()))?
+        as usize;
+    let scale = attributes
+        .additional
+        .get("scale")
+        .and_then(|v| v.as_u64())
+        .unwrap_or(0) as usize;
+    let size = attributes
+        .additional
+        .get("size")
+        .and_then(|v| v.as_u64())
+        .map(|s| s as usize)
+        .or(fallback_size);
+    if precision == 0 {
+        return Err(ArrowError::ParseError(
+            "Decimal requires precision > 0".to_string(),
+        ));
+    }
+    if scale > precision {
+        return Err(ArrowError::ParseError(format!(
+            "Decimal has invalid scale > precision: scale={scale}, precision={precision}"
+        )));
+    }
+    if precision > DECIMAL256_MAX_PRECISION as usize {
+        return Err(ArrowError::ParseError(format!(
+            "Decimal precision {precision} exceeds maximum supported by Arrow ({})",
+            DECIMAL256_MAX_PRECISION
+        )));
+    }
+    if let Some(sz) = size {
+        let max_p = max_precision_for_fixed_bytes(sz).ok_or_else(|| {
+            ArrowError::ParseError(format!(
+                "Invalid fixed size for decimal: {sz}, must be between 1 and 32 bytes"
+            ))
+        })?;
+        if precision > max_p {
+            return Err(ArrowError::ParseError(format!(
+                "Decimal precision {precision} exceeds capacity of fixed size {sz} bytes (max {max_p})"
+            )));
+        }
+    }
+    Ok((precision, scale, size))
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, AsRefStr)]
+#[strum(serialize_all = "snake_case")]
+enum UnionFieldKind {
+    Null,
+    Boolean,
+    Int,
+    Long,
+    Float,
+    Double,
+    Bytes,
+    String,
+    Date,
+    TimeMillis,
+    TimeMicros,
+    TimestampMillisUtc,
+    TimestampMillisLocal,
+    TimestampMicrosUtc,
+    TimestampMicrosLocal,
+    TimestampNanosUtc,
+    TimestampNanosLocal,
+    Duration,
+    Fixed,
+    Decimal,
+    Enum,
+    Array,
+    Record,
+    Map,
+    Uuid,
+    Union,
+}
+
+impl From<&Codec> for UnionFieldKind {
+    fn from(c: &Codec) -> Self {
+        match c {
+            Codec::Null => Self::Null,
+            Codec::Boolean => Self::Boolean,
+            Codec::Int32 => Self::Int,
+            Codec::Int64 => Self::Long,
+            Codec::Float32 => Self::Float,
+            Codec::Float64 => Self::Double,
+            Codec::Binary => Self::Bytes,
+            Codec::Utf8 | Codec::Utf8View => Self::String,
+            Codec::Date32 => Self::Date,
+            Codec::TimeMillis => Self::TimeMillis,
+            Codec::TimeMicros => Self::TimeMicros,
+            Codec::TimestampMillis(true) => Self::TimestampMillisUtc,
+            Codec::TimestampMillis(false) => Self::TimestampMillisLocal,
+            Codec::TimestampMicros(true) => Self::TimestampMicrosUtc,
+            Codec::TimestampMicros(false) => Self::TimestampMicrosLocal,
+            Codec::TimestampNanos(true) => Self::TimestampNanosUtc,
+            Codec::TimestampNanos(false) => Self::TimestampNanosLocal,
+            Codec::Interval => Self::Duration,
+            Codec::Fixed(_) => Self::Fixed,
+            Codec::Decimal(..) => Self::Decimal,
+            Codec::Enum(_) => Self::Enum,
+            Codec::List(_) => Self::Array,
+            Codec::Struct(_) => Self::Record,
+            Codec::Map(_) => Self::Map,
+            Codec::Uuid => Self::Uuid,
+            Codec::Union(..) => Self::Union,
+            #[cfg(feature = "avro_custom_types")]
+            Codec::RunEndEncoded(values, _) => UnionFieldKind::from(values.codec()),
+            #[cfg(feature = "avro_custom_types")]
+            Codec::DurationNanos
+            | Codec::DurationMicros
+            | Codec::DurationMillis
+            | Codec::DurationSeconds => Self::Duration,
+        }
+    }
+}
+
+fn union_branch_name(dt: &AvroDataType) -> String {
+    if let Some(name) = dt.metadata.get(AVRO_NAME_METADATA_KEY) {
+        if name.contains(".") {
+            // Full name
+            return name.to_string();
+        }
+        if let Some(ns) = dt.metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+            return format!("{ns}.{name}");
         }
+        return name.to_string();
     }
+    dt.codec.union_field_name()
+}
+
+fn build_union_fields(encodings: &[AvroDataType]) -> Result<UnionFields, ArrowError> {
+    let arrow_fields: Vec<Field> = encodings
+        .iter()
+        .map(|encoding| encoding.field_with_name(&union_branch_name(encoding)))
+        .collect();
+    let type_ids: Vec<i8> = (0..arrow_fields.len()).map(|i| i as i8).collect();
+    UnionFields::try_new(type_ids, arrow_fields)
 }
 
 /// Resolves Avro type names to [`AvroDataType`]
@@ -304,14 +1012,13 @@ struct Resolver<'a> {
 
 impl<'a> Resolver<'a> {
     fn register(&mut self, name: &'a str, namespace: Option<&'a str>, schema: AvroDataType) {
-        self.map.insert((name, namespace.unwrap_or("")), schema);
+        self.map.insert((namespace.unwrap_or(""), name), schema);
     }
 
     fn resolve(&self, name: &str, namespace: Option<&'a str>) -> Result<AvroDataType, ArrowError> {
         let (namespace, name) = name
             .rsplit_once('.')
             .unwrap_or_else(|| (namespace.unwrap_or(""), name));
-
         self.map
             .get(&(namespace, name))
             .ok_or_else(|| ArrowError::ParseError(format!("Failed to resolve {namespace}.{name}")))
@@ -319,169 +1026,973 @@ impl<'a> Resolver<'a> {
     }
 }
 
-/// Parses a [`AvroDataType`] from the provided [`Schema`] and the given `name` and `namespace`
-///
-/// `name`: is name used to refer to `schema` in its parent
-/// `namespace`: an optional qualifier used as part of a type hierarchy
-/// If the data type is a string, convert to use Utf8View if requested
-///
-/// This function is used during the schema conversion process to determine whether
-/// string data should be represented as StringArray (default) or StringViewArray.
-///
-/// `use_utf8view`: if true, use Utf8View instead of Utf8 for string types
+fn full_name_set(name: &str, ns: Option<&str>, aliases: &[&str]) -> HashSet<String> {
+    let mut out = HashSet::with_capacity(1 + aliases.len());
+    let (full, _) = make_full_name(name, ns, None);
+    out.insert(full);
+    for a in aliases {
+        let (fa, _) = make_full_name(a, None, ns);
+        out.insert(fa);
+    }
+    out
+}
+
+fn names_match(
+    writer_name: &str,
+    writer_namespace: Option<&str>,
+    writer_aliases: &[&str],
+    reader_name: &str,
+    reader_namespace: Option<&str>,
+    reader_aliases: &[&str],
+) -> bool {
+    let writer_set = full_name_set(writer_name, writer_namespace, writer_aliases);
+    let reader_set = full_name_set(reader_name, reader_namespace, reader_aliases);
+    // If the canonical full names match, or any alias matches cross-wise.
+    !writer_set.is_disjoint(&reader_set)
+}
+
+fn ensure_names_match(
+    data_type: &str,
+    writer_name: &str,
+    writer_namespace: Option<&str>,
+    writer_aliases: &[&str],
+    reader_name: &str,
+    reader_namespace: Option<&str>,
+    reader_aliases: &[&str],
+) -> Result<(), ArrowError> {
+    if names_match(
+        writer_name,
+        writer_namespace,
+        writer_aliases,
+        reader_name,
+        reader_namespace,
+        reader_aliases,
+    ) {
+        Ok(())
+    } else {
+        Err(ArrowError::ParseError(format!(
+            "{data_type} name mismatch writer={writer_name}, reader={reader_name}"
+        )))
+    }
+}
+
+fn primitive_of(schema: &Schema) -> Option<PrimitiveType> {
+    match schema {
+        Schema::TypeName(TypeName::Primitive(primitive)) => Some(*primitive),
+        Schema::Type(Type {
+            r#type: TypeName::Primitive(primitive),
+            ..
+        }) => Some(*primitive),
+        _ => None,
+    }
+}
+
+fn nullable_union_variants<'x, 'y>(
+    variant: &'y [Schema<'x>],
+) -> Option<(Nullability, &'y Schema<'x>)> {
+    if variant.len() != 2 {
+        return None;
+    }
+    let is_null = |schema: &Schema<'x>| {
+        matches!(
+            schema,
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null))
+        )
+    };
+    match (is_null(&variant[0]), is_null(&variant[1])) {
+        (true, false) => Some((Nullability::NullFirst, &variant[1])),
+        (false, true) => Some((Nullability::NullSecond, &variant[0])),
+        _ => None,
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+enum UnionBranchKey {
+    Named(String),
+    Primitive(PrimitiveType),
+    Array,
+    Map,
+}
+
+fn branch_key_of<'a>(s: &Schema<'a>, enclosing_ns: Option<&'a str>) -> Option<UnionBranchKey> {
+    let (name, namespace) = match s {
+        Schema::TypeName(TypeName::Primitive(p))
+        | Schema::Type(Type {
+            r#type: TypeName::Primitive(p),
+            ..
+        }) => return Some(UnionBranchKey::Primitive(*p)),
+        Schema::TypeName(TypeName::Ref(name))
+        | Schema::Type(Type {
+            r#type: TypeName::Ref(name),
+            ..
+        }) => (name, None),
+        Schema::Complex(ComplexType::Array(_)) => return Some(UnionBranchKey::Array),
+        Schema::Complex(ComplexType::Map(_)) => return Some(UnionBranchKey::Map),
+        Schema::Complex(ComplexType::Record(r)) => (&r.name, r.namespace),
+        Schema::Complex(ComplexType::Enum(e)) => (&e.name, e.namespace),
+        Schema::Complex(ComplexType::Fixed(f)) => (&f.name, f.namespace),
+        Schema::Union(_) => return None,
+    };
+    let (full, _) = make_full_name(name, namespace, enclosing_ns);
+    Some(UnionBranchKey::Named(full))
+}
+
+fn union_first_duplicate<'a>(
+    branches: &'a [Schema<'a>],
+    enclosing_ns: Option<&'a str>,
+) -> Option<String> {
+    let mut seen = HashSet::with_capacity(branches.len());
+    for schema in branches {
+        if let Some(key) = branch_key_of(schema, enclosing_ns) {
+            if !seen.insert(key.clone()) {
+                let msg = match key {
+                    UnionBranchKey::Named(full) => format!("named type {full}"),
+                    UnionBranchKey::Primitive(p) => format!("primitive {}", p.as_ref()),
+                    UnionBranchKey::Array => "array".to_string(),
+                    UnionBranchKey::Map => "map".to_string(),
+                };
+                return Some(msg);
+            }
+        }
+    }
+    None
+}
+
+/// Resolves Avro type names to [`AvroDataType`]
 ///
-/// See [`Resolver`] for more information
-fn make_data_type<'a>(
-    schema: &Schema<'a>,
-    namespace: Option<&'a str>,
-    resolver: &mut Resolver<'a>,
+/// See <https://avro.apache.org/docs/1.11.1/specification/#names>
+struct Maker<'a> {
+    resolver: Resolver<'a>,
     use_utf8view: bool,
-) -> Result<AvroDataType, ArrowError> {
-    match schema {
-        Schema::TypeName(TypeName::Primitive(p)) => {
-            let codec: Codec = (*p).into();
-            let codec = codec.with_utf8view(use_utf8view);
-            Ok(AvroDataType {
-                nullability: None,
-                metadata: Default::default(),
-                codec,
-            })
+    strict_mode: bool,
+}
+
+impl<'a> Maker<'a> {
+    fn new(use_utf8view: bool, strict_mode: bool) -> Self {
+        Self {
+            resolver: Default::default(),
+            use_utf8view,
+            strict_mode,
         }
-        Schema::TypeName(TypeName::Ref(name)) => resolver.resolve(name, namespace),
-        Schema::Union(f) => {
-            // Special case the common case of nullable primitives
-            let null = f
-                .iter()
-                .position(|x| x == &Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)));
-            match (f.len() == 2, null) {
-                (true, Some(0)) => {
-                    let mut field = make_data_type(&f[1], namespace, resolver, use_utf8view)?;
-                    field.nullability = Some(Nullability::NullFirst);
-                    Ok(field)
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[inline]
+    fn propagate_nullability_into_ree(dt: &mut AvroDataType, nb: Nullability) {
+        if let Codec::RunEndEncoded(values, bits) = dt.codec.clone() {
+            let mut inner = (*values).clone();
+            inner.nullability = Some(nb);
+            dt.codec = Codec::RunEndEncoded(Arc::new(inner), bits);
+        }
+    }
+
+    fn make_data_type<'s>(
+        &mut self,
+        writer_schema: &'s Schema<'a>,
+        reader_schema: Option<&'s Schema<'a>>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        match reader_schema {
+            Some(reader_schema) => self.resolve_type(writer_schema, reader_schema, namespace),
+            None => self.parse_type(writer_schema, namespace),
+        }
+    }
+
+    /// Parses a [`AvroDataType`] from the provided `Schema` and the given `name` and `namespace`
+    ///
+    /// `name`: is the name used to refer to `schema` in its parent
+    /// `namespace`: an optional qualifier used as part of a type hierarchy
+    /// If the data type is a string, convert to use Utf8View if requested
+    ///
+    /// This function is used during the schema conversion process to determine whether
+    /// string data should be represented as StringArray (default) or StringViewArray.
+    ///
+    /// `use_utf8view`: if true, use Utf8View instead of Utf8 for string types
+    ///
+    /// See [`Resolver`] for more information
+    fn parse_type<'s>(
+        &mut self,
+        schema: &'s Schema<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        match schema {
+            Schema::TypeName(TypeName::Primitive(p)) => Ok(AvroDataType::new(
+                Codec::from(*p).with_utf8view(self.use_utf8view),
+                Default::default(),
+                None,
+            )),
+            Schema::TypeName(TypeName::Ref(name)) => self.resolver.resolve(name, namespace),
+            Schema::Union(f) => {
+                let null = f
+                    .iter()
+                    .position(|x| x == &Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)));
+                match (f.len() == 2, null) {
+                    (true, Some(0)) => {
+                        let mut field = self.parse_type(&f[1], namespace)?;
+                        field.nullability = Some(Nullability::NullFirst);
+                        #[cfg(feature = "avro_custom_types")]
+                        Self::propagate_nullability_into_ree(&mut field, Nullability::NullFirst);
+                        return Ok(field);
+                    }
+                    (true, Some(1)) => {
+                        if self.strict_mode {
+                            return Err(ArrowError::SchemaError(
+                                "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
+                                    .to_string(),
+                            ));
+                        }
+                        let mut field = self.parse_type(&f[0], namespace)?;
+                        field.nullability = Some(Nullability::NullSecond);
+                        #[cfg(feature = "avro_custom_types")]
+                        Self::propagate_nullability_into_ree(&mut field, Nullability::NullSecond);
+                        return Ok(field);
+                    }
+                    _ => {}
                 }
-                (true, Some(1)) => {
-                    let mut field = make_data_type(&f[0], namespace, resolver, use_utf8view)?;
-                    field.nullability = Some(Nullability::NullSecond);
-                    Ok(field)
+                // Validate: unions may not immediately contain unions
+                if f.iter().any(|s| matches!(s, Schema::Union(_))) {
+                    return Err(ArrowError::SchemaError(
+                        "Avro unions may not immediately contain other unions".to_string(),
+                    ));
                 }
-                _ => Err(ArrowError::NotYetImplemented(format!(
-                    "Union of {f:?} not currently supported"
-                ))),
-            }
-        }
-        Schema::Complex(c) => match c {
-            ComplexType::Record(r) => {
-                let namespace = r.namespace.or(namespace);
-                let fields = r
-                    .fields
+                // Validate: duplicates (named by full name; non-named by kind)
+                if let Some(dup) = union_first_duplicate(f, namespace) {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Avro union contains duplicate branch type: {dup}"
+                    )));
+                }
+                // Parse all branches
+                let children: Vec<AvroDataType> = f
                     .iter()
-                    .map(|field| {
-                        Ok(AvroField {
-                            name: field.name.to_string(),
-                            data_type: make_data_type(
-                                &field.r#type,
-                                namespace,
-                                resolver,
-                                use_utf8view,
-                            )?,
+                    .map(|s| self.parse_type(s, namespace))
+                    .collect::<Result<_, _>>()?;
+                // Build Arrow layout once here
+                let union_fields = build_union_fields(&children)?;
+                Ok(AvroDataType::new(
+                    Codec::Union(Arc::from(children), union_fields, UnionMode::Dense),
+                    Default::default(),
+                    None,
+                ))
+            }
+            Schema::Complex(c) => match c {
+                ComplexType::Record(r) => {
+                    let namespace = r.namespace.or(namespace);
+                    let mut metadata = r.attributes.field_metadata();
+                    let fields = r
+                        .fields
+                        .iter()
+                        .map(|field| {
+                            Ok(AvroField {
+                                name: field.name.to_string(),
+                                data_type: self.parse_type(&field.r#type, namespace)?,
+                            })
                         })
+                        .collect::<Result<_, ArrowError>>()?;
+                    metadata.insert(AVRO_NAME_METADATA_KEY.to_string(), r.name.to_string());
+                    if let Some(ns) = namespace {
+                        metadata.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), ns.to_string());
+                    }
+                    let field = AvroDataType {
+                        nullability: None,
+                        codec: Codec::Struct(fields),
+                        metadata,
+                        resolution: None,
+                    };
+                    self.resolver.register(r.name, namespace, field.clone());
+                    Ok(field)
+                }
+                ComplexType::Array(a) => {
+                    let field = self.parse_type(a.items.as_ref(), namespace)?;
+                    Ok(AvroDataType {
+                        nullability: None,
+                        metadata: a.attributes.field_metadata(),
+                        codec: Codec::List(Arc::new(field)),
+                        resolution: None,
                     })
-                    .collect::<Result<_, ArrowError>>()?;
-
-                let field = AvroDataType {
-                    nullability: None,
-                    codec: Codec::Struct(fields),
-                    metadata: r.attributes.field_metadata(),
-                };
-                resolver.register(r.name, namespace, field.clone());
-                Ok(field)
-            }
-            ComplexType::Array(a) => {
-                let mut field =
-                    make_data_type(a.items.as_ref(), namespace, resolver, use_utf8view)?;
-                Ok(AvroDataType {
-                    nullability: None,
-                    metadata: a.attributes.field_metadata(),
-                    codec: Codec::List(Arc::new(field)),
-                })
-            }
-            ComplexType::Fixed(f) => {
-                let size = f.size.try_into().map_err(|e| {
-                    ArrowError::ParseError(format!("Overflow converting size to i32: {e}"))
-                })?;
-
-                let field = AvroDataType {
-                    nullability: None,
-                    metadata: f.attributes.field_metadata(),
-                    codec: Codec::Fixed(size),
-                };
-                resolver.register(f.name, namespace, field.clone());
-                Ok(field)
-            }
-            ComplexType::Enum(e) => Err(ArrowError::NotYetImplemented(format!(
-                "Enum of {e:?} not currently supported"
-            ))),
-            ComplexType::Map(m) => {
-                let val = make_data_type(&m.values, namespace, resolver, use_utf8view)?;
-                Ok(AvroDataType {
-                    nullability: None,
-                    metadata: m.attributes.field_metadata(),
-                    codec: Codec::Map(Arc::new(val)),
-                })
-            }
-        },
-        Schema::Type(t) => {
-            let mut field = make_data_type(
-                &Schema::TypeName(t.r#type.clone()),
-                namespace,
-                resolver,
-                use_utf8view,
-            )?;
-
-            // https://avro.apache.org/docs/1.11.1/specification/#logical-types
-            match (t.attributes.logical_type, &mut field.codec) {
-                (Some("decimal"), c @ Codec::Fixed(_)) => {
-                    return Err(ArrowError::NotYetImplemented(
-                        "Decimals are not currently supported".to_string(),
-                    ))
                 }
-                (Some("date"), c @ Codec::Int32) => *c = Codec::Date32,
-                (Some("time-millis"), c @ Codec::Int32) => *c = Codec::TimeMillis,
-                (Some("time-micros"), c @ Codec::Int64) => *c = Codec::TimeMicros,
-                (Some("timestamp-millis"), c @ Codec::Int64) => *c = Codec::TimestampMillis(true),
-                (Some("timestamp-micros"), c @ Codec::Int64) => *c = Codec::TimestampMicros(true),
-                (Some("local-timestamp-millis"), c @ Codec::Int64) => {
-                    *c = Codec::TimestampMillis(false)
+                ComplexType::Fixed(f) => {
+                    let size = f.size.try_into().map_err(|e| {
+                        ArrowError::ParseError(format!("Overflow converting size to i32: {e}"))
+                    })?;
+                    let namespace = f.namespace.or(namespace);
+                    let mut metadata = f.attributes.field_metadata();
+                    metadata.insert(AVRO_NAME_METADATA_KEY.to_string(), f.name.to_string());
+                    if let Some(ns) = namespace {
+                        metadata.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), ns.to_string());
+                    }
+                    let field = match f.attributes.logical_type {
+                        Some("decimal") => {
+                            let (precision, scale, _) =
+                                parse_decimal_attributes(&f.attributes, Some(size as usize), true)?;
+                            AvroDataType {
+                                nullability: None,
+                                metadata,
+                                codec: Codec::Decimal(precision, Some(scale), Some(size as usize)),
+                                resolution: None,
+                            }
+                        }
+                        Some("duration") => {
+                            if size != 12 {
+                                return Err(ArrowError::ParseError(format!(
+                                    "Invalid fixed size for Duration: {size}, must be 12"
+                                )));
+                            };
+                            AvroDataType {
+                                nullability: None,
+                                metadata,
+                                codec: Codec::Interval,
+                                resolution: None,
+                            }
+                        }
+                        _ => AvroDataType {
+                            nullability: None,
+                            metadata,
+                            codec: Codec::Fixed(size),
+                            resolution: None,
+                        },
+                    };
+                    self.resolver.register(f.name, namespace, field.clone());
+                    Ok(field)
+                }
+                ComplexType::Enum(e) => {
+                    let namespace = e.namespace.or(namespace);
+                    let symbols = e
+                        .symbols
+                        .iter()
+                        .map(|s| s.to_string())
+                        .collect::<Arc<[String]>>();
+                    let mut metadata = e.attributes.field_metadata();
+                    let symbols_json = serde_json::to_string(&e.symbols).map_err(|e| {
+                        ArrowError::ParseError(format!("Failed to serialize enum symbols: {e}"))
+                    })?;
+                    metadata.insert(AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(), symbols_json);
+                    metadata.insert(AVRO_NAME_METADATA_KEY.to_string(), e.name.to_string());
+                    if let Some(ns) = namespace {
+                        metadata.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), ns.to_string());
+                    }
+                    let field = AvroDataType {
+                        nullability: None,
+                        metadata,
+                        codec: Codec::Enum(symbols),
+                        resolution: None,
+                    };
+                    self.resolver.register(e.name, namespace, field.clone());
+                    Ok(field)
                 }
-                (Some("local-timestamp-micros"), c @ Codec::Int64) => {
-                    *c = Codec::TimestampMicros(false)
+                ComplexType::Map(m) => {
+                    let val = self.parse_type(&m.values, namespace)?;
+                    Ok(AvroDataType {
+                        nullability: None,
+                        metadata: m.attributes.field_metadata(),
+                        codec: Codec::Map(Arc::new(val)),
+                        resolution: None,
+                    })
                 }
-                (Some("duration"), c @ Codec::Fixed(12)) => *c = Codec::Interval,
-                (Some(logical), _) => {
-                    // Insert unrecognized logical type into metadata map
-                    field.metadata.insert("logicalType".into(), logical.into());
+            },
+            Schema::Type(t) => {
+                let mut field = self.parse_type(&Schema::TypeName(t.r#type.clone()), namespace)?;
+                // https://avro.apache.org/docs/1.11.1/specification/#logical-types
+                match (t.attributes.logical_type, &mut field.codec) {
+                    (Some("decimal"), c @ Codec::Binary) => {
+                        let (prec, sc, _) = parse_decimal_attributes(&t.attributes, None, false)?;
+                        *c = Codec::Decimal(prec, Some(sc), None);
+                    }
+                    (Some("date"), c @ Codec::Int32) => *c = Codec::Date32,
+                    (Some("time-millis"), c @ Codec::Int32) => *c = Codec::TimeMillis,
+                    (Some("time-micros"), c @ Codec::Int64) => *c = Codec::TimeMicros,
+                    (Some("timestamp-millis"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampMillis(true)
+                    }
+                    (Some("timestamp-micros"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampMicros(true)
+                    }
+                    (Some("local-timestamp-millis"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampMillis(false)
+                    }
+                    (Some("local-timestamp-micros"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampMicros(false)
+                    }
+                    (Some("timestamp-nanos"), c @ Codec::Int64) => *c = Codec::TimestampNanos(true),
+                    (Some("local-timestamp-nanos"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampNanos(false)
+                    }
+                    (Some("uuid"), c @ Codec::Utf8) => {
+                        // Map Avro string+logicalType=uuid into the UUID Codec,
+                        // and preserve the logicalType in Arrow field metadata
+                        // so writers can round-trip it correctly.
+                        *c = Codec::Uuid;
+                        field.metadata.insert("logicalType".into(), "uuid".into());
+                    }
+                    #[cfg(feature = "avro_custom_types")]
+                    (Some("arrow.duration-nanos"), c @ Codec::Int64) => *c = Codec::DurationNanos,
+                    #[cfg(feature = "avro_custom_types")]
+                    (Some("arrow.duration-micros"), c @ Codec::Int64) => *c = Codec::DurationMicros,
+                    #[cfg(feature = "avro_custom_types")]
+                    (Some("arrow.duration-millis"), c @ Codec::Int64) => *c = Codec::DurationMillis,
+                    #[cfg(feature = "avro_custom_types")]
+                    (Some("arrow.duration-seconds"), c @ Codec::Int64) => {
+                        *c = Codec::DurationSeconds
+                    }
+                    #[cfg(feature = "avro_custom_types")]
+                    (Some("arrow.run-end-encoded"), _) => {
+                        let bits_u8: u8 = t
+                            .attributes
+                            .additional
+                            .get("arrow.runEndIndexBits")
+                            .and_then(|v| v.as_u64())
+                            .and_then(|n| u8::try_from(n).ok())
+                            .ok_or_else(|| ArrowError::ParseError(
+                                "arrow.run-end-encoded requires 'arrow.runEndIndexBits' (one of 16, 32, or 64)"
+                                    .to_string(),
+                            ))?;
+                        if bits_u8 != 16 && bits_u8 != 32 && bits_u8 != 64 {
+                            return Err(ArrowError::ParseError(format!(
+                                "Invalid 'arrow.runEndIndexBits' value {bits_u8}; must be 16, 32, or 64"
+                            )));
+                        }
+                        // Wrap the parsed underlying site as REE
+                        let values_site = field.clone();
+                        field.codec = Codec::RunEndEncoded(Arc::new(values_site), bits_u8);
+                    }
+                    (Some(logical), _) => {
+                        // Insert unrecognized logical type into metadata map
+                        field.metadata.insert("logicalType".into(), logical.into());
+                    }
+                    (None, _) => {}
                 }
-                (None, _) => {}
-            }
-
-            if !t.attributes.additional.is_empty() {
-                for (k, v) in &t.attributes.additional {
-                    field.metadata.insert(k.to_string(), v.to_string());
+                if matches!(field.codec, Codec::Int64) {
+                    if let Some(unit) = t
+                        .attributes
+                        .additional
+                        .get("arrowTimeUnit")
+                        .and_then(|v| v.as_str())
+                    {
+                        if unit == "nanosecond" {
+                            field.codec = Codec::TimestampNanos(false);
+                        }
+                    }
                 }
+                if !t.attributes.additional.is_empty() {
+                    for (k, v) in &t.attributes.additional {
+                        field.metadata.insert(k.to_string(), v.to_string());
+                    }
+                }
+                Ok(field)
             }
-            Ok(field)
         }
     }
-}
 
-#[cfg(test)]
+    fn resolve_type<'s>(
+        &mut self,
+        writer_schema: &'s Schema<'a>,
+        reader_schema: &'s Schema<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        if let (Some(write_primitive), Some(read_primitive)) =
+            (primitive_of(writer_schema), primitive_of(reader_schema))
+        {
+            return self.resolve_primitives(write_primitive, read_primitive, reader_schema);
+        }
+        match (writer_schema, reader_schema) {
+            (Schema::Union(writer_variants), Schema::Union(reader_variants)) => {
+                let writer_variants = writer_variants.as_slice();
+                let reader_variants = reader_variants.as_slice();
+                match (
+                    nullable_union_variants(writer_variants),
+                    nullable_union_variants(reader_variants),
+                ) {
+                    (Some((w_nb, w_nonnull)), Some((_r_nb, r_nonnull))) => {
+                        let mut dt = self.make_data_type(w_nonnull, Some(r_nonnull), namespace)?;
+                        dt.nullability = Some(w_nb);
+                        #[cfg(feature = "avro_custom_types")]
+                        Self::propagate_nullability_into_ree(&mut dt, w_nb);
+                        Ok(dt)
+                    }
+                    _ => self.resolve_unions(writer_variants, reader_variants, namespace),
+                }
+            }
+            (Schema::Union(writer_variants), reader_non_union) => {
+                let writer_to_reader: Vec<Option<(usize, Promotion)>> = writer_variants
+                    .iter()
+                    .map(|writer| {
+                        self.resolve_type(writer, reader_non_union, namespace)
+                            .ok()
+                            .map(|tmp| (0usize, Self::coercion_from(&tmp)))
+                    })
+                    .collect();
+                let mut dt = self.parse_type(reader_non_union, namespace)?;
+                dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
+                    writer_to_reader: Arc::from(writer_to_reader),
+                    writer_is_union: true,
+                    reader_is_union: false,
+                }));
+                Ok(dt)
+            }
+            (writer_non_union, Schema::Union(reader_variants)) => {
+                let promo = self.find_best_promotion(
+                    writer_non_union,
+                    reader_variants.as_slice(),
+                    namespace,
+                );
+                let Some((reader_index, promotion)) = promo else {
+                    return Err(ArrowError::SchemaError(
+                        "Writer schema does not match any reader union branch".to_string(),
+                    ));
+                };
+                let mut dt = self.parse_type(reader_schema, namespace)?;
+                dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
+                    writer_to_reader: Arc::from(vec![Some((reader_index, promotion))]),
+                    writer_is_union: false,
+                    reader_is_union: true,
+                }));
+                Ok(dt)
+            }
+            (
+                Schema::Complex(ComplexType::Array(writer_array)),
+                Schema::Complex(ComplexType::Array(reader_array)),
+            ) => self.resolve_array(writer_array, reader_array, namespace),
+            (
+                Schema::Complex(ComplexType::Map(writer_map)),
+                Schema::Complex(ComplexType::Map(reader_map)),
+            ) => self.resolve_map(writer_map, reader_map, namespace),
+            (
+                Schema::Complex(ComplexType::Fixed(writer_fixed)),
+                Schema::Complex(ComplexType::Fixed(reader_fixed)),
+            ) => self.resolve_fixed(writer_fixed, reader_fixed, reader_schema, namespace),
+            (
+                Schema::Complex(ComplexType::Record(writer_record)),
+                Schema::Complex(ComplexType::Record(reader_record)),
+            ) => self.resolve_records(writer_record, reader_record, namespace),
+            (
+                Schema::Complex(ComplexType::Enum(writer_enum)),
+                Schema::Complex(ComplexType::Enum(reader_enum)),
+            ) => self.resolve_enums(writer_enum, reader_enum, reader_schema, namespace),
+            (Schema::TypeName(TypeName::Ref(_)), _) => self.parse_type(reader_schema, namespace),
+            (_, Schema::TypeName(TypeName::Ref(_))) => self.parse_type(reader_schema, namespace),
+            _ => Err(ArrowError::NotYetImplemented(
+                "Other resolutions not yet implemented".to_string(),
+            )),
+        }
+    }
+
+    #[inline]
+    fn coercion_from(dt: &AvroDataType) -> Promotion {
+        match dt.resolution.as_ref() {
+            Some(ResolutionInfo::Promotion(promotion)) => *promotion,
+            _ => Promotion::Direct,
+        }
+    }
+
+    fn find_best_promotion(
+        &mut self,
+        writer: &Schema<'a>,
+        reader_variants: &[Schema<'a>],
+        namespace: Option<&'a str>,
+    ) -> Option<(usize, Promotion)> {
+        let mut first_promotion: Option<(usize, Promotion)> = None;
+        for (reader_index, reader) in reader_variants.iter().enumerate() {
+            if let Ok(tmp) = self.resolve_type(writer, reader, namespace) {
+                let promotion = Self::coercion_from(&tmp);
+                if promotion == Promotion::Direct {
+                    // An exact match is best, return immediately.
+                    return Some((reader_index, promotion));
+                } else if first_promotion.is_none() {
+                    // Store the first valid promotion but keep searching for a direct match.
+                    first_promotion = Some((reader_index, promotion));
+                }
+            }
+        }
+        first_promotion
+    }
+
+    fn resolve_unions<'s>(
+        &mut self,
+        writer_variants: &'s [Schema<'a>],
+        reader_variants: &'s [Schema<'a>],
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        let reader_encodings: Vec<AvroDataType> = reader_variants
+            .iter()
+            .map(|reader_schema| self.parse_type(reader_schema, namespace))
+            .collect::<Result<_, _>>()?;
+        let mut writer_to_reader: Vec<Option<(usize, Promotion)>> =
+            Vec::with_capacity(writer_variants.len());
+        for writer in writer_variants {
+            writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace));
+        }
+        let union_fields = build_union_fields(&reader_encodings)?;
+        let mut dt = AvroDataType::new(
+            Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense),
+            Default::default(),
+            None,
+        );
+        dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
+            writer_to_reader: Arc::from(writer_to_reader),
+            writer_is_union: true,
+            reader_is_union: true,
+        }));
+        Ok(dt)
+    }
+
+    fn resolve_array(
+        &mut self,
+        writer_array: &Array<'a>,
+        reader_array: &Array<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        Ok(AvroDataType {
+            nullability: None,
+            metadata: reader_array.attributes.field_metadata(),
+            codec: Codec::List(Arc::new(self.make_data_type(
+                writer_array.items.as_ref(),
+                Some(reader_array.items.as_ref()),
+                namespace,
+            )?)),
+            resolution: None,
+        })
+    }
+
+    fn resolve_map(
+        &mut self,
+        writer_map: &Map<'a>,
+        reader_map: &Map<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        Ok(AvroDataType {
+            nullability: None,
+            metadata: reader_map.attributes.field_metadata(),
+            codec: Codec::Map(Arc::new(self.make_data_type(
+                &writer_map.values,
+                Some(&reader_map.values),
+                namespace,
+            )?)),
+            resolution: None,
+        })
+    }
+
+    fn resolve_fixed<'s>(
+        &mut self,
+        writer_fixed: &Fixed<'a>,
+        reader_fixed: &Fixed<'a>,
+        reader_schema: &'s Schema<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        ensure_names_match(
+            "Fixed",
+            writer_fixed.name,
+            writer_fixed.namespace,
+            &writer_fixed.aliases,
+            reader_fixed.name,
+            reader_fixed.namespace,
+            &reader_fixed.aliases,
+        )?;
+        if writer_fixed.size != reader_fixed.size {
+            return Err(ArrowError::SchemaError(format!(
+                "Fixed size mismatch for {}: writer={}, reader={}",
+                reader_fixed.name, writer_fixed.size, reader_fixed.size
+            )));
+        }
+        self.parse_type(reader_schema, namespace)
+    }
+
+    fn resolve_primitives(
+        &mut self,
+        write_primitive: PrimitiveType,
+        read_primitive: PrimitiveType,
+        reader_schema: &Schema<'a>,
+    ) -> Result<AvroDataType, ArrowError> {
+        if write_primitive == read_primitive {
+            return self.parse_type(reader_schema, None);
+        }
+        let promotion = match (write_primitive, read_primitive) {
+            (PrimitiveType::Int, PrimitiveType::Long) => Promotion::IntToLong,
+            (PrimitiveType::Int, PrimitiveType::Float) => Promotion::IntToFloat,
+            (PrimitiveType::Int, PrimitiveType::Double) => Promotion::IntToDouble,
+            (PrimitiveType::Long, PrimitiveType::Float) => Promotion::LongToFloat,
+            (PrimitiveType::Long, PrimitiveType::Double) => Promotion::LongToDouble,
+            (PrimitiveType::Float, PrimitiveType::Double) => Promotion::FloatToDouble,
+            (PrimitiveType::String, PrimitiveType::Bytes) => Promotion::StringToBytes,
+            (PrimitiveType::Bytes, PrimitiveType::String) => Promotion::BytesToString,
+            _ => {
+                return Err(ArrowError::ParseError(format!(
+                    "Illegal promotion {write_primitive:?} to {read_primitive:?}"
+                )));
+            }
+        };
+        let mut datatype = self.parse_type(reader_schema, None)?;
+        datatype.resolution = Some(ResolutionInfo::Promotion(promotion));
+        Ok(datatype)
+    }
+
+    // Resolve writer vs. reader enum schemas according to Avro 1.11.1.
+    //
+    // # How enums resolve (writer to reader)
+    // Per “Schema Resolution”:
+    // * The two schemas must refer to the same (unqualified) enum name (or match
+    //   via alias rewriting).
+    // * If the writer’s symbol is not present in the reader’s enum and the reader
+    //   enum has a `default`, that `default` symbol must be used; otherwise,
+    //   error.
+    //   https://avro.apache.org/docs/1.11.1/specification/#schema-resolution
+    // * Avro “Aliases” are applied from the reader side to rewrite the writer’s
+    //   names during resolution. For robustness across ecosystems, we also accept
+    //   symmetry here (see note below).
+    //   https://avro.apache.org/docs/1.11.1/specification/#aliases
+    //
+    // # Rationale for this code path
+    // 1. Do the work once at schema‑resolution time. Avro serializes an enum as a
+    //    writer‑side position. Mapping positions on the hot decoder path is expensive
+    //    if done with string lookups. This method builds a `writer_index to reader_index`
+    //    vector once, so decoding just does an O(1) table lookup.
+    // 2. Adopt the reader’s symbol set and order. We return an Arrow
+    //    `Dictionary(Int32, Utf8)` whose dictionary values are the reader enum
+    //    symbols. This makes downstream semantics match the reader schema, including
+    //    Avro’s sort order rule that orders enums by symbol position in the schema.
+    //    https://avro.apache.org/docs/1.11.1/specification/#sort-order
+    // 3. Honor Avro’s `default` for enums. Avro 1.9+ allows a type‑level default
+    //    on the enum. When the writer emits a symbol unknown to the reader, we map it
+    //    to the reader’s validated `default` symbol if present; otherwise we signal an
+    //    error at decoding time.
+    //    https://avro.apache.org/docs/1.11.1/specification/#enums
+    //
+    // # Implementation notes
+    // * We first check that enum names match or are*alias‑equivalent. The Avro
+    //   spec describes alias rewriting using reader aliases; this implementation
+    //   additionally treats writer aliases as acceptable for name matching to be
+    //   resilient with schemas produced by different tooling.
+    // * We build `EnumMapping`:
+    //   - `mapping[i]` = reader index of the writer symbol at writer index `i`.
+    //   - If the writer symbol is absent and the reader has a default, we store the
+    //     reader index of that default.
+    //   - Otherwise we store `-1` as a sentinel meaning unresolvable; the decoder
+    //     must treat encountering such a value as an error, per the spec.
+    // * We persist the reader symbol list in field metadata under
+    //   `AVRO_ENUM_SYMBOLS_METADATA_KEY`, so consumers can inspect the dictionary
+    //   without needing the original Avro schema.
+    // * The Arrow representation is `Dictionary(Int32, Utf8)`, which aligns with
+    //   Avro’s integer index encoding for enums.
+    //
+    // # Examples
+    // * Writer `["A","B","C"]`, Reader `["A","B"]`, Reader default `"A"`
+    //     `mapping = [0, 1, 0]`, `default_index = 0`.
+    // * Writer `["A","B"]`, Reader `["B","A"]` (no default)
+    //     `mapping = [1, 0]`, `default_index = -1`.
+    // * Writer `["A","B","C"]`, Reader `["A","B"]` (no default)
+    //     `mapping = [0, 1, -1]` (decode must error on `"C"`).
+    fn resolve_enums(
+        &mut self,
+        writer_enum: &Enum<'a>,
+        reader_enum: &Enum<'a>,
+        reader_schema: &Schema<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        ensure_names_match(
+            "Enum",
+            writer_enum.name,
+            writer_enum.namespace,
+            &writer_enum.aliases,
+            reader_enum.name,
+            reader_enum.namespace,
+            &reader_enum.aliases,
+        )?;
+        if writer_enum.symbols == reader_enum.symbols {
+            return self.parse_type(reader_schema, namespace);
+        }
+        let reader_index: HashMap<&str, i32> = reader_enum
+            .symbols
+            .iter()
+            .enumerate()
+            .map(|(index, &symbol)| (symbol, index as i32))
+            .collect();
+        let default_index: i32 = match reader_enum.default {
+            Some(symbol) => *reader_index.get(symbol).ok_or_else(|| {
+                ArrowError::SchemaError(format!(
+                    "Reader enum '{}' default symbol '{symbol}' not found in symbols list",
+                    reader_enum.name,
+                ))
+            })?,
+            None => -1,
+        };
+        let mapping: Vec<i32> = writer_enum
+            .symbols
+            .iter()
+            .map(|&write_symbol| {
+                reader_index
+                    .get(write_symbol)
+                    .copied()
+                    .unwrap_or(default_index)
+            })
+            .collect();
+        if self.strict_mode && mapping.iter().any(|&m| m < 0) {
+            return Err(ArrowError::SchemaError(format!(
+                "Reader enum '{}' does not cover all writer symbols and no default is provided",
+                reader_enum.name
+            )));
+        }
+        let mut dt = self.parse_type(reader_schema, namespace)?;
+        dt.resolution = Some(ResolutionInfo::EnumMapping(EnumMapping {
+            mapping: Arc::from(mapping),
+            default_index,
+        }));
+        let reader_ns = reader_enum.namespace.or(namespace);
+        self.resolver
+            .register(reader_enum.name, reader_ns, dt.clone());
+        Ok(dt)
+    }
+
+    #[inline]
+    fn build_writer_lookup(
+        writer_record: &Record<'a>,
+    ) -> (HashMap<&'a str, usize>, HashSet<&'a str>) {
+        let mut map: HashMap<&str, usize> = HashMap::with_capacity(writer_record.fields.len() * 2);
+        for (idx, wf) in writer_record.fields.iter().enumerate() {
+            // Avro field names are unique; last-in wins are acceptable and match previous behavior.
+            map.insert(wf.name, idx);
+        }
+        // Track ambiguous writer aliases (alias used by multiple writer fields)
+        let mut ambiguous: HashSet<&str> = HashSet::new();
+        for (idx, wf) in writer_record.fields.iter().enumerate() {
+            for &alias in &wf.aliases {
+                match map.entry(alias) {
+                    Entry::Occupied(e) if *e.get() != idx => {
+                        ambiguous.insert(alias);
+                    }
+                    Entry::Vacant(e) => {
+                        e.insert(idx);
+                    }
+                    _ => {}
+                }
+            }
+        }
+        (map, ambiguous)
+    }
+
+    fn resolve_records(
+        &mut self,
+        writer_record: &Record<'a>,
+        reader_record: &Record<'a>,
+        namespace: Option<&'a str>,
+    ) -> Result<AvroDataType, ArrowError> {
+        ensure_names_match(
+            "Record",
+            writer_record.name,
+            writer_record.namespace,
+            &writer_record.aliases,
+            reader_record.name,
+            reader_record.namespace,
+            &reader_record.aliases,
+        )?;
+        let writer_ns = writer_record.namespace.or(namespace);
+        let reader_ns = reader_record.namespace.or(namespace);
+        let reader_md = reader_record.attributes.field_metadata();
+        // Build writer lookup and ambiguous alias set.
+        let (writer_lookup, ambiguous_writer_aliases) = Self::build_writer_lookup(writer_record);
+        let mut writer_to_reader: Vec<Option<usize>> = vec![None; writer_record.fields.len()];
+        let mut reader_fields: Vec<AvroField> = Vec::with_capacity(reader_record.fields.len());
+        // Capture default field indices during the main loop (one pass).
+        let mut default_fields: Vec<usize> = Vec::new();
+        for (reader_idx, r_field) in reader_record.fields.iter().enumerate() {
+            // Direct name match, then reader aliases (a writer alias map is pre-populated).
+            let mut match_idx = writer_lookup.get(r_field.name).copied();
+            let mut matched_via_alias: Option<&str> = None;
+            if match_idx.is_none() {
+                for &alias in &r_field.aliases {
+                    if let Some(i) = writer_lookup.get(alias).copied() {
+                        if self.strict_mode && ambiguous_writer_aliases.contains(alias) {
+                            return Err(ArrowError::SchemaError(format!(
+                                "Ambiguous alias '{alias}' on reader field '{}' matches multiple writer fields",
+                                r_field.name
+                            )));
+                        }
+                        match_idx = Some(i);
+                        matched_via_alias = Some(alias);
+                        break;
+                    }
+                }
+            }
+            if let Some(wi) = match_idx {
+                if writer_to_reader[wi].is_none() {
+                    let w_schema = &writer_record.fields[wi].r#type;
+                    let dt = self.make_data_type(w_schema, Some(&r_field.r#type), reader_ns)?;
+                    writer_to_reader[wi] = Some(reader_idx);
+                    reader_fields.push(AvroField {
+                        name: r_field.name.to_owned(),
+                        data_type: dt,
+                    });
+                    continue;
+                } else if self.strict_mode {
+                    // Writer field already mapped and strict_mode => error
+                    let existing_reader = writer_to_reader[wi].unwrap();
+                    let via = matched_via_alias
+                        .map(|a| format!("alias '{a}'"))
+                        .unwrap_or_else(|| "name match".to_string());
+                    return Err(ArrowError::SchemaError(format!(
+                        "Multiple reader fields map to the same writer field '{}' via {via} (existing reader index {existing_reader}, new reader index {reader_idx})",
+                        writer_record.fields[wi].name
+                    )));
+                }
+                // Non-strict and already mapped -> fall through to defaulting logic
+            }
+            // No match (or conflicted in non-strict mode): attach default per Avro spec.
+            let mut dt = self.parse_type(&r_field.r#type, reader_ns)?;
+            if let Some(default_json) = r_field.default.as_ref() {
+                dt.resolution = Some(ResolutionInfo::DefaultValue(
+                    dt.parse_and_store_default(default_json)?,
+                ));
+                default_fields.push(reader_idx);
+            } else if dt.nullability() == Some(Nullability::NullFirst) {
+                // The only valid implicit default for a union is the first branch (null-first case).
+                dt.resolution = Some(ResolutionInfo::DefaultValue(
+                    dt.parse_and_store_default(&Value::Null)?,
+                ));
+                default_fields.push(reader_idx);
+            } else {
+                return Err(ArrowError::SchemaError(format!(
+                    "Reader field '{}' not present in writer schema must have a default value",
+                    r_field.name
+                )));
+            }
+            reader_fields.push(AvroField {
+                name: r_field.name.to_owned(),
+                data_type: dt,
+            });
+        }
+        // Build skip_fields in writer order; pre-size and push.
+        let mut skip_fields: Vec<Option<AvroDataType>> =
+            Vec::with_capacity(writer_record.fields.len());
+        for (writer_index, writer_field) in writer_record.fields.iter().enumerate() {
+            if writer_to_reader[writer_index].is_some() {
+                skip_fields.push(None);
+            } else {
+                skip_fields.push(Some(self.parse_type(&writer_field.r#type, writer_ns)?));
+            }
+        }
+        let resolved = AvroDataType::new_with_resolution(
+            Codec::Struct(Arc::from(reader_fields)),
+            reader_md,
+            None,
+            Some(ResolutionInfo::Record(ResolvedRecord {
+                writer_to_reader: Arc::from(writer_to_reader),
+                default_fields: Arc::from(default_fields),
+                skip_fields: Arc::from(skip_fields),
+            })),
+        );
+        // Register a resolved record by reader name+namespace for potential named type refs.
+        self.resolver
+            .register(reader_record.name, reader_ns, resolved.clone());
+        Ok(resolved)
+    }
+}
+
+#[cfg(test)]
 mod tests {
     use super::*;
     use crate::schema::{
-        Attributes, ComplexType, Fixed, PrimitiveType, Record, Schema, Type, TypeName,
+        AVRO_ROOT_RECORD_DEFAULT_NAME, Array, Attributes, ComplexType, Field as AvroFieldSchema,
+        Fixed, PrimitiveType, Record, Schema, Type, TypeName,
     };
-    use serde_json;
-    use std::collections::HashMap;
+    use indexmap::IndexMap;
+    use serde_json::{self, Value};
 
     fn create_schema_with_logical_type(
         primitive_type: PrimitiveType,
@@ -498,27 +2009,28 @@ mod tests {
         })
     }
 
-    fn create_fixed_schema(size: usize, logical_type: &'static str) -> Schema<'static> {
-        let attributes = Attributes {
-            logical_type: Some(logical_type),
-            additional: Default::default(),
-        };
+    fn resolve_promotion(writer: PrimitiveType, reader: PrimitiveType) -> AvroDataType {
+        let writer_schema = Schema::TypeName(TypeName::Primitive(writer));
+        let reader_schema = Schema::TypeName(TypeName::Primitive(reader));
+        let mut maker = Maker::new(false, false);
+        maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .expect("promotion should resolve")
+    }
 
-        Schema::Complex(ComplexType::Fixed(Fixed {
-            name: "fixed_type",
-            namespace: None,
-            aliases: Vec::new(),
-            size,
-            attributes,
-        }))
+    fn mk_primitive(pt: PrimitiveType) -> Schema<'static> {
+        Schema::TypeName(TypeName::Primitive(pt))
+    }
+    fn mk_union(branches: Vec<Schema<'_>>) -> Schema<'_> {
+        Schema::Union(branches)
     }
 
     #[test]
     fn test_date_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "date");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Date32));
     }
@@ -527,8 +2039,8 @@ mod tests {
     fn test_time_millis_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "time-millis");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimeMillis));
     }
@@ -537,8 +2049,8 @@ mod tests {
     fn test_time_micros_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "time-micros");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimeMicros));
     }
@@ -547,8 +2059,8 @@ mod tests {
     fn test_timestamp_millis_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimestampMillis(true)));
     }
@@ -557,8 +2069,8 @@ mod tests {
     fn test_timestamp_micros_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimestampMicros(true)));
     }
@@ -567,8 +2079,8 @@ mod tests {
     fn test_local_timestamp_millis_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-millis");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimestampMillis(false)));
     }
@@ -577,12 +2089,21 @@ mod tests {
     fn test_local_timestamp_micros_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-micros");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimestampMicros(false)));
     }
 
+    #[test]
+    fn test_uuid_type() {
+        let mut codec = Codec::Fixed(16);
+        if let c @ Codec::Fixed(16) = &mut codec {
+            *c = Codec::Uuid;
+        }
+        assert!(matches!(codec, Codec::Uuid));
+    }
+
     #[test]
     fn test_duration_logical_type() {
         let mut codec = Codec::Fixed(12);
@@ -596,7 +2117,7 @@ mod tests {
 
     #[test]
     fn test_decimal_logical_type_not_implemented() {
-        let mut codec = Codec::Fixed(16);
+        let codec = Codec::Fixed(16);
 
         let process_decimal = || -> Result<(), ArrowError> {
             if let Codec::Fixed(_) = codec {
@@ -616,13 +2137,12 @@ mod tests {
             panic!("Expected NotYetImplemented error");
         }
     }
-
     #[test]
     fn test_unknown_logical_type_added_to_metadata() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "custom-type");
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert_eq!(
             result.metadata.get("logicalType"),
@@ -634,8 +2154,8 @@ mod tests {
     fn test_string_with_utf8view_enabled() {
         let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, true).unwrap();
+        let mut maker = Maker::new(true, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Utf8View));
     }
@@ -644,8 +2164,8 @@ mod tests {
     fn test_string_without_utf8view_enabled() {
         let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, false).unwrap();
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Utf8));
     }
@@ -659,6 +2179,7 @@ mod tests {
             r#type: field_schema,
             default: None,
             doc: None,
+            aliases: vec![],
         };
 
         let record = Record {
@@ -672,8 +2193,8 @@ mod tests {
 
         let schema = Schema::Complex(ComplexType::Record(record));
 
-        let mut resolver = Resolver::default();
-        let result = make_data_type(&schema, None, &mut resolver, true).unwrap();
+        let mut maker = Maker::new(true, false);
+        let result = maker.make_data_type(&schema, None, None).unwrap();
 
         if let Codec::Struct(fields) = &result.codec {
             let first_field_codec = &fields[0].data_type().codec;
@@ -682,4 +2203,957 @@ mod tests {
             panic!("Expected Struct codec");
         }
     }
+
+    #[test]
+    fn test_union_with_strict_mode() {
+        let schema = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+        ]);
+
+        let mut maker = Maker::new(false, true);
+        let result = maker.make_data_type(&schema, None, None);
+
+        assert!(result.is_err());
+        match result {
+            Err(ArrowError::SchemaError(msg)) => {
+                assert!(msg.contains(
+                    "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
+                ));
+            }
+            _ => panic!("Expected SchemaError"),
+        }
+    }
+
+    #[test]
+    fn test_resolve_int_to_float_promotion() {
+        let result = resolve_promotion(PrimitiveType::Int, PrimitiveType::Float);
+        assert!(matches!(result.codec, Codec::Float32));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToFloat))
+        );
+    }
+
+    #[test]
+    fn test_resolve_int_to_double_promotion() {
+        let result = resolve_promotion(PrimitiveType::Int, PrimitiveType::Double);
+        assert!(matches!(result.codec, Codec::Float64));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToDouble))
+        );
+    }
+
+    #[test]
+    fn test_resolve_long_to_float_promotion() {
+        let result = resolve_promotion(PrimitiveType::Long, PrimitiveType::Float);
+        assert!(matches!(result.codec, Codec::Float32));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::LongToFloat))
+        );
+    }
+
+    #[test]
+    fn test_resolve_long_to_double_promotion() {
+        let result = resolve_promotion(PrimitiveType::Long, PrimitiveType::Double);
+        assert!(matches!(result.codec, Codec::Float64));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::LongToDouble))
+        );
+    }
+
+    #[test]
+    fn test_resolve_float_to_double_promotion() {
+        let result = resolve_promotion(PrimitiveType::Float, PrimitiveType::Double);
+        assert!(matches!(result.codec, Codec::Float64));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::FloatToDouble))
+        );
+    }
+
+    #[test]
+    fn test_resolve_string_to_bytes_promotion() {
+        let result = resolve_promotion(PrimitiveType::String, PrimitiveType::Bytes);
+        assert!(matches!(result.codec, Codec::Binary));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::StringToBytes))
+        );
+    }
+
+    #[test]
+    fn test_resolve_bytes_to_string_promotion() {
+        let result = resolve_promotion(PrimitiveType::Bytes, PrimitiveType::String);
+        assert!(matches!(result.codec, Codec::Utf8));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::BytesToString))
+        );
+    }
+
+    #[test]
+    fn test_resolve_illegal_promotion_double_to_float_errors() {
+        let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Double));
+        let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Float));
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&writer_schema, Some(&reader_schema), None);
+        assert!(result.is_err());
+        match result {
+            Err(ArrowError::ParseError(msg)) => {
+                assert!(msg.contains("Illegal promotion"));
+            }
+            _ => panic!("Expected ParseError for illegal promotion Double -> Float"),
+        }
+    }
+
+    #[test]
+    fn test_promotion_within_nullable_union_keeps_writer_null_ordering() {
+        let writer = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+        ]);
+        let reader = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+        ]);
+        let mut maker = Maker::new(false, false);
+        let result = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        assert!(matches!(result.codec, Codec::Float64));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToDouble))
+        );
+        assert_eq!(result.nullability, Some(Nullability::NullFirst));
+    }
+
+    #[test]
+    fn test_resolve_writer_union_to_reader_non_union_partial_coverage() {
+        let writer = mk_union(vec![
+            mk_primitive(PrimitiveType::String),
+            mk_primitive(PrimitiveType::Long),
+        ]);
+        let reader = mk_primitive(PrimitiveType::Bytes);
+        let mut maker = Maker::new(false, false);
+        let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        assert!(matches!(dt.codec(), Codec::Binary));
+        let resolved = match dt.resolution {
+            Some(ResolutionInfo::Union(u)) => u,
+            other => panic!("expected union resolution info, got {other:?}"),
+        };
+        assert!(resolved.writer_is_union && !resolved.reader_is_union);
+        assert_eq!(
+            resolved.writer_to_reader.as_ref(),
+            &[Some((0, Promotion::StringToBytes)), None]
+        );
+    }
+
+    #[test]
+    fn test_resolve_writer_non_union_to_reader_union_prefers_direct_over_promotion() {
+        let writer = mk_primitive(PrimitiveType::Long);
+        let reader = mk_union(vec![
+            mk_primitive(PrimitiveType::Long),
+            mk_primitive(PrimitiveType::Double),
+        ]);
+        let mut maker = Maker::new(false, false);
+        let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        let resolved = match dt.resolution {
+            Some(ResolutionInfo::Union(u)) => u,
+            other => panic!("expected union resolution info, got {other:?}"),
+        };
+        assert!(!resolved.writer_is_union && resolved.reader_is_union);
+        assert_eq!(
+            resolved.writer_to_reader.as_ref(),
+            &[Some((0, Promotion::Direct))]
+        );
+    }
+
+    #[test]
+    fn test_resolve_writer_non_union_to_reader_union_uses_promotion_when_needed() {
+        let writer = mk_primitive(PrimitiveType::Int);
+        let reader = mk_union(vec![
+            mk_primitive(PrimitiveType::Null),
+            mk_primitive(PrimitiveType::Long),
+            mk_primitive(PrimitiveType::String),
+        ]);
+        let mut maker = Maker::new(false, false);
+        let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        let resolved = match dt.resolution {
+            Some(ResolutionInfo::Union(u)) => u,
+            other => panic!("expected union resolution info, got {other:?}"),
+        };
+        assert_eq!(
+            resolved.writer_to_reader.as_ref(),
+            &[Some((1, Promotion::IntToLong))]
+        );
+    }
+
+    #[test]
+    fn test_resolve_both_nullable_unions_direct_match() {
+        let writer = mk_union(vec![
+            mk_primitive(PrimitiveType::Null),
+            mk_primitive(PrimitiveType::String),
+        ]);
+        let reader = mk_union(vec![
+            mk_primitive(PrimitiveType::String),
+            mk_primitive(PrimitiveType::Null),
+        ]);
+        let mut maker = Maker::new(false, false);
+        let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        assert!(matches!(dt.codec(), Codec::Utf8));
+        assert_eq!(dt.nullability, Some(Nullability::NullFirst));
+        assert!(dt.resolution.is_none());
+    }
+
+    #[test]
+    fn test_resolve_both_nullable_unions_with_promotion() {
+        let writer = mk_union(vec![
+            mk_primitive(PrimitiveType::Null),
+            mk_primitive(PrimitiveType::Int),
+        ]);
+        let reader = mk_union(vec![
+            mk_primitive(PrimitiveType::Double),
+            mk_primitive(PrimitiveType::Null),
+        ]);
+        let mut maker = Maker::new(false, false);
+        let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
+        assert!(matches!(dt.codec(), Codec::Float64));
+        assert_eq!(dt.nullability, Some(Nullability::NullFirst));
+        assert_eq!(
+            dt.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToDouble))
+        );
+    }
+
+    #[test]
+    fn test_resolve_type_promotion() {
+        let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
+        let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long));
+        let mut maker = Maker::new(false, false);
+        let result = maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .unwrap();
+        assert!(matches!(result.codec, Codec::Int64));
+        assert_eq!(
+            result.resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToLong))
+        );
+    }
+
+    #[test]
+    fn test_nested_record_type_reuse_without_namespace() {
+        let schema_str = r#"
+        {
+          "type": "record",
+          "name": "Record",
+          "fields": [
+            {
+              "name": "nested",
+              "type": {
+                "type": "record",
+                "name": "Nested",
+                "fields": [
+                  { "name": "nested_int", "type": "int" }
+                ]
+              }
+            },
+            { "name": "nestedRecord", "type": "Nested" },
+            { "name": "nestedArray", "type": { "type": "array", "items": "Nested" } },
+            { "name": "nestedMap", "type": { "type": "map", "values": "Nested" } }
+          ]
+        }
+        "#;
+
+        let schema: Schema = serde_json::from_str(schema_str).unwrap();
+
+        let mut maker = Maker::new(false, false);
+        let avro_data_type = maker.make_data_type(&schema, None, None).unwrap();
+
+        if let Codec::Struct(fields) = avro_data_type.codec() {
+            assert_eq!(fields.len(), 4);
+
+            // nested
+            assert_eq!(fields[0].name(), "nested");
+            let nested_data_type = fields[0].data_type();
+            if let Codec::Struct(nested_fields) = nested_data_type.codec() {
+                assert_eq!(nested_fields.len(), 1);
+                assert_eq!(nested_fields[0].name(), "nested_int");
+                assert!(matches!(nested_fields[0].data_type().codec(), Codec::Int32));
+            } else {
+                panic!(
+                    "'nested' field is not a struct but {:?}",
+                    nested_data_type.codec()
+                );
+            }
+
+            // nestedRecord
+            assert_eq!(fields[1].name(), "nestedRecord");
+            let nested_record_data_type = fields[1].data_type();
+            assert_eq!(
+                nested_record_data_type.codec().data_type(),
+                nested_data_type.codec().data_type()
+            );
+
+            // nestedArray
+            assert_eq!(fields[2].name(), "nestedArray");
+            if let Codec::List(item_type) = fields[2].data_type().codec() {
+                assert_eq!(
+                    item_type.codec().data_type(),
+                    nested_data_type.codec().data_type()
+                );
+            } else {
+                panic!("'nestedArray' field is not a list");
+            }
+
+            // nestedMap
+            assert_eq!(fields[3].name(), "nestedMap");
+            if let Codec::Map(value_type) = fields[3].data_type().codec() {
+                assert_eq!(
+                    value_type.codec().data_type(),
+                    nested_data_type.codec().data_type()
+                );
+            } else {
+                panic!("'nestedMap' field is not a map");
+            }
+        } else {
+            panic!("Top-level schema is not a struct");
+        }
+    }
+
+    #[test]
+    fn test_nested_enum_type_reuse_with_namespace() {
+        let schema_str = r#"
+        {
+          "type": "record",
+          "name": "Record",
+          "namespace": "record_ns",
+          "fields": [
+            {
+              "name": "status",
+              "type": {
+                "type": "enum",
+                "name": "Status",
+                "namespace": "enum_ns",
+                "symbols": ["ACTIVE", "INACTIVE", "PENDING"]
+              }
+            },
+            { "name": "backupStatus", "type": "enum_ns.Status" },
+            { "name": "statusHistory", "type": { "type": "array", "items": "enum_ns.Status" } },
+            { "name": "statusMap", "type": { "type": "map", "values": "enum_ns.Status" } }
+          ]
+        }
+        "#;
+
+        let schema: Schema = serde_json::from_str(schema_str).unwrap();
+
+        let mut maker = Maker::new(false, false);
+        let avro_data_type = maker.make_data_type(&schema, None, None).unwrap();
+
+        if let Codec::Struct(fields) = avro_data_type.codec() {
+            assert_eq!(fields.len(), 4);
+
+            // status
+            assert_eq!(fields[0].name(), "status");
+            let status_data_type = fields[0].data_type();
+            if let Codec::Enum(symbols) = status_data_type.codec() {
+                assert_eq!(symbols.as_ref(), &["ACTIVE", "INACTIVE", "PENDING"]);
+            } else {
+                panic!(
+                    "'status' field is not an enum but {:?}",
+                    status_data_type.codec()
+                );
+            }
+
+            // backupStatus
+            assert_eq!(fields[1].name(), "backupStatus");
+            let backup_status_data_type = fields[1].data_type();
+            assert_eq!(
+                backup_status_data_type.codec().data_type(),
+                status_data_type.codec().data_type()
+            );
+
+            // statusHistory
+            assert_eq!(fields[2].name(), "statusHistory");
+            if let Codec::List(item_type) = fields[2].data_type().codec() {
+                assert_eq!(
+                    item_type.codec().data_type(),
+                    status_data_type.codec().data_type()
+                );
+            } else {
+                panic!("'statusHistory' field is not a list");
+            }
+
+            // statusMap
+            assert_eq!(fields[3].name(), "statusMap");
+            if let Codec::Map(value_type) = fields[3].data_type().codec() {
+                assert_eq!(
+                    value_type.codec().data_type(),
+                    status_data_type.codec().data_type()
+                );
+            } else {
+                panic!("'statusMap' field is not a map");
+            }
+        } else {
+            panic!("Top-level schema is not a struct");
+        }
+    }
+
+    #[test]
+    fn test_resolve_from_writer_and_reader_defaults_root_name_for_non_record_reader() {
+        let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
+        let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
+        let mut maker = Maker::new(false, false);
+        let data_type = maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .expect("resolution should succeed");
+        let field = AvroField {
+            name: AVRO_ROOT_RECORD_DEFAULT_NAME.to_string(),
+            data_type,
+        };
+        assert_eq!(field.name(), AVRO_ROOT_RECORD_DEFAULT_NAME);
+        assert!(matches!(field.data_type().codec(), Codec::Utf8));
+    }
+
+    fn json_string(s: &str) -> Value {
+        Value::String(s.to_string())
+    }
+
+    fn assert_default_stored(dt: &AvroDataType, default_json: &Value) {
+        let stored = dt
+            .metadata
+            .get(AVRO_FIELD_DEFAULT_METADATA_KEY)
+            .cloned()
+            .unwrap_or_default();
+        let expected = serde_json::to_string(default_json).unwrap();
+        assert_eq!(stored, expected, "stored default metadata should match");
+    }
+
+    #[test]
+    fn test_validate_and_store_default_null_and_nullability_rules() {
+        let mut dt_null = AvroDataType::new(Codec::Null, HashMap::new(), None);
+        let lit = dt_null.parse_and_store_default(&Value::Null).unwrap();
+        assert_eq!(lit, AvroLiteral::Null);
+        assert_default_stored(&dt_null, &Value::Null);
+        let mut dt_int = AvroDataType::new(Codec::Int32, HashMap::new(), None);
+        let err = dt_int.parse_and_store_default(&Value::Null).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("JSON null default is only valid for `null` type"),
+            "unexpected error: {err}"
+        );
+        let mut dt_int_nf =
+            AvroDataType::new(Codec::Int32, HashMap::new(), Some(Nullability::NullFirst));
+        let lit2 = dt_int_nf.parse_and_store_default(&Value::Null).unwrap();
+        assert_eq!(lit2, AvroLiteral::Null);
+        assert_default_stored(&dt_int_nf, &Value::Null);
+        let mut dt_int_ns =
+            AvroDataType::new(Codec::Int32, HashMap::new(), Some(Nullability::NullSecond));
+        let err2 = dt_int_ns.parse_and_store_default(&Value::Null).unwrap_err();
+        assert!(
+            err2.to_string()
+                .contains("JSON null default is only valid for `null` type"),
+            "unexpected error: {err2}"
+        );
+    }
+
+    #[test]
+    fn test_validate_and_store_default_primitives_and_temporal() {
+        let mut dt_bool = AvroDataType::new(Codec::Boolean, HashMap::new(), None);
+        let lit = dt_bool.parse_and_store_default(&Value::Bool(true)).unwrap();
+        assert_eq!(lit, AvroLiteral::Boolean(true));
+        assert_default_stored(&dt_bool, &Value::Bool(true));
+        let mut dt_i32 = AvroDataType::new(Codec::Int32, HashMap::new(), None);
+        let lit = dt_i32
+            .parse_and_store_default(&serde_json::json!(123))
+            .unwrap();
+        assert_eq!(lit, AvroLiteral::Int(123));
+        assert_default_stored(&dt_i32, &serde_json::json!(123));
+        let err = dt_i32
+            .parse_and_store_default(&serde_json::json!(i64::from(i32::MAX) + 1))
+            .unwrap_err();
+        assert!(format!("{err}").contains("out of i32 range"));
+        let mut dt_i64 = AvroDataType::new(Codec::Int64, HashMap::new(), None);
+        let lit = dt_i64
+            .parse_and_store_default(&serde_json::json!(1234567890))
+            .unwrap();
+        assert_eq!(lit, AvroLiteral::Long(1234567890));
+        assert_default_stored(&dt_i64, &serde_json::json!(1234567890));
+        let mut dt_f32 = AvroDataType::new(Codec::Float32, HashMap::new(), None);
+        let lit = dt_f32
+            .parse_and_store_default(&serde_json::json!(1.25))
+            .unwrap();
+        assert_eq!(lit, AvroLiteral::Float(1.25));
+        assert_default_stored(&dt_f32, &serde_json::json!(1.25));
+        let err = dt_f32
+            .parse_and_store_default(&serde_json::json!(1e39))
+            .unwrap_err();
+        assert!(format!("{err}").contains("out of f32 range"));
+        let mut dt_f64 = AvroDataType::new(Codec::Float64, HashMap::new(), None);
+        let lit = dt_f64
+            .parse_and_store_default(&serde_json::json!(std::f64::consts::PI))
+            .unwrap();
+        assert_eq!(lit, AvroLiteral::Double(std::f64::consts::PI));
+        assert_default_stored(&dt_f64, &serde_json::json!(std::f64::consts::PI));
+        let mut dt_str = AvroDataType::new(Codec::Utf8, HashMap::new(), None);
+        let l = dt_str
+            .parse_and_store_default(&json_string("hello"))
+            .unwrap();
+        assert_eq!(l, AvroLiteral::String("hello".into()));
+        assert_default_stored(&dt_str, &json_string("hello"));
+        let mut dt_strv = AvroDataType::new(Codec::Utf8View, HashMap::new(), None);
+        let l = dt_strv
+            .parse_and_store_default(&json_string("view"))
+            .unwrap();
+        assert_eq!(l, AvroLiteral::String("view".into()));
+        assert_default_stored(&dt_strv, &json_string("view"));
+        let mut dt_uuid = AvroDataType::new(Codec::Uuid, HashMap::new(), None);
+        let l = dt_uuid
+            .parse_and_store_default(&json_string("00000000-0000-0000-0000-000000000000"))
+            .unwrap();
+        assert_eq!(
+            l,
+            AvroLiteral::String("00000000-0000-0000-0000-000000000000".into())
+        );
+        let mut dt_bin = AvroDataType::new(Codec::Binary, HashMap::new(), None);
+        let l = dt_bin.parse_and_store_default(&json_string("ABC")).unwrap();
+        assert_eq!(l, AvroLiteral::Bytes(vec![65, 66, 67]));
+        let err = dt_bin
+            .parse_and_store_default(&json_string("€")) // U+20AC
+            .unwrap_err();
+        assert!(format!("{err}").contains("Invalid codepoint"));
+        let mut dt_date = AvroDataType::new(Codec::Date32, HashMap::new(), None);
+        let ld = dt_date
+            .parse_and_store_default(&serde_json::json!(1))
+            .unwrap();
+        assert_eq!(ld, AvroLiteral::Int(1));
+        let mut dt_tmill = AvroDataType::new(Codec::TimeMillis, HashMap::new(), None);
+        let lt = dt_tmill
+            .parse_and_store_default(&serde_json::json!(86_400_000))
+            .unwrap();
+        assert_eq!(lt, AvroLiteral::Int(86_400_000));
+        let mut dt_tmicros = AvroDataType::new(Codec::TimeMicros, HashMap::new(), None);
+        let ltm = dt_tmicros
+            .parse_and_store_default(&serde_json::json!(1_000_000))
+            .unwrap();
+        assert_eq!(ltm, AvroLiteral::Long(1_000_000));
+        let mut dt_ts_milli = AvroDataType::new(Codec::TimestampMillis(true), HashMap::new(), None);
+        let l1 = dt_ts_milli
+            .parse_and_store_default(&serde_json::json!(123))
+            .unwrap();
+        assert_eq!(l1, AvroLiteral::Long(123));
+        let mut dt_ts_micro =
+            AvroDataType::new(Codec::TimestampMicros(false), HashMap::new(), None);
+        let l2 = dt_ts_micro
+            .parse_and_store_default(&serde_json::json!(456))
+            .unwrap();
+        assert_eq!(l2, AvroLiteral::Long(456));
+    }
+
+    #[test]
+    fn test_validate_and_store_default_fixed_decimal_interval() {
+        let mut dt_fixed = AvroDataType::new(Codec::Fixed(4), HashMap::new(), None);
+        let l = dt_fixed
+            .parse_and_store_default(&json_string("WXYZ"))
+            .unwrap();
+        assert_eq!(l, AvroLiteral::Bytes(vec![87, 88, 89, 90]));
+        let err = dt_fixed
+            .parse_and_store_default(&json_string("TOO LONG"))
+            .unwrap_err();
+        assert!(err.to_string().contains("Default length"));
+        let mut dt_dec_fixed =
+            AvroDataType::new(Codec::Decimal(10, Some(2), Some(3)), HashMap::new(), None);
+        let l = dt_dec_fixed
+            .parse_and_store_default(&json_string("abc"))
+            .unwrap();
+        assert_eq!(l, AvroLiteral::Bytes(vec![97, 98, 99]));
+        let err = dt_dec_fixed
+            .parse_and_store_default(&json_string("toolong"))
+            .unwrap_err();
+        assert!(err.to_string().contains("Default length"));
+        let mut dt_dec_bytes =
+            AvroDataType::new(Codec::Decimal(10, Some(2), None), HashMap::new(), None);
+        let l = dt_dec_bytes
+            .parse_and_store_default(&json_string("freeform"))
+            .unwrap();
+        assert_eq!(
+            l,
+            AvroLiteral::Bytes("freeform".bytes().collect::<Vec<_>>())
+        );
+        let mut dt_interval = AvroDataType::new(Codec::Interval, HashMap::new(), None);
+        let l = dt_interval
+            .parse_and_store_default(&json_string("ABCDEFGHIJKL"))
+            .unwrap();
+        assert_eq!(
+            l,
+            AvroLiteral::Bytes("ABCDEFGHIJKL".bytes().collect::<Vec<_>>())
+        );
+        let err = dt_interval
+            .parse_and_store_default(&json_string("short"))
+            .unwrap_err();
+        assert!(err.to_string().contains("Default length"));
+    }
+
+    #[test]
+    fn test_validate_and_store_default_enum_list_map_struct() {
+        let symbols: Arc<[String]> = ["RED".to_string(), "GREEN".to_string(), "BLUE".to_string()]
+            .into_iter()
+            .collect();
+        let mut dt_enum = AvroDataType::new(Codec::Enum(symbols), HashMap::new(), None);
+        let l = dt_enum
+            .parse_and_store_default(&json_string("GREEN"))
+            .unwrap();
+        assert_eq!(l, AvroLiteral::Enum("GREEN".into()));
+        let err = dt_enum
+            .parse_and_store_default(&json_string("YELLOW"))
+            .unwrap_err();
+        assert!(err.to_string().contains("Default enum symbol"));
+        let item = AvroDataType::new(Codec::Int64, HashMap::new(), None);
+        let mut dt_list = AvroDataType::new(Codec::List(Arc::new(item)), HashMap::new(), None);
+        let val = serde_json::json!([1, 2, 3]);
+        let l = dt_list.parse_and_store_default(&val).unwrap();
+        assert_eq!(
+            l,
+            AvroLiteral::Array(vec![
+                AvroLiteral::Long(1),
+                AvroLiteral::Long(2),
+                AvroLiteral::Long(3)
+            ])
+        );
+        let err = dt_list
+            .parse_and_store_default(&serde_json::json!({"not":"array"}))
+            .unwrap_err();
+        assert!(err.to_string().contains("JSON array"));
+        let val_dt = AvroDataType::new(Codec::Float64, HashMap::new(), None);
+        let mut dt_map = AvroDataType::new(Codec::Map(Arc::new(val_dt)), HashMap::new(), None);
+        let mv = serde_json::json!({"x": 1.5, "y": 2.5});
+        let l = dt_map.parse_and_store_default(&mv).unwrap();
+        let mut expected = IndexMap::new();
+        expected.insert("x".into(), AvroLiteral::Double(1.5));
+        expected.insert("y".into(), AvroLiteral::Double(2.5));
+        assert_eq!(l, AvroLiteral::Map(expected));
+        // Not object -> error
+        let err = dt_map
+            .parse_and_store_default(&serde_json::json!(123))
+            .unwrap_err();
+        assert!(err.to_string().contains("JSON object"));
+        let mut field_a = AvroField {
+            name: "a".into(),
+            data_type: AvroDataType::new(Codec::Int32, HashMap::new(), None),
+        };
+        let field_b = AvroField {
+            name: "b".into(),
+            data_type: AvroDataType::new(
+                Codec::Int64,
+                HashMap::new(),
+                Some(Nullability::NullFirst),
+            ),
+        };
+        let mut c_md = HashMap::new();
+        c_md.insert(AVRO_FIELD_DEFAULT_METADATA_KEY.into(), "\"xyz\"".into());
+        let field_c = AvroField {
+            name: "c".into(),
+            data_type: AvroDataType::new(Codec::Utf8, c_md, None),
+        };
+        field_a.data_type.metadata.insert("doc".into(), "na".into());
+        let struct_fields: Arc<[AvroField]> = Arc::from(vec![field_a, field_b, field_c]);
+        let mut dt_struct = AvroDataType::new(Codec::Struct(struct_fields), HashMap::new(), None);
+        let default_obj = serde_json::json!({"a": 7});
+        let l = dt_struct.parse_and_store_default(&default_obj).unwrap();
+        let mut expected = IndexMap::new();
+        expected.insert("a".into(), AvroLiteral::Int(7));
+        expected.insert("b".into(), AvroLiteral::Null);
+        expected.insert("c".into(), AvroLiteral::String("xyz".into()));
+        assert_eq!(l, AvroLiteral::Map(expected));
+        assert_default_stored(&dt_struct, &default_obj);
+        let req_field = AvroField {
+            name: "req".into(),
+            data_type: AvroDataType::new(Codec::Boolean, HashMap::new(), None),
+        };
+        let mut dt_bad = AvroDataType::new(
+            Codec::Struct(Arc::from(vec![req_field])),
+            HashMap::new(),
+            None,
+        );
+        let err = dt_bad
+            .parse_and_store_default(&serde_json::json!({}))
+            .unwrap_err();
+        assert!(
+            err.to_string().contains("missing required subfield 'req'"),
+            "unexpected error: {err}"
+        );
+        let err = dt_struct
+            .parse_and_store_default(&serde_json::json!(10))
+            .unwrap_err();
+        err.to_string().contains("must be a JSON object");
+    }
+
+    #[test]
+    fn test_resolve_array_promotion_and_reader_metadata() {
+        let mut w_add: HashMap<&str, Value> = HashMap::new();
+        w_add.insert("who", json_string("writer"));
+        let mut r_add: HashMap<&str, Value> = HashMap::new();
+        r_add.insert("who", json_string("reader"));
+        let writer_schema = Schema::Complex(ComplexType::Array(Array {
+            items: Box::new(Schema::TypeName(TypeName::Primitive(PrimitiveType::Int))),
+            attributes: Attributes {
+                logical_type: None,
+                additional: w_add,
+            },
+        }));
+        let reader_schema = Schema::Complex(ComplexType::Array(Array {
+            items: Box::new(Schema::TypeName(TypeName::Primitive(PrimitiveType::Long))),
+            attributes: Attributes {
+                logical_type: None,
+                additional: r_add,
+            },
+        }));
+        let mut maker = Maker::new(false, false);
+        let dt = maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .unwrap();
+        assert_eq!(dt.metadata.get("who"), Some(&"\"reader\"".to_string()));
+        if let Codec::List(inner) = dt.codec() {
+            assert!(matches!(inner.codec(), Codec::Int64));
+            assert_eq!(
+                inner.resolution,
+                Some(ResolutionInfo::Promotion(Promotion::IntToLong))
+            );
+        } else {
+            panic!("expected list codec");
+        }
+    }
+
+    #[test]
+    fn test_resolve_fixed_success_name_and_size_match_and_alias() {
+        let writer_schema = Schema::Complex(ComplexType::Fixed(Fixed {
+            name: "MD5",
+            namespace: None,
+            aliases: vec!["Hash16"],
+            size: 16,
+            attributes: Attributes::default(),
+        }));
+        let reader_schema = Schema::Complex(ComplexType::Fixed(Fixed {
+            name: "Hash16",
+            namespace: None,
+            aliases: vec![],
+            size: 16,
+            attributes: Attributes::default(),
+        }));
+        let mut maker = Maker::new(false, false);
+        let dt = maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .unwrap();
+        assert!(matches!(dt.codec(), Codec::Fixed(16)));
+    }
+
+    #[test]
+    fn test_resolve_records_mapping_default_fields_and_skip_fields() {
+        let writer = Schema::Complex(ComplexType::Record(Record {
+            name: "R",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![
+                crate::schema::Field {
+                    name: "a",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+                    default: None,
+                    aliases: vec![],
+                },
+                crate::schema::Field {
+                    name: "skipme",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+                    default: None,
+                    aliases: vec![],
+                },
+                crate::schema::Field {
+                    name: "b",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+                    default: None,
+                    aliases: vec![],
+                },
+            ],
+            attributes: Attributes::default(),
+        }));
+        let reader = Schema::Complex(ComplexType::Record(Record {
+            name: "R",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![
+                crate::schema::Field {
+                    name: "b",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+                    default: None,
+                    aliases: vec![],
+                },
+                crate::schema::Field {
+                    name: "a",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+                    default: None,
+                    aliases: vec![],
+                },
+                crate::schema::Field {
+                    name: "name",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+                    default: Some(json_string("anon")),
+                    aliases: vec![],
+                },
+                crate::schema::Field {
+                    name: "opt",
+                    doc: None,
+                    r#type: Schema::Union(vec![
+                        Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                        Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+                    ]),
+                    default: None, // should default to null because NullFirst
+                    aliases: vec![],
+                },
+            ],
+            attributes: Attributes::default(),
+        }));
+        let mut maker = Maker::new(false, false);
+        let dt = maker
+            .make_data_type(&writer, Some(&reader), None)
+            .expect("record resolution");
+        let fields = match dt.codec() {
+            Codec::Struct(f) => f,
+            other => panic!("expected struct, got {other:?}"),
+        };
+        assert_eq!(fields.len(), 4);
+        assert_eq!(fields[0].name(), "b");
+        assert_eq!(fields[1].name(), "a");
+        assert_eq!(fields[2].name(), "name");
+        assert_eq!(fields[3].name(), "opt");
+        assert!(matches!(
+            fields[1].data_type().resolution,
+            Some(ResolutionInfo::Promotion(Promotion::IntToLong))
+        ));
+        let rec = match dt.resolution {
+            Some(ResolutionInfo::Record(ref r)) => r.clone(),
+            other => panic!("expected record resolution, got {other:?}"),
+        };
+        assert_eq!(rec.writer_to_reader.as_ref(), &[Some(1), None, Some(0)]);
+        assert_eq!(rec.default_fields.as_ref(), &[2usize, 3usize]);
+        assert!(rec.skip_fields[0].is_none());
+        assert!(rec.skip_fields[2].is_none());
+        let skip1 = rec.skip_fields[1].as_ref().expect("skip field present");
+        assert!(matches!(skip1.codec(), Codec::Utf8));
+        let name_md = &fields[2].data_type().metadata;
+        assert_eq!(
+            name_md.get(AVRO_FIELD_DEFAULT_METADATA_KEY),
+            Some(&"\"anon\"".to_string())
+        );
+        let opt_md = &fields[3].data_type().metadata;
+        assert_eq!(
+            opt_md.get(AVRO_FIELD_DEFAULT_METADATA_KEY),
+            Some(&"null".to_string())
+        );
+    }
+
+    #[test]
+    fn test_named_type_alias_resolution_record_cross_namespace() {
+        let writer_record = Record {
+            name: "PersonV2",
+            namespace: Some("com.example.v2"),
+            doc: None,
+            aliases: vec!["com.example.Person"],
+            fields: vec![
+                AvroFieldSchema {
+                    name: "name",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+                    default: None,
+                    aliases: vec![],
+                },
+                AvroFieldSchema {
+                    name: "age",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+                    default: None,
+                    aliases: vec![],
+                },
+            ],
+            attributes: Attributes::default(),
+        };
+        let reader_record = Record {
+            name: "Person",
+            namespace: Some("com.example"),
+            doc: None,
+            aliases: vec![],
+            fields: writer_record.fields.clone(),
+            attributes: Attributes::default(),
+        };
+        let writer_schema = Schema::Complex(ComplexType::Record(writer_record));
+        let reader_schema = Schema::Complex(ComplexType::Record(reader_record));
+        let mut maker = Maker::new(false, false);
+        let result = maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .expect("record alias resolution should succeed");
+        match result.codec {
+            Codec::Struct(ref fields) => assert_eq!(fields.len(), 2),
+            other => panic!("expected struct, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_named_type_alias_resolution_enum_cross_namespace() {
+        let writer_enum = Enum {
+            name: "ColorV2",
+            namespace: Some("org.example.v2"),
+            doc: None,
+            aliases: vec!["org.example.Color"],
+            symbols: vec!["RED", "GREEN", "BLUE"],
+            default: None,
+            attributes: Attributes::default(),
+        };
+        let reader_enum = Enum {
+            name: "Color",
+            namespace: Some("org.example"),
+            doc: None,
+            aliases: vec![],
+            symbols: vec!["RED", "GREEN", "BLUE"],
+            default: None,
+            attributes: Attributes::default(),
+        };
+        let writer_schema = Schema::Complex(ComplexType::Enum(writer_enum));
+        let reader_schema = Schema::Complex(ComplexType::Enum(reader_enum));
+        let mut maker = Maker::new(false, false);
+        maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .expect("enum alias resolution should succeed");
+    }
+
+    #[test]
+    fn test_named_type_alias_resolution_fixed_cross_namespace() {
+        let writer_fixed = Fixed {
+            name: "Fx10V2",
+            namespace: Some("ns.v2"),
+            aliases: vec!["ns.Fx10"],
+            size: 10,
+            attributes: Attributes::default(),
+        };
+        let reader_fixed = Fixed {
+            name: "Fx10",
+            namespace: Some("ns"),
+            aliases: vec![],
+            size: 10,
+            attributes: Attributes::default(),
+        };
+        let writer_schema = Schema::Complex(ComplexType::Fixed(writer_fixed));
+        let reader_schema = Schema::Complex(ComplexType::Fixed(reader_fixed));
+        let mut maker = Maker::new(false, false);
+        maker
+            .make_data_type(&writer_schema, Some(&reader_schema), None)
+            .expect("fixed alias resolution should succeed");
+    }
 }
diff --git a/arrow-avro/src/compression.rs b/arrow-avro/src/compression.rs
index 69aee634977a..0cb2878a132d 100644
--- a/arrow-avro/src/compression.rs
+++ b/arrow-avro/src/compression.rs
@@ -16,8 +16,13 @@
 // under the License.
 
 use arrow_schema::ArrowError;
-use std::io;
-use std::io::Read;
+#[cfg(any(
+    feature = "deflate",
+    feature = "zstd",
+    feature = "bzip2",
+    feature = "xz"
+))]
+use std::io::{Read, Write};
 
 /// The metadata key used for storing the JSON encoded [`CompressionCodec`]
 pub const CODEC_METADATA_KEY: &str = "avro.codec";
@@ -34,9 +39,14 @@ pub enum CompressionCodec {
     Snappy,
     /// ZStandard compression
     ZStandard,
+    /// Bzip2 compression
+    Bzip2,
+    /// Xz compression
+    Xz,
 }
 
 impl CompressionCodec {
+    #[allow(unused_variables)]
     pub(crate) fn decompress(&self, block: &[u8]) -> Result<Vec<u8>, ArrowError> {
         match self {
             #[cfg(feature = "deflate")]
@@ -84,6 +94,102 @@ impl CompressionCodec {
             CompressionCodec::ZStandard => Err(ArrowError::ParseError(
                 "ZStandard codec requires zstd feature".to_string(),
             )),
+            #[cfg(feature = "bzip2")]
+            CompressionCodec::Bzip2 => {
+                let mut decoder = bzip2::read::BzDecoder::new(block);
+                let mut out = Vec::new();
+                decoder.read_to_end(&mut out)?;
+                Ok(out)
+            }
+            #[cfg(not(feature = "bzip2"))]
+            CompressionCodec::Bzip2 => Err(ArrowError::ParseError(
+                "Bzip2 codec requires bzip2 feature".to_string(),
+            )),
+            #[cfg(feature = "xz")]
+            CompressionCodec::Xz => {
+                let mut decoder = xz::read::XzDecoder::new(block);
+                let mut out = Vec::new();
+                decoder.read_to_end(&mut out)?;
+                Ok(out)
+            }
+            #[cfg(not(feature = "xz"))]
+            CompressionCodec::Xz => Err(ArrowError::ParseError(
+                "XZ codec requires xz feature".to_string(),
+            )),
+        }
+    }
+
+    #[allow(unused_variables)]
+    pub(crate) fn compress(&self, data: &[u8]) -> Result<Vec<u8>, ArrowError> {
+        match self {
+            #[cfg(feature = "deflate")]
+            CompressionCodec::Deflate => {
+                let mut encoder =
+                    flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default());
+                encoder.write_all(data)?;
+                let compressed = encoder.finish()?;
+                Ok(compressed)
+            }
+            #[cfg(not(feature = "deflate"))]
+            CompressionCodec::Deflate => Err(ArrowError::ParseError(
+                "Deflate codec requires deflate feature".to_string(),
+            )),
+
+            #[cfg(feature = "snappy")]
+            CompressionCodec::Snappy => {
+                let mut encoder = snap::raw::Encoder::new();
+                // Allocate and compress in one step for efficiency
+                let mut compressed = encoder
+                    .compress_vec(data)
+                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+                // Compute CRC32 (ISO‑HDLC poly) of **uncompressed** data
+                let crc_val = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC).checksum(data);
+                compressed.extend_from_slice(&crc_val.to_be_bytes());
+                Ok(compressed)
+            }
+            #[cfg(not(feature = "snappy"))]
+            CompressionCodec::Snappy => Err(ArrowError::ParseError(
+                "Snappy codec requires snappy feature".to_string(),
+            )),
+
+            #[cfg(feature = "zstd")]
+            CompressionCodec::ZStandard => {
+                let mut encoder = zstd::Encoder::new(Vec::new(), 0)
+                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+                encoder.write_all(data)?;
+                let compressed = encoder
+                    .finish()
+                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+                Ok(compressed)
+            }
+            #[cfg(not(feature = "zstd"))]
+            CompressionCodec::ZStandard => Err(ArrowError::ParseError(
+                "ZStandard codec requires zstd feature".to_string(),
+            )),
+
+            #[cfg(feature = "bzip2")]
+            CompressionCodec::Bzip2 => {
+                let mut encoder =
+                    bzip2::write::BzEncoder::new(Vec::new(), bzip2::Compression::default());
+                encoder.write_all(data)?;
+                let compressed = encoder.finish()?;
+                Ok(compressed)
+            }
+            #[cfg(not(feature = "bzip2"))]
+            CompressionCodec::Bzip2 => Err(ArrowError::ParseError(
+                "Bzip2 codec requires bzip2 feature".to_string(),
+            )),
+            #[cfg(feature = "xz")]
+            CompressionCodec::Xz => {
+                let mut encoder = xz::write::XzEncoder::new(Vec::new(), 6);
+                encoder.write_all(data)?;
+                let compressed = encoder.finish()?;
+                Ok(compressed)
+            }
+            #[cfg(not(feature = "xz"))]
+            CompressionCodec::Xz => Err(ArrowError::ParseError(
+                "XZ codec requires xz feature".to_string(),
+            )),
         }
     }
 }
diff --git a/arrow-avro/src/lib.rs b/arrow-avro/src/lib.rs
index e413e0aa9173..032ad683ff77 100644
--- a/arrow-avro/src/lib.rs
+++ b/arrow-avro/src/lib.rs
@@ -15,28 +15,173 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Convert data to / from the [Apache Arrow] memory format and [Apache Avro]
+//! Convert data to / from the [Apache Arrow] memory format and [Apache Avro].
 //!
-//! [Apache Arrow]: https://arrow.apache.org
+//! This crate provides:
+//! - a [`reader`] that decodes Avro (Object Container Files, Avro Single‑Object encoding,
+//!   and Confluent Schema Registry wire format) into Arrow `RecordBatch`es,
+//! - and a [`writer`] that encodes Arrow `RecordBatch`es into Avro (OCF or SOE).
+//!
+//! If you’re new to Arrow or Avro, see:
+//! - Arrow project site: <https://arrow.apache.org/>
+//! - Avro 1.11.1 specification: <https://avro.apache.org/docs/1.11.1/specification/>
+//!
+//! ## Example: OCF (Object Container File) round‑trip *(runnable)*
+//!
+//! The example below creates an Arrow table, writes an **Avro OCF** fully in memory,
+//! and then reads it back. OCF is a self‑describing file format that embeds the Avro
+//! schema in a header with optional compression and block sync markers.
+//! Spec: <https://avro.apache.org/docs/1.11.1/specification/#object-container-files>
+//!
+//! ```
+//! use std::io::Cursor;
+//! use std::sync::Arc;
+//! use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::writer::AvroWriter;
+//! use arrow_avro::reader::ReaderBuilder;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Build a tiny Arrow batch
+//! let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(schema.clone()),
+//!     vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+//! )?;
+//!
+//! // Write an Avro **Object Container File** (OCF) to a Vec<u8>
+//! let sink: Vec<u8> = Vec::new();
+//! let mut w = AvroWriter::new(sink, schema.clone())?;
+//! w.write(&batch)?;
+//! w.finish()?;
+//! let bytes = w.into_inner();
+//! assert!(!bytes.is_empty());
+//!
+//! // Read it back
+//! let mut r = ReaderBuilder::new().build(Cursor::new(bytes))?;
+//! let out = r.next().unwrap()?;
+//! assert_eq!(out.num_rows(), 3);
+//! # Ok(()) }
+//! ```
+//!
+//! ## Quickstart: SOE (Single‑Object Encoding) round‑trip *(runnable)*
+//!
+//! Avro **Single‑Object Encoding (SOE)** wraps an Avro body with a 2‑byte marker
+//! `0xC3 0x01` and an **8‑byte little‑endian CRC‑64‑AVRO Rabin fingerprint** of the
+//! writer schema, then the Avro body. Spec:
+//! <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+//!
+//! This example registers the writer schema (computing a Rabin fingerprint), writes a
+//! single‑row Avro body (using `AvroStreamWriter`), constructs the SOE frame, and decodes it back to Arrow.
+//!
+//! ```
+//! use std::collections::HashMap;
+//! use std::sync::Arc;
+//! use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::writer::{AvroStreamWriter, WriterBuilder};
+//! use arrow_avro::reader::ReaderBuilder;
+//! use arrow_avro::schema::{AvroSchema, SchemaStore, FingerprintStrategy, SCHEMA_METADATA_KEY};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Writer schema: { "type":"record","name":"User","fields":[{"name":"x","type":"long"}] }
+//! let writer_json = r#"{"type":"record","name":"User","fields":[{"name":"x","type":"long"}]}"#;
+//! let mut store = SchemaStore::new(); // Rabin CRC‑64‑AVRO by default
+//! let _fp = store.register(AvroSchema::new(writer_json.to_string()))?;
+//!
+//! // Build an Arrow schema that references the same Avro JSON
+//! let mut md = HashMap::new();
+//! md.insert(SCHEMA_METADATA_KEY.to_string(), writer_json.to_string());
+//! let schema = Schema::new_with_metadata(
+//!     vec![Field::new("x", DataType::Int64, false)],
+//!     md,
+//! );
+//!
+//! // One‑row batch: { x: 7 }
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(schema.clone()),
+//!     vec![Arc::new(Int64Array::from(vec![7])) as ArrayRef],
+//! )?;
+//!
+//! // Stream‑write a single record; the writer adds **SOE** (C3 01 + Rabin) automatically.
+//! let sink: Vec<u8> = Vec::new();
+//! let mut w: AvroStreamWriter<Vec<u8>> = WriterBuilder::new(schema.clone())
+//!     .with_fingerprint_strategy(FingerprintStrategy::Rabin)
+//!     .build(sink)?;
+//! w.write(&batch)?;
+//! w.finish()?;
+//! let frame = w.into_inner(); // already: C3 01 + 8B LE Rabin + Avro body
+//! assert!(frame.len() > 10);
+//!
+//! // Decode
+//! let mut dec = ReaderBuilder::new()
+//!   .with_writer_schema_store(store)
+//!   .build_decoder()?;
+//! dec.decode(&frame)?;
+//! let out = dec.flush()?.expect("one row");
+//! assert_eq!(out.num_rows(), 1);
+//! # Ok(()) }
+//! ```
+//!
+//! ---
+//!
+//! ### Modules
+//!
+//! - [`reader`]: read Avro (OCF, SOE, Confluent) into Arrow `RecordBatch`es.
+//! - [`writer`]: write Arrow `RecordBatch`es as Avro (OCF, SOE, Confluent, Apicurio).
+//! - [`schema`]: Avro schema parsing / fingerprints / registries.
+//! - [`compression`]: codecs used for **OCF block compression** (i.e., Deflate, Snappy, Zstandard, BZip2, and XZ).
+//! - [`codec`]: internal Avro-Arrow type conversion and row decode/encode plans.
+//!
+//! ### Features
+//!
+//! **OCF compression (enabled by default)**
+//! - `deflate` — enable DEFLATE block compression (via `flate2`).
+//! - `snappy` — enable Snappy block compression with 4‑byte BE CRC32 (per Avro).
+//! - `zstd` — enable Zstandard block compression.
+//! - `bzip2` — enable BZip2 block compression.
+//! - `xz` — enable XZ/LZMA block compression.
+//!
+//! **Schema fingerprints & helpers (opt‑in)**
+//! - `md5` — enable MD5 writer‑schema fingerprints.
+//! - `sha256` — enable SHA‑256 writer‑schema fingerprints.
+//! - `small_decimals` — support for compact Arrow representations of small Avro decimals (`Decimal32` and `Decimal64`).
+//! - `avro_custom_types` — interpret Avro fields annotated with Arrow‑specific logical
+//!   types such as `arrow.duration-nanos`, `arrow.duration-micros`,
+//!   `arrow.duration-millis`, or `arrow.duration-seconds` as Arrow `Duration(TimeUnit)`.
+//! - `canonical_extension_types` — enable support for Arrow [canonical extension types]
+//!   from `arrow-schema` so `arrow-avro` can respect them during Avro↔Arrow mapping.
+//!
+//! **Notes**
+//! - OCF compression codecs apply only to **Object Container Files**; they do not affect Avro
+//!   single object encodings.
+//!
+//! [canonical extension types]: https://arrow.apache.org/docs/format/CanonicalExtensions.html
+//!
+//! [Apache Arrow]: https://arrow.apache.org/
 //! [Apache Avro]: https://avro.apache.org/
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
-#![allow(unused)] // Temporary
 
 /// Core functionality for reading Avro data into Arrow arrays
 ///
 /// Implements the primary reader interface and record decoding logic.
 pub mod reader;
 
-// Avro schema parsing and representation
-//
-// Provides types for parsing and representing Avro schema definitions.
-mod schema;
+/// Core functionality for writing Arrow arrays as Avro data
+///
+/// Implements the primary writer interface and record encoding logic.
+pub mod writer;
+
+/// Avro schema parsing and representation
+///
+/// Provides types for parsing and representing Avro schema definitions.
+pub mod schema;
 
 /// Compression codec implementations for Avro
 ///
@@ -50,8 +195,6 @@ pub mod compression;
 /// Avro data types and Arrow data types.
 pub mod codec;
 
-pub use reader::ReadOptions;
-
 /// Extension trait for AvroField to add Utf8View support
 ///
 /// This trait adds methods for working with Utf8View support to the AvroField struct.
diff --git a/arrow-avro/src/reader/cursor.rs b/arrow-avro/src/reader/cursor.rs
index 4b6a5a4d65db..23d9e503339d 100644
--- a/arrow-avro/src/reader/cursor.rs
+++ b/arrow-avro/src/reader/cursor.rs
@@ -85,7 +85,7 @@ impl<'a> AvroCursor<'a> {
             ArrowError::ParseError("offset overflow reading avro bytes".to_string())
         })?;
 
-        if (self.buf.len() < len) {
+        if self.buf.len() < len {
             return Err(ArrowError::ParseError(
                 "Unexpected EOF reading bytes".to_string(),
             ));
@@ -97,7 +97,7 @@ impl<'a> AvroCursor<'a> {
 
     #[inline]
     pub(crate) fn get_float(&mut self) -> Result<f32, ArrowError> {
-        if (self.buf.len() < 4) {
+        if self.buf.len() < 4 {
             return Err(ArrowError::ParseError(
                 "Unexpected EOF reading float".to_string(),
             ));
@@ -109,7 +109,7 @@ impl<'a> AvroCursor<'a> {
 
     #[inline]
     pub(crate) fn get_double(&mut self) -> Result<f64, ArrowError> {
-        if (self.buf.len() < 8) {
+        if self.buf.len() < 8 {
             return Err(ArrowError::ParseError(
                 "Unexpected EOF reading float".to_string(),
             ));
@@ -118,4 +118,16 @@ impl<'a> AvroCursor<'a> {
         self.buf = &self.buf[8..];
         Ok(ret)
     }
+
+    /// Read exactly `n` bytes from the buffer (e.g. for Avro `fixed`).
+    pub(crate) fn get_fixed(&mut self, n: usize) -> Result<&'a [u8], ArrowError> {
+        if self.buf.len() < n {
+            return Err(ArrowError::ParseError(
+                "Unexpected EOF reading fixed".to_string(),
+            ));
+        }
+        let ret = &self.buf[..n];
+        self.buf = &self.buf[n..];
+        Ok(ret)
+    }
 }
diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs
index 98c285171bf3..aac267f50e9e 100644
--- a/arrow-avro/src/reader/header.rs
+++ b/arrow-avro/src/reader/header.rs
@@ -17,10 +17,31 @@
 
 //! Decoder for [`Header`]
 
-use crate::compression::{CompressionCodec, CODEC_METADATA_KEY};
+use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
 use crate::reader::vlq::VLQDecoder;
-use crate::schema::{Schema, SCHEMA_METADATA_KEY};
+use crate::schema::{SCHEMA_METADATA_KEY, Schema};
 use arrow_schema::ArrowError;
+use std::io::BufRead;
+
+/// Read the Avro file header (magic, metadata, sync marker) from `reader`.
+pub(crate) fn read_header<R: BufRead>(mut reader: R) -> Result<Header, ArrowError> {
+    let mut decoder = HeaderDecoder::default();
+    loop {
+        let buf = reader.fill_buf()?;
+        if buf.is_empty() {
+            break;
+        }
+        let read = buf.len();
+        let decoded = decoder.decode(buf)?;
+        reader.consume(decoded);
+        if decoded != read {
+            break;
+        }
+    }
+    decoder.flush().ok_or_else(|| {
+        ArrowError::ParseError("Unexpected EOF while reading Avro header".to_string())
+    })
+}
 
 #[derive(Debug)]
 enum HeaderDecoderState {
@@ -77,12 +98,13 @@ impl Header {
     /// Returns the [`CompressionCodec`] if any
     pub fn compression(&self) -> Result<Option<CompressionCodec>, ArrowError> {
         let v = self.get(CODEC_METADATA_KEY);
-
         match v {
             None | Some(b"null") => Ok(None),
             Some(b"deflate") => Ok(Some(CompressionCodec::Deflate)),
             Some(b"snappy") => Ok(Some(CompressionCodec::Snappy)),
             Some(b"zstandard") => Ok(Some(CompressionCodec::ZStandard)),
+            Some(b"bzip2") => Ok(Some(CompressionCodec::Bzip2)),
+            Some(b"xz") => Ok(Some(CompressionCodec::Xz)),
             Some(v) => Err(ArrowError::ParseError(format!(
                 "Unrecognized compression codec \'{}\'",
                 String::from_utf8_lossy(v)
@@ -90,8 +112,8 @@ impl Header {
         }
     }
 
-    /// Returns the [`Schema`] if any
-    pub fn schema(&self) -> Result<Option<Schema<'_>>, ArrowError> {
+    /// Returns the `Schema` if any
+    pub(crate) fn schema(&self) -> Result<Option<Schema<'_>>, ArrowError> {
         self.get(SCHEMA_METADATA_KEY)
             .map(|x| {
                 serde_json::from_slice(x).map_err(|e| {
@@ -264,13 +286,16 @@ impl HeaderDecoder {
 #[cfg(test)]
 mod test {
     use super::*;
-    use crate::codec::{AvroDataType, AvroField};
+    use crate::codec::AvroField;
     use crate::reader::read_header;
-    use crate::schema::SCHEMA_METADATA_KEY;
+    use crate::schema::{
+        AVRO_NAME_METADATA_KEY, AVRO_ROOT_RECORD_DEFAULT_NAME, SCHEMA_METADATA_KEY,
+    };
     use crate::test_util::arrow_test_data;
     use arrow_schema::{DataType, Field, Fields, TimeUnit};
+    use std::collections::HashMap;
     use std::fs::File;
-    use std::io::{BufRead, BufReader};
+    use std::io::BufReader;
 
     #[test]
     fn test_header_decode() {
@@ -290,7 +315,7 @@ mod test {
 
     fn decode_file(file: &str) -> Header {
         let file = File::open(file).unwrap();
-        read_header(BufReader::with_capacity(100, file)).unwrap()
+        read_header(BufReader::with_capacity(1000, file)).unwrap()
     }
 
     #[test]
@@ -325,6 +350,10 @@ mod test {
                 ])),
                 false
             )
+            .with_metadata(HashMap::from([(
+                AVRO_NAME_METADATA_KEY.to_string(),
+                AVRO_ROOT_RECORD_DEFAULT_NAME.to_string()
+            )]))
         );
 
         assert_eq!(
diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs
index 61e3e8511caa..546650faf568 100644
--- a/arrow-avro/src/reader/mod.rs
+++ b/arrow-avro/src/reader/mod.rs
@@ -15,11 +15,482 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Read Avro data to Arrow
-
-use crate::reader::block::{Block, BlockDecoder};
-use crate::reader::header::{Header, HeaderDecoder};
-use arrow_schema::ArrowError;
+//! Avro reader
+//!
+//! Facilities to read Apache Avro–encoded data into Arrow's `RecordBatch` format.
+//!
+//! ### Limitations
+//!
+//!- **Avro unions with > 127 branches are not supported.**
+//!  When decoding Avro unions to Arrow `UnionArray`, Arrow stores the union
+//!  type identifiers in an **8‑bit signed** buffer (`i8`). This implies a
+//!  practical limit of **127** distinct branch ids. Inputs that resolve to
+//!  more than 127 branches will return an error. If you truly need more,
+//!  model the schema as a **union of unions**, per the Arrow format spec.
+//!
+//!  See: Arrow Columnar Format — Dense Union (“types buffer: 8‑bit signed;
+//!  a union with more than 127 possible types can be modeled as a union of
+//!  unions”).
+//!
+//! This module exposes three layers of the API surface, from highest to lowest-level:
+//!
+//! * [`ReaderBuilder`](crate::reader::ReaderBuilder): configures how Avro is read (batch size, strict union handling,
+//!   string representation, reader schema, etc.) and produces either:
+//!   * a `Reader` for **Avro Object Container Files (OCF)** read from any `BufRead`, or
+//!   * a low-level `Decoder` for **single‑object encoded** Avro bytes and Confluent
+//!     **Schema Registry** framed messages.
+//! * [`Reader`](crate::reader::Reader): a convenient, synchronous iterator over `RecordBatch` decoded from an OCF
+//!   input. Implements [`Iterator<Item = Result<RecordBatch, ArrowError>>`] and
+//!   `RecordBatchReader`.
+//! * [`Decoder`](crate::reader::Decoder): a push‑based row decoder that consumes SOE framed Avro bytes and yields ready
+//!   `RecordBatch` values when batches fill. This is suitable for integrating with async
+//!   byte streams, network protocols, or other custom data sources.
+//!
+//! ## Encodings and when to use which type
+//!
+//! * **Object Container File (OCF)**: A self‑describing file format with a header containing
+//!   the writer schema, optional compression codec, and a sync marker, followed by one or
+//!   more data blocks. Use `Reader` for this format. See the Avro 1.11.1 specification
+//!   (“Object Container Files”). <https://avro.apache.org/docs/1.11.1/specification/#object-container-files>
+//! * **Single‑Object Encoding**: A stream‑friendly framing that prefixes each record body with
+//!   the 2‑byte marker `0xC3 0x01` followed by the **8‑byte little‑endian CRC‑64‑AVRO Rabin
+//!   fingerprint** of the writer schema, then the Avro binary body. Use `Decoder` with a
+//!   populated `SchemaStore` to resolve fingerprints to full schemas.
+//!   See “Single object encoding” in the Avro 1.11.1 spec.
+//!   <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+//! * **Confluent Schema Registry wire format**: A 1‑byte magic `0x00`, a **4‑byte big‑endian**
+//!   schema ID, then the Avro‑encoded body. Use `Decoder` with a `SchemaStore` configured
+//!   for `FingerprintAlgorithm::Id` and entries keyed by `Fingerprint::Id`. See
+//!   Confluent’s “Wire format” documentation.
+//!   <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+//! * **Apicurio Schema Registry wire format**: A 1‑byte magic `0x00`, a **8‑byte big‑endian**
+//!   global schema ID, then the Avro‑encoded body. Use `Decoder` with a `SchemaStore` configured
+//!   for `FingerprintAlgorithm::Id64` and entries keyed by `Fingerprint::Id64`. See
+//!   Apicurio’s “Avro SerDe” documentation.
+//!   <https://www.apicur.io/registry/docs/apicurio-registry/1.3.3.Final/getting-started/assembly-using-kafka-client-serdes.html#registry-serdes-types-avro-registry>
+//!
+//! ## Basic file usage (OCF)
+//!
+//! Use `ReaderBuilder::build` to construct a `Reader` from any `BufRead`. The doctest below
+//! creates a tiny OCF in memory using `AvroWriter` and then reads it back.
+//!
+//! ```
+//! use std::io::Cursor;
+//! use std::sync::Arc;
+//! use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::writer::AvroWriter;
+//! use arrow_avro::reader::ReaderBuilder;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Build a minimal Arrow schema and batch
+//! let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(schema.clone()),
+//!     vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+//! )?;
+//!
+//! // Write an Avro OCF to memory
+//! let buffer: Vec<u8> = Vec::new();
+//! let mut writer = AvroWriter::new(buffer, schema.clone())?;
+//! writer.write(&batch)?;
+//! writer.finish()?;
+//! let bytes = writer.into_inner();
+//!
+//! // Read it back with ReaderBuilder
+//! let mut reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+//! let out = reader.next().unwrap()?;
+//! assert_eq!(out.num_rows(), 3);
+//! # Ok(()) }
+//! ```
+//!
+//! ## Streaming usage (single‑object / Confluent / Apicurio)
+//!
+//! The `Decoder` lets you integrate Avro decoding with **any** source of bytes by
+//! periodically calling `Decoder::decode` with new data and calling `Decoder::flush`
+//! to get a `RecordBatch` once at least one row is complete.
+//!
+//! The example below shows how to decode from an arbitrary stream of `bytes::Bytes` using
+//! `futures` utilities. Note: this is illustrative and keeps a single in‑memory `Bytes`
+//! buffer for simplicity—real applications typically maintain a rolling buffer.
+//!
+//! ```
+//! use bytes::{Buf, Bytes};
+//! use futures::{Stream, StreamExt};
+//! use std::task::{Poll, ready};
+//! use arrow_array::RecordBatch;
+//! use arrow_schema::ArrowError;
+//! use arrow_avro::reader::Decoder;
+//!
+//! /// Decode a stream of Avro-framed bytes into RecordBatch values.
+//! fn decode_stream<S: Stream<Item = Bytes> + Unpin>(
+//!     mut decoder: Decoder,
+//!     mut input: S,
+//! ) -> impl Stream<Item = Result<RecordBatch, ArrowError>> {
+//!     let mut buffered = Bytes::new();
+//!     futures::stream::poll_fn(move |cx| {
+//!         loop {
+//!             if buffered.is_empty() {
+//!                 buffered = match ready!(input.poll_next_unpin(cx)) {
+//!                     Some(b) => b,
+//!                     None => break, // EOF
+//!                 };
+//!             }
+//!             // Feed as much as possible
+//!             let decoded = match decoder.decode(buffered.as_ref()) {
+//!                 Ok(n) => n,
+//!                 Err(e) => return Poll::Ready(Some(Err(e))),
+//!             };
+//!             let read = buffered.len();
+//!             buffered.advance(decoded);
+//!             if decoded != read {
+//!                 // decoder made partial progress; request more bytes
+//!                 break
+//!             }
+//!         }
+//!         // Return a batch if one or more rows are complete
+//!         Poll::Ready(decoder.flush().transpose())
+//!     })
+//! }
+//! ```
+//!
+//! ### Building and using a `Decoder` for **single‑object encoding** (Rabin fingerprints)
+//!
+//! The doctest below **writes** a single‑object framed record using the Avro writer
+//! (no manual varints) for the writer schema
+//! (`{"type":"record","name":"User","fields":[{"name":"id","type":"long"}]}`)
+//! and then decodes it into a `RecordBatch`.
+//!
+//! ```
+//! use std::sync::Arc;
+//! use std::collections::HashMap;
+//! use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::schema::{AvroSchema, SchemaStore, SCHEMA_METADATA_KEY, FingerprintStrategy};
+//! use arrow_avro::writer::{WriterBuilder, format::AvroSoeFormat};
+//! use arrow_avro::reader::ReaderBuilder;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Register the writer schema (Rabin fingerprint by default).
+//! let mut store = SchemaStore::new();
+//! let avro_schema = AvroSchema::new(r#"{"type":"record","name":"User","fields":[
+//!   {"name":"id","type":"long"}]}"#.to_string());
+//! let _fp = store.register(avro_schema.clone())?;
+//!
+//! // Create a single-object framed record { id: 42 } with the Avro writer.
+//! let mut md = HashMap::new();
+//! md.insert(SCHEMA_METADATA_KEY.to_string(), avro_schema.json_string.clone());
+//! let arrow = Schema::new_with_metadata(vec![Field::new("id", DataType::Int64, false)], md);
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(arrow.clone()),
+//!     vec![Arc::new(Int64Array::from(vec![42])) as ArrayRef],
+//! )?;
+//! let mut w = WriterBuilder::new(arrow)
+//!     .with_fingerprint_strategy(FingerprintStrategy::Rabin) // SOE prefix
+//!     .build::<_, AvroSoeFormat>(Vec::new())?;
+//! w.write(&batch)?;
+//! w.finish()?;
+//! let frame = w.into_inner(); // C3 01 + fp + Avro body
+//!
+//! // Decode with a `Decoder`
+//! let mut dec = ReaderBuilder::new()
+//!   .with_writer_schema_store(store)
+//!   .with_batch_size(1024)
+//!   .build_decoder()?;
+//!
+//! dec.decode(&frame)?;
+//! let out = dec.flush()?.expect("one batch");
+//! assert_eq!(out.num_rows(), 1);
+//! # Ok(()) }
+//! ```
+//!
+//! See Avro 1.11.1 “Single object encoding” for details of the 2‑byte marker
+//! and little‑endian CRC‑64‑AVRO fingerprint:
+//! <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+//!
+//! ### Building and using a `Decoder` for **Confluent Schema Registry** framing
+//!
+//! The Confluent wire format is: 1‑byte magic `0x00`, then a **4‑byte big‑endian** schema ID,
+//! then the Avro body. The doctest below crafts two messages for the same schema ID and
+//! decodes them into a single `RecordBatch` with two rows.
+//!
+//! ```
+//! use std::sync::Arc;
+//! use std::collections::HashMap;
+//! use arrow_array::{ArrayRef, Int64Array, StringArray, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::schema::{AvroSchema, SchemaStore, Fingerprint, FingerprintAlgorithm, SCHEMA_METADATA_KEY, FingerprintStrategy};
+//! use arrow_avro::writer::{WriterBuilder, format::AvroSoeFormat};
+//! use arrow_avro::reader::ReaderBuilder;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Set up a store keyed by numeric IDs (Confluent).
+//! let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+//! let schema_id = 7u32;
+//! let avro_schema = AvroSchema::new(r#"{"type":"record","name":"User","fields":[
+//!   {"name":"id","type":"long"}, {"name":"name","type":"string"}]}"#.to_string());
+//! store.set(Fingerprint::Id(schema_id), avro_schema.clone())?;
+//!
+//! // Write two Confluent-framed messages {id:1,name:"a"} and {id:2,name:"b"}.
+//! fn msg(id: i64, name: &str, schema: &AvroSchema, schema_id: u32) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+//!     let mut md = HashMap::new();
+//!     md.insert(SCHEMA_METADATA_KEY.to_string(), schema.json_string.clone());
+//!     let arrow = Schema::new_with_metadata(
+//!         vec![Field::new("id", DataType::Int64, false), Field::new("name", DataType::Utf8, false)],
+//!         md,
+//!     );
+//!     let batch = RecordBatch::try_new(
+//!         Arc::new(arrow.clone()),
+//!         vec![
+//!           Arc::new(Int64Array::from(vec![id])) as ArrayRef,
+//!           Arc::new(StringArray::from(vec![name])) as ArrayRef,
+//!         ],
+//!     )?;
+//!     let mut w = WriterBuilder::new(arrow)
+//!         .with_fingerprint_strategy(FingerprintStrategy::Id(schema_id)) // 0x00 + ID + body
+//!         .build::<_, AvroSoeFormat>(Vec::new())?;
+//!     w.write(&batch)?; w.finish()?;
+//!     Ok(w.into_inner())
+//! }
+//! let m1 = msg(1, "a", &avro_schema, schema_id)?;
+//! let m2 = msg(2, "b", &avro_schema, schema_id)?;
+//!
+//! // Decode both into a single batch.
+//! let mut dec = ReaderBuilder::new()
+//!   .with_writer_schema_store(store)
+//!   .with_batch_size(1024)
+//!   .build_decoder()?;
+//! dec.decode(&m1)?;
+//! dec.decode(&m2)?;
+//! let batch = dec.flush()?.expect("batch");
+//! assert_eq!(batch.num_rows(), 2);
+//! # Ok(()) }
+//! ```
+//!
+//! See Confluent’s “Wire format” notes: magic byte `0x00`, 4‑byte **big‑endian** schema ID,
+//! then the Avro‑encoded payload.
+//! <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+//!
+//! ## Schema resolution (reader vs. writer schemas)
+//!
+//! Avro supports resolving data written with one schema (“writer”) into another (“reader”)
+//! using rules like **field aliases**, **default values**, and **numeric promotions**.
+//! In practice this lets you evolve schemas over time while remaining compatible with old data.
+//!
+//! *Spec background:* See Avro’s **Schema Resolution** (aliases, defaults) and the Confluent
+//! **Wire format** (magic `0x00` + big‑endian schema id + Avro body).
+//! <https://avro.apache.org/docs/1.11.1/specification/#schema-resolution>
+//! <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+//!
+//! ### OCF example: rename a field and add a default via a reader schema
+//!
+//! Below we write an OCF with a *writer schema* having fields `id: long`, `name: string`.
+//! We then read it with a *reader schema* that:
+//! - **renames** `name` to `full_name` via `aliases`, and
+//! - **adds** `is_active: boolean` with a **default** value `true`.
+//!
+//! ```
+//! use std::io::Cursor;
+//! use std::sync::Arc;
+//! use arrow_array::{ArrayRef, Int64Array, StringArray, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//! use arrow_avro::writer::AvroWriter;
+//! use arrow_avro::reader::ReaderBuilder;
+//! use arrow_avro::schema::AvroSchema;
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! // Writer (past version): { id: long, name: string }
+//! let writer_arrow = Schema::new(vec![
+//!     Field::new("id", DataType::Int64, false),
+//!     Field::new("name", DataType::Utf8, false),
+//! ]);
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(writer_arrow.clone()),
+//!     vec![
+//!         Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef,
+//!         Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef,
+//!     ],
+//! )?;
+//!
+//! // Write an OCF entirely in memory
+//! let mut w = AvroWriter::new(Vec::<u8>::new(), writer_arrow)?;
+//! w.write(&batch)?;
+//! w.finish()?;
+//! let bytes = w.into_inner();
+//!
+//! // Reader (current version):
+//! //  - record name "topLevelRecord" matches the crate's default for OCF
+//! //  - rename `name` -> `full_name` using aliases (optional)
+//! let reader_json = r#"
+//! {
+//!   "type": "record",
+//!   "name": "topLevelRecord",
+//!   "fields": [
+//!     { "name": "id", "type": "long" },
+//!     { "name": "full_name", "type": ["null","string"], "aliases": ["name"], "default": null },
+//!     { "name": "is_active", "type": "boolean", "default": true }
+//!   ]
+//! }"#;
+//!
+//! let mut reader = ReaderBuilder::new()
+//!   .with_reader_schema(AvroSchema::new(reader_json.to_string()))
+//!   .build(Cursor::new(bytes))?;
+//!
+//! let out = reader.next().unwrap()?;
+//! assert_eq!(out.num_rows(), 2);
+//! # Ok(()) }
+//! ```
+//!
+//! ### Confluent single‑object example: resolve *past* writer versions to the topic’s **current** reader schema
+//!
+//! In this scenario, the **reader schema** is the topic’s *current* schema, while the two
+//! **writer schemas** registered under Confluent IDs **1** and **2** represent *past versions*.
+//! The decoder uses the reader schema to resolve both versions.
+//!
+//! ```
+//! use std::sync::Arc;
+//! use std::collections::HashMap;
+//! use arrow_avro::reader::ReaderBuilder;
+//! use arrow_avro::schema::{
+//!     AvroSchema, Fingerprint, FingerprintAlgorithm, SchemaStore,
+//!     SCHEMA_METADATA_KEY, FingerprintStrategy,
+//! };
+//! use arrow_array::{ArrayRef, Int32Array, Int64Array, StringArray, RecordBatch};
+//! use arrow_schema::{DataType, Field, Schema};
+//!
+//! fn main() -> Result<(), Box<dyn std::error::Error>> {
+//!     // Reader: current topic schema (no reader-added fields)
+//!     //   {"type":"record","name":"User","fields":[
+//!     //     {"name":"id","type":"long"},
+//!     //     {"name":"name","type":"string"}]}
+//!     let reader_schema = AvroSchema::new(
+//!         r#"{"type":"record","name":"User",
+//!             "fields":[{"name":"id","type":"long"},{"name":"name","type":"string"}]}"#
+//!             .to_string(),
+//!     );
+//!
+//!     // Register two *writer* schemas under Confluent IDs 0 and 1
+//!     let writer_v0 = AvroSchema::new(
+//!         r#"{"type":"record","name":"User",
+//!             "fields":[{"name":"id","type":"int"},{"name":"name","type":"string"}]}"#
+//!             .to_string(),
+//!     );
+//!     let writer_v1 = AvroSchema::new(
+//!         r#"{"type":"record","name":"User",
+//!             "fields":[{"name":"id","type":"long"},{"name":"name","type":"string"},
+//!                       {"name":"email","type":["null","string"],"default":null}]}"#
+//!             .to_string(),
+//!     );
+//!
+//!     let id_v0: u32 = 0;
+//!     let id_v1: u32 = 1;
+//!
+//!     let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id); // integer IDs
+//!     store.set(Fingerprint::Id(id_v0), writer_v0.clone())?;
+//!     store.set(Fingerprint::Id(id_v1), writer_v1.clone())?;
+//!
+//!     // Write two Confluent-framed messages using each writer version
+//!     // frame0: writer v0 body {id:1001_i32, name:"v0-alice"}
+//!     let mut md0 = HashMap::new();
+//!     md0.insert(SCHEMA_METADATA_KEY.to_string(), writer_v0.json_string.clone());
+//!     let arrow0 = Schema::new_with_metadata(
+//!         vec![Field::new("id", DataType::Int32, false),
+//!              Field::new("name", DataType::Utf8, false)], md0);
+//!     let batch0 = RecordBatch::try_new(
+//!         Arc::new(arrow0.clone()),
+//!         vec![Arc::new(Int32Array::from(vec![1001])) as ArrayRef,
+//!              Arc::new(StringArray::from(vec!["v0-alice"])) as ArrayRef])?;
+//!     let mut w0 = arrow_avro::writer::WriterBuilder::new(arrow0)
+//!         .with_fingerprint_strategy(FingerprintStrategy::Id(id_v0))
+//!         .build::<_, arrow_avro::writer::format::AvroSoeFormat>(Vec::new())?;
+//!     w0.write(&batch0)?; w0.finish()?;
+//!     let frame0 = w0.into_inner(); // 0x00 + id_v0 + body
+//!
+//!     // frame1: writer v1 body {id:2002_i64, name:"v1-bob", email: Some("bob@example.com")}
+//!     let mut md1 = HashMap::new();
+//!    md1.insert(SCHEMA_METADATA_KEY.to_string(), writer_v1.json_string.clone());
+//!     let arrow1 = Schema::new_with_metadata(
+//!         vec![Field::new("id", DataType::Int64, false),
+//!              Field::new("name", DataType::Utf8, false),
+//!              Field::new("email", DataType::Utf8, true)], md1);
+//!     let batch1 = RecordBatch::try_new(
+//!         Arc::new(arrow1.clone()),
+//!         vec![Arc::new(Int64Array::from(vec![2002])) as ArrayRef,
+//!              Arc::new(StringArray::from(vec!["v1-bob"])) as ArrayRef,
+//!              Arc::new(StringArray::from(vec![Some("bob@example.com")])) as ArrayRef])?;
+//!     let mut w1 = arrow_avro::writer::WriterBuilder::new(arrow1)
+//!         .with_fingerprint_strategy(FingerprintStrategy::Id(id_v1))
+//!         .build::<_, arrow_avro::writer::format::AvroSoeFormat>(Vec::new())?;
+//!     w1.write(&batch1)?; w1.finish()?;
+//!     let frame1 = w1.into_inner(); // 0x00 + id_v1 + body
+//!
+//!     // Build a streaming Decoder that understands Confluent framing
+//!     let mut decoder = ReaderBuilder::new()
+//!         .with_reader_schema(reader_schema)
+//!         .with_writer_schema_store(store)
+//!         .with_batch_size(8) // small demo batches
+//!         .build_decoder()?;
+//!
+//!     // Decode each whole frame, then drain completed rows with flush()
+//!     let mut total_rows = 0usize;
+//!
+//!     let consumed0 = decoder.decode(&frame0)?;
+//!     assert_eq!(consumed0, frame0.len(), "decoder must consume the whole frame");
+//!     while let Some(batch) = decoder.flush()? { total_rows += batch.num_rows(); }
+//!
+//!     let consumed1 = decoder.decode(&frame1)?;
+//!     assert_eq!(consumed1, frame1.len(), "decoder must consume the whole frame");
+//!     while let Some(batch) = decoder.flush()? { total_rows += batch.num_rows(); }
+//!
+//!     // We sent 2 records so we should get 2 rows (possibly one per flush)
+//!     assert_eq!(total_rows, 2);
+//!     Ok(())
+//! }
+//! ```
+//!
+//! ## Schema evolution and batch boundaries
+//!
+//! `Decoder` supports mid‑stream schema changes when the input framing carries a schema
+//! fingerprint (single‑object or Confluent). When a new fingerprint is observed:
+//!
+//! * If the current `RecordBatch` is **empty**, the decoder switches to the new schema
+//!   immediately.
+//! * If not, the decoder finishes the current batch first and only then switches.
+//!
+//! Consequently, the schema of batches produced by `Decoder::flush` may change over time,
+//! and `Decoder` intentionally does **not** implement `RecordBatchReader`. In contrast,
+//! `Reader` (OCF) has a single writer schema for the entire file and therefore implements
+//! `RecordBatchReader`.
+//!
+//! ## Performance & memory
+//!
+//! * `batch_size` controls the maximum number of rows per `RecordBatch`. Larger batches
+//!   amortize per‑batch overhead; smaller batches reduce peak memory usage and latency.
+//! * When `utf8_view` is enabled, string columns use Arrow’s `StringViewArray`, which can
+//!   reduce allocations for short strings.
+//! * For OCF, blocks may be compressed; `Reader` will decompress using the codec specified
+//!   in the file header and feed uncompressed bytes to the row `Decoder`.
+//!
+//! ## Error handling
+//!
+//! * Incomplete inputs return parse errors with "Unexpected EOF"; callers typically provide
+//!   more bytes and try again.
+//! * If a fingerprint is unknown to the provided `SchemaStore`, decoding fails with a
+//!   descriptive error. Populate the store up front to avoid this.
+//!
+//! ---
+use crate::codec::AvroFieldBuilder;
+use crate::reader::header::read_header;
+use crate::schema::{
+    AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, SINGLE_OBJECT_MAGIC, Schema,
+    SchemaStore,
+};
+use arrow_array::{RecordBatch, RecordBatchReader};
+use arrow_schema::{ArrowError, SchemaRef};
+use block::BlockDecoder;
+use header::Header;
+use indexmap::IndexMap;
+use record::RecordDecoder;
 use std::io::BufRead;
 
 mod block;
@@ -28,259 +499,2284 @@ mod header;
 mod record;
 mod vlq;
 
-/// Configuration options for reading Avro data into Arrow arrays
+fn is_incomplete_data(err: &ArrowError) -> bool {
+    matches!(
+        err,
+        ArrowError::ParseError(msg)
+            if msg.contains("Unexpected EOF")
+    )
+}
+
+/// A low‑level, push‑based decoder from Avro bytes to Arrow `RecordBatch`.
+///
+/// `Decoder` is designed for **streaming** scenarios:
+///
+/// * You *feed* freshly received bytes using `Self::decode`, potentially multiple times,
+///   until at least one row is complete.
+/// * You then *drain* completed rows with `Self::flush`, which yields a `RecordBatch`
+///   if any rows were finished since the last flush.
+///
+/// Unlike `Reader`, which is specialized for Avro **Object Container Files**, `Decoder`
+/// understands **framed single‑object** inputs and **Confluent Schema Registry** messages,
+/// switching schemas mid‑stream when the framing indicates a new fingerprint.
+///
+/// ### Supported prefixes
+///
+/// On each new row boundary, `Decoder` tries to match one of the following "prefixes":
+///
+/// * **Single‑Object encoding**: magic `0xC3 0x01` + schema fingerprint (length depends on
+///   the configured `FingerprintAlgorithm`); see `SINGLE_OBJECT_MAGIC`.
+/// * **Confluent wire format**: magic `0x00` + 4‑byte big‑endian schema id; see
+///   `CONFLUENT_MAGIC`.
+///
+/// The active fingerprint determines which cached row decoder is used to decode the following
+/// record body bytes.
+///
+/// ### Schema switching semantics
+///
+/// When a new fingerprint is observed:
+///
+/// * If the current batch is empty, the decoder switches immediately;
+/// * Otherwise, the current batch is finalized on the next `flush` and only then
+///   does the decoder switch to the new schema. This guarantees that a single `RecordBatch`
+///   never mixes rows with different schemas.
+///
+/// ### Examples
+///
+/// Build and use a `Decoder` for single‑object encoding:
+///
+/// ```
+/// use arrow_avro::schema::{AvroSchema, SchemaStore};
+/// use arrow_avro::reader::ReaderBuilder;
+///
+/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// // Use a record schema at the top level so we can build an Arrow RecordBatch
+/// let mut store = SchemaStore::new(); // Rabin fingerprinting by default
+/// let avro = AvroSchema::new(
+///     r#"{"type":"record","name":"E","fields":[{"name":"x","type":"long"}]}"#.to_string()
+/// );
+/// let fp = store.register(avro)?;
+///
+/// // --- Hidden: write a single-object framed row {x:7} ---
+/// # use std::sync::Arc;
+/// # use std::collections::HashMap;
+/// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # use arrow_avro::schema::{SCHEMA_METADATA_KEY, FingerprintStrategy};
+/// # use arrow_avro::writer::{WriterBuilder, format::AvroSoeFormat};
+/// # let mut md = HashMap::new();
+/// # md.insert(SCHEMA_METADATA_KEY.to_string(),
+/// #     r#"{"type":"record","name":"E","fields":[{"name":"x","type":"long"}]}"#.to_string());
+/// # let arrow = Schema::new_with_metadata(vec![Field::new("x", DataType::Int64, false)], md);
+/// # let batch = RecordBatch::try_new(Arc::new(arrow.clone()), vec![Arc::new(Int64Array::from(vec![7])) as ArrayRef])?;
+/// # let mut w = WriterBuilder::new(arrow)
+/// #     .with_fingerprint_strategy(fp.into())
+/// #     .build::<_, AvroSoeFormat>(Vec::new())?;
+/// # w.write(&batch)?; w.finish()?; let frame = w.into_inner();
+///
+/// let mut decoder = ReaderBuilder::new()
+///     .with_writer_schema_store(store)
+///     .with_batch_size(16)
+///     .build_decoder()?;
+///
+/// # decoder.decode(&frame)?;
+/// let batch = decoder.flush()?.expect("one row");
+/// assert_eq!(batch.num_rows(), 1);
+/// # Ok(()) }
+/// ```
+///
+/// *Background:* Avro's single‑object encoding is defined as `0xC3 0x01` + 8‑byte
+/// little‑endian CRC‑64‑AVRO fingerprint of the **writer schema** + Avro binary body.
+/// See the Avro 1.11.1 spec for details. <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+///
+/// Build and use a `Decoder` for Confluent Registry messages:
+///
+/// ```
+/// use arrow_avro::schema::{AvroSchema, SchemaStore, Fingerprint, FingerprintAlgorithm};
+/// use arrow_avro::reader::ReaderBuilder;
+///
+/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+/// store.set(Fingerprint::Id(1234), AvroSchema::new(r#"{"type":"record","name":"E","fields":[{"name":"x","type":"long"}]}"#.to_string()))?;
+///
+/// // --- Hidden: encode two Confluent-framed messages {x:1} and {x:2} ---
+/// # use std::sync::Arc;
+/// # use std::collections::HashMap;
+/// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # use arrow_avro::schema::{SCHEMA_METADATA_KEY, FingerprintStrategy};
+/// # use arrow_avro::writer::{WriterBuilder, format::AvroSoeFormat};
+/// # fn msg(x: i64) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
+/// #   let mut md = HashMap::new();
+/// #   md.insert(SCHEMA_METADATA_KEY.to_string(),
+/// #     r#"{"type":"record","name":"E","fields":[{"name":"x","type":"long"}]}"#.to_string());
+/// #   let arrow = Schema::new_with_metadata(vec![Field::new("x", DataType::Int64, false)], md);
+/// #   let batch = RecordBatch::try_new(Arc::new(arrow.clone()), vec![Arc::new(Int64Array::from(vec![x])) as ArrayRef])?;
+/// #   let mut w = WriterBuilder::new(arrow)
+/// #       .with_fingerprint_strategy(FingerprintStrategy::Id(1234))
+/// #       .build::<_, AvroSoeFormat>(Vec::new())?;
+/// #   w.write(&batch)?; w.finish()?; Ok(w.into_inner())
+/// # }
+/// # let m1 = msg(1)?;
+/// # let m2 = msg(2)?;
+///
+/// let mut decoder = ReaderBuilder::new()
+///     .with_writer_schema_store(store)
+///     .build_decoder()?;
+/// # decoder.decode(&m1)?;
+/// # decoder.decode(&m2)?;
+/// let batch = decoder.flush()?.expect("two rows");
+/// assert_eq!(batch.num_rows(), 2);
+/// # Ok(()) }
+/// ```
+#[derive(Debug)]
+pub struct Decoder {
+    active_decoder: RecordDecoder,
+    active_fingerprint: Option<Fingerprint>,
+    batch_size: usize,
+    remaining_capacity: usize,
+    cache: IndexMap<Fingerprint, RecordDecoder>,
+    fingerprint_algorithm: FingerprintAlgorithm,
+    pending_schema: Option<(Fingerprint, RecordDecoder)>,
+    awaiting_body: bool,
+}
+
+impl Decoder {
+    /// Returns the Arrow schema for the rows decoded by this decoder.
+    ///
+    /// **Note:** With single‑object or Confluent framing, the schema may change
+    /// at a row boundary when the input indicates a new fingerprint.
+    pub fn schema(&self) -> SchemaRef {
+        self.active_decoder.schema().clone()
+    }
+
+    /// Returns the configured maximum number of rows per batch.
+    pub fn batch_size(&self) -> usize {
+        self.batch_size
+    }
+
+    /// Feed a chunk of bytes into the decoder.
+    ///
+    /// This will:
+    ///
+    /// * Decode at most `Self::batch_size` rows;
+    /// * Return the number of input bytes **consumed** from `data` (which may be 0 if more
+    ///   bytes are required, or less than `data.len()` if a prefix/body straddles the
+    ///   chunk boundary);
+    /// * Defer producing a `RecordBatch` until you call `Self::flush`.
+    ///
+    /// # Returns
+    /// The number of bytes consumed from `data`.
+    ///
+    /// # Errors
+    /// Returns an error if:
+    ///
+    /// * The input indicates an unknown fingerprint (not present in the provided
+    ///   `SchemaStore`;
+    /// * The Avro body is malformed;
+    /// * A strict‑mode union rule is violated (see `ReaderBuilder::with_strict_mode`).
+    pub fn decode(&mut self, data: &[u8]) -> Result<usize, ArrowError> {
+        let mut total_consumed = 0usize;
+        while total_consumed < data.len() && self.remaining_capacity > 0 {
+            if self.awaiting_body {
+                match self.active_decoder.decode(&data[total_consumed..], 1) {
+                    Ok(n) => {
+                        self.remaining_capacity -= 1;
+                        total_consumed += n;
+                        self.awaiting_body = false;
+                        continue;
+                    }
+                    Err(ref e) if is_incomplete_data(e) => break,
+                    err => return err,
+                };
+            }
+            match self.handle_prefix(&data[total_consumed..])? {
+                Some(0) => break, // Insufficient bytes
+                Some(n) => {
+                    total_consumed += n;
+                    self.apply_pending_schema_if_batch_empty();
+                    self.awaiting_body = true;
+                }
+                None => {
+                    return Err(ArrowError::ParseError(
+                        "Missing magic bytes and fingerprint".to_string(),
+                    ));
+                }
+            }
+        }
+        Ok(total_consumed)
+    }
+
+    // Attempt to handle a prefix at the current position.
+    // * Ok(None) – buffer does not start with the prefix.
+    // * Ok(Some(0)) – prefix detected, but the buffer is too short; caller should await more bytes.
+    // * Ok(Some(n)) – consumed `n > 0` bytes of a complete prefix (magic and fingerprint).
+    fn handle_prefix(&mut self, buf: &[u8]) -> Result<Option<usize>, ArrowError> {
+        match self.fingerprint_algorithm {
+            FingerprintAlgorithm::Rabin => {
+                self.handle_prefix_common(buf, &SINGLE_OBJECT_MAGIC, |bytes| {
+                    Fingerprint::Rabin(u64::from_le_bytes(bytes))
+                })
+            }
+            FingerprintAlgorithm::Id => self.handle_prefix_common(buf, &CONFLUENT_MAGIC, |bytes| {
+                Fingerprint::Id(u32::from_be_bytes(bytes))
+            }),
+            FingerprintAlgorithm::Id64 => {
+                self.handle_prefix_common(buf, &CONFLUENT_MAGIC, |bytes| {
+                    Fingerprint::Id64(u64::from_be_bytes(bytes))
+                })
+            }
+            #[cfg(feature = "md5")]
+            FingerprintAlgorithm::MD5 => {
+                self.handle_prefix_common(buf, &SINGLE_OBJECT_MAGIC, |bytes| {
+                    Fingerprint::MD5(bytes)
+                })
+            }
+            #[cfg(feature = "sha256")]
+            FingerprintAlgorithm::SHA256 => {
+                self.handle_prefix_common(buf, &SINGLE_OBJECT_MAGIC, |bytes| {
+                    Fingerprint::SHA256(bytes)
+                })
+            }
+        }
+    }
+
+    /// This method checks for the provided `magic` bytes at the start of `buf` and, if present,
+    /// attempts to read the following fingerprint of `N` bytes, converting it to a
+    /// `Fingerprint` using `fingerprint_from`.
+    fn handle_prefix_common<const MAGIC_LEN: usize, const N: usize>(
+        &mut self,
+        buf: &[u8],
+        magic: &[u8; MAGIC_LEN],
+        fingerprint_from: impl FnOnce([u8; N]) -> Fingerprint,
+    ) -> Result<Option<usize>, ArrowError> {
+        // Need at least the magic bytes to decide
+        // 2 bytes for Avro Spec and 1 byte for Confluent Wire Protocol.
+        if buf.len() < MAGIC_LEN {
+            return Ok(Some(0));
+        }
+        // Bail out early if the magic does not match.
+        if &buf[..MAGIC_LEN] != magic {
+            return Ok(None);
+        }
+        // Try to parse the fingerprint that follows the magic.
+        let consumed_fp = self.handle_fingerprint(&buf[MAGIC_LEN..], fingerprint_from)?;
+        // Convert the inner result into a “bytes consumed” count.
+        // NOTE: Incomplete fingerprint consumes no bytes.
+        Ok(Some(consumed_fp.map_or(0, |n| n + MAGIC_LEN)))
+    }
+
+    // Attempts to read and install a new fingerprint of `N` bytes.
+    //
+    // * Ok(None) – insufficient bytes (`buf.len() < `N`).
+    // * Ok(Some(N)) – fingerprint consumed (always `N`).
+    fn handle_fingerprint<const N: usize>(
+        &mut self,
+        buf: &[u8],
+        fingerprint_from: impl FnOnce([u8; N]) -> Fingerprint,
+    ) -> Result<Option<usize>, ArrowError> {
+        // Need enough bytes to get fingerprint (next N bytes)
+        let Some(fingerprint_bytes) = buf.get(..N) else {
+            return Ok(None); // insufficient bytes
+        };
+        // SAFETY: length checked above.
+        let new_fingerprint = fingerprint_from(fingerprint_bytes.try_into().unwrap());
+        // If the fingerprint indicates a schema change, prepare to switch decoders.
+        if self.active_fingerprint != Some(new_fingerprint) {
+            let Some(new_decoder) = self.cache.shift_remove(&new_fingerprint) else {
+                return Err(ArrowError::ParseError(format!(
+                    "Unknown fingerprint: {new_fingerprint:?}"
+                )));
+            };
+            self.pending_schema = Some((new_fingerprint, new_decoder));
+            // If there are already decoded rows, we must flush them first.
+            // Reducing `remaining_capacity` to 0 ensures `flush` is called next.
+            if self.remaining_capacity < self.batch_size {
+                self.remaining_capacity = 0;
+            }
+        }
+        Ok(Some(N))
+    }
+
+    fn apply_pending_schema(&mut self) {
+        if let Some((new_fingerprint, new_decoder)) = self.pending_schema.take() {
+            if let Some(old_fingerprint) = self.active_fingerprint.replace(new_fingerprint) {
+                let old_decoder = std::mem::replace(&mut self.active_decoder, new_decoder);
+                self.cache.shift_remove(&old_fingerprint);
+                self.cache.insert(old_fingerprint, old_decoder);
+            } else {
+                self.active_decoder = new_decoder;
+            }
+        }
+    }
+
+    fn apply_pending_schema_if_batch_empty(&mut self) {
+        if self.batch_is_empty() {
+            self.apply_pending_schema();
+        }
+    }
+
+    fn flush_and_reset(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
+        if self.batch_is_empty() {
+            return Ok(None);
+        }
+        let batch = self.active_decoder.flush()?;
+        self.remaining_capacity = self.batch_size;
+        Ok(Some(batch))
+    }
+
+    /// Produce a `RecordBatch` if at least one row is fully decoded, returning
+    /// `Ok(None)` if no new rows are available.
+    ///
+    /// If a schema change was detected while decoding rows for the current batch, the
+    /// schema switch is applied **after** flushing this batch, so the **next** batch
+    /// (if any) may have a different schema.
+    pub fn flush(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
+        // We must flush the active decoder before switching to the pending one.
+        let batch = self.flush_and_reset();
+        self.apply_pending_schema();
+        batch
+    }
+
+    /// Returns the number of rows that can be added to this decoder before it is full.
+    pub fn capacity(&self) -> usize {
+        self.remaining_capacity
+    }
+
+    /// Returns true if the decoder has reached its capacity for the current batch.
+    pub fn batch_is_full(&self) -> bool {
+        self.remaining_capacity == 0
+    }
+
+    /// Returns true if the decoder has not decoded any batches yet (i.e., the current batch is empty).
+    pub fn batch_is_empty(&self) -> bool {
+        self.remaining_capacity == self.batch_size
+    }
+
+    // Decode either the block count or remaining capacity from `data` (an OCF block payload).
+    //
+    // Returns the number of bytes consumed from `data` along with the number of records decoded.
+    fn decode_block(&mut self, data: &[u8], count: usize) -> Result<(usize, usize), ArrowError> {
+        // OCF decoding never interleaves records across blocks, so no chunking.
+        let to_decode = std::cmp::min(count, self.remaining_capacity);
+        if to_decode == 0 {
+            return Ok((0, 0));
+        }
+        let consumed = self.active_decoder.decode(data, to_decode)?;
+        self.remaining_capacity -= to_decode;
+        Ok((consumed, to_decode))
+    }
+
+    // Produce a `RecordBatch` if at least one row is fully decoded, returning
+    // `Ok(None)` if no new rows are available.
+    fn flush_block(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
+        self.flush_and_reset()
+    }
+}
+
+/// A builder that configures and constructs Avro readers and decoders.
+///
+/// `ReaderBuilder` is the primary entry point for this module. It supports:
+///
+/// * OCF reading via `Self::build`, returning a `Reader` over any `BufRead`;
+/// * streaming decoding via `Self::build_decoder`, returning a `Decoder`.
+///
+/// ### Options
+///
+/// * **`batch_size`**: Max rows per `RecordBatch` (default: `1024`). See `Self::with_batch_size`.
+/// * **`utf8_view`**: Use Arrow `StringViewArray` for string columns (default: `false`).
+///   See `Self::with_utf8_view`.
+/// * **`strict_mode`**: Opt‑in to stricter union handling (default: `false`).
+///   See `Self::with_strict_mode`.
+/// * **`reader_schema`**: Optional reader schema (projection / evolution) used when decoding
+///   values (default: `None`). See `Self::with_reader_schema`.
+/// * **`writer_schema_store`**: Required for building a `Decoder` for single‑object or
+///   Confluent framing. Maps fingerprints to Avro schemas. See `Self::with_writer_schema_store`.
+/// * **`active_fingerprint`**: Optional starting fingerprint for streaming decode when the
+///   first frame omits one (rare). See `Self::with_active_fingerprint`.
+///
+/// ### Examples
+///
+/// Read an OCF file in batches of 4096 rows:
 ///
-/// This struct contains configuration options that control how Avro data is
-/// converted into Arrow arrays. It allows customizing various aspects of the
-/// data conversion process.
+/// ```no_run
+/// use std::fs::File;
+/// use std::io::BufReader;
+/// use arrow_avro::reader::ReaderBuilder;
 ///
-/// # Examples
+/// let file = File::open("data.avro")?;
+/// let mut reader = ReaderBuilder::new()
+///     .with_batch_size(4096)
+///     .build(BufReader::new(file))?;
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// Build a `Decoder` for Confluent messages:
 ///
 /// ```
-/// # use arrow_avro::reader::ReadOptions;
-/// // Use default options (regular StringArray for strings)
-/// let default_options = ReadOptions::default();
+/// use arrow_avro::schema::{AvroSchema, SchemaStore, Fingerprint, FingerprintAlgorithm};
+/// use arrow_avro::reader::ReaderBuilder;
 ///
-/// // Enable Utf8View support for better string performance
-/// let options = ReadOptions::default()
-///     .with_utf8view(true);
+/// let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+/// store.set(Fingerprint::Id(1234), AvroSchema::new(r#"{"type":"record","name":"E","fields":[]}"#.to_string()))?;
+///
+/// let decoder = ReaderBuilder::new()
+///     .with_writer_schema_store(store)
+///     .build_decoder()?;
+/// # Ok::<(), Box<dyn std::error::Error>>(())
 /// ```
-#[derive(Default, Debug, Clone)]
-pub struct ReadOptions {
-    use_utf8view: bool,
+#[derive(Debug)]
+pub struct ReaderBuilder {
+    batch_size: usize,
+    strict_mode: bool,
+    utf8_view: bool,
+    reader_schema: Option<AvroSchema>,
+    writer_schema_store: Option<SchemaStore>,
+    active_fingerprint: Option<Fingerprint>,
+}
+
+impl Default for ReaderBuilder {
+    fn default() -> Self {
+        Self {
+            batch_size: 1024,
+            strict_mode: false,
+            utf8_view: false,
+            reader_schema: None,
+            writer_schema_store: None,
+            active_fingerprint: None,
+        }
+    }
 }
 
-impl ReadOptions {
-    /// Create a new `ReadOptions` with default values
+impl ReaderBuilder {
+    /// Creates a new `ReaderBuilder` with defaults:
+    ///
+    /// * `batch_size = 1024`
+    /// * `strict_mode = false`
+    /// * `utf8_view = false`
+    /// * `reader_schema = None`
+    /// * `writer_schema_store = None`
+    /// * `active_fingerprint = None`
     pub fn new() -> Self {
         Self::default()
     }
 
-    /// Set whether to use StringViewArray for string data
+    fn make_record_decoder(
+        &self,
+        writer_schema: &Schema,
+        reader_schema: Option<&Schema>,
+    ) -> Result<RecordDecoder, ArrowError> {
+        let mut builder = AvroFieldBuilder::new(writer_schema);
+        if let Some(reader_schema) = reader_schema {
+            builder = builder.with_reader_schema(reader_schema);
+        }
+        let root = builder
+            .with_utf8view(self.utf8_view)
+            .with_strict_mode(self.strict_mode)
+            .build()?;
+        RecordDecoder::try_new_with_options(root.data_type())
+    }
+
+    fn make_record_decoder_from_schemas(
+        &self,
+        writer_schema: &Schema,
+        reader_schema: Option<&AvroSchema>,
+    ) -> Result<RecordDecoder, ArrowError> {
+        let reader_schema_raw = reader_schema.map(|s| s.schema()).transpose()?;
+        self.make_record_decoder(writer_schema, reader_schema_raw.as_ref())
+    }
+
+    fn make_decoder_with_parts(
+        &self,
+        active_decoder: RecordDecoder,
+        active_fingerprint: Option<Fingerprint>,
+        cache: IndexMap<Fingerprint, RecordDecoder>,
+        fingerprint_algorithm: FingerprintAlgorithm,
+    ) -> Decoder {
+        Decoder {
+            batch_size: self.batch_size,
+            remaining_capacity: self.batch_size,
+            active_fingerprint,
+            active_decoder,
+            cache,
+            fingerprint_algorithm,
+            pending_schema: None,
+            awaiting_body: false,
+        }
+    }
+
+    fn make_decoder(
+        &self,
+        header: Option<&Header>,
+        reader_schema: Option<&AvroSchema>,
+    ) -> Result<Decoder, ArrowError> {
+        if let Some(hdr) = header {
+            let writer_schema = hdr
+                .schema()
+                .map_err(|e| ArrowError::ExternalError(Box::new(e)))?
+                .ok_or_else(|| {
+                    ArrowError::ParseError("No Avro schema present in file header".into())
+                })?;
+            let record_decoder =
+                self.make_record_decoder_from_schemas(&writer_schema, reader_schema)?;
+            return Ok(self.make_decoder_with_parts(
+                record_decoder,
+                None,
+                IndexMap::new(),
+                FingerprintAlgorithm::Rabin,
+            ));
+        }
+        let store = self.writer_schema_store.as_ref().ok_or_else(|| {
+            ArrowError::ParseError("Writer schema store required for raw Avro".into())
+        })?;
+        let fingerprints = store.fingerprints();
+        if fingerprints.is_empty() {
+            return Err(ArrowError::ParseError(
+                "Writer schema store must contain at least one schema".into(),
+            ));
+        }
+        let start_fingerprint = self
+            .active_fingerprint
+            .or_else(|| fingerprints.first().copied())
+            .ok_or_else(|| {
+                ArrowError::ParseError("Could not determine initial schema fingerprint".into())
+            })?;
+        let mut cache = IndexMap::with_capacity(fingerprints.len().saturating_sub(1));
+        let mut active_decoder: Option<RecordDecoder> = None;
+        for fingerprint in store.fingerprints() {
+            let avro_schema = match store.lookup(&fingerprint) {
+                Some(schema) => schema,
+                None => {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Fingerprint {fingerprint:?} not found in schema store",
+                    )));
+                }
+            };
+            let writer_schema = avro_schema.schema()?;
+            let record_decoder =
+                self.make_record_decoder_from_schemas(&writer_schema, reader_schema)?;
+            if fingerprint == start_fingerprint {
+                active_decoder = Some(record_decoder);
+            } else {
+                cache.insert(fingerprint, record_decoder);
+            }
+        }
+        let active_decoder = active_decoder.ok_or_else(|| {
+            ArrowError::ComputeError(format!(
+                "Initial fingerprint {start_fingerprint:?} not found in schema store"
+            ))
+        })?;
+        Ok(self.make_decoder_with_parts(
+            active_decoder,
+            Some(start_fingerprint),
+            cache,
+            store.fingerprint_algorithm(),
+        ))
+    }
+
+    /// Sets the **row‑based batch size**.
+    ///
+    /// Each call to `Decoder::flush` or each iteration of `Reader` yields a batch with
+    /// *up to* this many rows. Larger batches can reduce overhead; smaller batches can
+    /// reduce peak memory usage and latency.
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+
+    /// Choose Arrow's `StringViewArray` for UTF‑8 string data.
     ///
-    /// When enabled, string data from Avro files will be loaded into
-    /// Arrow's StringViewArray instead of the standard StringArray.
-    pub fn with_utf8view(mut self, use_utf8view: bool) -> Self {
-        self.use_utf8view = use_utf8view;
+    /// When enabled, textual Avro fields are loaded into Arrow’s **StringViewArray**
+    /// instead of the standard `StringArray`. This can improve performance for workloads
+    /// with many short strings by reducing allocations.
+    pub fn with_utf8_view(mut self, utf8_view: bool) -> Self {
+        self.utf8_view = utf8_view;
         self
     }
 
-    /// Get whether StringViewArray is enabled for string data
+    /// Returns whether `StringViewArray` is enabled for string data.
     pub fn use_utf8view(&self) -> bool {
-        self.use_utf8view
+        self.utf8_view
     }
-}
 
-/// Read a [`Header`] from the provided [`BufRead`]
-fn read_header<R: BufRead>(mut reader: R) -> Result<Header, ArrowError> {
-    let mut decoder = HeaderDecoder::default();
-    loop {
-        let buf = reader.fill_buf()?;
-        if buf.is_empty() {
-            break;
-        }
-        let read = buf.len();
-        let decoded = decoder.decode(buf)?;
-        reader.consume(decoded);
-        if decoded != read {
-            break;
+    /// Enable stricter behavior for certain Avro unions (e.g., `[T, "null"]`).
+    ///
+    /// When `true`, ambiguous or lossy unions that would otherwise be coerced may instead
+    /// produce a descriptive error. Use this to catch schema issues early during ingestion.
+    pub fn with_strict_mode(mut self, strict_mode: bool) -> Self {
+        self.strict_mode = strict_mode;
+        self
+    }
+
+    /// Sets the **reader schema** used during decoding.
+    ///
+    /// If not provided, the writer schema from the OCF header (for `Reader`) or the
+    /// schema looked up from the fingerprint (for `Decoder`) is used directly.
+    ///
+    /// A reader schema can be used for **schema evolution** or **projection**.
+    pub fn with_reader_schema(mut self, schema: AvroSchema) -> Self {
+        self.reader_schema = Some(schema);
+        self
+    }
+
+    /// Sets the `SchemaStore` used to resolve writer schemas by fingerprint.
+    ///
+    /// This is required when building a `Decoder` for **single‑object encoding** or the
+    /// **Confluent** wire format. The store maps a fingerprint (Rabin / MD5 / SHA‑256 /
+    /// ID) to a full Avro schema.
+    ///
+    /// Defaults to `None`.
+    pub fn with_writer_schema_store(mut self, store: SchemaStore) -> Self {
+        self.writer_schema_store = Some(store);
+        self
+    }
+
+    /// Sets the initial schema fingerprint for stream decoding.
+    ///
+    /// This can be useful for streams that **do not include** a fingerprint before the first
+    /// record body (uncommon). If not set, the first observed fingerprint is used.
+    pub fn with_active_fingerprint(mut self, fp: Fingerprint) -> Self {
+        self.active_fingerprint = Some(fp);
+        self
+    }
+
+    /// Build a `Reader` (OCF) from this builder and a `BufRead`.
+    ///
+    /// This reads and validates the OCF header, initializes an internal row decoder from
+    /// the discovered writer (and optional reader) schema, and prepares to iterate blocks,
+    /// decompressing if necessary.
+    pub fn build<R: BufRead>(self, mut reader: R) -> Result<Reader<R>, ArrowError> {
+        let header = read_header(&mut reader)?;
+        let decoder = self.make_decoder(Some(&header), self.reader_schema.as_ref())?;
+        Ok(Reader {
+            reader,
+            header,
+            decoder,
+            block_decoder: BlockDecoder::default(),
+            block_data: Vec::new(),
+            block_count: 0,
+            block_cursor: 0,
+            finished: false,
+        })
+    }
+
+    /// Build a streaming `Decoder` from this builder.
+    ///
+    /// # Requirements
+    /// * `SchemaStore` **must** be provided via `Self::with_writer_schema_store`.
+    /// * The store should contain **all** fingerprints that may appear on the stream.
+    ///
+    /// # Errors
+    /// * Returns [`ArrowError::InvalidArgumentError`] if the schema store is missing
+    pub fn build_decoder(self) -> Result<Decoder, ArrowError> {
+        if self.writer_schema_store.is_none() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Building a decoder requires a writer schema store".to_string(),
+            ));
         }
+        self.make_decoder(None, self.reader_schema.as_ref())
     }
+}
 
-    decoder
-        .flush()
-        .ok_or_else(|| ArrowError::ParseError("Unexpected EOF".to_string()))
+/// A high‑level Avro **Object Container File** reader.
+///
+/// `Reader` pulls blocks from a `BufRead` source, handles optional block compression,
+/// and decodes them row‑by‑row into Arrow `RecordBatch` values using an internal
+/// `Decoder`. It implements both:
+///
+/// * [`Iterator<Item = Result<RecordBatch, ArrowError>>`], and
+/// * `RecordBatchReader`, guaranteeing a consistent schema across all produced batches.
+///
+#[derive(Debug)]
+pub struct Reader<R: BufRead> {
+    reader: R,
+    header: Header,
+    decoder: Decoder,
+    block_decoder: BlockDecoder,
+    block_data: Vec<u8>,
+    block_count: usize,
+    block_cursor: usize,
+    finished: bool,
 }
 
-/// Return an iterator of [`Block`] from the provided [`BufRead`]
-fn read_blocks<R: BufRead>(mut reader: R) -> impl Iterator<Item = Result<Block, ArrowError>> {
-    let mut decoder = BlockDecoder::default();
+impl<R: BufRead> Reader<R> {
+    /// Returns the Arrow schema discovered from the Avro file header (or derived via
+    /// the optional reader schema).
+    pub fn schema(&self) -> SchemaRef {
+        self.decoder.schema()
+    }
 
-    let mut try_next = move || {
-        loop {
-            let buf = reader.fill_buf()?;
-            if buf.is_empty() {
-                break;
+    /// Returns a reference to the parsed Avro container‑file header (magic, metadata, codec, sync).
+    pub fn avro_header(&self) -> &Header {
+        &self.header
+    }
+
+    /// Reads the next `RecordBatch` from the Avro file, or `Ok(None)` on EOF.
+    ///
+    /// Batches are bounded by `batch_size`; a single OCF block may yield multiple batches,
+    /// and a batch may also span multiple blocks.
+    fn read(&mut self) -> Result<Option<RecordBatch>, ArrowError> {
+        'outer: while !self.finished && !self.decoder.batch_is_full() {
+            while self.block_cursor == self.block_data.len() {
+                let buf = self.reader.fill_buf()?;
+                if buf.is_empty() {
+                    self.finished = true;
+                    break 'outer;
+                }
+                // Try to decode another block from the buffered reader.
+                let consumed = self.block_decoder.decode(buf)?;
+                self.reader.consume(consumed);
+                if let Some(block) = self.block_decoder.flush() {
+                    // Successfully decoded a block.
+                    self.block_data = if let Some(ref codec) = self.header.compression()? {
+                        codec.decompress(&block.data)?
+                    } else {
+                        block.data
+                    };
+                    self.block_count = block.count;
+                    self.block_cursor = 0;
+                } else if consumed == 0 {
+                    // The block decoder made no progress on a non-empty buffer.
+                    return Err(ArrowError::ParseError(
+                        "Could not decode next Avro block from partial data".to_string(),
+                    ));
+                }
             }
-            let read = buf.len();
-            let decoded = decoder.decode(buf)?;
-            reader.consume(decoded);
-            if decoded != read {
-                break;
+            // Decode as many rows as will fit in the current batch
+            if self.block_cursor < self.block_data.len() {
+                let (consumed, records_decoded) = self
+                    .decoder
+                    .decode_block(&self.block_data[self.block_cursor..], self.block_count)?;
+                self.block_cursor += consumed;
+                self.block_count -= records_decoded;
             }
         }
-        Ok(decoder.flush())
-    };
-    std::iter::from_fn(move || try_next().transpose())
+        self.decoder.flush_block()
+    }
+}
+
+impl<R: BufRead> Iterator for Reader<R> {
+    type Item = Result<RecordBatch, ArrowError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.read().transpose()
+    }
+}
+
+impl<R: BufRead> RecordBatchReader for Reader<R> {
+    fn schema(&self) -> SchemaRef {
+        self.schema()
+    }
 }
 
 #[cfg(test)]
 mod test {
-    use crate::codec::{AvroDataType, AvroField, Codec};
-    use crate::compression::CompressionCodec;
+    use crate::codec::AvroFieldBuilder;
     use crate::reader::record::RecordDecoder;
-    use crate::reader::{read_blocks, read_header};
+    use crate::reader::{Decoder, Reader, ReaderBuilder};
+    use crate::schema::{
+        AVRO_ENUM_SYMBOLS_METADATA_KEY, AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY,
+        AvroSchema, CONFLUENT_MAGIC, Fingerprint, FingerprintAlgorithm, PrimitiveType,
+        SINGLE_OBJECT_MAGIC, SchemaStore,
+    };
     use crate::test_util::arrow_test_data;
+    use crate::writer::AvroWriter;
+    use arrow_array::builder::{
+        ArrayBuilder, BooleanBuilder, Float32Builder, Int32Builder, Int64Builder, ListBuilder,
+        MapBuilder, StringBuilder, StructBuilder,
+    };
+    #[cfg(feature = "snappy")]
+    use arrow_array::builder::{Float64Builder, MapFieldNames};
+    use arrow_array::cast::AsArray;
+    #[cfg(not(feature = "avro_custom_types"))]
+    use arrow_array::types::Int64Type;
+    #[cfg(feature = "avro_custom_types")]
+    use arrow_array::types::{
+        DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+        DurationSecondType,
+    };
+    use arrow_array::types::{Int32Type, IntervalMonthDayNanoType};
     use arrow_array::*;
-    use arrow_schema::{DataType, Field};
+    #[cfg(feature = "snappy")]
+    use arrow_buffer::{Buffer, NullBuffer};
+    use arrow_buffer::{IntervalMonthDayNano, OffsetBuffer, ScalarBuffer, i256};
+    #[cfg(feature = "avro_custom_types")]
+    use arrow_schema::{
+        ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, Schema, TimeUnit, UnionFields,
+        UnionMode,
+    };
+    #[cfg(not(feature = "avro_custom_types"))]
+    use arrow_schema::{
+        ArrowError, DataType, Field, FieldRef, Fields, IntervalUnit, Schema, UnionFields, UnionMode,
+    };
+    use bytes::Bytes;
+    use futures::executor::block_on;
+    use futures::{Stream, StreamExt, TryStreamExt, stream};
+    use serde_json::{Value, json};
     use std::collections::HashMap;
     use std::fs::File;
-    use std::io::BufReader;
+    use std::io::{BufReader, Cursor};
     use std::sync::Arc;
 
-    fn read_file(file: &str, batch_size: usize) -> RecordBatch {
-        read_file_with_options(file, batch_size, &crate::ReadOptions::default())
+    fn files() -> impl Iterator<Item = &'static str> {
+        [
+            // TODO: avoid requiring snappy for this file
+            #[cfg(feature = "snappy")]
+            "avro/alltypes_plain.avro",
+            #[cfg(feature = "snappy")]
+            "avro/alltypes_plain.snappy.avro",
+            #[cfg(feature = "zstd")]
+            "avro/alltypes_plain.zstandard.avro",
+            #[cfg(feature = "bzip2")]
+            "avro/alltypes_plain.bzip2.avro",
+            #[cfg(feature = "xz")]
+            "avro/alltypes_plain.xz.avro",
+        ]
+        .into_iter()
+    }
+
+    fn read_file(path: &str, batch_size: usize, utf8_view: bool) -> RecordBatch {
+        let file = File::open(path).unwrap();
+        let reader = ReaderBuilder::new()
+            .with_batch_size(batch_size)
+            .with_utf8_view(utf8_view)
+            .build(BufReader::new(file))
+            .unwrap();
+        let schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+        arrow::compute::concat_batches(&schema, &batches).unwrap()
     }
 
-    fn read_file_with_options(
-        file: &str,
+    fn read_file_strict(
+        path: &str,
         batch_size: usize,
-        options: &crate::ReadOptions,
-    ) -> RecordBatch {
-        let file = File::open(file).unwrap();
-        let mut reader = BufReader::new(file);
-        let header = read_header(&mut reader).unwrap();
-        let compression = header.compression().unwrap();
-        let schema = header.schema().unwrap().unwrap();
-        let root = AvroField::try_from(&schema).unwrap();
-
-        let mut decoder =
-            RecordDecoder::try_new_with_options(root.data_type(), options.clone()).unwrap();
-
-        for result in read_blocks(reader) {
-            let block = result.unwrap();
-            assert_eq!(block.sync, header.sync());
-            if let Some(c) = compression {
-                let decompressed = c.decompress(&block.data).unwrap();
-
-                let mut offset = 0;
-                let mut remaining = block.count;
-                while remaining > 0 {
-                    let to_read = remaining.max(batch_size);
-                    offset += decoder
-                        .decode(&decompressed[offset..], block.count)
-                        .unwrap();
+        utf8_view: bool,
+    ) -> Result<Reader<BufReader<File>>, ArrowError> {
+        let file = File::open(path)?;
+        ReaderBuilder::new()
+            .with_batch_size(batch_size)
+            .with_utf8_view(utf8_view)
+            .with_strict_mode(true)
+            .build(BufReader::new(file))
+    }
 
-                    remaining -= to_read;
+    fn decode_stream<S: Stream<Item = Bytes> + Unpin>(
+        mut decoder: Decoder,
+        mut input: S,
+    ) -> impl Stream<Item = Result<RecordBatch, ArrowError>> {
+        async_stream::try_stream! {
+            if let Some(data) = input.next().await {
+                let consumed = decoder.decode(&data)?;
+                if consumed < data.len() {
+                    Err(ArrowError::ParseError(
+                        "did not consume all bytes".to_string(),
+                    ))?;
                 }
-                assert_eq!(offset, decompressed.len());
+            }
+            if let Some(batch) = decoder.flush()? {
+                yield batch
             }
         }
-        decoder.flush().unwrap()
     }
 
-    #[test]
-    fn test_utf8view_support() {
-        let schema_json = r#"{
-            "type": "record",
-            "name": "test",
-            "fields": [{
-                "name": "str_field",
-                "type": "string"
-            }]
-        }"#;
+    fn make_record_schema(pt: PrimitiveType) -> AvroSchema {
+        let js = format!(
+            r#"{{"type":"record","name":"TestRecord","fields":[{{"name":"a","type":"{}"}}]}}"#,
+            pt.as_ref()
+        );
+        AvroSchema::new(js)
+    }
+
+    fn make_two_schema_store() -> (
+        SchemaStore,
+        Fingerprint,
+        Fingerprint,
+        AvroSchema,
+        AvroSchema,
+    ) {
+        let schema_int = make_record_schema(PrimitiveType::Int);
+        let schema_long = make_record_schema(PrimitiveType::Long);
+        let mut store = SchemaStore::new();
+        let fp_int = store
+            .register(schema_int.clone())
+            .expect("register int schema");
+        let fp_long = store
+            .register(schema_long.clone())
+            .expect("register long schema");
+        (store, fp_int, fp_long, schema_int, schema_long)
+    }
+
+    fn make_prefix(fp: Fingerprint) -> Vec<u8> {
+        match fp {
+            Fingerprint::Rabin(v) => {
+                let mut out = Vec::with_capacity(2 + 8);
+                out.extend_from_slice(&SINGLE_OBJECT_MAGIC);
+                out.extend_from_slice(&v.to_le_bytes());
+                out
+            }
+            Fingerprint::Id(v) => {
+                panic!("make_prefix expects a Rabin fingerprint, got ({v})");
+            }
+            Fingerprint::Id64(v) => {
+                panic!("make_prefix expects a Rabin fingerprint, got ({v})");
+            }
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(v) => {
+                panic!("make_prefix expects a Rabin fingerprint, got ({v:?})");
+            }
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(id) => {
+                panic!("make_prefix expects a Rabin fingerprint, got ({id:?})");
+            }
+        }
+    }
 
-        let schema: crate::schema::Schema = serde_json::from_str(schema_json).unwrap();
-        let avro_field = AvroField::try_from(&schema).unwrap();
+    fn make_decoder(store: &SchemaStore, fp: Fingerprint, reader_schema: &AvroSchema) -> Decoder {
+        ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(reader_schema.clone())
+            .with_writer_schema_store(store.clone())
+            .with_active_fingerprint(fp)
+            .build_decoder()
+            .expect("decoder")
+    }
 
-        let data_type = avro_field.data_type();
+    fn make_id_prefix(id: u32, additional: usize) -> Vec<u8> {
+        let capacity = CONFLUENT_MAGIC.len() + size_of::<u32>() + additional;
+        let mut out = Vec::with_capacity(capacity);
+        out.extend_from_slice(&CONFLUENT_MAGIC);
+        out.extend_from_slice(&id.to_be_bytes());
+        out
+    }
 
-        struct TestHelper;
-        impl TestHelper {
-            fn with_utf8view(field: &Field) -> Field {
-                match field.data_type() {
-                    DataType::Utf8 => {
-                        Field::new(field.name(), DataType::Utf8View, field.is_nullable())
-                            .with_metadata(field.metadata().clone())
+    fn make_message_id(id: u32, value: i64) -> Vec<u8> {
+        let encoded_value = encode_zigzag(value);
+        let mut msg = make_id_prefix(id, encoded_value.len());
+        msg.extend_from_slice(&encoded_value);
+        msg
+    }
+
+    fn make_id64_prefix(id: u64, additional: usize) -> Vec<u8> {
+        let capacity = CONFLUENT_MAGIC.len() + size_of::<u64>() + additional;
+        let mut out = Vec::with_capacity(capacity);
+        out.extend_from_slice(&CONFLUENT_MAGIC);
+        out.extend_from_slice(&id.to_be_bytes());
+        out
+    }
+
+    fn make_message_id64(id: u64, value: i64) -> Vec<u8> {
+        let encoded_value = encode_zigzag(value);
+        let mut msg = make_id64_prefix(id, encoded_value.len());
+        msg.extend_from_slice(&encoded_value);
+        msg
+    }
+
+    fn make_value_schema(pt: PrimitiveType) -> AvroSchema {
+        let json_schema = format!(
+            r#"{{"type":"record","name":"S","fields":[{{"name":"v","type":"{}"}}]}}"#,
+            pt.as_ref()
+        );
+        AvroSchema::new(json_schema)
+    }
+
+    fn encode_zigzag(value: i64) -> Vec<u8> {
+        let mut n = ((value << 1) ^ (value >> 63)) as u64;
+        let mut out = Vec::new();
+        loop {
+            if (n & !0x7F) == 0 {
+                out.push(n as u8);
+                break;
+            } else {
+                out.push(((n & 0x7F) | 0x80) as u8);
+                n >>= 7;
+            }
+        }
+        out
+    }
+
+    fn make_message(fp: Fingerprint, value: i64) -> Vec<u8> {
+        let mut msg = make_prefix(fp);
+        msg.extend_from_slice(&encode_zigzag(value));
+        msg
+    }
+
+    fn load_writer_schema_json(path: &str) -> Value {
+        let file = File::open(path).unwrap();
+        let header = super::read_header(BufReader::new(file)).unwrap();
+        let schema = header.schema().unwrap().unwrap();
+        serde_json::to_value(&schema).unwrap()
+    }
+
+    fn make_reader_schema_with_promotions(
+        path: &str,
+        promotions: &HashMap<&str, &str>,
+    ) -> AvroSchema {
+        let mut root = load_writer_schema_json(path);
+        assert_eq!(root["type"], "record", "writer schema must be a record");
+        let fields = root
+            .get_mut("fields")
+            .and_then(|f| f.as_array_mut())
+            .expect("record has fields");
+        for f in fields.iter_mut() {
+            let Some(name) = f.get("name").and_then(|n| n.as_str()) else {
+                continue;
+            };
+            if let Some(new_ty) = promotions.get(name) {
+                let ty = f.get_mut("type").expect("field has a type");
+                match ty {
+                    Value::String(_) => {
+                        *ty = Value::String((*new_ty).to_string());
                     }
-                    _ => field.clone(),
+                    // Union
+                    Value::Array(arr) => {
+                        for b in arr.iter_mut() {
+                            match b {
+                                Value::String(s) if s != "null" => {
+                                    *b = Value::String((*new_ty).to_string());
+                                    break;
+                                }
+                                Value::Object(_) => {
+                                    *b = Value::String((*new_ty).to_string());
+                                    break;
+                                }
+                                _ => {}
+                            }
+                        }
+                    }
+                    Value::Object(_) => {
+                        *ty = Value::String((*new_ty).to_string());
+                    }
+                    _ => {}
                 }
             }
         }
+        AvroSchema::new(root.to_string())
+    }
 
-        let field = TestHelper::with_utf8view(&Field::new("str_field", DataType::Utf8, false));
+    fn make_reader_schema_with_enum_remap(
+        path: &str,
+        remap: &HashMap<&str, Vec<&str>>,
+    ) -> AvroSchema {
+        let mut root = load_writer_schema_json(path);
+        assert_eq!(root["type"], "record", "writer schema must be a record");
+        let fields = root
+            .get_mut("fields")
+            .and_then(|f| f.as_array_mut())
+            .expect("record has fields");
 
-        assert_eq!(field.data_type(), &DataType::Utf8View);
+        fn to_symbols_array(symbols: &[&str]) -> Value {
+            Value::Array(symbols.iter().map(|s| Value::String((*s).into())).collect())
+        }
 
-        let array = StringViewArray::from(vec!["test1", "test2"]);
-        let batch =
-            RecordBatch::try_from_iter(vec![("str_field", Arc::new(array) as ArrayRef)]).unwrap();
+        fn update_enum_symbols(ty: &mut Value, symbols: &Value) {
+            match ty {
+                Value::Object(map) => {
+                    if matches!(map.get("type"), Some(Value::String(t)) if t == "enum") {
+                        map.insert("symbols".to_string(), symbols.clone());
+                    }
+                }
+                Value::Array(arr) => {
+                    for b in arr.iter_mut() {
+                        if let Value::Object(map) = b {
+                            if matches!(map.get("type"), Some(Value::String(t)) if t == "enum") {
+                                map.insert("symbols".to_string(), symbols.clone());
+                            }
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+        for f in fields.iter_mut() {
+            let Some(name) = f.get("name").and_then(|n| n.as_str()) else {
+                continue;
+            };
+            if let Some(new_symbols) = remap.get(name) {
+                let symbols_val = to_symbols_array(new_symbols);
+                let ty = f.get_mut("type").expect("field has a type");
+                update_enum_symbols(ty, &symbols_val);
+            }
+        }
+        AvroSchema::new(root.to_string())
+    }
 
-        assert!(batch.column(0).as_any().is::<StringViewArray>());
+    fn read_alltypes_with_reader_schema(path: &str, reader_schema: AvroSchema) -> RecordBatch {
+        let file = File::open(path).unwrap();
+        let reader = ReaderBuilder::new()
+            .with_batch_size(1024)
+            .with_utf8_view(false)
+            .with_reader_schema(reader_schema)
+            .build(BufReader::new(file))
+            .unwrap();
+        let schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+        arrow::compute::concat_batches(&schema, &batches).unwrap()
     }
 
-    #[test]
-    fn test_alltypes() {
-        let files = [
-            "avro/alltypes_plain.avro",
-            "avro/alltypes_plain.snappy.avro",
-            "avro/alltypes_plain.zstandard.avro",
-        ];
+    fn make_reader_schema_with_selected_fields_in_order(
+        path: &str,
+        selected: &[&str],
+    ) -> AvroSchema {
+        let mut root = load_writer_schema_json(path);
+        assert_eq!(root["type"], "record", "writer schema must be a record");
+        let writer_fields = root
+            .get("fields")
+            .and_then(|f| f.as_array())
+            .expect("record has fields");
+        let mut field_map: HashMap<String, Value> = HashMap::with_capacity(writer_fields.len());
+        for f in writer_fields {
+            if let Some(name) = f.get("name").and_then(|n| n.as_str()) {
+                field_map.insert(name.to_string(), f.clone());
+            }
+        }
+        let mut new_fields = Vec::with_capacity(selected.len());
+        for name in selected {
+            let f = field_map
+                .get(*name)
+                .unwrap_or_else(|| panic!("field '{name}' not found in writer schema"))
+                .clone();
+            new_fields.push(f);
+        }
+        root["fields"] = Value::Array(new_fields);
+        AvroSchema::new(root.to_string())
+    }
 
-        let expected = RecordBatch::try_from_iter_with_nullable([
-            (
-                "id",
-                Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
-                true,
-            ),
-            (
-                "bool_col",
-                Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
-                true,
-            ),
-            (
-                "tinyint_col",
-                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
-                true,
-            ),
-            (
-                "smallint_col",
-                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
-                true,
-            ),
-            (
-                "int_col",
-                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
-                true,
-            ),
-            (
-                "bigint_col",
-                Arc::new(Int64Array::from_iter_values((0..8).map(|x| (x % 2) * 10))) as _,
-                true,
-            ),
-            (
-                "float_col",
-                Arc::new(Float32Array::from_iter_values(
-                    (0..8).map(|x| (x % 2) as f32 * 1.1),
-                )) as _,
-                true,
-            ),
-            (
-                "double_col",
-                Arc::new(Float64Array::from_iter_values(
-                    (0..8).map(|x| (x % 2) as f64 * 10.1),
-                )) as _,
-                true,
-            ),
-            (
-                "date_string_col",
-                Arc::new(BinaryArray::from_iter_values([
-                    [48, 51, 47, 48, 49, 47, 48, 57],
-                    [48, 51, 47, 48, 49, 47, 48, 57],
-                    [48, 52, 47, 48, 49, 47, 48, 57],
-                    [48, 52, 47, 48, 49, 47, 48, 57],
-                    [48, 50, 47, 48, 49, 47, 48, 57],
-                    [48, 50, 47, 48, 49, 47, 48, 57],
-                    [48, 49, 47, 48, 49, 47, 48, 57],
-                    [48, 49, 47, 48, 49, 47, 48, 57],
-                ])) as _,
-                true,
-            ),
-            (
-                "string_col",
-                Arc::new(BinaryArray::from_iter_values((0..8).map(|x| [48 + x % 2]))) as _,
-                true,
-            ),
+    fn write_ocf(schema: &Schema, batches: &[RecordBatch]) -> Vec<u8> {
+        let mut w = AvroWriter::new(Vec::<u8>::new(), schema.clone()).expect("writer");
+        for b in batches {
+            w.write(b).expect("write");
+        }
+        w.finish().expect("finish");
+        w.into_inner()
+    }
+
+    #[test]
+    fn writer_string_reader_nullable_with_alias() -> Result<(), Box<dyn std::error::Error>> {
+        // Writer: { id: long, name: string }
+        let writer_schema = Schema::new(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("name", DataType::Utf8, false),
+        ]);
+        let batch = RecordBatch::try_new(
+            Arc::new(writer_schema.clone()),
+            vec![
+                Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef,
+                Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef,
+            ],
+        )?;
+        let bytes = write_ocf(&writer_schema, &[batch]);
+        let reader_json = r#"
+    {
+      "type": "record",
+      "name": "topLevelRecord",
+      "fields": [
+        { "name": "id", "type": "long" },
+        { "name": "full_name", "type": ["null","string"], "aliases": ["name"], "default": null },
+        { "name": "is_active", "type": "boolean", "default": true }
+      ]
+    }"#;
+        let mut reader = ReaderBuilder::new()
+            .with_reader_schema(AvroSchema::new(reader_json.to_string()))
+            .build(Cursor::new(bytes))?;
+        let out = reader.next().unwrap()?;
+        // Evolved aliased field should be non-null and match original writer values
+        let full_name = out.column(1).as_string::<i32>();
+        assert_eq!(full_name.value(0), "a");
+        assert_eq!(full_name.value(1), "b");
+
+        Ok(())
+    }
+
+    #[test]
+    fn writer_string_reader_string_null_order_second() -> Result<(), Box<dyn std::error::Error>> {
+        // Writer: { name: string }
+        let writer_schema = Schema::new(vec![Field::new("name", DataType::Utf8, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(writer_schema.clone()),
+            vec![Arc::new(StringArray::from(vec!["x", "y"])) as ArrayRef],
+        )?;
+        let bytes = write_ocf(&writer_schema, &[batch]);
+
+        // Reader: ["string","null"] (NullSecond)
+        let reader_json = r#"
+    {
+      "type":"record", "name":"topLevelRecord",
+      "fields":[ { "name":"name", "type":["string","null"], "default":"x" } ]
+    }"#;
+
+        let mut reader = ReaderBuilder::new()
+            .with_reader_schema(AvroSchema::new(reader_json.to_string()))
+            .build(Cursor::new(bytes))?;
+
+        let out = reader.next().unwrap()?;
+        assert_eq!(out.num_rows(), 2);
+
+        // Should decode as non-null strings (writer non-union -> reader union)
+        let name = out.column(0).as_string::<i32>();
+        assert_eq!(name.value(0), "x");
+        assert_eq!(name.value(1), "y");
+
+        Ok(())
+    }
+
+    #[test]
+    fn promotion_writer_int_reader_nullable_long() -> Result<(), Box<dyn std::error::Error>> {
+        // Writer: { v: int }
+        let writer_schema = Schema::new(vec![Field::new("v", DataType::Int32, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(writer_schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+        )?;
+        let bytes = write_ocf(&writer_schema, &[batch]);
+
+        // Reader: { v: ["null","long"] }
+        let reader_json = r#"
+    {
+      "type":"record", "name":"topLevelRecord",
+      "fields":[ { "name":"v", "type":["null","long"], "default": null } ]
+    }"#;
+
+        let mut reader = ReaderBuilder::new()
+            .with_reader_schema(AvroSchema::new(reader_json.to_string()))
+            .build(Cursor::new(bytes))?;
+
+        let out = reader.next().unwrap()?;
+        assert_eq!(out.num_rows(), 3);
+
+        // Should have promoted to Int64 and be non-null (no union tag in writer)
+        let v = out
+            .column(0)
+            .as_primitive::<arrow_array::types::Int64Type>();
+        assert_eq!(v.values(), &[1, 2, 3]);
+        assert!(
+            out.column(0).nulls().is_none(),
+            "expected no validity bitmap for all-valid column"
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_alltypes_schema_promotion_mixed() {
+        for file in files() {
+            let file = arrow_test_data(file);
+            let mut promotions: HashMap<&str, &str> = HashMap::new();
+            promotions.insert("id", "long");
+            promotions.insert("tinyint_col", "float");
+            promotions.insert("smallint_col", "double");
+            promotions.insert("int_col", "double");
+            promotions.insert("bigint_col", "double");
+            promotions.insert("float_col", "double");
+            promotions.insert("date_string_col", "string");
+            promotions.insert("string_col", "string");
+            let reader_schema = make_reader_schema_with_promotions(&file, &promotions);
+            let batch = read_alltypes_with_reader_schema(&file, reader_schema);
+            let expected = RecordBatch::try_from_iter_with_nullable([
+                (
+                    "id",
+                    Arc::new(Int64Array::from(vec![4i64, 5, 6, 7, 2, 3, 0, 1])) as _,
+                    true,
+                ),
+                (
+                    "bool_col",
+                    Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
+                    true,
+                ),
+                (
+                    "tinyint_col",
+                    Arc::new(Float32Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f32),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "smallint_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f64),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "int_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f64),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "bigint_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| ((x % 2) * 10) as f64),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "float_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| ((x % 2) as f32 * 1.1f32) as f64),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "double_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f64 * 10.1),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "date_string_col",
+                    Arc::new(StringArray::from(vec![
+                        "03/01/09", "03/01/09", "04/01/09", "04/01/09", "02/01/09", "02/01/09",
+                        "01/01/09", "01/01/09",
+                    ])) as _,
+                    true,
+                ),
+                (
+                    "string_col",
+                    Arc::new(StringArray::from(
+                        (0..8)
+                            .map(|x| if x % 2 == 0 { "0" } else { "1" })
+                            .collect::<Vec<_>>(),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "timestamp_col",
+                    Arc::new(
+                        TimestampMicrosecondArray::from_iter_values([
+                            1235865600000000, // 2009-03-01T00:00:00.000
+                            1235865660000000, // 2009-03-01T00:01:00.000
+                            1238544000000000, // 2009-04-01T00:00:00.000
+                            1238544060000000, // 2009-04-01T00:01:00.000
+                            1233446400000000, // 2009-02-01T00:00:00.000
+                            1233446460000000, // 2009-02-01T00:01:00.000
+                            1230768000000000, // 2009-01-01T00:00:00.000
+                            1230768060000000, // 2009-01-01T00:01:00.000
+                        ])
+                        .with_timezone("+00:00"),
+                    ) as _,
+                    true,
+                ),
+            ])
+            .unwrap();
+            assert_eq!(batch, expected, "mismatch for file {file}");
+        }
+    }
+
+    #[test]
+    fn test_alltypes_schema_promotion_long_to_float_only() {
+        for file in files() {
+            let file = arrow_test_data(file);
+            let mut promotions: HashMap<&str, &str> = HashMap::new();
+            promotions.insert("bigint_col", "float");
+            let reader_schema = make_reader_schema_with_promotions(&file, &promotions);
+            let batch = read_alltypes_with_reader_schema(&file, reader_schema);
+            let expected = RecordBatch::try_from_iter_with_nullable([
+                (
+                    "id",
+                    Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
+                    true,
+                ),
+                (
+                    "bool_col",
+                    Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
+                    true,
+                ),
+                (
+                    "tinyint_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "smallint_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "int_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "bigint_col",
+                    Arc::new(Float32Array::from_iter_values(
+                        (0..8).map(|x| ((x % 2) * 10) as f32),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "float_col",
+                    Arc::new(Float32Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f32 * 1.1),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "double_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f64 * 10.1),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "date_string_col",
+                    Arc::new(BinaryArray::from_iter_values([
+                        [48, 51, 47, 48, 49, 47, 48, 57],
+                        [48, 51, 47, 48, 49, 47, 48, 57],
+                        [48, 52, 47, 48, 49, 47, 48, 57],
+                        [48, 52, 47, 48, 49, 47, 48, 57],
+                        [48, 50, 47, 48, 49, 47, 48, 57],
+                        [48, 50, 47, 48, 49, 47, 48, 57],
+                        [48, 49, 47, 48, 49, 47, 48, 57],
+                        [48, 49, 47, 48, 49, 47, 48, 57],
+                    ])) as _,
+                    true,
+                ),
+                (
+                    "string_col",
+                    Arc::new(BinaryArray::from_iter_values((0..8).map(|x| [48 + x % 2]))) as _,
+                    true,
+                ),
+                (
+                    "timestamp_col",
+                    Arc::new(
+                        TimestampMicrosecondArray::from_iter_values([
+                            1235865600000000, // 2009-03-01T00:00:00.000
+                            1235865660000000, // 2009-03-01T00:01:00.000
+                            1238544000000000, // 2009-04-01T00:00:00.000
+                            1238544060000000, // 2009-04-01T00:01:00.000
+                            1233446400000000, // 2009-02-01T00:00:00.000
+                            1233446460000000, // 2009-02-01T00:01:00.000
+                            1230768000000000, // 2009-01-01T00:00:00.000
+                            1230768060000000, // 2009-01-01T00:01:00.000
+                        ])
+                        .with_timezone("+00:00"),
+                    ) as _,
+                    true,
+                ),
+            ])
+            .unwrap();
+            assert_eq!(batch, expected, "mismatch for file {file}");
+        }
+    }
+
+    #[test]
+    fn test_alltypes_schema_promotion_bytes_to_string_only() {
+        for file in files() {
+            let file = arrow_test_data(file);
+            let mut promotions: HashMap<&str, &str> = HashMap::new();
+            promotions.insert("date_string_col", "string");
+            promotions.insert("string_col", "string");
+            let reader_schema = make_reader_schema_with_promotions(&file, &promotions);
+            let batch = read_alltypes_with_reader_schema(&file, reader_schema);
+            let expected = RecordBatch::try_from_iter_with_nullable([
+                (
+                    "id",
+                    Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
+                    true,
+                ),
+                (
+                    "bool_col",
+                    Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
+                    true,
+                ),
+                (
+                    "tinyint_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "smallint_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "int_col",
+                    Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                    true,
+                ),
+                (
+                    "bigint_col",
+                    Arc::new(Int64Array::from_iter_values((0..8).map(|x| (x % 2) * 10))) as _,
+                    true,
+                ),
+                (
+                    "float_col",
+                    Arc::new(Float32Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f32 * 1.1),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "double_col",
+                    Arc::new(Float64Array::from_iter_values(
+                        (0..8).map(|x| (x % 2) as f64 * 10.1),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "date_string_col",
+                    Arc::new(StringArray::from(vec![
+                        "03/01/09", "03/01/09", "04/01/09", "04/01/09", "02/01/09", "02/01/09",
+                        "01/01/09", "01/01/09",
+                    ])) as _,
+                    true,
+                ),
+                (
+                    "string_col",
+                    Arc::new(StringArray::from(
+                        (0..8)
+                            .map(|x| if x % 2 == 0 { "0" } else { "1" })
+                            .collect::<Vec<_>>(),
+                    )) as _,
+                    true,
+                ),
+                (
+                    "timestamp_col",
+                    Arc::new(
+                        TimestampMicrosecondArray::from_iter_values([
+                            1235865600000000, // 2009-03-01T00:00:00.000
+                            1235865660000000, // 2009-03-01T00:01:00.000
+                            1238544000000000, // 2009-04-01T00:00:00.000
+                            1238544060000000, // 2009-04-01T00:01:00.000
+                            1233446400000000, // 2009-02-01T00:00:00.000
+                            1233446460000000, // 2009-02-01T00:01:00.000
+                            1230768000000000, // 2009-01-01T00:00:00.000
+                            1230768060000000, // 2009-01-01T00:01:00.000
+                        ])
+                        .with_timezone("+00:00"),
+                    ) as _,
+                    true,
+                ),
+            ])
+            .unwrap();
+            assert_eq!(batch, expected, "mismatch for file {file}");
+        }
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_alltypes_illegal_promotion_bool_to_double_errors() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let mut promotions: HashMap<&str, &str> = HashMap::new();
+        promotions.insert("bool_col", "double"); // illegal
+        let reader_schema = make_reader_schema_with_promotions(&file, &promotions);
+        let file_handle = File::open(&file).unwrap();
+        let result = ReaderBuilder::new()
+            .with_reader_schema(reader_schema)
+            .build(BufReader::new(file_handle));
+        let err = result.expect_err("expected illegal promotion to error");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Illegal promotion") || msg.contains("illegal promotion"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_simple_enum_with_reader_schema_mapping() {
+        let file = arrow_test_data("avro/simple_enum.avro");
+        let mut remap: HashMap<&str, Vec<&str>> = HashMap::new();
+        remap.insert("f1", vec!["d", "c", "b", "a"]);
+        remap.insert("f2", vec!["h", "g", "f", "e"]);
+        remap.insert("f3", vec!["k", "i", "j"]);
+        let reader_schema = make_reader_schema_with_enum_remap(&file, &remap);
+        let actual = read_alltypes_with_reader_schema(&file, reader_schema);
+        let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+        // f1
+        let f1_keys = Int32Array::from(vec![3, 2, 1, 0]);
+        let f1_vals = StringArray::from(vec!["d", "c", "b", "a"]);
+        let f1 = DictionaryArray::<Int32Type>::try_new(f1_keys, Arc::new(f1_vals)).unwrap();
+        let mut md_f1 = HashMap::new();
+        md_f1.insert(
+            AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+            r#"["d","c","b","a"]"#.to_string(),
+        );
+        // New named-type metadata
+        md_f1.insert("avro.name".to_string(), "enum1".to_string());
+        md_f1.insert("avro.namespace".to_string(), "ns1".to_string());
+        let f1_field = Field::new("f1", dict_type.clone(), false).with_metadata(md_f1);
+        // f2
+        let f2_keys = Int32Array::from(vec![1, 0, 3, 2]);
+        let f2_vals = StringArray::from(vec!["h", "g", "f", "e"]);
+        let f2 = DictionaryArray::<Int32Type>::try_new(f2_keys, Arc::new(f2_vals)).unwrap();
+        let mut md_f2 = HashMap::new();
+        md_f2.insert(
+            AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+            r#"["h","g","f","e"]"#.to_string(),
+        );
+        // New named-type metadata
+        md_f2.insert("avro.name".to_string(), "enum2".to_string());
+        md_f2.insert("avro.namespace".to_string(), "ns2".to_string());
+        let f2_field = Field::new("f2", dict_type.clone(), false).with_metadata(md_f2);
+        // f3
+        let f3_keys = Int32Array::from(vec![Some(2), Some(0), None, Some(1)]);
+        let f3_vals = StringArray::from(vec!["k", "i", "j"]);
+        let f3 = DictionaryArray::<Int32Type>::try_new(f3_keys, Arc::new(f3_vals)).unwrap();
+        let mut md_f3 = HashMap::new();
+        md_f3.insert(
+            AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+            r#"["k","i","j"]"#.to_string(),
+        );
+        // New named-type metadata
+        md_f3.insert("avro.name".to_string(), "enum3".to_string());
+        md_f3.insert("avro.namespace".to_string(), "ns1".to_string());
+        let f3_field = Field::new("f3", dict_type.clone(), true).with_metadata(md_f3);
+        let expected_schema = Arc::new(Schema::new(vec![f1_field, f2_field, f3_field]));
+        let expected = RecordBatch::try_new(
+            expected_schema,
+            vec![Arc::new(f1) as ArrayRef, Arc::new(f2), Arc::new(f3)],
+        )
+        .unwrap();
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_schema_store_register_lookup() {
+        let schema_int = make_record_schema(PrimitiveType::Int);
+        let schema_long = make_record_schema(PrimitiveType::Long);
+        let mut store = SchemaStore::new();
+        let fp_int = store.register(schema_int.clone()).unwrap();
+        let fp_long = store.register(schema_long.clone()).unwrap();
+        assert_eq!(store.lookup(&fp_int).cloned(), Some(schema_int));
+        assert_eq!(store.lookup(&fp_long).cloned(), Some(schema_long));
+        assert_eq!(store.fingerprint_algorithm(), FingerprintAlgorithm::Rabin);
+    }
+
+    #[test]
+    fn test_unknown_fingerprint_is_error() {
+        let (store, fp_int, _fp_long, _schema_int, schema_long) = make_two_schema_store();
+        let unknown_fp = Fingerprint::Rabin(0xDEAD_BEEF_DEAD_BEEF);
+        let prefix = make_prefix(unknown_fp);
+        let mut decoder = make_decoder(&store, fp_int, &schema_long);
+        let err = decoder.decode(&prefix).expect_err("decode should error");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Unknown fingerprint"),
+            "unexpected message: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_handle_prefix_incomplete_magic() {
+        let (store, fp_int, _fp_long, _schema_int, schema_long) = make_two_schema_store();
+        let mut decoder = make_decoder(&store, fp_int, &schema_long);
+        let buf = &SINGLE_OBJECT_MAGIC[..1];
+        let res = decoder.handle_prefix(buf).unwrap();
+        assert_eq!(res, Some(0));
+        assert!(decoder.pending_schema.is_none());
+    }
+
+    #[test]
+    fn test_handle_prefix_magic_mismatch() {
+        let (store, fp_int, _fp_long, _schema_int, schema_long) = make_two_schema_store();
+        let mut decoder = make_decoder(&store, fp_int, &schema_long);
+        let buf = [0xFFu8, 0x00u8, 0x01u8];
+        let res = decoder.handle_prefix(&buf).unwrap();
+        assert!(res.is_none());
+    }
+
+    #[test]
+    fn test_handle_prefix_incomplete_fingerprint() {
+        let (store, fp_int, fp_long, _schema_int, schema_long) = make_two_schema_store();
+        let mut decoder = make_decoder(&store, fp_int, &schema_long);
+        let long_bytes = match fp_long {
+            Fingerprint::Rabin(v) => v.to_le_bytes(),
+            Fingerprint::Id(id) => panic!("expected Rabin fingerprint, got ({id})"),
+            Fingerprint::Id64(id) => panic!("expected Rabin fingerprint, got ({id})"),
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(v) => panic!("expected Rabin fingerprint, got ({v:?})"),
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(v) => panic!("expected Rabin fingerprint, got ({v:?})"),
+        };
+        let mut buf = Vec::from(SINGLE_OBJECT_MAGIC);
+        buf.extend_from_slice(&long_bytes[..4]);
+        let res = decoder.handle_prefix(&buf).unwrap();
+        assert_eq!(res, Some(0));
+        assert!(decoder.pending_schema.is_none());
+    }
+
+    #[test]
+    fn test_handle_prefix_valid_prefix_switches_schema() {
+        let (store, fp_int, fp_long, _schema_int, schema_long) = make_two_schema_store();
+        let mut decoder = make_decoder(&store, fp_int, &schema_long);
+        let writer_schema_long = schema_long.schema().unwrap();
+        let root_long = AvroFieldBuilder::new(&writer_schema_long).build().unwrap();
+        let long_decoder = RecordDecoder::try_new_with_options(root_long.data_type()).unwrap();
+        let _ = decoder.cache.insert(fp_long, long_decoder);
+        let mut buf = Vec::from(SINGLE_OBJECT_MAGIC);
+        match fp_long {
+            Fingerprint::Rabin(v) => buf.extend_from_slice(&v.to_le_bytes()),
+            Fingerprint::Id(id) => panic!("expected Rabin fingerprint, got ({id})"),
+            Fingerprint::Id64(id) => panic!("expected Rabin fingerprint, got ({id})"),
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(v) => panic!("expected Rabin fingerprint, got ({v:?})"),
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(v) => panic!("expected Rabin fingerprint, got ({v:?})"),
+        }
+        let consumed = decoder.handle_prefix(&buf).unwrap().unwrap();
+        assert_eq!(consumed, buf.len());
+        assert!(decoder.pending_schema.is_some());
+        assert_eq!(decoder.pending_schema.as_ref().unwrap().0, fp_long);
+    }
+
+    #[test]
+    fn test_two_messages_same_schema() {
+        let writer_schema = make_value_schema(PrimitiveType::Int);
+        let reader_schema = writer_schema.clone();
+        let mut store = SchemaStore::new();
+        let fp = store.register(writer_schema).unwrap();
+        let msg1 = make_message(fp, 42);
+        let msg2 = make_message(fp, 11);
+        let input = [msg1.clone(), msg2.clone()].concat();
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(reader_schema.clone())
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(fp)
+            .build_decoder()
+            .unwrap();
+        let _ = decoder.decode(&input).unwrap();
+        let batch = decoder.flush().unwrap().expect("batch");
+        assert_eq!(batch.num_rows(), 2);
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(col.value(0), 42);
+        assert_eq!(col.value(1), 11);
+    }
+
+    #[test]
+    fn test_two_messages_schema_switch() {
+        let w_int = make_value_schema(PrimitiveType::Int);
+        let w_long = make_value_schema(PrimitiveType::Long);
+        let mut store = SchemaStore::new();
+        let fp_int = store.register(w_int).unwrap();
+        let fp_long = store.register(w_long).unwrap();
+        let msg_int = make_message(fp_int, 1);
+        let msg_long = make_message(fp_long, 123456789_i64);
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(fp_int)
+            .build_decoder()
+            .unwrap();
+        let _ = decoder.decode(&msg_int).unwrap();
+        let batch1 = decoder.flush().unwrap().expect("batch1");
+        assert_eq!(batch1.num_rows(), 1);
+        assert_eq!(
+            batch1
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap()
+                .value(0),
+            1
+        );
+        let _ = decoder.decode(&msg_long).unwrap();
+        let batch2 = decoder.flush().unwrap().expect("batch2");
+        assert_eq!(batch2.num_rows(), 1);
+        assert_eq!(
+            batch2
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int64Array>()
+                .unwrap()
+                .value(0),
+            123456789_i64
+        );
+    }
+
+    #[test]
+    fn test_two_messages_same_schema_id() {
+        let writer_schema = make_value_schema(PrimitiveType::Int);
+        let reader_schema = writer_schema.clone();
+        let id = 100u32;
+        // Set up store with None fingerprint algorithm and register schema by id
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+        let _ = store
+            .set(Fingerprint::Id(id), writer_schema.clone())
+            .expect("set id schema");
+        let msg1 = make_message_id(id, 21);
+        let msg2 = make_message_id(id, 22);
+        let input = [msg1.clone(), msg2.clone()].concat();
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(reader_schema)
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(Fingerprint::Id(id))
+            .build_decoder()
+            .unwrap();
+        let _ = decoder.decode(&input).unwrap();
+        let batch = decoder.flush().unwrap().expect("batch");
+        assert_eq!(batch.num_rows(), 2);
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(col.value(0), 21);
+        assert_eq!(col.value(1), 22);
+    }
+
+    #[test]
+    fn test_unknown_id_fingerprint_is_error() {
+        let writer_schema = make_value_schema(PrimitiveType::Int);
+        let id_known = 7u32;
+        let id_unknown = 9u32;
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+        let _ = store
+            .set(Fingerprint::Id(id_known), writer_schema.clone())
+            .expect("set id schema");
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(writer_schema)
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(Fingerprint::Id(id_known))
+            .build_decoder()
+            .unwrap();
+        let prefix = make_id_prefix(id_unknown, 0);
+        let err = decoder.decode(&prefix).expect_err("decode should error");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Unknown fingerprint"),
+            "unexpected message: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_handle_prefix_id_incomplete_magic() {
+        let writer_schema = make_value_schema(PrimitiveType::Int);
+        let id = 5u32;
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+        let _ = store
+            .set(Fingerprint::Id(id), writer_schema.clone())
+            .expect("set id schema");
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(writer_schema)
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(Fingerprint::Id(id))
+            .build_decoder()
+            .unwrap();
+        let buf = &CONFLUENT_MAGIC[..0]; // empty incomplete magic
+        let res = decoder.handle_prefix(buf).unwrap();
+        assert_eq!(res, Some(0));
+        assert!(decoder.pending_schema.is_none());
+    }
+
+    #[test]
+    fn test_two_messages_same_schema_id64() {
+        let writer_schema = make_value_schema(PrimitiveType::Int);
+        let reader_schema = writer_schema.clone();
+        let id = 100u64;
+        // Set up store with None fingerprint algorithm and register schema by id
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id64);
+        let _ = store
+            .set(Fingerprint::Id64(id), writer_schema.clone())
+            .expect("set id schema");
+        let msg1 = make_message_id64(id, 21);
+        let msg2 = make_message_id64(id, 22);
+        let input = [msg1.clone(), msg2.clone()].concat();
+        let mut decoder = ReaderBuilder::new()
+            .with_batch_size(8)
+            .with_reader_schema(reader_schema)
+            .with_writer_schema_store(store)
+            .with_active_fingerprint(Fingerprint::Id64(id))
+            .build_decoder()
+            .unwrap();
+        let _ = decoder.decode(&input).unwrap();
+        let batch = decoder.flush().unwrap().expect("batch");
+        assert_eq!(batch.num_rows(), 2);
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(col.value(0), 21);
+        assert_eq!(col.value(1), 22);
+    }
+
+    #[test]
+    fn test_decode_stream_with_schema() {
+        struct TestCase<'a> {
+            name: &'a str,
+            schema: &'a str,
+            expected_error: Option<&'a str>,
+        }
+        let tests = vec![
+            TestCase {
+                name: "success",
+                schema: r#"{"type":"record","name":"test","fields":[{"name":"f2","type":"string"}]}"#,
+                expected_error: None,
+            },
+            TestCase {
+                name: "valid schema invalid data",
+                schema: r#"{"type":"record","name":"test","fields":[{"name":"f2","type":"long"}]}"#,
+                expected_error: Some("did not consume all bytes"),
+            },
+        ];
+        for test in tests {
+            let avro_schema = AvroSchema::new(test.schema.to_string());
+            let mut store = SchemaStore::new();
+            let fp = store.register(avro_schema.clone()).unwrap();
+            let prefix = make_prefix(fp);
+            let record_val = "some_string";
+            let mut body = prefix;
+            body.push((record_val.len() as u8) << 1);
+            body.extend_from_slice(record_val.as_bytes());
+            let decoder_res = ReaderBuilder::new()
+                .with_batch_size(1)
+                .with_writer_schema_store(store)
+                .with_active_fingerprint(fp)
+                .build_decoder();
+            let decoder = match decoder_res {
+                Ok(d) => d,
+                Err(e) => {
+                    if let Some(expected) = test.expected_error {
+                        assert!(
+                            e.to_string().contains(expected),
+                            "Test '{}' failed at build – expected '{expected}', got '{e}'",
+                            test.name
+                        );
+                        continue;
+                    } else {
+                        panic!("Test '{}' failed during build: {e}", test.name);
+                    }
+                }
+            };
+            let stream = Box::pin(stream::once(async { Bytes::from(body) }));
+            let decoded_stream = decode_stream(decoder, stream);
+            let batches_result: Result<Vec<RecordBatch>, ArrowError> =
+                block_on(decoded_stream.try_collect());
+            match (batches_result, test.expected_error) {
+                (Ok(batches), None) => {
+                    let batch =
+                        arrow::compute::concat_batches(&batches[0].schema(), &batches).unwrap();
+                    let expected_field = Field::new("f2", DataType::Utf8, false);
+                    let expected_schema = Arc::new(Schema::new(vec![expected_field]));
+                    let expected_array = Arc::new(StringArray::from(vec![record_val]));
+                    let expected_batch =
+                        RecordBatch::try_new(expected_schema, vec![expected_array]).unwrap();
+                    assert_eq!(batch, expected_batch, "Test '{}'", test.name);
+                }
+                (Err(e), Some(expected)) => {
+                    assert!(
+                        e.to_string().contains(expected),
+                        "Test '{}' – expected error containing '{expected}', got '{e}'",
+                        test.name
+                    );
+                }
+                (Ok(_), Some(expected)) => {
+                    panic!(
+                        "Test '{}' expected failure ('{expected}') but succeeded",
+                        test.name
+                    );
+                }
+                (Err(e), None) => {
+                    panic!("Test '{}' unexpectedly failed with '{e}'", test.name);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_utf8view_support() {
+        struct TestHelper;
+        impl TestHelper {
+            fn with_utf8view(field: &Field) -> Field {
+                match field.data_type() {
+                    DataType::Utf8 => {
+                        Field::new(field.name(), DataType::Utf8View, field.is_nullable())
+                            .with_metadata(field.metadata().clone())
+                    }
+                    _ => field.clone(),
+                }
+            }
+        }
+
+        let field = TestHelper::with_utf8view(&Field::new("str_field", DataType::Utf8, false));
+
+        assert_eq!(field.data_type(), &DataType::Utf8View);
+
+        let array = StringViewArray::from(vec!["test1", "test2"]);
+        let batch =
+            RecordBatch::try_from_iter(vec![("str_field", Arc::new(array) as ArrayRef)]).unwrap();
+
+        assert!(batch.column(0).as_any().is::<StringViewArray>());
+    }
+
+    fn make_reader_schema_with_default_fields(
+        path: &str,
+        default_fields: Vec<Value>,
+    ) -> AvroSchema {
+        let mut root = load_writer_schema_json(path);
+        assert_eq!(root["type"], "record", "writer schema must be a record");
+        root.as_object_mut()
+            .expect("schema is a JSON object")
+            .insert("fields".to_string(), Value::Array(default_fields));
+        AvroSchema::new(root.to_string())
+    }
+
+    #[test]
+    fn test_schema_resolution_defaults_all_supported_types() {
+        let path = "test/data/skippable_types.avro";
+        let duration_default = "\u{0000}".repeat(12);
+        let reader_schema = make_reader_schema_with_default_fields(
+            path,
+            vec![
+                serde_json::json!({"name":"d_bool","type":"boolean","default":true}),
+                serde_json::json!({"name":"d_int","type":"int","default":42}),
+                serde_json::json!({"name":"d_long","type":"long","default":12345}),
+                serde_json::json!({"name":"d_float","type":"float","default":1.5}),
+                serde_json::json!({"name":"d_double","type":"double","default":2.25}),
+                serde_json::json!({"name":"d_bytes","type":"bytes","default":"XYZ"}),
+                serde_json::json!({"name":"d_string","type":"string","default":"hello"}),
+                serde_json::json!({"name":"d_date","type":{"type":"int","logicalType":"date"},"default":0}),
+                serde_json::json!({"name":"d_time_ms","type":{"type":"int","logicalType":"time-millis"},"default":1000}),
+                serde_json::json!({"name":"d_time_us","type":{"type":"long","logicalType":"time-micros"},"default":2000}),
+                serde_json::json!({"name":"d_ts_ms","type":{"type":"long","logicalType":"local-timestamp-millis"},"default":0}),
+                serde_json::json!({"name":"d_ts_us","type":{"type":"long","logicalType":"local-timestamp-micros"},"default":0}),
+                serde_json::json!({"name":"d_decimal","type":{"type":"bytes","logicalType":"decimal","precision":10,"scale":2},"default":""}),
+                serde_json::json!({"name":"d_fixed","type":{"type":"fixed","name":"F4","size":4},"default":"ABCD"}),
+                serde_json::json!({"name":"d_enum","type":{"type":"enum","name":"E","symbols":["A","B","C"]},"default":"A"}),
+                serde_json::json!({"name":"d_duration","type":{"type":"fixed","name":"Dur","size":12,"logicalType":"duration"},"default":duration_default}),
+                serde_json::json!({"name":"d_uuid","type":{"type":"string","logicalType":"uuid"},"default":"00000000-0000-0000-0000-000000000000"}),
+                serde_json::json!({"name":"d_array","type":{"type":"array","items":"int"},"default":[1,2,3]}),
+                serde_json::json!({"name":"d_map","type":{"type":"map","values":"long"},"default":{"a":1,"b":2}}),
+                serde_json::json!({"name":"d_record","type":{
+              "type":"record","name":"DefaultRec","fields":[
+                  {"name":"x","type":"int"},
+                  {"name":"y","type":["null","string"],"default":null}
+              ]
+        },"default":{"x":7}}),
+                serde_json::json!({"name":"d_nullable_null","type":["null","int"],"default":null}),
+                serde_json::json!({"name":"d_nullable_value","type":["int","null"],"default":123}),
+            ],
+        );
+        let actual = read_alltypes_with_reader_schema(path, reader_schema);
+        let num_rows = actual.num_rows();
+        assert!(num_rows > 0, "skippable_types.avro should contain rows");
+        assert_eq!(
+            actual.num_columns(),
+            22,
+            "expected exactly our defaulted fields"
+        );
+        let mut arrays: Vec<Arc<dyn Array>> = Vec::with_capacity(22);
+        arrays.push(Arc::new(BooleanArray::from_iter(std::iter::repeat_n(
+            Some(true),
+            num_rows,
+        ))));
+        arrays.push(Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(
+            42, num_rows,
+        ))));
+        arrays.push(Arc::new(Int64Array::from_iter_values(std::iter::repeat_n(
+            12345, num_rows,
+        ))));
+        arrays.push(Arc::new(Float32Array::from_iter_values(
+            std::iter::repeat_n(1.5f32, num_rows),
+        )));
+        arrays.push(Arc::new(Float64Array::from_iter_values(
+            std::iter::repeat_n(2.25f64, num_rows),
+        )));
+        arrays.push(Arc::new(BinaryArray::from_iter_values(
+            std::iter::repeat_n(b"XYZ".as_ref(), num_rows),
+        )));
+        arrays.push(Arc::new(StringArray::from_iter_values(
+            std::iter::repeat_n("hello", num_rows),
+        )));
+        arrays.push(Arc::new(Date32Array::from_iter_values(
+            std::iter::repeat_n(0, num_rows),
+        )));
+        arrays.push(Arc::new(Time32MillisecondArray::from_iter_values(
+            std::iter::repeat_n(1_000, num_rows),
+        )));
+        arrays.push(Arc::new(Time64MicrosecondArray::from_iter_values(
+            std::iter::repeat_n(2_000i64, num_rows),
+        )));
+        arrays.push(Arc::new(TimestampMillisecondArray::from_iter_values(
+            std::iter::repeat_n(0i64, num_rows),
+        )));
+        arrays.push(Arc::new(TimestampMicrosecondArray::from_iter_values(
+            std::iter::repeat_n(0i64, num_rows),
+        )));
+        #[cfg(feature = "small_decimals")]
+        let decimal = Decimal64Array::from_iter_values(std::iter::repeat_n(0i64, num_rows))
+            .with_precision_and_scale(10, 2)
+            .unwrap();
+        #[cfg(not(feature = "small_decimals"))]
+        let decimal = Decimal128Array::from_iter_values(std::iter::repeat_n(0i128, num_rows))
+            .with_precision_and_scale(10, 2)
+            .unwrap();
+        arrays.push(Arc::new(decimal));
+        let fixed_iter = std::iter::repeat_n(Some(*b"ABCD"), num_rows);
+        arrays.push(Arc::new(
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(fixed_iter, 4).unwrap(),
+        ));
+        let enum_keys = Int32Array::from_iter_values(std::iter::repeat_n(0, num_rows));
+        let enum_values = StringArray::from_iter_values(["A", "B", "C"]);
+        let enum_arr =
+            DictionaryArray::<Int32Type>::try_new(enum_keys, Arc::new(enum_values)).unwrap();
+        arrays.push(Arc::new(enum_arr));
+        let duration_values = std::iter::repeat_n(
+            Some(IntervalMonthDayNanoType::make_value(0, 0, 0)),
+            num_rows,
+        );
+        let duration_arr: IntervalMonthDayNanoArray = duration_values.collect();
+        arrays.push(Arc::new(duration_arr));
+        let uuid_bytes = [0u8; 16];
+        let uuid_iter = std::iter::repeat_n(Some(uuid_bytes), num_rows);
+        arrays.push(Arc::new(
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(uuid_iter, 16).unwrap(),
+        ));
+        let item_field = Arc::new(Field::new(
+            Field::LIST_FIELD_DEFAULT_NAME,
+            DataType::Int32,
+            false,
+        ));
+        let mut list_builder = ListBuilder::new(Int32Builder::new()).with_field(item_field);
+        for _ in 0..num_rows {
+            list_builder.values().append_value(1);
+            list_builder.values().append_value(2);
+            list_builder.values().append_value(3);
+            list_builder.append(true);
+        }
+        arrays.push(Arc::new(list_builder.finish()));
+        let values_field = Arc::new(Field::new("value", DataType::Int64, false));
+        let mut map_builder = MapBuilder::new(
+            Some(builder::MapFieldNames {
+                entry: "entries".to_string(),
+                key: "key".to_string(),
+                value: "value".to_string(),
+            }),
+            StringBuilder::new(),
+            Int64Builder::new(),
+        )
+        .with_values_field(values_field);
+        for _ in 0..num_rows {
+            let (keys, vals) = map_builder.entries();
+            keys.append_value("a");
+            vals.append_value(1);
+            keys.append_value("b");
+            vals.append_value(2);
+            map_builder.append(true).unwrap();
+        }
+        arrays.push(Arc::new(map_builder.finish()));
+        let rec_fields: Fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Utf8, true),
+        ]);
+        let mut sb = StructBuilder::new(
+            rec_fields.clone(),
+            vec![
+                Box::new(Int32Builder::new()),
+                Box::new(StringBuilder::new()),
+            ],
+        );
+        for _ in 0..num_rows {
+            sb.field_builder::<Int32Builder>(0).unwrap().append_value(7);
+            sb.field_builder::<StringBuilder>(1).unwrap().append_null();
+            sb.append(true);
+        }
+        arrays.push(Arc::new(sb.finish()));
+        arrays.push(Arc::new(Int32Array::from_iter(std::iter::repeat_n(
+            None::<i32>,
+            num_rows,
+        ))));
+        arrays.push(Arc::new(Int32Array::from_iter_values(std::iter::repeat_n(
+            123, num_rows,
+        ))));
+        let expected = RecordBatch::try_new(actual.schema(), arrays).unwrap();
+        assert_eq!(
+            actual, expected,
+            "defaults should materialize correctly for all fields"
+        );
+    }
+
+    #[test]
+    fn test_schema_resolution_default_enum_invalid_symbol_errors() {
+        let path = "test/data/skippable_types.avro";
+        let bad_schema = make_reader_schema_with_default_fields(
+            path,
+            vec![serde_json::json!({
+                "name":"bad_enum",
+                "type":{"type":"enum","name":"E","symbols":["A","B","C"]},
+                "default":"Z"
+            })],
+        );
+        let file = File::open(path).unwrap();
+        let res = ReaderBuilder::new()
+            .with_reader_schema(bad_schema)
+            .build(BufReader::new(file));
+        let err = res.expect_err("expected enum default validation to fail");
+        let msg = err.to_string();
+        let lower_msg = msg.to_lowercase();
+        assert!(
+            lower_msg.contains("enum")
+                && (lower_msg.contains("symbol") || lower_msg.contains("default")),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_schema_resolution_default_fixed_size_mismatch_errors() {
+        let path = "test/data/skippable_types.avro";
+        let bad_schema = make_reader_schema_with_default_fields(
+            path,
+            vec![serde_json::json!({
+                "name":"bad_fixed",
+                "type":{"type":"fixed","name":"F","size":4},
+                "default":"ABC"
+            })],
+        );
+        let file = File::open(path).unwrap();
+        let res = ReaderBuilder::new()
+            .with_reader_schema(bad_schema)
+            .build(BufReader::new(file));
+        let err = res.expect_err("expected fixed default validation to fail");
+        let msg = err.to_string();
+        let lower_msg = msg.to_lowercase();
+        assert!(
+            lower_msg.contains("fixed")
+                && (lower_msg.contains("size")
+                    || lower_msg.contains("length")
+                    || lower_msg.contains("does not match")),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_alltypes_skip_writer_fields_keep_double_only() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let reader_schema =
+            make_reader_schema_with_selected_fields_in_order(&file, &["double_col"]);
+        let batch = read_alltypes_with_reader_schema(&file, reader_schema);
+        let expected = RecordBatch::try_from_iter_with_nullable([(
+            "double_col",
+            Arc::new(Float64Array::from_iter_values(
+                (0..8).map(|x| (x % 2) as f64 * 10.1),
+            )) as _,
+            true,
+        )])
+        .unwrap();
+        assert_eq!(batch, expected);
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_alltypes_skip_writer_fields_reorder_and_skip_many() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let reader_schema =
+            make_reader_schema_with_selected_fields_in_order(&file, &["timestamp_col", "id"]);
+        let batch = read_alltypes_with_reader_schema(&file, reader_schema);
+        let expected = RecordBatch::try_from_iter_with_nullable([
             (
                 "timestamp_col",
                 Arc::new(
@@ -298,14 +2794,5920 @@ mod test {
                 ) as _,
                 true,
             ),
+            (
+                "id",
+                Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
+                true,
+            ),
         ])
         .unwrap();
+        assert_eq!(batch, expected);
+    }
 
-        for file in files {
-            let file = arrow_test_data(file);
+    #[test]
+    fn test_skippable_types_project_each_field_individually() {
+        let path = "test/data/skippable_types.avro";
+        let full = read_file(path, 1024, false);
+        let schema_full = full.schema();
+        let num_rows = full.num_rows();
+        let writer_json = load_writer_schema_json(path);
+        assert_eq!(
+            writer_json["type"], "record",
+            "writer schema must be a record"
+        );
+        let fields_json = writer_json
+            .get("fields")
+            .and_then(|f| f.as_array())
+            .expect("record has fields");
+        assert_eq!(
+            schema_full.fields().len(),
+            fields_json.len(),
+            "full read column count vs writer fields"
+        );
+        fn rebuild_list_array_with_element(
+            col: &ArrayRef,
+            new_elem: Arc<Field>,
+            is_large: bool,
+        ) -> ArrayRef {
+            if is_large {
+                let list = col
+                    .as_any()
+                    .downcast_ref::<LargeListArray>()
+                    .expect("expected LargeListArray");
+                let offsets = list.offsets().clone();
+                let values = list.values().clone();
+                let validity = list.nulls().cloned();
+                Arc::new(LargeListArray::try_new(new_elem, offsets, values, validity).unwrap())
+            } else {
+                let list = col
+                    .as_any()
+                    .downcast_ref::<ListArray>()
+                    .expect("expected ListArray");
+                let offsets = list.offsets().clone();
+                let values = list.values().clone();
+                let validity = list.nulls().cloned();
+                Arc::new(ListArray::try_new(new_elem, offsets, values, validity).unwrap())
+            }
+        }
+        for (idx, f) in fields_json.iter().enumerate() {
+            let name = f
+                .get("name")
+                .and_then(|n| n.as_str())
+                .unwrap_or_else(|| panic!("field at index {idx} has no name"));
+            let reader_schema = make_reader_schema_with_selected_fields_in_order(path, &[name]);
+            let projected = read_alltypes_with_reader_schema(path, reader_schema);
+            assert_eq!(
+                projected.num_columns(),
+                1,
+                "projected batch should contain exactly the selected column '{name}'"
+            );
+            assert_eq!(
+                projected.num_rows(),
+                num_rows,
+                "row count mismatch for projected column '{name}'"
+            );
+            let col_full = full.column(idx).clone();
+            let full_field = schema_full.field(idx).as_ref().clone();
+            let proj_field_ref = projected.schema().field(0).clone();
+            let proj_field = proj_field_ref.as_ref();
+            let top_meta = proj_field.metadata().clone();
+            let (expected_field_ref, expected_col): (Arc<Field>, ArrayRef) =
+                match (full_field.data_type(), proj_field.data_type()) {
+                    (&DataType::List(_), DataType::List(proj_elem)) => {
+                        let new_col =
+                            rebuild_list_array_with_element(&col_full, proj_elem.clone(), false);
+                        let nf = Field::new(
+                            full_field.name().clone(),
+                            proj_field.data_type().clone(),
+                            full_field.is_nullable(),
+                        )
+                        .with_metadata(top_meta);
+                        (Arc::new(nf), new_col)
+                    }
+                    (&DataType::LargeList(_), DataType::LargeList(proj_elem)) => {
+                        let new_col =
+                            rebuild_list_array_with_element(&col_full, proj_elem.clone(), true);
+                        let nf = Field::new(
+                            full_field.name().clone(),
+                            proj_field.data_type().clone(),
+                            full_field.is_nullable(),
+                        )
+                        .with_metadata(top_meta);
+                        (Arc::new(nf), new_col)
+                    }
+                    _ => {
+                        let nf = full_field.with_metadata(top_meta);
+                        (Arc::new(nf), col_full)
+                    }
+                };
+
+            let expected = RecordBatch::try_new(
+                Arc::new(Schema::new(vec![expected_field_ref])),
+                vec![expected_col],
+            )
+            .unwrap();
+            assert_eq!(
+                projected, expected,
+                "projected column '{name}' mismatch vs full read column"
+            );
+        }
+    }
+
+    #[test]
+    fn test_union_fields_avro_nullable_and_general_unions() {
+        let path = "test/data/union_fields.avro";
+        let batch = read_file(path, 1024, false);
+        let schema = batch.schema();
+        let idx = schema.index_of("nullable_int_nullfirst").unwrap();
+        let a = batch.column(idx).as_primitive::<Int32Type>();
+        assert_eq!(a.len(), 4);
+        assert!(a.is_null(0));
+        assert_eq!(a.value(1), 42);
+        assert!(a.is_null(2));
+        assert_eq!(a.value(3), 0);
+        let idx = schema.index_of("nullable_string_nullsecond").unwrap();
+        let s = batch
+            .column(idx)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .expect("nullable_string_nullsecond should be Utf8");
+        assert_eq!(s.len(), 4);
+        assert_eq!(s.value(0), "s1");
+        assert!(s.is_null(1));
+        assert_eq!(s.value(2), "s3");
+        assert!(s.is_valid(3)); // empty string, not null
+        assert_eq!(s.value(3), "");
+        let idx = schema.index_of("union_prim").unwrap();
+        let u = batch
+            .column(idx)
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("union_prim should be Union");
+        let fields = match u.data_type() {
+            DataType::Union(fields, mode) => {
+                assert!(matches!(mode, UnionMode::Dense), "expect dense unions");
+                fields
+            }
+            other => panic!("expected Union, got {other:?}"),
+        };
+        let tid_by_name = |name: &str| -> i8 {
+            for (tid, f) in fields.iter() {
+                if f.name() == name {
+                    return tid;
+                }
+            }
+            panic!("union child '{name}' not found");
+        };
+        let expected_type_ids = vec![
+            tid_by_name("long"),
+            tid_by_name("int"),
+            tid_by_name("float"),
+            tid_by_name("double"),
+        ];
+        let type_ids: Vec<i8> = u.type_ids().iter().copied().collect();
+        assert_eq!(
+            type_ids, expected_type_ids,
+            "branch selection for union_prim rows"
+        );
+        let longs = u
+            .child(tid_by_name("long"))
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+        assert_eq!(longs.len(), 1);
+        let ints = u
+            .child(tid_by_name("int"))
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(ints.len(), 1);
+        let floats = u
+            .child(tid_by_name("float"))
+            .as_any()
+            .downcast_ref::<Float32Array>()
+            .unwrap();
+        assert_eq!(floats.len(), 1);
+        let doubles = u
+            .child(tid_by_name("double"))
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(doubles.len(), 1);
+        let idx = schema.index_of("union_bytes_vs_string").unwrap();
+        let u = batch
+            .column(idx)
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("union_bytes_vs_string should be Union");
+        let fields = match u.data_type() {
+            DataType::Union(fields, _) => fields,
+            other => panic!("expected Union, got {other:?}"),
+        };
+        let tid_by_name = |name: &str| -> i8 {
+            for (tid, f) in fields.iter() {
+                if f.name() == name {
+                    return tid;
+                }
+            }
+            panic!("union child '{name}' not found");
+        };
+        let tid_bytes = tid_by_name("bytes");
+        let tid_string = tid_by_name("string");
+        let type_ids: Vec<i8> = u.type_ids().iter().copied().collect();
+        assert_eq!(
+            type_ids,
+            vec![tid_bytes, tid_string, tid_string, tid_bytes],
+            "branch selection for bytes/string union"
+        );
+        let s_child = u
+            .child(tid_string)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(s_child.len(), 2);
+        assert_eq!(s_child.value(0), "hello");
+        assert_eq!(s_child.value(1), "world");
+        let b_child = u
+            .child(tid_bytes)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+        assert_eq!(b_child.len(), 2);
+        assert_eq!(b_child.value(0), &[0x00, 0xFF, 0x7F]);
+        assert_eq!(b_child.value(1), b""); // previously: &[]
+        let idx = schema.index_of("union_enum_records_array_map").unwrap();
+        let u = batch
+            .column(idx)
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("union_enum_records_array_map should be Union");
+        let fields = match u.data_type() {
+            DataType::Union(fields, _) => fields,
+            other => panic!("expected Union, got {other:?}"),
+        };
+        let mut tid_enum: Option<i8> = None;
+        let mut tid_rec_a: Option<i8> = None;
+        let mut tid_rec_b: Option<i8> = None;
+        let mut tid_array: Option<i8> = None;
+        for (tid, f) in fields.iter() {
+            match f.data_type() {
+                DataType::Dictionary(_, _) => tid_enum = Some(tid),
+                DataType::Struct(childs) => {
+                    if childs.len() == 2 && childs[0].name() == "a" && childs[1].name() == "b" {
+                        tid_rec_a = Some(tid);
+                    } else if childs.len() == 2
+                        && childs[0].name() == "x"
+                        && childs[1].name() == "y"
+                    {
+                        tid_rec_b = Some(tid);
+                    }
+                }
+                DataType::List(_) => tid_array = Some(tid),
+                _ => {}
+            }
+        }
+        let (tid_enum, tid_rec_a, tid_rec_b, tid_array) = (
+            tid_enum.expect("enum child"),
+            tid_rec_a.expect("RecA child"),
+            tid_rec_b.expect("RecB child"),
+            tid_array.expect("array<long> child"),
+        );
+        let type_ids: Vec<i8> = u.type_ids().iter().copied().collect();
+        assert_eq!(
+            type_ids,
+            vec![tid_enum, tid_rec_a, tid_rec_b, tid_array],
+            "branch selection for complex union"
+        );
+        let dict = u
+            .child(tid_enum)
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        assert_eq!(dict.len(), 1);
+        assert!(dict.is_valid(0));
+        let rec_a = u
+            .child(tid_rec_a)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        assert_eq!(rec_a.len(), 1);
+        let a_val = rec_a
+            .column_by_name("a")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(a_val.value(0), 7);
+        let b_val = rec_a
+            .column_by_name("b")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(b_val.value(0), "x");
+        // RecB row: {"x": 123456789, "y": b"\xFF\x00"}
+        let rec_b = u
+            .child(tid_rec_b)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        let x_val = rec_b
+            .column_by_name("x")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+        assert_eq!(x_val.value(0), 123_456_789_i64);
+        let y_val = rec_b
+            .column_by_name("y")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+        assert_eq!(y_val.value(0), &[0xFF, 0x00]);
+        let arr = u
+            .child(tid_array)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+        assert_eq!(arr.len(), 1);
+        let first_values = arr.value(0);
+        let longs = first_values.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(longs.len(), 3);
+        assert_eq!(longs.value(0), 1);
+        assert_eq!(longs.value(1), 2);
+        assert_eq!(longs.value(2), 3);
+        let idx = schema.index_of("union_date_or_fixed4").unwrap();
+        let u = batch
+            .column(idx)
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("union_date_or_fixed4 should be Union");
+        let fields = match u.data_type() {
+            DataType::Union(fields, _) => fields,
+            other => panic!("expected Union, got {other:?}"),
+        };
+        let mut tid_date: Option<i8> = None;
+        let mut tid_fixed: Option<i8> = None;
+        for (tid, f) in fields.iter() {
+            match f.data_type() {
+                DataType::Date32 => tid_date = Some(tid),
+                DataType::FixedSizeBinary(4) => tid_fixed = Some(tid),
+                _ => {}
+            }
+        }
+        let (tid_date, tid_fixed) = (tid_date.expect("date"), tid_fixed.expect("fixed(4)"));
+        let type_ids: Vec<i8> = u.type_ids().iter().copied().collect();
+        assert_eq!(
+            type_ids,
+            vec![tid_date, tid_fixed, tid_date, tid_fixed],
+            "branch selection for date/fixed4 union"
+        );
+        let dates = u
+            .child(tid_date)
+            .as_any()
+            .downcast_ref::<Date32Array>()
+            .unwrap();
+        assert_eq!(dates.len(), 2);
+        assert_eq!(dates.value(0), 19_000); // ~2022‑01‑15
+        assert_eq!(dates.value(1), 0); // epoch
+        let fixed = u
+            .child(tid_fixed)
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        assert_eq!(fixed.len(), 2);
+        assert_eq!(fixed.value(0), b"ABCD");
+        assert_eq!(fixed.value(1), &[0x00, 0x11, 0x22, 0x33]);
+    }
 
-            assert_eq!(read_file(&file, 8), expected);
-            assert_eq!(read_file(&file, 3), expected);
+    #[test]
+    fn test_union_schema_resolution_all_type_combinations() {
+        let path = "test/data/union_fields.avro";
+        let baseline = read_file(path, 1024, false);
+        let baseline_schema = baseline.schema();
+        let mut root = load_writer_schema_json(path);
+        assert_eq!(root["type"], "record", "writer schema must be a record");
+        let fields = root
+            .get_mut("fields")
+            .and_then(|f| f.as_array_mut())
+            .expect("record has fields");
+        fn is_named_type(obj: &Value, ty: &str, nm: &str) -> bool {
+            obj.get("type").and_then(|v| v.as_str()) == Some(ty)
+                && obj.get("name").and_then(|v| v.as_str()) == Some(nm)
+        }
+        fn is_logical(obj: &Value, prim: &str, lt: &str) -> bool {
+            obj.get("type").and_then(|v| v.as_str()) == Some(prim)
+                && obj.get("logicalType").and_then(|v| v.as_str()) == Some(lt)
+        }
+        fn find_first(arr: &[Value], pred: impl Fn(&Value) -> bool) -> Option<Value> {
+            arr.iter().find(|v| pred(v)).cloned()
+        }
+        fn prim(s: &str) -> Value {
+            Value::String(s.to_string())
+        }
+        for f in fields.iter_mut() {
+            let Some(name) = f.get("name").and_then(|n| n.as_str()) else {
+                continue;
+            };
+            match name {
+                // Flip null ordering – should not affect values
+                "nullable_int_nullfirst" => {
+                    f["type"] = json!(["int", "null"]);
+                }
+                "nullable_string_nullsecond" => {
+                    f["type"] = json!(["null", "string"]);
+                }
+                "union_prim" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let long = prim("long");
+                    let double = prim("double");
+                    let string = prim("string");
+                    let bytes = prim("bytes");
+                    let boolean = prim("boolean");
+                    assert!(orig.contains(&long));
+                    assert!(orig.contains(&double));
+                    assert!(orig.contains(&string));
+                    assert!(orig.contains(&bytes));
+                    assert!(orig.contains(&boolean));
+                    f["type"] = json!([long, double, string, bytes, boolean]);
+                }
+                "union_bytes_vs_string" => {
+                    f["type"] = json!(["string", "bytes"]);
+                }
+                "union_fixed_dur_decfix" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let fx8 = find_first(&orig, |o| is_named_type(o, "fixed", "Fx8")).unwrap();
+                    let dur12 = find_first(&orig, |o| is_named_type(o, "fixed", "Dur12")).unwrap();
+                    let decfix16 =
+                        find_first(&orig, |o| is_named_type(o, "fixed", "DecFix16")).unwrap();
+                    f["type"] = json!([decfix16, dur12, fx8]);
+                }
+                "union_enum_records_array_map" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let enum_color = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("enum")
+                    })
+                    .unwrap();
+                    let rec_a = find_first(&orig, |o| is_named_type(o, "record", "RecA")).unwrap();
+                    let rec_b = find_first(&orig, |o| is_named_type(o, "record", "RecB")).unwrap();
+                    let arr = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("array")
+                    })
+                    .unwrap();
+                    let map = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("map")
+                    })
+                    .unwrap();
+                    f["type"] = json!([arr, map, rec_b, rec_a, enum_color]);
+                }
+                "union_date_or_fixed4" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let date = find_first(&orig, |o| is_logical(o, "int", "date")).unwrap();
+                    let fx4 = find_first(&orig, |o| is_named_type(o, "fixed", "Fx4")).unwrap();
+                    f["type"] = json!([fx4, date]);
+                }
+                "union_time_millis_or_enum" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let time_ms =
+                        find_first(&orig, |o| is_logical(o, "int", "time-millis")).unwrap();
+                    let en = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("enum")
+                    })
+                    .unwrap();
+                    f["type"] = json!([en, time_ms]);
+                }
+                "union_time_micros_or_string" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let time_us =
+                        find_first(&orig, |o| is_logical(o, "long", "time-micros")).unwrap();
+                    f["type"] = json!(["string", time_us]);
+                }
+                "union_ts_millis_utc_or_array" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let ts_ms =
+                        find_first(&orig, |o| is_logical(o, "long", "timestamp-millis")).unwrap();
+                    let arr = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("array")
+                    })
+                    .unwrap();
+                    f["type"] = json!([arr, ts_ms]);
+                }
+                "union_ts_micros_local_or_bytes" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let lts_us =
+                        find_first(&orig, |o| is_logical(o, "long", "local-timestamp-micros"))
+                            .unwrap();
+                    f["type"] = json!(["bytes", lts_us]);
+                }
+                "union_uuid_or_fixed10" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let uuid = find_first(&orig, |o| is_logical(o, "string", "uuid")).unwrap();
+                    let fx10 = find_first(&orig, |o| is_named_type(o, "fixed", "Fx10")).unwrap();
+                    f["type"] = json!([fx10, uuid]);
+                }
+                "union_dec_bytes_or_dec_fixed" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let dec_bytes = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("bytes")
+                            && o.get("logicalType").and_then(|v| v.as_str()) == Some("decimal")
+                    })
+                    .unwrap();
+                    let dec_fix = find_first(&orig, |o| {
+                        is_named_type(o, "fixed", "DecFix20")
+                            && o.get("logicalType").and_then(|v| v.as_str()) == Some("decimal")
+                    })
+                    .unwrap();
+                    f["type"] = json!([dec_fix, dec_bytes]);
+                }
+                "union_null_bytes_string" => {
+                    f["type"] = json!(["bytes", "string", "null"]);
+                }
+                "array_of_union" => {
+                    let obj = f
+                        .get_mut("type")
+                        .expect("array type")
+                        .as_object_mut()
+                        .unwrap();
+                    obj.insert("items".to_string(), json!(["string", "long"]));
+                }
+                "map_of_union" => {
+                    let obj = f
+                        .get_mut("type")
+                        .expect("map type")
+                        .as_object_mut()
+                        .unwrap();
+                    obj.insert("values".to_string(), json!(["double", "null"]));
+                }
+                "record_with_union_field" => {
+                    let rec = f
+                        .get_mut("type")
+                        .expect("record type")
+                        .as_object_mut()
+                        .unwrap();
+                    let rec_fields = rec.get_mut("fields").unwrap().as_array_mut().unwrap();
+                    let mut found = false;
+                    for rf in rec_fields.iter_mut() {
+                        if rf.get("name").and_then(|v| v.as_str()) == Some("u") {
+                            rf["type"] = json!(["string", "long"]); // rely on int→long promotion
+                            found = true;
+                            break;
+                        }
+                    }
+                    assert!(found, "field 'u' expected in HasUnion");
+                }
+                "union_ts_micros_utc_or_map" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let ts_us =
+                        find_first(&orig, |o| is_logical(o, "long", "timestamp-micros")).unwrap();
+                    let map = find_first(&orig, |o| {
+                        o.get("type").and_then(|v| v.as_str()) == Some("map")
+                    })
+                    .unwrap();
+                    f["type"] = json!([map, ts_us]);
+                }
+                "union_ts_millis_local_or_string" => {
+                    let orig = f["type"].as_array().unwrap().clone();
+                    let lts_ms =
+                        find_first(&orig, |o| is_logical(o, "long", "local-timestamp-millis"))
+                            .unwrap();
+                    f["type"] = json!(["string", lts_ms]);
+                }
+                "union_bool_or_string" => {
+                    f["type"] = json!(["string", "boolean"]);
+                }
+                _ => {}
+            }
+        }
+        let reader_schema = AvroSchema::new(root.to_string());
+        let resolved = read_alltypes_with_reader_schema(path, reader_schema);
+
+        fn branch_token(dt: &DataType) -> String {
+            match dt {
+                DataType::Null => "null".into(),
+                DataType::Boolean => "boolean".into(),
+                DataType::Int32 => "int".into(),
+                DataType::Int64 => "long".into(),
+                DataType::Float32 => "float".into(),
+                DataType::Float64 => "double".into(),
+                DataType::Binary => "bytes".into(),
+                DataType::Utf8 => "string".into(),
+                DataType::Date32 => "date".into(),
+                DataType::Time32(arrow_schema::TimeUnit::Millisecond) => "time-millis".into(),
+                DataType::Time64(arrow_schema::TimeUnit::Microsecond) => "time-micros".into(),
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, tz) => if tz.is_some() {
+                    "timestamp-millis"
+                } else {
+                    "local-timestamp-millis"
+                }
+                .into(),
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, tz) => if tz.is_some() {
+                    "timestamp-micros"
+                } else {
+                    "local-timestamp-micros"
+                }
+                .into(),
+                DataType::Interval(IntervalUnit::MonthDayNano) => "duration".into(),
+                DataType::FixedSizeBinary(n) => format!("fixed{n}"),
+                DataType::Dictionary(_, _) => "enum".into(),
+                DataType::Decimal128(p, s) => format!("decimal({p},{s})"),
+                DataType::Decimal256(p, s) => format!("decimal({p},{s})"),
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(p, s) => format!("decimal({p},{s})"),
+                DataType::Struct(fields) => {
+                    if fields.len() == 2 && fields[0].name() == "a" && fields[1].name() == "b" {
+                        "record:RecA".into()
+                    } else if fields.len() == 2
+                        && fields[0].name() == "x"
+                        && fields[1].name() == "y"
+                    {
+                        "record:RecB".into()
+                    } else {
+                        "record".into()
+                    }
+                }
+                DataType::List(_) => "array".into(),
+                DataType::Map(_, _) => "map".into(),
+                other => format!("{other:?}"),
+            }
+        }
+
+        fn union_tokens(u: &UnionArray) -> (Vec<i8>, HashMap<i8, String>) {
+            let fields = match u.data_type() {
+                DataType::Union(fields, _) => fields,
+                other => panic!("expected Union, got {other:?}"),
+            };
+            let mut dict: HashMap<i8, String> = HashMap::with_capacity(fields.len());
+            for (tid, f) in fields.iter() {
+                dict.insert(tid, branch_token(f.data_type()));
+            }
+            let ids: Vec<i8> = u.type_ids().iter().copied().collect();
+            (ids, dict)
+        }
+
+        fn expected_token(field_name: &str, writer_token: &str) -> String {
+            match field_name {
+                "union_prim" => match writer_token {
+                    "int" => "long".into(),
+                    "float" => "double".into(),
+                    other => other.into(),
+                },
+                "record_with_union_field.u" => match writer_token {
+                    "int" => "long".into(),
+                    other => other.into(),
+                },
+                _ => writer_token.into(),
+            }
+        }
+
+        fn get_union<'a>(
+            rb: &'a RecordBatch,
+            schema: arrow_schema::SchemaRef,
+            fname: &str,
+        ) -> &'a UnionArray {
+            let idx = schema.index_of(fname).unwrap();
+            rb.column(idx)
+                .as_any()
+                .downcast_ref::<UnionArray>()
+                .unwrap_or_else(|| panic!("{fname} should be a Union"))
+        }
+
+        fn assert_union_equivalent(field_name: &str, u_writer: &UnionArray, u_reader: &UnionArray) {
+            let (ids_w, dict_w) = union_tokens(u_writer);
+            let (ids_r, dict_r) = union_tokens(u_reader);
+            assert_eq!(
+                ids_w.len(),
+                ids_r.len(),
+                "{field_name}: row count mismatch between baseline and resolved"
+            );
+            for (i, (id_w, id_r)) in ids_w.iter().zip(ids_r.iter()).enumerate() {
+                let w_tok = dict_w.get(id_w).unwrap();
+                let want = expected_token(field_name, w_tok);
+                let got = dict_r.get(id_r).unwrap();
+                assert_eq!(
+                    got, &want,
+                    "{field_name}: row {i} resolved to wrong union branch (writer={w_tok}, expected={want}, got={got})"
+                );
+            }
+        }
+
+        for (fname, dt) in [
+            ("nullable_int_nullfirst", DataType::Int32),
+            ("nullable_string_nullsecond", DataType::Utf8),
+        ] {
+            let idx_b = baseline_schema.index_of(fname).unwrap();
+            let idx_r = resolved.schema().index_of(fname).unwrap();
+            let col_b = baseline.column(idx_b);
+            let col_r = resolved.column(idx_r);
+            assert_eq!(
+                col_b.data_type(),
+                &dt,
+                "baseline {fname} should decode as non-union with nullability"
+            );
+            assert_eq!(
+                col_b.as_ref(),
+                col_r.as_ref(),
+                "{fname}: values must be identical regardless of null-branch order"
+            );
+        }
+        let union_fields = [
+            "union_prim",
+            "union_bytes_vs_string",
+            "union_fixed_dur_decfix",
+            "union_enum_records_array_map",
+            "union_date_or_fixed4",
+            "union_time_millis_or_enum",
+            "union_time_micros_or_string",
+            "union_ts_millis_utc_or_array",
+            "union_ts_micros_local_or_bytes",
+            "union_uuid_or_fixed10",
+            "union_dec_bytes_or_dec_fixed",
+            "union_null_bytes_string",
+            "union_ts_micros_utc_or_map",
+            "union_ts_millis_local_or_string",
+            "union_bool_or_string",
+        ];
+        for fname in union_fields {
+            let u_b = get_union(&baseline, baseline_schema.clone(), fname);
+            let u_r = get_union(&resolved, resolved.schema(), fname);
+            assert_union_equivalent(fname, u_b, u_r);
+        }
+        {
+            let fname = "array_of_union";
+            let idx_b = baseline_schema.index_of(fname).unwrap();
+            let idx_r = resolved.schema().index_of(fname).unwrap();
+            let arr_b = baseline
+                .column(idx_b)
+                .as_any()
+                .downcast_ref::<ListArray>()
+                .expect("array_of_union should be a List");
+            let arr_r = resolved
+                .column(idx_r)
+                .as_any()
+                .downcast_ref::<ListArray>()
+                .expect("array_of_union should be a List");
+            assert_eq!(
+                arr_b.value_offsets(),
+                arr_r.value_offsets(),
+                "{fname}: list offsets changed after resolution"
+            );
+            let u_b = arr_b
+                .values()
+                .as_any()
+                .downcast_ref::<UnionArray>()
+                .expect("array items should be Union");
+            let u_r = arr_r
+                .values()
+                .as_any()
+                .downcast_ref::<UnionArray>()
+                .expect("array items should be Union");
+            let (ids_b, dict_b) = union_tokens(u_b);
+            let (ids_r, dict_r) = union_tokens(u_r);
+            assert_eq!(ids_b.len(), ids_r.len(), "{fname}: values length mismatch");
+            for (i, (id_b, id_r)) in ids_b.iter().zip(ids_r.iter()).enumerate() {
+                let w_tok = dict_b.get(id_b).unwrap();
+                let got = dict_r.get(id_r).unwrap();
+                assert_eq!(
+                    got, w_tok,
+                    "{fname}: value {i} resolved to wrong branch (writer={w_tok}, got={got})"
+                );
+            }
+        }
+        {
+            let fname = "map_of_union";
+            let idx_b = baseline_schema.index_of(fname).unwrap();
+            let idx_r = resolved.schema().index_of(fname).unwrap();
+            let map_b = baseline
+                .column(idx_b)
+                .as_any()
+                .downcast_ref::<MapArray>()
+                .expect("map_of_union should be a Map");
+            let map_r = resolved
+                .column(idx_r)
+                .as_any()
+                .downcast_ref::<MapArray>()
+                .expect("map_of_union should be a Map");
+            assert_eq!(
+                map_b.value_offsets(),
+                map_r.value_offsets(),
+                "{fname}: map value offsets changed after resolution"
+            );
+            let ent_b = map_b.entries();
+            let ent_r = map_r.entries();
+            let val_b_any = ent_b.column(1).as_ref();
+            let val_r_any = ent_r.column(1).as_ref();
+            let b_union = val_b_any.as_any().downcast_ref::<UnionArray>();
+            let r_union = val_r_any.as_any().downcast_ref::<UnionArray>();
+            if let (Some(u_b), Some(u_r)) = (b_union, r_union) {
+                assert_union_equivalent(fname, u_b, u_r);
+            } else {
+                assert_eq!(
+                    val_b_any.data_type(),
+                    val_r_any.data_type(),
+                    "{fname}: value data types differ after resolution"
+                );
+                assert_eq!(
+                    val_b_any, val_r_any,
+                    "{fname}: value arrays differ after resolution (nullable value column case)"
+                );
+                let value_nullable = |m: &MapArray| -> bool {
+                    match m.data_type() {
+                        DataType::Map(entries_field, _sorted) => match entries_field.data_type() {
+                            DataType::Struct(fields) => {
+                                assert_eq!(fields.len(), 2, "entries struct must have 2 fields");
+                                assert_eq!(fields[0].name(), "key");
+                                assert_eq!(fields[1].name(), "value");
+                                fields[1].is_nullable()
+                            }
+                            other => panic!("Map entries field must be Struct, got {other:?}"),
+                        },
+                        other => panic!("expected Map data type, got {other:?}"),
+                    }
+                };
+                assert!(
+                    value_nullable(map_b),
+                    "{fname}: baseline Map value field should be nullable per Arrow spec"
+                );
+                assert!(
+                    value_nullable(map_r),
+                    "{fname}: resolved Map value field should be nullable per Arrow spec"
+                );
+            }
+        }
+        {
+            let fname = "record_with_union_field";
+            let idx_b = baseline_schema.index_of(fname).unwrap();
+            let idx_r = resolved.schema().index_of(fname).unwrap();
+            let rec_b = baseline
+                .column(idx_b)
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .expect("record_with_union_field should be a Struct");
+            let rec_r = resolved
+                .column(idx_r)
+                .as_any()
+                .downcast_ref::<StructArray>()
+                .expect("record_with_union_field should be a Struct");
+            let u_b = rec_b
+                .column_by_name("u")
+                .unwrap()
+                .as_any()
+                .downcast_ref::<UnionArray>()
+                .expect("field 'u' should be Union (baseline)");
+            let u_r = rec_r
+                .column_by_name("u")
+                .unwrap()
+                .as_any()
+                .downcast_ref::<UnionArray>()
+                .expect("field 'u' should be Union (resolved)");
+            assert_union_equivalent("record_with_union_field.u", u_b, u_r);
+        }
+    }
+
+    #[test]
+    fn test_union_fields_end_to_end_expected_arrays() {
+        fn tid_by_name(fields: &UnionFields, want: &str) -> i8 {
+            for (tid, f) in fields.iter() {
+                if f.name() == want {
+                    return tid;
+                }
+            }
+            panic!("union child '{want}' not found")
+        }
+
+        fn tid_by_dt(fields: &UnionFields, pred: impl Fn(&DataType) -> bool) -> i8 {
+            for (tid, f) in fields.iter() {
+                if pred(f.data_type()) {
+                    return tid;
+                }
+            }
+            panic!("no union child matches predicate");
+        }
+
+        fn uuid16_from_str(s: &str) -> [u8; 16] {
+            fn hex(b: u8) -> u8 {
+                match b {
+                    b'0'..=b'9' => b - b'0',
+                    b'a'..=b'f' => b - b'a' + 10,
+                    b'A'..=b'F' => b - b'A' + 10,
+                    _ => panic!("invalid hex"),
+                }
+            }
+            let mut out = [0u8; 16];
+            let bytes = s.as_bytes();
+            let (mut i, mut j) = (0, 0);
+            while i < bytes.len() {
+                if bytes[i] == b'-' {
+                    i += 1;
+                    continue;
+                }
+                let hi = hex(bytes[i]);
+                let lo = hex(bytes[i + 1]);
+                out[j] = (hi << 4) | lo;
+                j += 1;
+                i += 2;
+            }
+            assert_eq!(j, 16, "uuid must decode to 16 bytes");
+            out
+        }
+
+        fn empty_child_for(dt: &DataType) -> Arc<dyn Array> {
+            match dt {
+                DataType::Null => Arc::new(NullArray::new(0)),
+                DataType::Boolean => Arc::new(BooleanArray::from(Vec::<bool>::new())),
+                DataType::Int32 => Arc::new(Int32Array::from(Vec::<i32>::new())),
+                DataType::Int64 => Arc::new(Int64Array::from(Vec::<i64>::new())),
+                DataType::Float32 => Arc::new(arrow_array::Float32Array::from(Vec::<f32>::new())),
+                DataType::Float64 => Arc::new(arrow_array::Float64Array::from(Vec::<f64>::new())),
+                DataType::Binary => Arc::new(BinaryArray::from(Vec::<&[u8]>::new())),
+                DataType::Utf8 => Arc::new(StringArray::from(Vec::<&str>::new())),
+                DataType::Date32 => Arc::new(arrow_array::Date32Array::from(Vec::<i32>::new())),
+                DataType::Time32(arrow_schema::TimeUnit::Millisecond) => {
+                    Arc::new(Time32MillisecondArray::from(Vec::<i32>::new()))
+                }
+                DataType::Time64(arrow_schema::TimeUnit::Microsecond) => {
+                    Arc::new(Time64MicrosecondArray::from(Vec::<i64>::new()))
+                }
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, tz) => {
+                    let a = TimestampMillisecondArray::from(Vec::<i64>::new());
+                    Arc::new(if let Some(tz) = tz {
+                        a.with_timezone(tz.clone())
+                    } else {
+                        a
+                    })
+                }
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, tz) => {
+                    let a = TimestampMicrosecondArray::from(Vec::<i64>::new());
+                    Arc::new(if let Some(tz) = tz {
+                        a.with_timezone(tz.clone())
+                    } else {
+                        a
+                    })
+                }
+                DataType::Interval(IntervalUnit::MonthDayNano) => {
+                    Arc::new(arrow_array::IntervalMonthDayNanoArray::from(Vec::<
+                        IntervalMonthDayNano,
+                    >::new(
+                    )))
+                }
+                DataType::FixedSizeBinary(n) => Arc::new(FixedSizeBinaryArray::new_null(*n, 0)),
+                DataType::Dictionary(k, v) => {
+                    assert_eq!(**k, DataType::Int32, "expect int32 keys for enums");
+                    let keys = Int32Array::from(Vec::<i32>::new());
+                    let values = match v.as_ref() {
+                        DataType::Utf8 => {
+                            Arc::new(StringArray::from(Vec::<&str>::new())) as ArrayRef
+                        }
+                        other => panic!("unexpected dictionary value type {other:?}"),
+                    };
+                    Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                }
+                DataType::List(field) => {
+                    let values: ArrayRef = match field.data_type() {
+                        DataType::Int32 => {
+                            Arc::new(Int32Array::from(Vec::<i32>::new())) as ArrayRef
+                        }
+                        DataType::Int64 => {
+                            Arc::new(Int64Array::from(Vec::<i64>::new())) as ArrayRef
+                        }
+                        DataType::Utf8 => {
+                            Arc::new(StringArray::from(Vec::<&str>::new())) as ArrayRef
+                        }
+                        DataType::Union(_, _) => {
+                            let (uf, _) = if let DataType::Union(f, m) = field.data_type() {
+                                (f.clone(), m)
+                            } else {
+                                unreachable!()
+                            };
+                            let children: Vec<ArrayRef> = uf
+                                .iter()
+                                .map(|(_, f)| empty_child_for(f.data_type()))
+                                .collect();
+                            Arc::new(
+                                UnionArray::try_new(
+                                    uf.clone(),
+                                    ScalarBuffer::<i8>::from(Vec::<i8>::new()),
+                                    Some(ScalarBuffer::<i32>::from(Vec::<i32>::new())),
+                                    children,
+                                )
+                                .unwrap(),
+                            ) as ArrayRef
+                        }
+                        other => panic!("unsupported list item type: {other:?}"),
+                    };
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                    Arc::new(ListArray::try_new(field.clone(), offsets, values, None).unwrap())
+                }
+                DataType::Map(entry_field, ordered) => {
+                    let DataType::Struct(childs) = entry_field.data_type() else {
+                        panic!("map entries must be struct")
+                    };
+                    let key_field = &childs[0];
+                    let val_field = &childs[1];
+                    assert_eq!(key_field.data_type(), &DataType::Utf8);
+                    let keys = StringArray::from(Vec::<&str>::new());
+                    let vals: ArrayRef = match val_field.data_type() {
+                        DataType::Float64 => {
+                            Arc::new(arrow_array::Float64Array::from(Vec::<f64>::new())) as ArrayRef
+                        }
+                        DataType::Int64 => {
+                            Arc::new(Int64Array::from(Vec::<i64>::new())) as ArrayRef
+                        }
+                        DataType::Utf8 => {
+                            Arc::new(StringArray::from(Vec::<&str>::new())) as ArrayRef
+                        }
+                        DataType::Union(uf, _) => {
+                            let ch: Vec<ArrayRef> = uf
+                                .iter()
+                                .map(|(_, f)| empty_child_for(f.data_type()))
+                                .collect();
+                            Arc::new(
+                                UnionArray::try_new(
+                                    uf.clone(),
+                                    ScalarBuffer::<i8>::from(Vec::<i8>::new()),
+                                    Some(ScalarBuffer::<i32>::from(Vec::<i32>::new())),
+                                    ch,
+                                )
+                                .unwrap(),
+                            ) as ArrayRef
+                        }
+                        other => panic!("unsupported map value type: {other:?}"),
+                    };
+                    let entries = StructArray::new(
+                        Fields::from(vec![key_field.as_ref().clone(), val_field.as_ref().clone()]),
+                        vec![Arc::new(keys) as ArrayRef, vals],
+                        None,
+                    );
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                    Arc::new(MapArray::new(
+                        entry_field.clone(),
+                        offsets,
+                        entries,
+                        None,
+                        *ordered,
+                    ))
+                }
+                other => panic!("empty_child_for: unhandled type {other:?}"),
+            }
+        }
+
+        fn mk_dense_union(
+            fields: &UnionFields,
+            type_ids: Vec<i8>,
+            offsets: Vec<i32>,
+            provide: impl Fn(&Field) -> Option<ArrayRef>,
+        ) -> ArrayRef {
+            let children: Vec<ArrayRef> = fields
+                .iter()
+                .map(|(_, f)| provide(f).unwrap_or_else(|| empty_child_for(f.data_type())))
+                .collect();
+
+            Arc::new(
+                UnionArray::try_new(
+                    fields.clone(),
+                    ScalarBuffer::<i8>::from(type_ids),
+                    Some(ScalarBuffer::<i32>::from(offsets)),
+                    children,
+                )
+                .unwrap(),
+            ) as ArrayRef
+        }
+
+        // Dates / times / timestamps from the Avro content block:
+        let date_a: i32 = 19_000;
+        let time_ms_a: i32 = 13 * 3_600_000 + 45 * 60_000 + 30_000 + 123;
+        let time_us_b: i64 = 23 * 3_600_000_000 + 59 * 60_000_000 + 59 * 1_000_000 + 999_999;
+        let ts_ms_2024_01_01: i64 = 1_704_067_200_000;
+        let ts_us_2024_01_01: i64 = ts_ms_2024_01_01 * 1000;
+        // Fixed / bytes-like values:
+        let fx8_a: [u8; 8] = *b"ABCDEFGH";
+        let fx4_abcd: [u8; 4] = *b"ABCD";
+        let fx4_misc: [u8; 4] = [0x00, 0x11, 0x22, 0x33];
+        let fx10_ascii: [u8; 10] = *b"0123456789";
+        let fx10_aa: [u8; 10] = [0xAA; 10];
+        // Duration logical values as MonthDayNano:
+        let dur_a = IntervalMonthDayNanoType::make_value(1, 2, 3_000_000_000);
+        let dur_b = IntervalMonthDayNanoType::make_value(12, 31, 999_000_000);
+        // UUID logical values (stored as 16-byte FixedSizeBinary in Arrow):
+        let uuid1 = uuid16_from_str("fe7bc30b-4ce8-4c5e-b67c-2234a2d38e66");
+        let uuid2 = uuid16_from_str("0826cc06-d2e3-4599-b4ad-af5fa6905cdb");
+        // Decimals from Avro content:
+        let dec_b_scale2_pos: i128 = 123_456; // "1234.56" bytes-decimal -> (precision=10, scale=2)
+        let dec_fix16_neg: i128 = -101; // "-1.01" fixed(16) decimal(10,2)
+        let dec_fix20_s4: i128 = 1_234_567_891_234; // "123456789.1234" fixed(20) decimal(20,4)
+        let dec_fix20_s4_neg: i128 = -123; // "-0.0123" fixed(20) decimal(20,4)
+        let path = "test/data/union_fields.avro";
+        let actual = read_file(path, 1024, false);
+        let schema = actual.schema();
+        // Helper to fetch union metadata for a column
+        let get_union = |name: &str| -> (UnionFields, UnionMode) {
+            let idx = schema.index_of(name).unwrap();
+            match schema.field(idx).data_type() {
+                DataType::Union(f, m) => (f.clone(), *m),
+                other => panic!("{name} should be a Union, got {other:?}"),
+            }
+        };
+        let mut expected_cols: Vec<ArrayRef> = Vec::with_capacity(schema.fields().len());
+        // 1) ["null","int"]: Int32 (nullable)
+        expected_cols.push(Arc::new(Int32Array::from(vec![
+            None,
+            Some(42),
+            None,
+            Some(0),
+        ])));
+        // 2) ["string","null"]: Utf8 (nullable)
+        expected_cols.push(Arc::new(StringArray::from(vec![
+            Some("s1"),
+            None,
+            Some("s3"),
+            Some(""),
+        ])));
+        // 3) union_prim: ["boolean","int","long","float","double","bytes","string"]
+        {
+            let (uf, mode) = get_union("union_prim");
+            assert!(matches!(mode, UnionMode::Dense));
+            let generated_names: Vec<&str> = uf.iter().map(|(_, f)| f.name().as_str()).collect();
+            let expected_names = vec![
+                "boolean", "int", "long", "float", "double", "bytes", "string",
+            ];
+            assert_eq!(
+                generated_names, expected_names,
+                "Field names for union_prim are incorrect"
+            );
+            let tids = vec![
+                tid_by_name(&uf, "long"),
+                tid_by_name(&uf, "int"),
+                tid_by_name(&uf, "float"),
+                tid_by_name(&uf, "double"),
+            ];
+            let offs = vec![0, 0, 0, 0];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.name().as_str() {
+                "int" => Some(Arc::new(Int32Array::from(vec![-1])) as ArrayRef),
+                "long" => Some(Arc::new(Int64Array::from(vec![1_234_567_890_123i64])) as ArrayRef),
+                "float" => {
+                    Some(Arc::new(arrow_array::Float32Array::from(vec![1.25f32])) as ArrayRef)
+                }
+                "double" => {
+                    Some(Arc::new(arrow_array::Float64Array::from(vec![-2.5f64])) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 4) union_bytes_vs_string: ["bytes","string"]
+        {
+            let (uf, _) = get_union("union_bytes_vs_string");
+            let tids = vec![
+                tid_by_name(&uf, "bytes"),
+                tid_by_name(&uf, "string"),
+                tid_by_name(&uf, "string"),
+                tid_by_name(&uf, "bytes"),
+            ];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.name().as_str() {
+                "bytes" => Some(
+                    Arc::new(BinaryArray::from(vec![&[0x00, 0xFF, 0x7F][..], &[][..]])) as ArrayRef,
+                ),
+                "string" => Some(Arc::new(StringArray::from(vec!["hello", "world"])) as ArrayRef),
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 5) union_fixed_dur_decfix: [Fx8, Dur12, DecFix16(decimal(10,2))]
+        {
+            let (uf, _) = get_union("union_fixed_dur_decfix");
+            let tid_fx8 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(8)));
+            let tid_dur = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano)
+                )
+            });
+            let tid_dec = tid_by_dt(&uf, |dt| match dt {
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(10, 2) => true,
+                DataType::Decimal128(10, 2) | DataType::Decimal256(10, 2) => true,
+                _ => false,
+            });
+            let tids = vec![tid_fx8, tid_dur, tid_dec, tid_dur];
+            let offs = vec![0, 0, 0, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::FixedSizeBinary(8) => {
+                    let it = [Some(fx8_a)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 8).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::Interval(IntervalUnit::MonthDayNano) => {
+                    Some(Arc::new(arrow_array::IntervalMonthDayNanoArray::from(vec![
+                        dur_a, dur_b,
+                    ])) as ArrayRef)
+                }
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(10, 2) => {
+                    let a = arrow_array::Decimal64Array::from_iter_values([dec_fix16_neg as i64]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal128(10, 2) => {
+                    let a = arrow_array::Decimal128Array::from_iter_values([dec_fix16_neg]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal256(10, 2) => {
+                    let a = arrow_array::Decimal256Array::from_iter_values([i256::from_i128(
+                        dec_fix16_neg,
+                    )]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                _ => None,
+            });
+            let generated_names: Vec<&str> = uf.iter().map(|(_, f)| f.name().as_str()).collect();
+            let expected_names = vec!["Fx8", "Dur12", "DecFix16"];
+            assert_eq!(
+                generated_names, expected_names,
+                "Data type names were not generated correctly for union_fixed_dur_decfix"
+            );
+            expected_cols.push(arr);
+        }
+        // 6) union_enum_records_array_map: [enum ColorU, record RecA, record RecB, array<long>, map<string>]
+        {
+            let (uf, _) = get_union("union_enum_records_array_map");
+            let tid_enum = tid_by_dt(&uf, |dt| matches!(dt, DataType::Dictionary(_, _)));
+            let tid_reca = tid_by_dt(&uf, |dt| {
+                if let DataType::Struct(fs) = dt {
+                    fs.len() == 2 && fs[0].name() == "a" && fs[1].name() == "b"
+                } else {
+                    false
+                }
+            });
+            let tid_recb = tid_by_dt(&uf, |dt| {
+                if let DataType::Struct(fs) = dt {
+                    fs.len() == 2 && fs[0].name() == "x" && fs[1].name() == "y"
+                } else {
+                    false
+                }
+            });
+            let tid_arr = tid_by_dt(&uf, |dt| matches!(dt, DataType::List(_)));
+            let tids = vec![tid_enum, tid_reca, tid_recb, tid_arr];
+            let offs = vec![0, 0, 0, 0];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Dictionary(_, _) => {
+                    let keys = Int32Array::from(vec![0i32]); // "RED"
+                    let values =
+                        Arc::new(StringArray::from(vec!["RED", "GREEN", "BLUE"])) as ArrayRef;
+                    Some(
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                            as ArrayRef,
+                    )
+                }
+                DataType::Struct(fs)
+                    if fs.len() == 2 && fs[0].name() == "a" && fs[1].name() == "b" =>
+                {
+                    let a = Int32Array::from(vec![7]);
+                    let b = StringArray::from(vec!["x"]);
+                    Some(Arc::new(StructArray::new(
+                        fs.clone(),
+                        vec![Arc::new(a), Arc::new(b)],
+                        None,
+                    )) as ArrayRef)
+                }
+                DataType::Struct(fs)
+                    if fs.len() == 2 && fs[0].name() == "x" && fs[1].name() == "y" =>
+                {
+                    let x = Int64Array::from(vec![123_456_789i64]);
+                    let y = BinaryArray::from(vec![&[0xFF, 0x00][..]]);
+                    Some(Arc::new(StructArray::new(
+                        fs.clone(),
+                        vec![Arc::new(x), Arc::new(y)],
+                        None,
+                    )) as ArrayRef)
+                }
+                DataType::List(field) => {
+                    let values = Int64Array::from(vec![1i64, 2, 3]);
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3]));
+                    Some(Arc::new(
+                        ListArray::try_new(field.clone(), offsets, Arc::new(values), None).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::Map(_, _) => None,
+                other => panic!("unexpected child {other:?}"),
+            });
+            expected_cols.push(arr);
+        }
+        // 7) union_date_or_fixed4: [date32, fixed(4)]
+        {
+            let (uf, _) = get_union("union_date_or_fixed4");
+            let tid_date = tid_by_dt(&uf, |dt| matches!(dt, DataType::Date32));
+            let tid_fx4 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(4)));
+            let tids = vec![tid_date, tid_fx4, tid_date, tid_fx4];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Date32 => {
+                    Some(Arc::new(arrow_array::Date32Array::from(vec![date_a, 0])) as ArrayRef)
+                }
+                DataType::FixedSizeBinary(4) => {
+                    let it = [Some(fx4_abcd), Some(fx4_misc)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 4).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 8) union_time_millis_or_enum: [time-millis, enum OnOff]
+        {
+            let (uf, _) = get_union("union_time_millis_or_enum");
+            let tid_ms = tid_by_dt(&uf, |dt| {
+                matches!(dt, DataType::Time32(arrow_schema::TimeUnit::Millisecond))
+            });
+            let tid_en = tid_by_dt(&uf, |dt| matches!(dt, DataType::Dictionary(_, _)));
+            let tids = vec![tid_ms, tid_en, tid_en, tid_ms];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Time32(arrow_schema::TimeUnit::Millisecond) => {
+                    Some(Arc::new(Time32MillisecondArray::from(vec![time_ms_a, 0])) as ArrayRef)
+                }
+                DataType::Dictionary(_, _) => {
+                    let keys = Int32Array::from(vec![0i32, 1]); // "ON", "OFF"
+                    let values = Arc::new(StringArray::from(vec!["ON", "OFF"])) as ArrayRef;
+                    Some(
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                            as ArrayRef,
+                    )
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 9) union_time_micros_or_string: [time-micros, string]
+        {
+            let (uf, _) = get_union("union_time_micros_or_string");
+            let tid_us = tid_by_dt(&uf, |dt| {
+                matches!(dt, DataType::Time64(arrow_schema::TimeUnit::Microsecond))
+            });
+            let tid_s = tid_by_name(&uf, "string");
+            let tids = vec![tid_s, tid_us, tid_s, tid_s];
+            let offs = vec![0, 0, 1, 2];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Time64(arrow_schema::TimeUnit::Microsecond) => {
+                    Some(Arc::new(Time64MicrosecondArray::from(vec![time_us_b])) as ArrayRef)
+                }
+                DataType::Utf8 => {
+                    Some(Arc::new(StringArray::from(vec!["evening", "night", ""])) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 10) union_ts_millis_utc_or_array: [timestamp-millis(TZ), array<int>]
+        {
+            let (uf, _) = get_union("union_ts_millis_utc_or_array");
+            let tid_ts = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, _)
+                )
+            });
+            let tid_arr = tid_by_dt(&uf, |dt| matches!(dt, DataType::List(_)));
+            let tids = vec![tid_ts, tid_arr, tid_arr, tid_ts];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, tz) => {
+                    let a = TimestampMillisecondArray::from(vec![
+                        ts_ms_2024_01_01,
+                        ts_ms_2024_01_01 + 86_400_000,
+                    ]);
+                    Some(Arc::new(if let Some(tz) = tz {
+                        a.with_timezone(tz.clone())
+                    } else {
+                        a
+                    }) as ArrayRef)
+                }
+                DataType::List(field) => {
+                    let values = Int32Array::from(vec![0, 1, 2, -1, 0, 1]);
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 6]));
+                    Some(Arc::new(
+                        ListArray::try_new(field.clone(), offsets, Arc::new(values), None).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 11) union_ts_micros_local_or_bytes: [local-timestamp-micros, bytes]
+        {
+            let (uf, _) = get_union("union_ts_micros_local_or_bytes");
+            let tid_lts = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
+                )
+            });
+            let tid_b = tid_by_name(&uf, "bytes");
+            let tids = vec![tid_b, tid_lts, tid_b, tid_b];
+            let offs = vec![0, 0, 1, 2];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None) => Some(Arc::new(
+                    TimestampMicrosecondArray::from(vec![ts_us_2024_01_01]),
+                )
+                    as ArrayRef),
+                DataType::Binary => Some(Arc::new(BinaryArray::from(vec![
+                    &b"\x11\x22\x33"[..],
+                    &b"\x00"[..],
+                    &b"\x10\x20\x30\x40"[..],
+                ])) as ArrayRef),
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 12) union_uuid_or_fixed10: [uuid(string)->fixed(16), fixed(10)]
+        {
+            let (uf, _) = get_union("union_uuid_or_fixed10");
+            let tid_fx16 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(16)));
+            let tid_fx10 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(10)));
+            let tids = vec![tid_fx16, tid_fx10, tid_fx16, tid_fx10];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::FixedSizeBinary(16) => {
+                    let it = [Some(uuid1), Some(uuid2)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::FixedSizeBinary(10) => {
+                    let it = [Some(fx10_ascii), Some(fx10_aa)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 10).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 13) union_dec_bytes_or_dec_fixed: [bytes dec(10,2), fixed(20) dec(20,4)]
+        {
+            let (uf, _) = get_union("union_dec_bytes_or_dec_fixed");
+            let tid_b10s2 = tid_by_dt(&uf, |dt| match dt {
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(10, 2) => true,
+                DataType::Decimal128(10, 2) | DataType::Decimal256(10, 2) => true,
+                _ => false,
+            });
+            let tid_f20s4 = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Decimal128(20, 4) | DataType::Decimal256(20, 4)
+                )
+            });
+            let tids = vec![tid_b10s2, tid_f20s4, tid_b10s2, tid_f20s4];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(10, 2) => {
+                    let a = Decimal64Array::from_iter_values([dec_b_scale2_pos as i64, 0i64]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal128(10, 2) => {
+                    let a = Decimal128Array::from_iter_values([dec_b_scale2_pos, 0]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal256(10, 2) => {
+                    let a = Decimal256Array::from_iter_values([
+                        i256::from_i128(dec_b_scale2_pos),
+                        i256::from(0),
+                    ]);
+                    Some(Arc::new(a.with_precision_and_scale(10, 2).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal128(20, 4) => {
+                    let a = Decimal128Array::from_iter_values([dec_fix20_s4_neg, dec_fix20_s4]);
+                    Some(Arc::new(a.with_precision_and_scale(20, 4).unwrap()) as ArrayRef)
+                }
+                DataType::Decimal256(20, 4) => {
+                    let a = Decimal256Array::from_iter_values([
+                        i256::from_i128(dec_fix20_s4_neg),
+                        i256::from_i128(dec_fix20_s4),
+                    ]);
+                    Some(Arc::new(a.with_precision_and_scale(20, 4).unwrap()) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 14) union_null_bytes_string: ["null","bytes","string"]
+        {
+            let (uf, _) = get_union("union_null_bytes_string");
+            let tid_n = tid_by_name(&uf, "null");
+            let tid_b = tid_by_name(&uf, "bytes");
+            let tid_s = tid_by_name(&uf, "string");
+            let tids = vec![tid_n, tid_b, tid_s, tid_s];
+            let offs = vec![0, 0, 0, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.name().as_str() {
+                "null" => Some(Arc::new(arrow_array::NullArray::new(1)) as ArrayRef),
+                "bytes" => Some(Arc::new(BinaryArray::from(vec![&b"\x01\x02"[..]])) as ArrayRef),
+                "string" => Some(Arc::new(StringArray::from(vec!["text", "u"])) as ArrayRef),
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 15) array_of_union: array<[long,string]>
+        {
+            let idx = schema.index_of("array_of_union").unwrap();
+            let dt = schema.field(idx).data_type().clone();
+            let (item_field, _) = match &dt {
+                DataType::List(f) => (f.clone(), ()),
+                other => panic!("array_of_union must be List, got {other:?}"),
+            };
+            let (uf, _) = match item_field.data_type() {
+                DataType::Union(f, m) => (f.clone(), m),
+                other => panic!("array_of_union items must be Union, got {other:?}"),
+            };
+            let tid_l = tid_by_name(&uf, "long");
+            let tid_s = tid_by_name(&uf, "string");
+            let type_ids = vec![tid_l, tid_s, tid_l, tid_s, tid_l, tid_l, tid_s, tid_l];
+            let offsets = vec![0, 0, 1, 1, 2, 3, 2, 4];
+            let values_union =
+                mk_dense_union(&uf, type_ids, offsets, |f| match f.name().as_str() {
+                    "long" => {
+                        Some(Arc::new(Int64Array::from(vec![1i64, -5, 42, -1, 0])) as ArrayRef)
+                    }
+                    "string" => Some(Arc::new(StringArray::from(vec!["a", "", "z"])) as ArrayRef),
+                    _ => None,
+                });
+            let list_offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 5, 6, 8]));
+            expected_cols.push(Arc::new(
+                ListArray::try_new(item_field.clone(), list_offsets, values_union, None).unwrap(),
+            ));
+        }
+        // 16) map_of_union: map<[null,double]>
+        {
+            let idx = schema.index_of("map_of_union").unwrap();
+            let dt = schema.field(idx).data_type().clone();
+            let (entry_field, ordered) = match &dt {
+                DataType::Map(f, ordered) => (f.clone(), *ordered),
+                other => panic!("map_of_union must be Map, got {other:?}"),
+            };
+            let DataType::Struct(entry_fields) = entry_field.data_type() else {
+                panic!("map entries must be struct")
+            };
+            let key_field = entry_fields[0].clone();
+            let val_field = entry_fields[1].clone();
+            let keys = StringArray::from(vec!["a", "b", "x", "pi"]);
+            let rounded_pi = (std::f64::consts::PI * 100_000.0).round() / 100_000.0;
+            let values: ArrayRef = match val_field.data_type() {
+                DataType::Union(uf, _) => {
+                    let tid_n = tid_by_name(uf, "null");
+                    let tid_d = tid_by_name(uf, "double");
+                    let tids = vec![tid_n, tid_d, tid_d, tid_d];
+                    let offs = vec![0, 0, 1, 2];
+                    mk_dense_union(uf, tids, offs, |f| match f.name().as_str() {
+                        "null" => Some(Arc::new(NullArray::new(1)) as ArrayRef),
+                        "double" => Some(Arc::new(arrow_array::Float64Array::from(vec![
+                            2.5f64, -0.5f64, rounded_pi,
+                        ])) as ArrayRef),
+                        _ => None,
+                    })
+                }
+                DataType::Float64 => Arc::new(arrow_array::Float64Array::from(vec![
+                    None,
+                    Some(2.5),
+                    Some(-0.5),
+                    Some(rounded_pi),
+                ])),
+                other => panic!("unexpected map value type {other:?}"),
+            };
+            let entries = StructArray::new(
+                Fields::from(vec![key_field.as_ref().clone(), val_field.as_ref().clone()]),
+                vec![Arc::new(keys) as ArrayRef, values],
+                None,
+            );
+            let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 3, 4]));
+            expected_cols.push(Arc::new(MapArray::new(
+                entry_field,
+                offsets,
+                entries,
+                None,
+                ordered,
+            )));
+        }
+        // 17) record_with_union_field: struct { id:int, u:[int,string] }
+        {
+            let idx = schema.index_of("record_with_union_field").unwrap();
+            let DataType::Struct(rec_fields) = schema.field(idx).data_type() else {
+                panic!("record_with_union_field should be Struct")
+            };
+            let id = Int32Array::from(vec![1, 2, 3, 4]);
+            let u_field = rec_fields.iter().find(|f| f.name() == "u").unwrap();
+            let DataType::Union(uf, _) = u_field.data_type() else {
+                panic!("u must be Union")
+            };
+            let tid_i = tid_by_name(uf, "int");
+            let tid_s = tid_by_name(uf, "string");
+            let tids = vec![tid_s, tid_i, tid_i, tid_s];
+            let offs = vec![0, 0, 1, 1];
+            let u = mk_dense_union(uf, tids, offs, |f| match f.name().as_str() {
+                "int" => Some(Arc::new(Int32Array::from(vec![99, 0])) as ArrayRef),
+                "string" => Some(Arc::new(StringArray::from(vec!["one", "four"])) as ArrayRef),
+                _ => None,
+            });
+            let rec = StructArray::new(rec_fields.clone(), vec![Arc::new(id) as ArrayRef, u], None);
+            expected_cols.push(Arc::new(rec));
+        }
+        // 18) union_ts_micros_utc_or_map: [timestamp-micros(TZ), map<long>]
+        {
+            let (uf, _) = get_union("union_ts_micros_utc_or_map");
+            let tid_ts = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, Some(_))
+                )
+            });
+            let tid_map = tid_by_dt(&uf, |dt| matches!(dt, DataType::Map(_, _)));
+            let tids = vec![tid_ts, tid_map, tid_ts, tid_map];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, tz) => {
+                    let a = TimestampMicrosecondArray::from(vec![ts_us_2024_01_01, 0i64]);
+                    Some(Arc::new(if let Some(tz) = tz {
+                        a.with_timezone(tz.clone())
+                    } else {
+                        a
+                    }) as ArrayRef)
+                }
+                DataType::Map(entry_field, ordered) => {
+                    let DataType::Struct(fs) = entry_field.data_type() else {
+                        panic!("map entries must be struct")
+                    };
+                    let key_field = fs[0].clone();
+                    let val_field = fs[1].clone();
+                    assert_eq!(key_field.data_type(), &DataType::Utf8);
+                    assert_eq!(val_field.data_type(), &DataType::Int64);
+                    let keys = StringArray::from(vec!["k1", "k2", "n"]);
+                    let vals = Int64Array::from(vec![1i64, 2, 0]);
+                    let entries = StructArray::new(
+                        Fields::from(vec![key_field.as_ref().clone(), val_field.as_ref().clone()]),
+                        vec![Arc::new(keys) as ArrayRef, Arc::new(vals) as ArrayRef],
+                        None,
+                    );
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3]));
+                    Some(Arc::new(MapArray::new(
+                        entry_field.clone(),
+                        offsets,
+                        entries,
+                        None,
+                        *ordered,
+                    )) as ArrayRef)
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 19) union_ts_millis_local_or_string: [local-timestamp-millis, string]
+        {
+            let (uf, _) = get_union("union_ts_millis_local_or_string");
+            let tid_ts = tid_by_dt(&uf, |dt| {
+                matches!(
+                    dt,
+                    DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None)
+                )
+            });
+            let tid_s = tid_by_name(&uf, "string");
+            let tids = vec![tid_s, tid_ts, tid_s, tid_s];
+            let offs = vec![0, 0, 1, 2];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None) => Some(Arc::new(
+                    TimestampMillisecondArray::from(vec![ts_ms_2024_01_01]),
+                )
+                    as ArrayRef),
+                DataType::Utf8 => {
+                    Some(
+                        Arc::new(StringArray::from(vec!["local midnight", "done", ""])) as ArrayRef,
+                    )
+                }
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        // 20) union_bool_or_string: ["boolean","string"]
+        {
+            let (uf, _) = get_union("union_bool_or_string");
+            let tid_b = tid_by_name(&uf, "boolean");
+            let tid_s = tid_by_name(&uf, "string");
+            let tids = vec![tid_b, tid_s, tid_b, tid_s];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.name().as_str() {
+                "boolean" => Some(Arc::new(BooleanArray::from(vec![true, false])) as ArrayRef),
+                "string" => Some(Arc::new(StringArray::from(vec!["no", "yes"])) as ArrayRef),
+                _ => None,
+            });
+            expected_cols.push(arr);
+        }
+        let expected = RecordBatch::try_new(schema.clone(), expected_cols).unwrap();
+        assert_eq!(
+            actual, expected,
+            "full end-to-end equality for union_fields.avro"
+        );
+    }
+
+    #[test]
+    fn test_read_zero_byte_avro_file() {
+        let batch = read_file("test/data/zero_byte.avro", 3, false);
+        let schema = batch.schema();
+        assert_eq!(schema.fields().len(), 1);
+        let field = schema.field(0);
+        assert_eq!(field.name(), "data");
+        assert_eq!(field.data_type(), &DataType::Binary);
+        assert!(field.is_nullable());
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(batch.num_columns(), 1);
+        let binary_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<BinaryArray>()
+            .unwrap();
+        assert!(binary_array.is_null(0));
+        assert!(binary_array.is_valid(1));
+        assert_eq!(binary_array.value(1), b"");
+        assert!(binary_array.is_valid(2));
+        assert_eq!(binary_array.value(2), b"some bytes");
+    }
+
+    #[test]
+    fn test_alltypes() {
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            (
+                "id",
+                Arc::new(Int32Array::from(vec![4, 5, 6, 7, 2, 3, 0, 1])) as _,
+                true,
+            ),
+            (
+                "bool_col",
+                Arc::new(BooleanArray::from_iter((0..8).map(|x| Some(x % 2 == 0)))) as _,
+                true,
+            ),
+            (
+                "tinyint_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "smallint_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "int_col",
+                Arc::new(Int32Array::from_iter_values((0..8).map(|x| x % 2))) as _,
+                true,
+            ),
+            (
+                "bigint_col",
+                Arc::new(Int64Array::from_iter_values((0..8).map(|x| (x % 2) * 10))) as _,
+                true,
+            ),
+            (
+                "float_col",
+                Arc::new(Float32Array::from_iter_values(
+                    (0..8).map(|x| (x % 2) as f32 * 1.1),
+                )) as _,
+                true,
+            ),
+            (
+                "double_col",
+                Arc::new(Float64Array::from_iter_values(
+                    (0..8).map(|x| (x % 2) as f64 * 10.1),
+                )) as _,
+                true,
+            ),
+            (
+                "date_string_col",
+                Arc::new(BinaryArray::from_iter_values([
+                    [48, 51, 47, 48, 49, 47, 48, 57],
+                    [48, 51, 47, 48, 49, 47, 48, 57],
+                    [48, 52, 47, 48, 49, 47, 48, 57],
+                    [48, 52, 47, 48, 49, 47, 48, 57],
+                    [48, 50, 47, 48, 49, 47, 48, 57],
+                    [48, 50, 47, 48, 49, 47, 48, 57],
+                    [48, 49, 47, 48, 49, 47, 48, 57],
+                    [48, 49, 47, 48, 49, 47, 48, 57],
+                ])) as _,
+                true,
+            ),
+            (
+                "string_col",
+                Arc::new(BinaryArray::from_iter_values((0..8).map(|x| [48 + x % 2]))) as _,
+                true,
+            ),
+            (
+                "timestamp_col",
+                Arc::new(
+                    TimestampMicrosecondArray::from_iter_values([
+                        1235865600000000, // 2009-03-01T00:00:00.000
+                        1235865660000000, // 2009-03-01T00:01:00.000
+                        1238544000000000, // 2009-04-01T00:00:00.000
+                        1238544060000000, // 2009-04-01T00:01:00.000
+                        1233446400000000, // 2009-02-01T00:00:00.000
+                        1233446460000000, // 2009-02-01T00:01:00.000
+                        1230768000000000, // 2009-01-01T00:00:00.000
+                        1230768060000000, // 2009-01-01T00:01:00.000
+                    ])
+                    .with_timezone("+00:00"),
+                ) as _,
+                true,
+            ),
+        ])
+        .unwrap();
+
+        for file in files() {
+            let file = arrow_test_data(file);
+
+            assert_eq!(read_file(&file, 8, false), expected);
+            assert_eq!(read_file(&file, 3, false), expected);
+        }
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_alltypes_dictionary() {
+        let file = "avro/alltypes_dictionary.avro";
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            ("id", Arc::new(Int32Array::from(vec![0, 1])) as _, true),
+            (
+                "bool_col",
+                Arc::new(BooleanArray::from(vec![Some(true), Some(false)])) as _,
+                true,
+            ),
+            (
+                "tinyint_col",
+                Arc::new(Int32Array::from(vec![0, 1])) as _,
+                true,
+            ),
+            (
+                "smallint_col",
+                Arc::new(Int32Array::from(vec![0, 1])) as _,
+                true,
+            ),
+            ("int_col", Arc::new(Int32Array::from(vec![0, 1])) as _, true),
+            (
+                "bigint_col",
+                Arc::new(Int64Array::from(vec![0, 10])) as _,
+                true,
+            ),
+            (
+                "float_col",
+                Arc::new(Float32Array::from(vec![0.0, 1.1])) as _,
+                true,
+            ),
+            (
+                "double_col",
+                Arc::new(Float64Array::from(vec![0.0, 10.1])) as _,
+                true,
+            ),
+            (
+                "date_string_col",
+                Arc::new(BinaryArray::from_iter_values([b"01/01/09", b"01/01/09"])) as _,
+                true,
+            ),
+            (
+                "string_col",
+                Arc::new(BinaryArray::from_iter_values([b"0", b"1"])) as _,
+                true,
+            ),
+            (
+                "timestamp_col",
+                Arc::new(
+                    TimestampMicrosecondArray::from_iter_values([
+                        1230768000000000, // 2009-01-01T00:00:00.000
+                        1230768060000000, // 2009-01-01T00:01:00.000
+                    ])
+                    .with_timezone("+00:00"),
+                ) as _,
+                true,
+            ),
+        ])
+        .unwrap();
+        let file_path = arrow_test_data(file);
+        let batch_large = read_file(&file_path, 8, false);
+        assert_eq!(
+            batch_large, expected,
+            "Decoded RecordBatch does not match for file {file}"
+        );
+        let batch_small = read_file(&file_path, 3, false);
+        assert_eq!(
+            batch_small, expected,
+            "Decoded RecordBatch (batch size 3) does not match for file {file}"
+        );
+    }
+
+    #[test]
+    fn test_alltypes_nulls_plain() {
+        let file = "avro/alltypes_nulls_plain.avro";
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            (
+                "string_col",
+                Arc::new(StringArray::from(vec![None::<&str>])) as _,
+                true,
+            ),
+            ("int_col", Arc::new(Int32Array::from(vec![None])) as _, true),
+            (
+                "bool_col",
+                Arc::new(BooleanArray::from(vec![None])) as _,
+                true,
+            ),
+            (
+                "bigint_col",
+                Arc::new(Int64Array::from(vec![None])) as _,
+                true,
+            ),
+            (
+                "float_col",
+                Arc::new(Float32Array::from(vec![None])) as _,
+                true,
+            ),
+            (
+                "double_col",
+                Arc::new(Float64Array::from(vec![None])) as _,
+                true,
+            ),
+            (
+                "bytes_col",
+                Arc::new(BinaryArray::from(vec![None::<&[u8]>])) as _,
+                true,
+            ),
+        ])
+        .unwrap();
+        let file_path = arrow_test_data(file);
+        let batch_large = read_file(&file_path, 8, false);
+        assert_eq!(
+            batch_large, expected,
+            "Decoded RecordBatch does not match for file {file}"
+        );
+        let batch_small = read_file(&file_path, 3, false);
+        assert_eq!(
+            batch_small, expected,
+            "Decoded RecordBatch (batch size 3) does not match for file {file}"
+        );
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_binary() {
+        let file = arrow_test_data("avro/binary.avro");
+        let batch = read_file(&file, 8, false);
+        let expected = RecordBatch::try_from_iter_with_nullable([(
+            "foo",
+            Arc::new(BinaryArray::from_iter_values(vec![
+                b"\x00" as &[u8],
+                b"\x01" as &[u8],
+                b"\x02" as &[u8],
+                b"\x03" as &[u8],
+                b"\x04" as &[u8],
+                b"\x05" as &[u8],
+                b"\x06" as &[u8],
+                b"\x07" as &[u8],
+                b"\x08" as &[u8],
+                b"\t" as &[u8],
+                b"\n" as &[u8],
+                b"\x0b" as &[u8],
+            ])) as Arc<dyn Array>,
+            true,
+        )])
+        .unwrap();
+        assert_eq!(batch, expected);
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for these files
+    #[cfg(feature = "snappy")]
+    fn test_decimal() {
+        // Choose expected Arrow types depending on the `small_decimals` feature flag.
+        // With `small_decimals` enabled, Decimal32/Decimal64 are used where their
+        // precision allows; otherwise, those cases resolve to Decimal128.
+        #[cfg(feature = "small_decimals")]
+        let files: [(&str, DataType, HashMap<String, String>); 8] = [
+            (
+                "avro/fixed_length_decimal.avro",
+                DataType::Decimal128(25, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/fixed_length_decimal_legacy.avro",
+                DataType::Decimal64(13, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/int32_decimal.avro",
+                DataType::Decimal32(4, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/int64_decimal.avro",
+                DataType::Decimal64(10, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "test/data/int256_decimal.avro",
+                DataType::Decimal256(76, 10),
+                HashMap::new(),
+            ),
+            (
+                "test/data/fixed256_decimal.avro",
+                DataType::Decimal256(76, 10),
+                HashMap::from([("avro.name".to_string(), "Decimal256Fixed".to_string())]),
+            ),
+            (
+                "test/data/fixed_length_decimal_legacy_32.avro",
+                DataType::Decimal32(9, 2),
+                HashMap::from([("avro.name".to_string(), "Decimal32FixedLegacy".to_string())]),
+            ),
+            (
+                "test/data/int128_decimal.avro",
+                DataType::Decimal128(38, 2),
+                HashMap::new(),
+            ),
+        ];
+        #[cfg(not(feature = "small_decimals"))]
+        let files: [(&str, DataType, HashMap<String, String>); 8] = [
+            (
+                "avro/fixed_length_decimal.avro",
+                DataType::Decimal128(25, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/fixed_length_decimal_legacy.avro",
+                DataType::Decimal128(13, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/int32_decimal.avro",
+                DataType::Decimal128(4, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "avro/int64_decimal.avro",
+                DataType::Decimal128(10, 2),
+                HashMap::from([
+                    (
+                        "avro.namespace".to_string(),
+                        "topLevelRecord.value".to_string(),
+                    ),
+                    ("avro.name".to_string(), "fixed".to_string()),
+                ]),
+            ),
+            (
+                "test/data/int256_decimal.avro",
+                DataType::Decimal256(76, 10),
+                HashMap::new(),
+            ),
+            (
+                "test/data/fixed256_decimal.avro",
+                DataType::Decimal256(76, 10),
+                HashMap::from([("avro.name".to_string(), "Decimal256Fixed".to_string())]),
+            ),
+            (
+                "test/data/fixed_length_decimal_legacy_32.avro",
+                DataType::Decimal128(9, 2),
+                HashMap::from([("avro.name".to_string(), "Decimal32FixedLegacy".to_string())]),
+            ),
+            (
+                "test/data/int128_decimal.avro",
+                DataType::Decimal128(38, 2),
+                HashMap::new(),
+            ),
+        ];
+        for (file, expected_dt, mut metadata) in files {
+            let (precision, scale) = match expected_dt {
+                DataType::Decimal32(p, s)
+                | DataType::Decimal64(p, s)
+                | DataType::Decimal128(p, s)
+                | DataType::Decimal256(p, s) => (p, s),
+                _ => unreachable!("Unexpected decimal type in test inputs"),
+            };
+            assert!(scale >= 0, "test data uses non-negative scales only");
+            let scale_u32 = scale as u32;
+            let file_path: String = if file.starts_with("avro/") {
+                arrow_test_data(file)
+            } else {
+                std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+                    .join(file)
+                    .to_string_lossy()
+                    .into_owned()
+            };
+            let pow10: i128 = 10i128.pow(scale_u32);
+            let values_i128: Vec<i128> = (1..=24).map(|n| (n as i128) * pow10).collect();
+            let build_expected = |dt: &DataType, values: &[i128]| -> ArrayRef {
+                match *dt {
+                    #[cfg(feature = "small_decimals")]
+                    DataType::Decimal32(p, s) => {
+                        let it = values.iter().map(|&v| v as i32);
+                        Arc::new(
+                            Decimal32Array::from_iter_values(it)
+                                .with_precision_and_scale(p, s)
+                                .unwrap(),
+                        )
+                    }
+                    #[cfg(feature = "small_decimals")]
+                    DataType::Decimal64(p, s) => {
+                        let it = values.iter().map(|&v| v as i64);
+                        Arc::new(
+                            Decimal64Array::from_iter_values(it)
+                                .with_precision_and_scale(p, s)
+                                .unwrap(),
+                        )
+                    }
+                    DataType::Decimal128(p, s) => {
+                        let it = values.iter().copied();
+                        Arc::new(
+                            Decimal128Array::from_iter_values(it)
+                                .with_precision_and_scale(p, s)
+                                .unwrap(),
+                        )
+                    }
+                    DataType::Decimal256(p, s) => {
+                        let it = values.iter().map(|&v| i256::from_i128(v));
+                        Arc::new(
+                            Decimal256Array::from_iter_values(it)
+                                .with_precision_and_scale(p, s)
+                                .unwrap(),
+                        )
+                    }
+                    _ => unreachable!("Unexpected decimal type in test"),
+                }
+            };
+            let actual_batch = read_file(&file_path, 8, false);
+            let actual_nullable = actual_batch.schema().field(0).is_nullable();
+            let expected_array = build_expected(&expected_dt, &values_i128);
+            metadata.insert("precision".to_string(), precision.to_string());
+            metadata.insert("scale".to_string(), scale.to_string());
+            let field =
+                Field::new("value", expected_dt.clone(), actual_nullable).with_metadata(metadata);
+            let expected_schema = Arc::new(Schema::new(vec![field]));
+            let expected_batch =
+                RecordBatch::try_new(expected_schema.clone(), vec![expected_array]).unwrap();
+            assert_eq!(
+                actual_batch, expected_batch,
+                "Decoded RecordBatch does not match for {file}"
+            );
+            let actual_batch_small = read_file(&file_path, 3, false);
+            assert_eq!(
+                actual_batch_small, expected_batch,
+                "Decoded RecordBatch does not match for {file} with batch size 3"
+            );
+        }
+    }
+
+    #[test]
+    fn test_read_duration_logical_types_feature_toggle() -> Result<(), ArrowError> {
+        let file_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test/data/duration_logical_types.avro")
+            .to_string_lossy()
+            .into_owned();
+
+        let actual_batch = read_file(&file_path, 4, false);
+
+        let expected_batch = {
+            #[cfg(feature = "avro_custom_types")]
+            {
+                let schema = Arc::new(Schema::new(vec![
+                    Field::new(
+                        "duration_time_nanos",
+                        DataType::Duration(TimeUnit::Nanosecond),
+                        false,
+                    ),
+                    Field::new(
+                        "duration_time_micros",
+                        DataType::Duration(TimeUnit::Microsecond),
+                        false,
+                    ),
+                    Field::new(
+                        "duration_time_millis",
+                        DataType::Duration(TimeUnit::Millisecond),
+                        false,
+                    ),
+                    Field::new(
+                        "duration_time_seconds",
+                        DataType::Duration(TimeUnit::Second),
+                        false,
+                    ),
+                ]));
+
+                let nanos = Arc::new(PrimitiveArray::<DurationNanosecondType>::from(vec![
+                    10, 20, 30, 40,
+                ])) as ArrayRef;
+                let micros = Arc::new(PrimitiveArray::<DurationMicrosecondType>::from(vec![
+                    100, 200, 300, 400,
+                ])) as ArrayRef;
+                let millis = Arc::new(PrimitiveArray::<DurationMillisecondType>::from(vec![
+                    1000, 2000, 3000, 4000,
+                ])) as ArrayRef;
+                let seconds = Arc::new(PrimitiveArray::<DurationSecondType>::from(vec![1, 2, 3, 4]))
+                    as ArrayRef;
+
+                RecordBatch::try_new(schema, vec![nanos, micros, millis, seconds])?
+            }
+            #[cfg(not(feature = "avro_custom_types"))]
+            {
+                let schema = Arc::new(Schema::new(vec![
+                    Field::new("duration_time_nanos", DataType::Int64, false).with_metadata(
+                        [(
+                            "logicalType".to_string(),
+                            "arrow.duration-nanos".to_string(),
+                        )]
+                        .into(),
+                    ),
+                    Field::new("duration_time_micros", DataType::Int64, false).with_metadata(
+                        [(
+                            "logicalType".to_string(),
+                            "arrow.duration-micros".to_string(),
+                        )]
+                        .into(),
+                    ),
+                    Field::new("duration_time_millis", DataType::Int64, false).with_metadata(
+                        [(
+                            "logicalType".to_string(),
+                            "arrow.duration-millis".to_string(),
+                        )]
+                        .into(),
+                    ),
+                    Field::new("duration_time_seconds", DataType::Int64, false).with_metadata(
+                        [(
+                            "logicalType".to_string(),
+                            "arrow.duration-seconds".to_string(),
+                        )]
+                        .into(),
+                    ),
+                ]));
+
+                let nanos =
+                    Arc::new(PrimitiveArray::<Int64Type>::from(vec![10, 20, 30, 40])) as ArrayRef;
+                let micros = Arc::new(PrimitiveArray::<Int64Type>::from(vec![100, 200, 300, 400]))
+                    as ArrayRef;
+                let millis = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+                    1000, 2000, 3000, 4000,
+                ])) as ArrayRef;
+                let seconds =
+                    Arc::new(PrimitiveArray::<Int64Type>::from(vec![1, 2, 3, 4])) as ArrayRef;
+
+                RecordBatch::try_new(schema, vec![nanos, micros, millis, seconds])?
+            }
+        };
+
+        assert_eq!(actual_batch, expected_batch);
+
+        Ok(())
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_dict_pages_offset_zero() {
+        let file = arrow_test_data("avro/dict-page-offset-zero.avro");
+        let batch = read_file(&file, 32, false);
+        let num_rows = batch.num_rows();
+        let expected_field = Int32Array::from(vec![Some(1552); num_rows]);
+        let expected = RecordBatch::try_from_iter_with_nullable([(
+            "l_partkey",
+            Arc::new(expected_field) as Arc<dyn Array>,
+            true,
+        )])
+        .unwrap();
+        assert_eq!(batch, expected);
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_list_columns() {
+        let file = arrow_test_data("avro/list_columns.avro");
+        let mut int64_list_builder = ListBuilder::new(Int64Builder::new());
+        {
+            {
+                let values = int64_list_builder.values();
+                values.append_value(1);
+                values.append_value(2);
+                values.append_value(3);
+            }
+            int64_list_builder.append(true);
+        }
+        {
+            {
+                let values = int64_list_builder.values();
+                values.append_null();
+                values.append_value(1);
+            }
+            int64_list_builder.append(true);
+        }
+        {
+            {
+                let values = int64_list_builder.values();
+                values.append_value(4);
+            }
+            int64_list_builder.append(true);
+        }
+        let int64_list = int64_list_builder.finish();
+        let mut utf8_list_builder = ListBuilder::new(StringBuilder::new());
+        {
+            {
+                let values = utf8_list_builder.values();
+                values.append_value("abc");
+                values.append_value("efg");
+                values.append_value("hij");
+            }
+            utf8_list_builder.append(true);
+        }
+        {
+            utf8_list_builder.append(false);
+        }
+        {
+            {
+                let values = utf8_list_builder.values();
+                values.append_value("efg");
+                values.append_null();
+                values.append_value("hij");
+                values.append_value("xyz");
+            }
+            utf8_list_builder.append(true);
+        }
+        let utf8_list = utf8_list_builder.finish();
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            ("int64_list", Arc::new(int64_list) as Arc<dyn Array>, true),
+            ("utf8_list", Arc::new(utf8_list) as Arc<dyn Array>, true),
+        ])
+        .unwrap();
+        let batch = read_file(&file, 8, false);
+        assert_eq!(batch, expected);
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_nested_lists() {
+        use arrow_data::ArrayDataBuilder;
+        let file = arrow_test_data("avro/nested_lists.snappy.avro");
+        let inner_values = StringArray::from(vec![
+            Some("a"),
+            Some("b"),
+            Some("c"),
+            Some("d"),
+            Some("a"),
+            Some("b"),
+            Some("c"),
+            Some("d"),
+            Some("e"),
+            Some("a"),
+            Some("b"),
+            Some("c"),
+            Some("d"),
+            Some("e"),
+            Some("f"),
+        ]);
+        let inner_offsets = Buffer::from_slice_ref([0, 2, 3, 3, 4, 6, 8, 8, 9, 11, 13, 14, 14, 15]);
+        let inner_validity = [
+            true, true, false, true, true, true, false, true, true, true, true, false, true,
+        ];
+        let inner_null_buffer = Buffer::from_iter(inner_validity.iter().copied());
+        let inner_field = Field::new("item", DataType::Utf8, true);
+        let inner_list_data = ArrayDataBuilder::new(DataType::List(Arc::new(inner_field)))
+            .len(13)
+            .add_buffer(inner_offsets)
+            .add_child_data(inner_values.to_data())
+            .null_bit_buffer(Some(inner_null_buffer))
+            .build()
+            .unwrap();
+        let inner_list_array = ListArray::from(inner_list_data);
+        let middle_offsets = Buffer::from_slice_ref([0, 2, 4, 6, 8, 11, 13]);
+        let middle_validity = [true; 6];
+        let middle_null_buffer = Buffer::from_iter(middle_validity.iter().copied());
+        let middle_field = Field::new("item", inner_list_array.data_type().clone(), true);
+        let middle_list_data = ArrayDataBuilder::new(DataType::List(Arc::new(middle_field)))
+            .len(6)
+            .add_buffer(middle_offsets)
+            .add_child_data(inner_list_array.to_data())
+            .null_bit_buffer(Some(middle_null_buffer))
+            .build()
+            .unwrap();
+        let middle_list_array = ListArray::from(middle_list_data);
+        let outer_offsets = Buffer::from_slice_ref([0, 2, 4, 6]);
+        let outer_null_buffer = Buffer::from_slice_ref([0b111]); // all 3 rows valid
+        let outer_field = Field::new("item", middle_list_array.data_type().clone(), true);
+        let outer_list_data = ArrayDataBuilder::new(DataType::List(Arc::new(outer_field)))
+            .len(3)
+            .add_buffer(outer_offsets)
+            .add_child_data(middle_list_array.to_data())
+            .null_bit_buffer(Some(outer_null_buffer))
+            .build()
+            .unwrap();
+        let a_expected = ListArray::from(outer_list_data);
+        let b_expected = Int32Array::from(vec![1, 1, 1]);
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            ("a", Arc::new(a_expected) as Arc<dyn Array>, true),
+            ("b", Arc::new(b_expected) as Arc<dyn Array>, true),
+        ])
+        .unwrap();
+        let left = read_file(&file, 8, false);
+        assert_eq!(left, expected, "Mismatch for batch size=8");
+        let left_small = read_file(&file, 3, false);
+        assert_eq!(left_small, expected, "Mismatch for batch size=3");
+    }
+
+    #[test]
+    fn test_simple() {
+        let tests = [
+            ("avro/simple_enum.avro", 4, build_expected_enum(), 2),
+            ("avro/simple_fixed.avro", 2, build_expected_fixed(), 1),
+        ];
+
+        fn build_expected_enum() -> RecordBatch {
+            // Build the DictionaryArrays for f1, f2, f3
+            let keys_f1 = Int32Array::from(vec![0, 1, 2, 3]);
+            let vals_f1 = StringArray::from(vec!["a", "b", "c", "d"]);
+            let f1_dict =
+                DictionaryArray::<Int32Type>::try_new(keys_f1, Arc::new(vals_f1)).unwrap();
+            let keys_f2 = Int32Array::from(vec![2, 3, 0, 1]);
+            let vals_f2 = StringArray::from(vec!["e", "f", "g", "h"]);
+            let f2_dict =
+                DictionaryArray::<Int32Type>::try_new(keys_f2, Arc::new(vals_f2)).unwrap();
+            let keys_f3 = Int32Array::from(vec![Some(1), Some(2), None, Some(0)]);
+            let vals_f3 = StringArray::from(vec!["i", "j", "k"]);
+            let f3_dict =
+                DictionaryArray::<Int32Type>::try_new(keys_f3, Arc::new(vals_f3)).unwrap();
+            let dict_type =
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+            let mut md_f1 = HashMap::new();
+            md_f1.insert(
+                AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+                r#"["a","b","c","d"]"#.to_string(),
+            );
+            md_f1.insert(AVRO_NAME_METADATA_KEY.to_string(), "enum1".to_string());
+            md_f1.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns1".to_string());
+            let f1_field = Field::new("f1", dict_type.clone(), false).with_metadata(md_f1);
+            let mut md_f2 = HashMap::new();
+            md_f2.insert(
+                AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+                r#"["e","f","g","h"]"#.to_string(),
+            );
+            md_f2.insert(AVRO_NAME_METADATA_KEY.to_string(), "enum2".to_string());
+            md_f2.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns2".to_string());
+            let f2_field = Field::new("f2", dict_type.clone(), false).with_metadata(md_f2);
+            let mut md_f3 = HashMap::new();
+            md_f3.insert(
+                AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+                r#"["i","j","k"]"#.to_string(),
+            );
+            md_f3.insert(AVRO_NAME_METADATA_KEY.to_string(), "enum3".to_string());
+            md_f3.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns1".to_string());
+            let f3_field = Field::new("f3", dict_type.clone(), true).with_metadata(md_f3);
+            let expected_schema = Arc::new(Schema::new(vec![f1_field, f2_field, f3_field]));
+            RecordBatch::try_new(
+                expected_schema,
+                vec![
+                    Arc::new(f1_dict) as Arc<dyn Array>,
+                    Arc::new(f2_dict) as Arc<dyn Array>,
+                    Arc::new(f3_dict) as Arc<dyn Array>,
+                ],
+            )
+            .unwrap()
+        }
+
+        fn build_expected_fixed() -> RecordBatch {
+            let f1 =
+                FixedSizeBinaryArray::try_from_iter(vec![b"abcde", b"12345"].into_iter()).unwrap();
+            let f2 =
+                FixedSizeBinaryArray::try_from_iter(vec![b"fghijklmno", b"1234567890"].into_iter())
+                    .unwrap();
+            let f3 = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                vec![Some(b"ABCDEF" as &[u8]), None].into_iter(),
+                6,
+            )
+            .unwrap();
+
+            // Add Avro named-type metadata for fixed fields
+            let mut md_f1 = HashMap::new();
+            md_f1.insert(
+                crate::schema::AVRO_NAME_METADATA_KEY.to_string(),
+                "fixed1".to_string(),
+            );
+            md_f1.insert(
+                crate::schema::AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "ns1".to_string(),
+            );
+
+            let mut md_f2 = HashMap::new();
+            md_f2.insert(
+                crate::schema::AVRO_NAME_METADATA_KEY.to_string(),
+                "fixed2".to_string(),
+            );
+            md_f2.insert(
+                crate::schema::AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "ns2".to_string(),
+            );
+
+            let mut md_f3 = HashMap::new();
+            md_f3.insert(
+                crate::schema::AVRO_NAME_METADATA_KEY.to_string(),
+                "fixed3".to_string(),
+            );
+            md_f3.insert(
+                crate::schema::AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "ns1".to_string(),
+            );
+
+            let expected_schema = Arc::new(Schema::new(vec![
+                Field::new("f1", DataType::FixedSizeBinary(5), false).with_metadata(md_f1),
+                Field::new("f2", DataType::FixedSizeBinary(10), false).with_metadata(md_f2),
+                Field::new("f3", DataType::FixedSizeBinary(6), true).with_metadata(md_f3),
+            ]));
+
+            RecordBatch::try_new(
+                expected_schema,
+                vec![
+                    Arc::new(f1) as Arc<dyn Array>,
+                    Arc::new(f2) as Arc<dyn Array>,
+                    Arc::new(f3) as Arc<dyn Array>,
+                ],
+            )
+            .unwrap()
+        }
+        for (file_name, batch_size, expected, alt_batch_size) in tests {
+            let file = arrow_test_data(file_name);
+            let actual = read_file(&file, batch_size, false);
+            assert_eq!(actual, expected);
+            let actual2 = read_file(&file, alt_batch_size, false);
+            assert_eq!(actual2, expected);
+        }
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_single_nan() {
+        let file = arrow_test_data("avro/single_nan.avro");
+        let actual = read_file(&file, 1, false);
+        use arrow_array::Float64Array;
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "mycol",
+            DataType::Float64,
+            true,
+        )]));
+        let col = Float64Array::from(vec![None]);
+        let expected = RecordBatch::try_new(schema, vec![Arc::new(col)]).unwrap();
+        assert_eq!(actual, expected);
+        let actual2 = read_file(&file, 2, false);
+        assert_eq!(actual2, expected);
+    }
+
+    #[test]
+    fn test_duration_uuid() {
+        let batch = read_file("test/data/duration_uuid.avro", 4, false);
+        let schema = batch.schema();
+        let fields = schema.fields();
+        assert_eq!(fields.len(), 2);
+        assert_eq!(fields[0].name(), "duration_field");
+        assert_eq!(
+            fields[0].data_type(),
+            &DataType::Interval(IntervalUnit::MonthDayNano)
+        );
+        assert_eq!(fields[1].name(), "uuid_field");
+        assert_eq!(fields[1].data_type(), &DataType::FixedSizeBinary(16));
+        assert_eq!(batch.num_rows(), 4);
+        assert_eq!(batch.num_columns(), 2);
+        let duration_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<IntervalMonthDayNanoArray>()
+            .unwrap();
+        let expected_duration_array: IntervalMonthDayNanoArray = [
+            Some(IntervalMonthDayNanoType::make_value(1, 15, 500_000_000)),
+            Some(IntervalMonthDayNanoType::make_value(0, 5, 2_500_000_000)),
+            Some(IntervalMonthDayNanoType::make_value(2, 0, 0)),
+            Some(IntervalMonthDayNanoType::make_value(12, 31, 999_000_000)),
+        ]
+        .iter()
+        .copied()
+        .collect();
+        assert_eq!(&expected_duration_array, duration_array);
+        let uuid_array = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        let expected_uuid_array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+            [
+                Some([
+                    0xfe, 0x7b, 0xc3, 0x0b, 0x4c, 0xe8, 0x4c, 0x5e, 0xb6, 0x7c, 0x22, 0x34, 0xa2,
+                    0xd3, 0x8e, 0x66,
+                ]),
+                Some([
+                    0xb3, 0x3f, 0x2a, 0xd7, 0x97, 0xb4, 0x4d, 0xe1, 0x8b, 0xfe, 0x94, 0x94, 0x1d,
+                    0x60, 0x15, 0x6e,
+                ]),
+                Some([
+                    0x5f, 0x74, 0x92, 0x64, 0x07, 0x4b, 0x40, 0x05, 0x84, 0xbf, 0x11, 0x5e, 0xa8,
+                    0x4e, 0xd2, 0x0a,
+                ]),
+                Some([
+                    0x08, 0x26, 0xcc, 0x06, 0xd2, 0xe3, 0x45, 0x99, 0xb4, 0xad, 0xaf, 0x5f, 0xa6,
+                    0x90, 0x5c, 0xdb,
+                ]),
+            ]
+            .into_iter(),
+            16,
+        )
+        .unwrap();
+        assert_eq!(&expected_uuid_array, uuid_array);
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_datapage_v2() {
+        let file = arrow_test_data("avro/datapage_v2.snappy.avro");
+        let batch = read_file(&file, 8, false);
+        let a = StringArray::from(vec![
+            Some("abc"),
+            Some("abc"),
+            Some("abc"),
+            None,
+            Some("abc"),
+        ]);
+        let b = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)]);
+        let c = Float64Array::from(vec![Some(2.0), Some(3.0), Some(4.0), Some(5.0), Some(2.0)]);
+        let d = BooleanArray::from(vec![
+            Some(true),
+            Some(true),
+            Some(true),
+            Some(false),
+            Some(true),
+        ]);
+        let e_values = Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            Some(3),
+            Some(1),
+            Some(2),
+            Some(3),
+            Some(1),
+            Some(2),
+        ]);
+        let e_offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0i32, 3, 3, 3, 6, 8]));
+        let e_validity = Some(NullBuffer::from(vec![true, false, false, true, true]));
+        let field_e = Arc::new(Field::new("item", DataType::Int32, true));
+        let e = ListArray::new(field_e, e_offsets, Arc::new(e_values), e_validity);
+        let expected = RecordBatch::try_from_iter_with_nullable([
+            ("a", Arc::new(a) as Arc<dyn Array>, true),
+            ("b", Arc::new(b) as Arc<dyn Array>, true),
+            ("c", Arc::new(c) as Arc<dyn Array>, true),
+            ("d", Arc::new(d) as Arc<dyn Array>, true),
+            ("e", Arc::new(e) as Arc<dyn Array>, true),
+        ])
+        .unwrap();
+        assert_eq!(batch, expected);
+    }
+
+    #[test]
+    fn test_nested_records() {
+        let f1_f1_1 = StringArray::from(vec!["aaa", "bbb"]);
+        let f1_f1_2 = Int32Array::from(vec![10, 20]);
+        let rounded_pi = (std::f64::consts::PI * 100.0).round() / 100.0;
+        let f1_f1_3_1 = Float64Array::from(vec![rounded_pi, rounded_pi]);
+        let f1_f1_3 = StructArray::from(vec![(
+            Arc::new(Field::new("f1_3_1", DataType::Float64, false)),
+            Arc::new(f1_f1_3_1) as Arc<dyn Array>,
+        )]);
+        // Add Avro named-type metadata to nested field f1_3 (ns3.record3)
+        let mut f1_3_md: HashMap<String, String> = HashMap::new();
+        f1_3_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns3".to_string());
+        f1_3_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "record3".to_string());
+        let f1_expected = StructArray::from(vec![
+            (
+                Arc::new(Field::new("f1_1", DataType::Utf8, false)),
+                Arc::new(f1_f1_1) as Arc<dyn Array>,
+            ),
+            (
+                Arc::new(Field::new("f1_2", DataType::Int32, false)),
+                Arc::new(f1_f1_2) as Arc<dyn Array>,
+            ),
+            (
+                Arc::new(
+                    Field::new(
+                        "f1_3",
+                        DataType::Struct(Fields::from(vec![Field::new(
+                            "f1_3_1",
+                            DataType::Float64,
+                            false,
+                        )])),
+                        false,
+                    )
+                    .with_metadata(f1_3_md),
+                ),
+                Arc::new(f1_f1_3) as Arc<dyn Array>,
+            ),
+        ]);
+        let f2_fields = [
+            Field::new("f2_1", DataType::Boolean, false),
+            Field::new("f2_2", DataType::Float32, false),
+        ];
+        let f2_struct_builder = StructBuilder::new(
+            f2_fields
+                .iter()
+                .map(|f| Arc::new(f.clone()))
+                .collect::<Vec<Arc<Field>>>(),
+            vec![
+                Box::new(BooleanBuilder::new()) as Box<dyn arrow_array::builder::ArrayBuilder>,
+                Box::new(Float32Builder::new()) as Box<dyn arrow_array::builder::ArrayBuilder>,
+            ],
+        );
+        let mut f2_list_builder = ListBuilder::new(f2_struct_builder);
+        {
+            let struct_builder = f2_list_builder.values();
+            struct_builder.append(true);
+            {
+                let b = struct_builder.field_builder::<BooleanBuilder>(0).unwrap();
+                b.append_value(true);
+            }
+            {
+                let b = struct_builder.field_builder::<Float32Builder>(1).unwrap();
+                b.append_value(1.2_f32);
+            }
+            struct_builder.append(true);
+            {
+                let b = struct_builder.field_builder::<BooleanBuilder>(0).unwrap();
+                b.append_value(true);
+            }
+            {
+                let b = struct_builder.field_builder::<Float32Builder>(1).unwrap();
+                b.append_value(2.2_f32);
+            }
+            f2_list_builder.append(true);
+        }
+        {
+            let struct_builder = f2_list_builder.values();
+            struct_builder.append(true);
+            {
+                let b = struct_builder.field_builder::<BooleanBuilder>(0).unwrap();
+                b.append_value(false);
+            }
+            {
+                let b = struct_builder.field_builder::<Float32Builder>(1).unwrap();
+                b.append_value(10.2_f32);
+            }
+            f2_list_builder.append(true);
+        }
+
+        let list_array_with_nullable_items = f2_list_builder.finish();
+        // Add Avro named-type metadata to f2's list item (ns4.record4)
+        let mut f2_item_md: HashMap<String, String> = HashMap::new();
+        f2_item_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "record4".to_string());
+        f2_item_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns4".to_string());
+        let item_field = Arc::new(
+            Field::new(
+                "item",
+                list_array_with_nullable_items.values().data_type().clone(),
+                false, // items are non-nullable for f2
+            )
+            .with_metadata(f2_item_md),
+        );
+        let list_data_type = DataType::List(item_field);
+        let f2_array_data = list_array_with_nullable_items
+            .to_data()
+            .into_builder()
+            .data_type(list_data_type)
+            .build()
+            .unwrap();
+        let f2_expected = ListArray::from(f2_array_data);
+        let mut f3_struct_builder = StructBuilder::new(
+            vec![Arc::new(Field::new("f3_1", DataType::Utf8, false))],
+            vec![Box::new(StringBuilder::new()) as Box<dyn ArrayBuilder>],
+        );
+        f3_struct_builder.append(true);
+        {
+            let b = f3_struct_builder.field_builder::<StringBuilder>(0).unwrap();
+            b.append_value("xyz");
+        }
+        f3_struct_builder.append(false);
+        {
+            let b = f3_struct_builder.field_builder::<StringBuilder>(0).unwrap();
+            b.append_null();
+        }
+        let f3_expected = f3_struct_builder.finish();
+        let f4_fields = [Field::new("f4_1", DataType::Int64, false)];
+        let f4_struct_builder = StructBuilder::new(
+            f4_fields
+                .iter()
+                .map(|f| Arc::new(f.clone()))
+                .collect::<Vec<Arc<Field>>>(),
+            vec![Box::new(Int64Builder::new()) as Box<dyn arrow_array::builder::ArrayBuilder>],
+        );
+        let mut f4_list_builder = ListBuilder::new(f4_struct_builder);
+        {
+            let struct_builder = f4_list_builder.values();
+            struct_builder.append(true);
+            {
+                let b = struct_builder.field_builder::<Int64Builder>(0).unwrap();
+                b.append_value(200);
+            }
+            struct_builder.append(false);
+            {
+                let b = struct_builder.field_builder::<Int64Builder>(0).unwrap();
+                b.append_null();
+            }
+            f4_list_builder.append(true);
+        }
+        {
+            let struct_builder = f4_list_builder.values();
+            struct_builder.append(false);
+            {
+                let b = struct_builder.field_builder::<Int64Builder>(0).unwrap();
+                b.append_null();
+            }
+            struct_builder.append(true);
+            {
+                let b = struct_builder.field_builder::<Int64Builder>(0).unwrap();
+                b.append_value(300);
+            }
+            f4_list_builder.append(true);
+        }
+        let f4_expected = f4_list_builder.finish();
+        // Add Avro named-type metadata to f4's list item (ns6.record6), item is nullable
+        let mut f4_item_md: HashMap<String, String> = HashMap::new();
+        f4_item_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns6".to_string());
+        f4_item_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "record6".to_string());
+        let f4_item_field = Arc::new(
+            Field::new("item", f4_expected.values().data_type().clone(), true)
+                .with_metadata(f4_item_md),
+        );
+        let f4_list_data_type = DataType::List(f4_item_field);
+        let f4_array_data = f4_expected
+            .to_data()
+            .into_builder()
+            .data_type(f4_list_data_type)
+            .build()
+            .unwrap();
+        let f4_expected = ListArray::from(f4_array_data);
+        // Build Schema with Avro named-type metadata on the top-level f1 and f3 fields
+        let mut f1_md: HashMap<String, String> = HashMap::new();
+        f1_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "record2".to_string());
+        f1_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns2".to_string());
+        let mut f3_md: HashMap<String, String> = HashMap::new();
+        f3_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), "ns5".to_string());
+        f3_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "record5".to_string());
+        let expected_schema = Schema::new(vec![
+            Field::new("f1", f1_expected.data_type().clone(), false).with_metadata(f1_md),
+            Field::new("f2", f2_expected.data_type().clone(), false),
+            Field::new("f3", f3_expected.data_type().clone(), true).with_metadata(f3_md),
+            Field::new("f4", f4_expected.data_type().clone(), false),
+        ]);
+        let expected = RecordBatch::try_new(
+            Arc::new(expected_schema),
+            vec![
+                Arc::new(f1_expected) as Arc<dyn Array>,
+                Arc::new(f2_expected) as Arc<dyn Array>,
+                Arc::new(f3_expected) as Arc<dyn Array>,
+                Arc::new(f4_expected) as Arc<dyn Array>,
+            ],
+        )
+        .unwrap();
+        let file = arrow_test_data("avro/nested_records.avro");
+        let batch_large = read_file(&file, 8, false);
+        assert_eq!(
+            batch_large, expected,
+            "Decoded RecordBatch does not match expected data for nested records (batch size 8)"
+        );
+        let batch_small = read_file(&file, 3, false);
+        assert_eq!(
+            batch_small, expected,
+            "Decoded RecordBatch does not match expected data for nested records (batch size 3)"
+        );
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_repeated_no_annotation() {
+        use arrow_data::ArrayDataBuilder;
+        let file = arrow_test_data("avro/repeated_no_annotation.avro");
+        let batch_large = read_file(&file, 8, false);
+        // id column
+        let id_array = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
+        // Build the inner Struct<number:int64, kind:utf8>
+        let number_array = Int64Array::from(vec![
+            Some(5555555555),
+            Some(1111111111),
+            Some(1111111111),
+            Some(2222222222),
+            Some(3333333333),
+        ]);
+        let kind_array =
+            StringArray::from(vec![None, Some("home"), Some("home"), None, Some("mobile")]);
+        let phone_fields = Fields::from(vec![
+            Field::new("number", DataType::Int64, true),
+            Field::new("kind", DataType::Utf8, true),
+        ]);
+        let phone_struct_data = ArrayDataBuilder::new(DataType::Struct(phone_fields))
+            .len(5)
+            .child_data(vec![number_array.into_data(), kind_array.into_data()])
+            .build()
+            .unwrap();
+        let phone_struct_array = StructArray::from(phone_struct_data);
+        // Build List<item: Struct<...>> with Avro named-type metadata on the *element* field
+        let phone_list_offsets = Buffer::from_slice_ref([0i32, 0, 0, 0, 1, 2, 5]);
+        let phone_list_validity = Buffer::from_iter([false, false, true, true, true, true]);
+        // The Avro schema names this inner record "phone" in namespace "topLevelRecord.phoneNumbers"
+        let mut phone_item_md = HashMap::new();
+        phone_item_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "phone".to_string());
+        phone_item_md.insert(
+            AVRO_NAMESPACE_METADATA_KEY.to_string(),
+            "topLevelRecord.phoneNumbers".to_string(),
+        );
+        let phone_item_field = Field::new("item", phone_struct_array.data_type().clone(), true)
+            .with_metadata(phone_item_md);
+        let phone_list_data = ArrayDataBuilder::new(DataType::List(Arc::new(phone_item_field)))
+            .len(6)
+            .add_buffer(phone_list_offsets)
+            .null_bit_buffer(Some(phone_list_validity))
+            .child_data(vec![phone_struct_array.into_data()])
+            .build()
+            .unwrap();
+        let phone_list_array = ListArray::from(phone_list_data);
+        // Wrap in Struct { phone: List<...> }
+        let phone_numbers_validity = Buffer::from_iter([false, false, true, true, true, true]);
+        let phone_numbers_field = Field::new("phone", phone_list_array.data_type().clone(), true);
+        let phone_numbers_struct_data =
+            ArrayDataBuilder::new(DataType::Struct(Fields::from(vec![phone_numbers_field])))
+                .len(6)
+                .null_bit_buffer(Some(phone_numbers_validity))
+                .child_data(vec![phone_list_array.into_data()])
+                .build()
+                .unwrap();
+        let phone_numbers_struct_array = StructArray::from(phone_numbers_struct_data);
+        // Build the expected Schema, annotating the top-level "phoneNumbers" field with Avro name/namespace
+        let mut phone_numbers_md = HashMap::new();
+        phone_numbers_md.insert(
+            AVRO_NAME_METADATA_KEY.to_string(),
+            "phoneNumbers".to_string(),
+        );
+        phone_numbers_md.insert(
+            AVRO_NAMESPACE_METADATA_KEY.to_string(),
+            "topLevelRecord".to_string(),
+        );
+        let id_field = Field::new("id", DataType::Int32, true);
+        let phone_numbers_schema_field = Field::new(
+            "phoneNumbers",
+            phone_numbers_struct_array.data_type().clone(),
+            true,
+        )
+        .with_metadata(phone_numbers_md);
+        let expected_schema = Schema::new(vec![id_field, phone_numbers_schema_field]);
+        // Final expected RecordBatch (arrays already carry matching list-element metadata)
+        let expected = RecordBatch::try_new(
+            Arc::new(expected_schema),
+            vec![
+                Arc::new(id_array) as _,
+                Arc::new(phone_numbers_struct_array) as _,
+            ],
+        )
+        .unwrap();
+        assert_eq!(batch_large, expected, "Mismatch for batch_size=8");
+        let batch_small = read_file(&file, 3, false);
+        assert_eq!(batch_small, expected, "Mismatch for batch_size=3");
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_nonnullable_impala() {
+        let file = arrow_test_data("avro/nonnullable.impala.avro");
+        let id = Int64Array::from(vec![Some(8)]);
+        let mut int_array_builder = ListBuilder::new(Int32Builder::new());
+        {
+            let vb = int_array_builder.values();
+            vb.append_value(-1);
+        }
+        int_array_builder.append(true); // finalize one sub-list
+        let int_array = int_array_builder.finish();
+        let mut iaa_builder = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
+        {
+            let inner_list_builder = iaa_builder.values();
+            {
+                let vb = inner_list_builder.values();
+                vb.append_value(-1);
+                vb.append_value(-2);
+            }
+            inner_list_builder.append(true);
+            inner_list_builder.append(true);
+        }
+        iaa_builder.append(true);
+        let int_array_array = iaa_builder.finish();
+        let field_names = MapFieldNames {
+            entry: "entries".to_string(),
+            key: "key".to_string(),
+            value: "value".to_string(),
+        };
+        let mut int_map_builder =
+            MapBuilder::new(Some(field_names), StringBuilder::new(), Int32Builder::new());
+        {
+            let (keys, vals) = int_map_builder.entries();
+            keys.append_value("k1");
+            vals.append_value(-1);
+        }
+        int_map_builder.append(true).unwrap(); // finalize map for row 0
+        let int_map = int_map_builder.finish();
+        let field_names2 = MapFieldNames {
+            entry: "entries".to_string(),
+            key: "key".to_string(),
+            value: "value".to_string(),
+        };
+        let mut ima_builder = ListBuilder::new(MapBuilder::new(
+            Some(field_names2),
+            StringBuilder::new(),
+            Int32Builder::new(),
+        ));
+        {
+            let map_builder = ima_builder.values();
+            map_builder.append(true).unwrap();
+            {
+                let (keys, vals) = map_builder.entries();
+                keys.append_value("k1");
+                vals.append_value(1);
+            }
+            map_builder.append(true).unwrap();
+            map_builder.append(true).unwrap();
+            map_builder.append(true).unwrap();
+        }
+        ima_builder.append(true);
+        let int_map_array_ = ima_builder.finish();
+        // Helper metadata maps
+        let meta_nested_struct: HashMap<String, String> = [
+            ("avro.name", "nested_Struct"),
+            ("avro.namespace", "topLevelRecord"),
+        ]
+        .into_iter()
+        .map(|(k, v)| (k.to_string(), v.to_string()))
+        .collect();
+        let meta_c: HashMap<String, String> = [
+            ("avro.name", "c"),
+            ("avro.namespace", "topLevelRecord.nested_Struct"),
+        ]
+        .into_iter()
+        .map(|(k, v)| (k.to_string(), v.to_string()))
+        .collect();
+        let meta_d_item_struct: HashMap<String, String> = [
+            ("avro.name", "D"),
+            ("avro.namespace", "topLevelRecord.nested_Struct.c"),
+        ]
+        .into_iter()
+        .map(|(k, v)| (k.to_string(), v.to_string()))
+        .collect();
+        let meta_g_value: HashMap<String, String> = [
+            ("avro.name", "G"),
+            ("avro.namespace", "topLevelRecord.nested_Struct"),
+        ]
+        .into_iter()
+        .map(|(k, v)| (k.to_string(), v.to_string()))
+        .collect();
+        let meta_h: HashMap<String, String> = [
+            ("avro.name", "h"),
+            ("avro.namespace", "topLevelRecord.nested_Struct.G"),
+        ]
+        .into_iter()
+        .map(|(k, v)| (k.to_string(), v.to_string()))
+        .collect();
+        // Types used multiple times below
+        let ef_struct_field = Arc::new(
+            Field::new(
+                "item",
+                DataType::Struct(
+                    vec![
+                        Field::new("e", DataType::Int32, true),
+                        Field::new("f", DataType::Utf8, true),
+                    ]
+                    .into(),
+                ),
+                true,
+            )
+            .with_metadata(meta_d_item_struct.clone()),
+        );
+        let d_inner_list_field = Arc::new(Field::new(
+            "item",
+            DataType::List(ef_struct_field.clone()),
+            true,
+        ));
+        let d_field = Field::new("D", DataType::List(d_inner_list_field.clone()), true);
+        // G.value.h.i : List<Float64>
+        let i_list_field = Arc::new(Field::new("item", DataType::Float64, true));
+        let i_field = Field::new("i", DataType::List(i_list_field.clone()), true);
+        // G.value.h : Struct<{ i: List<Float64> }> with metadata (h)
+        let h_field = Field::new("h", DataType::Struct(vec![i_field.clone()].into()), true)
+            .with_metadata(meta_h.clone());
+        // G.value : Struct<{ h: ... }> with metadata (G)
+        let g_value_struct_field = Field::new(
+            "value",
+            DataType::Struct(vec![h_field.clone()].into()),
+            true,
+        )
+        .with_metadata(meta_g_value.clone());
+        // entries struct for Map G
+        let entries_struct_field = Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new("key", DataType::Utf8, false),
+                    g_value_struct_field.clone(),
+                ]
+                .into(),
+            ),
+            false,
+        );
+        // Top-level nested_Struct fields (include metadata on "c")
+        let a_field = Arc::new(Field::new("a", DataType::Int32, true));
+        let b_field = Arc::new(Field::new(
+            "B",
+            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            true,
+        ));
+        let c_field = Arc::new(
+            Field::new("c", DataType::Struct(vec![d_field.clone()].into()), true)
+                .with_metadata(meta_c.clone()),
+        );
+        let g_field = Arc::new(Field::new(
+            "G",
+            DataType::Map(Arc::new(entries_struct_field.clone()), false),
+            true,
+        ));
+        // Now create builders that match these exact field types (so nested types carry metadata)
+        let mut nested_sb = StructBuilder::new(
+            vec![
+                a_field.clone(),
+                b_field.clone(),
+                c_field.clone(),
+                g_field.clone(),
+            ],
+            vec![
+                Box::new(Int32Builder::new()),
+                Box::new(ListBuilder::new(Int32Builder::new())),
+                {
+                    // builder for "c" with correctly typed "D" including metadata on inner list item
+                    Box::new(StructBuilder::new(
+                        vec![Arc::new(d_field.clone())],
+                        vec![Box::new({
+                            let ef_struct_builder = StructBuilder::new(
+                                vec![
+                                    Arc::new(Field::new("e", DataType::Int32, true)),
+                                    Arc::new(Field::new("f", DataType::Utf8, true)),
+                                ],
+                                vec![
+                                    Box::new(Int32Builder::new()),
+                                    Box::new(StringBuilder::new()),
+                                ],
+                            );
+                            // Inner list that holds Struct<e,f> with Avro named-type metadata ("D")
+                            let list_of_ef = ListBuilder::new(ef_struct_builder)
+                                .with_field(ef_struct_field.clone());
+                            // Outer list for "D"
+                            ListBuilder::new(list_of_ef)
+                        })],
+                    ))
+                },
+                {
+                    let map_field_names = MapFieldNames {
+                        entry: "entries".to_string(),
+                        key: "key".to_string(),
+                        value: "value".to_string(),
+                    };
+                    let i_list_builder = ListBuilder::new(Float64Builder::new());
+                    let h_struct_builder = StructBuilder::new(
+                        vec![Arc::new(Field::new(
+                            "i",
+                            DataType::List(i_list_field.clone()),
+                            true,
+                        ))],
+                        vec![Box::new(i_list_builder)],
+                    );
+                    let g_value_builder = StructBuilder::new(
+                        vec![Arc::new(
+                            Field::new("h", DataType::Struct(vec![i_field.clone()].into()), true)
+                                .with_metadata(meta_h.clone()),
+                        )],
+                        vec![Box::new(h_struct_builder)],
+                    );
+                    // Use with_values_field to attach metadata to "value" field in the map's entries
+                    let map_builder = MapBuilder::new(
+                        Some(map_field_names),
+                        StringBuilder::new(),
+                        g_value_builder,
+                    )
+                    .with_values_field(Arc::new(
+                        Field::new(
+                            "value",
+                            DataType::Struct(vec![h_field.clone()].into()),
+                            true,
+                        )
+                        .with_metadata(meta_g_value.clone()),
+                    ));
+
+                    Box::new(map_builder)
+                },
+            ],
+        );
+        nested_sb.append(true);
+        {
+            let a_builder = nested_sb.field_builder::<Int32Builder>(0).unwrap();
+            a_builder.append_value(-1);
+        }
+        {
+            let b_builder = nested_sb
+                .field_builder::<ListBuilder<Int32Builder>>(1)
+                .unwrap();
+            {
+                let vb = b_builder.values();
+                vb.append_value(-1);
+            }
+            b_builder.append(true);
+        }
+        {
+            let c_struct_builder = nested_sb.field_builder::<StructBuilder>(2).unwrap();
+            c_struct_builder.append(true);
+            let d_list_builder = c_struct_builder
+                .field_builder::<ListBuilder<ListBuilder<StructBuilder>>>(0)
+                .unwrap();
+            {
+                let sub_list_builder = d_list_builder.values();
+                {
+                    let ef_struct = sub_list_builder.values();
+                    ef_struct.append(true);
+                    {
+                        let e_b = ef_struct.field_builder::<Int32Builder>(0).unwrap();
+                        e_b.append_value(-1);
+                        let f_b = ef_struct.field_builder::<StringBuilder>(1).unwrap();
+                        f_b.append_value("nonnullable");
+                    }
+                    sub_list_builder.append(true);
+                }
+                d_list_builder.append(true);
+            }
+        }
+        {
+            let g_map_builder = nested_sb
+                .field_builder::<MapBuilder<StringBuilder, StructBuilder>>(3)
+                .unwrap();
+            g_map_builder.append(true).unwrap();
+        }
+        let nested_struct = nested_sb.finish();
+        let schema = Arc::new(arrow_schema::Schema::new(vec![
+            Field::new("ID", id.data_type().clone(), true),
+            Field::new("Int_Array", int_array.data_type().clone(), true),
+            Field::new("int_array_array", int_array_array.data_type().clone(), true),
+            Field::new("Int_Map", int_map.data_type().clone(), true),
+            Field::new("int_map_array", int_map_array_.data_type().clone(), true),
+            Field::new("nested_Struct", nested_struct.data_type().clone(), true)
+                .with_metadata(meta_nested_struct.clone()),
+        ]));
+        let expected = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(id) as Arc<dyn Array>,
+                Arc::new(int_array),
+                Arc::new(int_array_array),
+                Arc::new(int_map),
+                Arc::new(int_map_array_),
+                Arc::new(nested_struct),
+            ],
+        )
+        .unwrap();
+        let batch_large = read_file(&file, 8, false);
+        assert_eq!(batch_large, expected, "Mismatch for batch_size=8");
+        let batch_small = read_file(&file, 3, false);
+        assert_eq!(batch_small, expected, "Mismatch for batch_size=3");
+    }
+
+    #[test]
+    fn test_nonnullable_impala_strict() {
+        let file = arrow_test_data("avro/nonnullable.impala.avro");
+        let err = read_file_strict(&file, 8, false).unwrap_err();
+        assert!(err.to_string().contains(
+            "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
+        ));
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_nullable_impala() {
+        let file = arrow_test_data("avro/nullable.impala.avro");
+        let batch1 = read_file(&file, 3, false);
+        let batch2 = read_file(&file, 8, false);
+        assert_eq!(batch1, batch2);
+        let batch = batch1;
+        assert_eq!(batch.num_rows(), 7);
+        let id_array = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .expect("id column should be an Int64Array");
+        let expected_ids = [1, 2, 3, 4, 5, 6, 7];
+        for (i, &expected_id) in expected_ids.iter().enumerate() {
+            assert_eq!(id_array.value(i), expected_id, "Mismatch in id at row {i}",);
+        }
+        let int_array = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("int_array column should be a ListArray");
+        {
+            let offsets = int_array.value_offsets();
+            let start = offsets[0] as usize;
+            let end = offsets[1] as usize;
+            let values = int_array
+                .values()
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .expect("Values of int_array should be an Int32Array");
+            let row0: Vec<Option<i32>> = (start..end).map(|i| Some(values.value(i))).collect();
+            assert_eq!(
+                row0,
+                vec![Some(1), Some(2), Some(3)],
+                "Mismatch in int_array row 0"
+            );
+        }
+        let nested_struct = batch
+            .column(5)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .expect("nested_struct column should be a StructArray");
+        let a_array = nested_struct
+            .column_by_name("A")
+            .expect("Field A should exist in nested_struct")
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("Field A should be an Int32Array");
+        assert_eq!(a_array.value(0), 1, "Mismatch in nested_struct.A at row 0");
+        assert!(
+            !a_array.is_valid(1),
+            "Expected null in nested_struct.A at row 1"
+        );
+        assert!(
+            !a_array.is_valid(3),
+            "Expected null in nested_struct.A at row 3"
+        );
+        assert_eq!(a_array.value(6), 7, "Mismatch in nested_struct.A at row 6");
+    }
+
+    #[test]
+    fn test_nullable_impala_strict() {
+        let file = arrow_test_data("avro/nullable.impala.avro");
+        let err = read_file_strict(&file, 8, false).unwrap_err();
+        assert!(err.to_string().contains(
+            "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
+        ));
+    }
+
+    #[test]
+    fn test_nested_record_type_reuse() {
+        // The .avro file has the following schema:
+        // {
+        // "type" : "record",
+        // "name" : "Record",
+        // "fields" : [ {
+        //     "name" : "nested",
+        //     "type" : {
+        //     "type" : "record",
+        //     "name" : "Nested",
+        //     "fields" : [ {
+        //         "name" : "nested_int",
+        //         "type" : "int"
+        //     } ]
+        //     }
+        // }, {
+        //     "name" : "nestedRecord",
+        //     "type" : "Nested"
+        // }, {
+        //     "name" : "nestedArray",
+        //     "type" : {
+        //     "type" : "array",
+        //     "items" : "Nested"
+        //     }
+        // } ]
+        // }
+        let batch = read_file("test/data/nested_record_reuse.avro", 8, false);
+        let schema = batch.schema();
+
+        // Verify schema structure
+        assert_eq!(schema.fields().len(), 3);
+        let fields = schema.fields();
+        assert_eq!(fields[0].name(), "nested");
+        assert_eq!(fields[1].name(), "nestedRecord");
+        assert_eq!(fields[2].name(), "nestedArray");
+        assert!(matches!(fields[0].data_type(), DataType::Struct(_)));
+        assert!(matches!(fields[1].data_type(), DataType::Struct(_)));
+        assert!(matches!(fields[2].data_type(), DataType::List(_)));
+
+        // Validate that the nested record type
+        if let DataType::Struct(nested_fields) = fields[0].data_type() {
+            assert_eq!(nested_fields.len(), 1);
+            assert_eq!(nested_fields[0].name(), "nested_int");
+            assert_eq!(nested_fields[0].data_type(), &DataType::Int32);
+        }
+
+        // Validate that the nested record type is reused
+        assert_eq!(fields[0].data_type(), fields[1].data_type());
+        if let DataType::List(array_field) = fields[2].data_type() {
+            assert_eq!(array_field.data_type(), fields[0].data_type());
+        }
+
+        // Validate data
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        // Validate the first column (nested)
+        let nested_col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        let nested_int_array = nested_col
+            .column_by_name("nested_int")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(nested_int_array.value(0), 42);
+        assert_eq!(nested_int_array.value(1), 99);
+
+        // Validate the second column (nestedRecord)
+        let nested_record_col = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        let nested_record_int_array = nested_record_col
+            .column_by_name("nested_int")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(nested_record_int_array.value(0), 100);
+        assert_eq!(nested_record_int_array.value(1), 200);
+
+        // Validate the third column (nestedArray)
+        let nested_array_col = batch
+            .column(2)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+        assert_eq!(nested_array_col.len(), 2);
+        let first_array_struct = nested_array_col.value(0);
+        let first_array_struct_array = first_array_struct
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        let first_array_int_values = first_array_struct_array
+            .column_by_name("nested_int")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(first_array_int_values.len(), 3);
+        assert_eq!(first_array_int_values.value(0), 1);
+        assert_eq!(first_array_int_values.value(1), 2);
+        assert_eq!(first_array_int_values.value(2), 3);
+    }
+
+    #[test]
+    fn test_enum_type_reuse() {
+        // The .avro file has the following schema:
+        // {
+        //     "type" : "record",
+        //     "name" : "Record",
+        //     "fields" : [ {
+        //       "name" : "status",
+        //       "type" : {
+        //         "type" : "enum",
+        //         "name" : "Status",
+        //         "symbols" : [ "ACTIVE", "INACTIVE", "PENDING" ]
+        //       }
+        //     }, {
+        //       "name" : "backupStatus",
+        //       "type" : "Status"
+        //     }, {
+        //       "name" : "statusHistory",
+        //       "type" : {
+        //         "type" : "array",
+        //         "items" : "Status"
+        //       }
+        //     } ]
+        //   }
+        let batch = read_file("test/data/enum_reuse.avro", 8, false);
+        let schema = batch.schema();
+
+        // Verify schema structure
+        assert_eq!(schema.fields().len(), 3);
+        let fields = schema.fields();
+        assert_eq!(fields[0].name(), "status");
+        assert_eq!(fields[1].name(), "backupStatus");
+        assert_eq!(fields[2].name(), "statusHistory");
+        assert!(matches!(fields[0].data_type(), DataType::Dictionary(_, _)));
+        assert!(matches!(fields[1].data_type(), DataType::Dictionary(_, _)));
+        assert!(matches!(fields[2].data_type(), DataType::List(_)));
+
+        if let DataType::Dictionary(key_type, value_type) = fields[0].data_type() {
+            assert_eq!(key_type.as_ref(), &DataType::Int32);
+            assert_eq!(value_type.as_ref(), &DataType::Utf8);
+        }
+
+        // Validate that the enum types are reused
+        assert_eq!(fields[0].data_type(), fields[1].data_type());
+        if let DataType::List(array_field) = fields[2].data_type() {
+            assert_eq!(array_field.data_type(), fields[0].data_type());
+        }
+
+        // Validate data - should have 2 rows
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        // Get status enum values
+        let status_col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        let status_values = status_col
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+
+        // First row should be "ACTIVE", second row should be "PENDING"
+        assert_eq!(
+            status_values.value(status_col.key(0).unwrap() as usize),
+            "ACTIVE"
+        );
+        assert_eq!(
+            status_values.value(status_col.key(1).unwrap() as usize),
+            "PENDING"
+        );
+
+        // Get backupStatus enum values (same as status)
+        let backup_status_col = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        let backup_status_values = backup_status_col
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+
+        // First row should be "INACTIVE", second row should be "ACTIVE"
+        assert_eq!(
+            backup_status_values.value(backup_status_col.key(0).unwrap() as usize),
+            "INACTIVE"
+        );
+        assert_eq!(
+            backup_status_values.value(backup_status_col.key(1).unwrap() as usize),
+            "ACTIVE"
+        );
+
+        // Get statusHistory array
+        let status_history_col = batch
+            .column(2)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+        assert_eq!(status_history_col.len(), 2);
+
+        // Validate first row's array data
+        let first_array_dict = status_history_col.value(0);
+        let first_array_dict_array = first_array_dict
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        let first_array_values = first_array_dict_array
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+
+        // First row: ["PENDING", "ACTIVE", "INACTIVE"]
+        assert_eq!(first_array_dict_array.len(), 3);
+        assert_eq!(
+            first_array_values.value(first_array_dict_array.key(0).unwrap() as usize),
+            "PENDING"
+        );
+        assert_eq!(
+            first_array_values.value(first_array_dict_array.key(1).unwrap() as usize),
+            "ACTIVE"
+        );
+        assert_eq!(
+            first_array_values.value(first_array_dict_array.key(2).unwrap() as usize),
+            "INACTIVE"
+        );
+    }
+
+    #[test]
+    fn comprehensive_e2e_test() {
+        let path = "test/data/comprehensive_e2e.avro";
+        let batch = read_file(path, 1024, false);
+        let schema = batch.schema();
+
+        #[inline]
+        fn tid_by_name(fields: &UnionFields, want: &str) -> i8 {
+            for (tid, f) in fields.iter() {
+                if f.name() == want {
+                    return tid;
+                }
+            }
+            panic!("union child '{want}' not found");
+        }
+
+        #[inline]
+        fn tid_by_dt(fields: &UnionFields, pred: impl Fn(&DataType) -> bool) -> i8 {
+            for (tid, f) in fields.iter() {
+                if pred(f.data_type()) {
+                    return tid;
+                }
+            }
+            panic!("no union child matches predicate");
+        }
+
+        fn mk_dense_union(
+            fields: &UnionFields,
+            type_ids: Vec<i8>,
+            offsets: Vec<i32>,
+            provide: impl Fn(&Field) -> Option<ArrayRef>,
+        ) -> ArrayRef {
+            fn empty_child_for(dt: &DataType) -> Arc<dyn Array> {
+                match dt {
+                    DataType::Null => Arc::new(NullArray::new(0)),
+                    DataType::Boolean => Arc::new(BooleanArray::from(Vec::<bool>::new())),
+                    DataType::Int32 => Arc::new(Int32Array::from(Vec::<i32>::new())),
+                    DataType::Int64 => Arc::new(Int64Array::from(Vec::<i64>::new())),
+                    DataType::Float32 => Arc::new(Float32Array::from(Vec::<f32>::new())),
+                    DataType::Float64 => Arc::new(Float64Array::from(Vec::<f64>::new())),
+                    DataType::Binary => Arc::new(BinaryArray::from(Vec::<&[u8]>::new())),
+                    DataType::Utf8 => Arc::new(StringArray::from(Vec::<&str>::new())),
+                    DataType::Date32 => Arc::new(Date32Array::from(Vec::<i32>::new())),
+                    DataType::Time32(arrow_schema::TimeUnit::Millisecond) => {
+                        Arc::new(Time32MillisecondArray::from(Vec::<i32>::new()))
+                    }
+                    DataType::Time64(arrow_schema::TimeUnit::Microsecond) => {
+                        Arc::new(Time64MicrosecondArray::from(Vec::<i64>::new()))
+                    }
+                    DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, tz) => {
+                        let a = TimestampMillisecondArray::from(Vec::<i64>::new());
+                        Arc::new(if let Some(tz) = tz {
+                            a.with_timezone(tz.clone())
+                        } else {
+                            a
+                        })
+                    }
+                    DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, tz) => {
+                        let a = TimestampMicrosecondArray::from(Vec::<i64>::new());
+                        Arc::new(if let Some(tz) = tz {
+                            a.with_timezone(tz.clone())
+                        } else {
+                            a
+                        })
+                    }
+                    DataType::Interval(IntervalUnit::MonthDayNano) => Arc::new(
+                        IntervalMonthDayNanoArray::from(Vec::<IntervalMonthDayNano>::new()),
+                    ),
+                    DataType::FixedSizeBinary(sz) => Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                            std::iter::empty::<Option<Vec<u8>>>(),
+                            *sz,
+                        )
+                        .unwrap(),
+                    ),
+                    DataType::Dictionary(_, _) => {
+                        let keys = Int32Array::from(Vec::<i32>::new());
+                        let values = Arc::new(StringArray::from(Vec::<&str>::new()));
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                    }
+                    DataType::Struct(fields) => {
+                        let children: Vec<ArrayRef> = fields
+                            .iter()
+                            .map(|f| empty_child_for(f.data_type()) as ArrayRef)
+                            .collect();
+                        Arc::new(StructArray::new(fields.clone(), children, None))
+                    }
+                    DataType::List(field) => {
+                        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                        Arc::new(
+                            ListArray::try_new(
+                                field.clone(),
+                                offsets,
+                                empty_child_for(field.data_type()),
+                                None,
+                            )
+                            .unwrap(),
+                        )
+                    }
+                    DataType::Map(entry_field, is_sorted) => {
+                        let (key_field, val_field) = match entry_field.data_type() {
+                            DataType::Struct(fs) => (fs[0].clone(), fs[1].clone()),
+                            other => panic!("unexpected map entries type: {other:?}"),
+                        };
+                        let keys = StringArray::from(Vec::<&str>::new());
+                        let vals: ArrayRef = match val_field.data_type() {
+                            DataType::Null => Arc::new(NullArray::new(0)) as ArrayRef,
+                            DataType::Boolean => {
+                                Arc::new(BooleanArray::from(Vec::<bool>::new())) as ArrayRef
+                            }
+                            DataType::Int32 => {
+                                Arc::new(Int32Array::from(Vec::<i32>::new())) as ArrayRef
+                            }
+                            DataType::Int64 => {
+                                Arc::new(Int64Array::from(Vec::<i64>::new())) as ArrayRef
+                            }
+                            DataType::Float32 => {
+                                Arc::new(Float32Array::from(Vec::<f32>::new())) as ArrayRef
+                            }
+                            DataType::Float64 => {
+                                Arc::new(Float64Array::from(Vec::<f64>::new())) as ArrayRef
+                            }
+                            DataType::Utf8 => {
+                                Arc::new(StringArray::from(Vec::<&str>::new())) as ArrayRef
+                            }
+                            DataType::Binary => {
+                                Arc::new(BinaryArray::from(Vec::<&[u8]>::new())) as ArrayRef
+                            }
+                            DataType::Union(uf, _) => {
+                                let children: Vec<ArrayRef> = uf
+                                    .iter()
+                                    .map(|(_, f)| empty_child_for(f.data_type()))
+                                    .collect();
+                                Arc::new(
+                                    UnionArray::try_new(
+                                        uf.clone(),
+                                        ScalarBuffer::<i8>::from(Vec::<i8>::new()),
+                                        Some(ScalarBuffer::<i32>::from(Vec::<i32>::new())),
+                                        children,
+                                    )
+                                    .unwrap(),
+                                ) as ArrayRef
+                            }
+                            other => panic!("unsupported map value type: {other:?}"),
+                        };
+                        let entries = StructArray::new(
+                            Fields::from(vec![
+                                key_field.as_ref().clone(),
+                                val_field.as_ref().clone(),
+                            ]),
+                            vec![Arc::new(keys) as ArrayRef, vals],
+                            None,
+                        );
+                        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                        Arc::new(MapArray::new(
+                            entry_field.clone(),
+                            offsets,
+                            entries,
+                            None,
+                            *is_sorted,
+                        ))
+                    }
+                    other => panic!("empty_child_for: unhandled type {other:?}"),
+                }
+            }
+            let children: Vec<ArrayRef> = fields
+                .iter()
+                .map(|(_, f)| provide(f).unwrap_or_else(|| empty_child_for(f.data_type())))
+                .collect();
+            Arc::new(
+                UnionArray::try_new(
+                    fields.clone(),
+                    ScalarBuffer::<i8>::from(type_ids),
+                    Some(ScalarBuffer::<i32>::from(offsets)),
+                    children,
+                )
+                .unwrap(),
+            ) as ArrayRef
+        }
+
+        #[inline]
+        fn uuid16_from_str(s: &str) -> [u8; 16] {
+            let mut out = [0u8; 16];
+            let mut idx = 0usize;
+            let mut hi: Option<u8> = None;
+            for ch in s.chars() {
+                if ch == '-' {
+                    continue;
+                }
+                let v = ch.to_digit(16).expect("invalid hex digit in UUID") as u8;
+                if let Some(h) = hi {
+                    out[idx] = (h << 4) | v;
+                    idx += 1;
+                    hi = None;
+                } else {
+                    hi = Some(v);
+                }
+            }
+            assert_eq!(idx, 16, "UUID must decode to 16 bytes");
+            out
+        }
+        let date_a: i32 = 19_000; // 2022-01-08
+        let time_ms_a: i32 = 12 * 3_600_000 + 34 * 60_000 + 56_000 + 789;
+        let time_us_eod: i64 = 86_400_000_000 - 1;
+        let ts_ms_2024_01_01: i64 = 1_704_067_200_000; // 2024-01-01T00:00:00Z
+        let ts_us_2024_01_01: i64 = ts_ms_2024_01_01 * 1_000;
+        let dur_small = IntervalMonthDayNanoType::make_value(1, 2, 3_000_000_000);
+        let dur_zero = IntervalMonthDayNanoType::make_value(0, 0, 0);
+        let dur_large =
+            IntervalMonthDayNanoType::make_value(12, 31, ((86_400_000 - 1) as i64) * 1_000_000);
+        let dur_2years = IntervalMonthDayNanoType::make_value(24, 0, 0);
+        let uuid1 = uuid16_from_str("fe7bc30b-4ce8-4c5e-b67c-2234a2d38e66");
+        let uuid2 = uuid16_from_str("0826cc06-d2e3-4599-b4ad-af5fa6905cdb");
+
+        #[inline]
+        fn push_like(
+            reader_schema: &arrow_schema::Schema,
+            name: &str,
+            arr: ArrayRef,
+            fields: &mut Vec<FieldRef>,
+            cols: &mut Vec<ArrayRef>,
+        ) {
+            let src = reader_schema
+                .field_with_name(name)
+                .unwrap_or_else(|_| panic!("source schema missing field '{name}'"));
+            let mut f = Field::new(name, arr.data_type().clone(), src.is_nullable());
+            let md = src.metadata();
+            if !md.is_empty() {
+                f = f.with_metadata(md.clone());
+            }
+            fields.push(Arc::new(f));
+            cols.push(arr);
+        }
+
+        let mut fields: Vec<FieldRef> = Vec::new();
+        let mut columns: Vec<ArrayRef> = Vec::new();
+        push_like(
+            schema.as_ref(),
+            "id",
+            Arc::new(Int64Array::from(vec![1, 2, 3, 4])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "flag",
+            Arc::new(BooleanArray::from(vec![true, false, true, false])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "ratio_f32",
+            Arc::new(Float32Array::from(vec![1.25f32, -0.0, 3.5, 9.75])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "ratio_f64",
+            Arc::new(Float64Array::from(vec![2.5f64, -1.0, 7.0, -2.25])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "count_i32",
+            Arc::new(Int32Array::from(vec![7, -1, 0, 123])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "count_i64",
+            Arc::new(Int64Array::from(vec![
+                7_000_000_000i64,
+                -2,
+                0,
+                -9_876_543_210i64,
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "opt_i32_nullfirst",
+            Arc::new(Int32Array::from(vec![None, Some(42), None, Some(0)])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "opt_str_nullsecond",
+            Arc::new(StringArray::from(vec![
+                Some("alpha"),
+                None,
+                Some("s3"),
+                Some(""),
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        {
+            let uf = match schema
+                .field_with_name("tri_union_prim")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("tri_union_prim should be dense union, got {other:?}"),
+            };
+            let tid_i = tid_by_name(&uf, "int");
+            let tid_s = tid_by_name(&uf, "string");
+            let tid_b = tid_by_name(&uf, "boolean");
+            let tids = vec![tid_i, tid_s, tid_b, tid_s];
+            let offs = vec![0, 0, 0, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Int32 => Some(Arc::new(Int32Array::from(vec![0])) as ArrayRef),
+                DataType::Utf8 => Some(Arc::new(StringArray::from(vec!["hi", ""])) as ArrayRef),
+                DataType::Boolean => Some(Arc::new(BooleanArray::from(vec![true])) as ArrayRef),
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "tri_union_prim",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+
+        push_like(
+            schema.as_ref(),
+            "str_utf8",
+            Arc::new(StringArray::from(vec!["hello", "", "world", "✓ unicode"])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "raw_bytes",
+            Arc::new(BinaryArray::from(vec![
+                b"\x00\x01".as_ref(),
+                b"".as_ref(),
+                b"\xFF\x00".as_ref(),
+                b"\x10\x20\x30\x40".as_ref(),
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        {
+            let it = [
+                Some(*b"0123456789ABCDEF"),
+                Some([0u8; 16]),
+                Some(*b"ABCDEFGHIJKLMNOP"),
+                Some([0xAA; 16]),
+            ]
+            .into_iter();
+            let arr =
+                Arc::new(FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap())
+                    as ArrayRef;
+            push_like(
+                schema.as_ref(),
+                "fx16_plain",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            #[cfg(feature = "small_decimals")]
+            let dec10_2 = Arc::new(
+                Decimal64Array::from_iter_values([123456i64, -1, 0, 9_999_999_999i64])
+                    .with_precision_and_scale(10, 2)
+                    .unwrap(),
+            ) as ArrayRef;
+            #[cfg(not(feature = "small_decimals"))]
+            let dec10_2 = Arc::new(
+                Decimal128Array::from_iter_values([123456i128, -1, 0, 9_999_999_999i128])
+                    .with_precision_and_scale(10, 2)
+                    .unwrap(),
+            ) as ArrayRef;
+            push_like(
+                schema.as_ref(),
+                "dec_bytes_s10_2",
+                dec10_2,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            #[cfg(feature = "small_decimals")]
+            let dec20_4 = Arc::new(
+                Decimal128Array::from_iter_values([1_234_567_891_234i128, -420_000i128, 0, -1i128])
+                    .with_precision_and_scale(20, 4)
+                    .unwrap(),
+            ) as ArrayRef;
+            #[cfg(not(feature = "small_decimals"))]
+            let dec20_4 = Arc::new(
+                Decimal128Array::from_iter_values([1_234_567_891_234i128, -420_000i128, 0, -1i128])
+                    .with_precision_and_scale(20, 4)
+                    .unwrap(),
+            ) as ArrayRef;
+            push_like(
+                schema.as_ref(),
+                "dec_fix_s20_4",
+                dec20_4,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let it = [Some(uuid1), Some(uuid2), Some(uuid1), Some(uuid2)].into_iter();
+            let arr =
+                Arc::new(FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap())
+                    as ArrayRef;
+            push_like(schema.as_ref(), "uuid_str", arr, &mut fields, &mut columns);
+        }
+        push_like(
+            schema.as_ref(),
+            "d_date",
+            Arc::new(Date32Array::from(vec![date_a, 0, 1, 365])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "t_millis",
+            Arc::new(Time32MillisecondArray::from(vec![
+                time_ms_a,
+                0,
+                1,
+                86_400_000 - 1,
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "t_micros",
+            Arc::new(Time64MicrosecondArray::from(vec![
+                time_us_eod,
+                0,
+                1,
+                1_000_000,
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        {
+            let a = TimestampMillisecondArray::from(vec![
+                ts_ms_2024_01_01,
+                -1,
+                ts_ms_2024_01_01 + 123,
+                0,
+            ])
+            .with_timezone("+00:00");
+            push_like(
+                schema.as_ref(),
+                "ts_millis_utc",
+                Arc::new(a) as ArrayRef,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let a = TimestampMicrosecondArray::from(vec![
+                ts_us_2024_01_01,
+                1,
+                ts_us_2024_01_01 + 456,
+                0,
+            ])
+            .with_timezone("+00:00");
+            push_like(
+                schema.as_ref(),
+                "ts_micros_utc",
+                Arc::new(a) as ArrayRef,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        push_like(
+            schema.as_ref(),
+            "ts_millis_local",
+            Arc::new(TimestampMillisecondArray::from(vec![
+                ts_ms_2024_01_01 + 86_400_000,
+                0,
+                ts_ms_2024_01_01 + 789,
+                123_456_789,
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        push_like(
+            schema.as_ref(),
+            "ts_micros_local",
+            Arc::new(TimestampMicrosecondArray::from(vec![
+                ts_us_2024_01_01 + 123_456,
+                0,
+                ts_us_2024_01_01 + 101_112,
+                987_654_321,
+            ])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        {
+            let v = vec![dur_small, dur_zero, dur_large, dur_2years];
+            push_like(
+                schema.as_ref(),
+                "interval_mdn",
+                Arc::new(IntervalMonthDayNanoArray::from(v)) as ArrayRef,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let keys = Int32Array::from(vec![1, 2, 3, 0]); // NEW, PROCESSING, DONE, UNKNOWN
+            let values = Arc::new(StringArray::from(vec![
+                "UNKNOWN",
+                "NEW",
+                "PROCESSING",
+                "DONE",
+            ])) as ArrayRef;
+            let dict = DictionaryArray::<Int32Type>::try_new(keys, values).unwrap();
+            push_like(
+                schema.as_ref(),
+                "status",
+                Arc::new(dict) as ArrayRef,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let list_field = match schema.field_with_name("arr_union").unwrap().data_type() {
+                DataType::List(f) => f.clone(),
+                other => panic!("arr_union should be List, got {other:?}"),
+            };
+            let uf = match list_field.data_type() {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("arr_union item should be union, got {other:?}"),
+            };
+            let tid_l = tid_by_name(&uf, "long");
+            let tid_s = tid_by_name(&uf, "string");
+            let tid_n = tid_by_name(&uf, "null");
+            let type_ids = vec![
+                tid_l, tid_s, tid_n, tid_l, tid_n, tid_s, tid_l, tid_l, tid_s, tid_n, tid_l,
+            ];
+            let offsets = vec![0, 0, 0, 1, 1, 1, 2, 3, 2, 2, 4];
+            let values = mk_dense_union(&uf, type_ids, offsets, |f| match f.data_type() {
+                DataType::Int64 => {
+                    Some(Arc::new(Int64Array::from(vec![1i64, -3, 0, -1, 0])) as ArrayRef)
+                }
+                DataType::Utf8 => {
+                    Some(Arc::new(StringArray::from(vec!["x", "z", "end"])) as ArrayRef)
+                }
+                DataType::Null => Some(Arc::new(NullArray::new(3)) as ArrayRef),
+                _ => None,
+            });
+            let list_offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 4, 7, 8, 11]));
+            let arr = Arc::new(ListArray::try_new(list_field, list_offsets, values, None).unwrap())
+                as ArrayRef;
+            push_like(schema.as_ref(), "arr_union", arr, &mut fields, &mut columns);
+        }
+        {
+            let (entry_field, entries_fields, uf, is_sorted) =
+                match schema.field_with_name("map_union").unwrap().data_type() {
+                    DataType::Map(entry_field, is_sorted) => {
+                        let fs = match entry_field.data_type() {
+                            DataType::Struct(fs) => fs.clone(),
+                            other => panic!("map entries must be struct, got {other:?}"),
+                        };
+                        let val_f = fs[1].clone();
+                        let uf = match val_f.data_type() {
+                            DataType::Union(f, UnionMode::Dense) => f.clone(),
+                            other => panic!("map value must be union, got {other:?}"),
+                        };
+                        (entry_field.clone(), fs, uf, *is_sorted)
+                    }
+                    other => panic!("map_union should be Map, got {other:?}"),
+                };
+            let keys = StringArray::from(vec!["a", "b", "c", "neg", "pi", "ok"]);
+            let moff = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 4, 4, 6]));
+            let tid_null = tid_by_name(&uf, "null");
+            let tid_d = tid_by_name(&uf, "double");
+            let tid_s = tid_by_name(&uf, "string");
+            let type_ids = vec![tid_d, tid_null, tid_s, tid_d, tid_d, tid_s];
+            let offsets = vec![0, 0, 0, 1, 2, 1];
+            let pi_5dp = (std::f64::consts::PI * 100_000.0).trunc() / 100_000.0;
+            let vals = mk_dense_union(&uf, type_ids, offsets, |f| match f.data_type() {
+                DataType::Float64 => {
+                    Some(Arc::new(Float64Array::from(vec![1.5f64, -0.5, pi_5dp])) as ArrayRef)
+                }
+                DataType::Utf8 => {
+                    Some(Arc::new(StringArray::from(vec!["yes", "true"])) as ArrayRef)
+                }
+                DataType::Null => Some(Arc::new(NullArray::new(2)) as ArrayRef),
+                _ => None,
+            });
+            let entries = StructArray::new(
+                entries_fields.clone(),
+                vec![Arc::new(keys) as ArrayRef, vals],
+                None,
+            );
+            let map =
+                Arc::new(MapArray::new(entry_field, moff, entries, None, is_sorted)) as ArrayRef;
+            push_like(schema.as_ref(), "map_union", map, &mut fields, &mut columns);
+        }
+        {
+            let fs = match schema.field_with_name("address").unwrap().data_type() {
+                DataType::Struct(fs) => fs.clone(),
+                other => panic!("address should be Struct, got {other:?}"),
+            };
+            let street = Arc::new(StringArray::from(vec![
+                "100 Main",
+                "",
+                "42 Galaxy Way",
+                "End Ave",
+            ])) as ArrayRef;
+            let zip = Arc::new(Int32Array::from(vec![12345, 0, 42424, 1])) as ArrayRef;
+            let country = Arc::new(StringArray::from(vec!["US", "CA", "US", "GB"])) as ArrayRef;
+            let arr = Arc::new(StructArray::new(fs, vec![street, zip, country], None)) as ArrayRef;
+            push_like(schema.as_ref(), "address", arr, &mut fields, &mut columns);
+        }
+        {
+            let fs = match schema.field_with_name("maybe_auth").unwrap().data_type() {
+                DataType::Struct(fs) => fs.clone(),
+                other => panic!("maybe_auth should be Struct, got {other:?}"),
+            };
+            let user =
+                Arc::new(StringArray::from(vec!["alice", "bob", "carol", "dave"])) as ArrayRef;
+            let token_values: Vec<Option<&[u8]>> = vec![
+                None,                           // row 1: null
+                Some(b"\x01\x02\x03".as_ref()), // row 2: bytes
+                None,                           // row 3: null
+                Some(b"".as_ref()),             // row 4: empty bytes
+            ];
+            let token = Arc::new(BinaryArray::from(token_values)) as ArrayRef;
+            let arr = Arc::new(StructArray::new(fs, vec![user, token], None)) as ArrayRef;
+            push_like(
+                schema.as_ref(),
+                "maybe_auth",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let uf = match schema
+                .field_with_name("union_enum_record_array_map")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("union_enum_record_array_map should be union, got {other:?}"),
+            };
+            let mut tid_enum: Option<i8> = None;
+            let mut tid_rec_a: Option<i8> = None;
+            let mut tid_array: Option<i8> = None;
+            let mut tid_map: Option<i8> = None;
+            let mut map_entry_field: Option<FieldRef> = None;
+            let mut map_sorted: bool = false;
+            for (tid, f) in uf.iter() {
+                match f.data_type() {
+                    DataType::Dictionary(_, _) => tid_enum = Some(tid),
+                    DataType::Struct(childs)
+                        if childs.len() == 2
+                            && childs[0].name() == "a"
+                            && childs[1].name() == "b" =>
+                    {
+                        tid_rec_a = Some(tid)
+                    }
+                    DataType::List(item) if matches!(item.data_type(), DataType::Int64) => {
+                        tid_array = Some(tid)
+                    }
+                    DataType::Map(ef, is_sorted) => {
+                        tid_map = Some(tid);
+                        map_entry_field = Some(ef.clone());
+                        map_sorted = *is_sorted;
+                    }
+                    _ => {}
+                }
+            }
+            let (tid_enum, tid_rec_a, tid_array, tid_map) = (
+                tid_enum.unwrap(),
+                tid_rec_a.unwrap(),
+                tid_array.unwrap(),
+                tid_map.unwrap(),
+            );
+            let tids = vec![tid_enum, tid_rec_a, tid_array, tid_map];
+            let offs = vec![0, 0, 0, 0];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Dictionary(_, _) => {
+                    let keys = Int32Array::from(vec![0i32]);
+                    let values =
+                        Arc::new(StringArray::from(vec!["RED", "GREEN", "BLUE"])) as ArrayRef;
+                    Some(
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                            as ArrayRef,
+                    )
+                }
+                DataType::Struct(fs)
+                    if fs.len() == 2 && fs[0].name() == "a" && fs[1].name() == "b" =>
+                {
+                    let a = Int32Array::from(vec![7]);
+                    let b = StringArray::from(vec!["rec"]);
+                    Some(Arc::new(StructArray::new(
+                        fs.clone(),
+                        vec![Arc::new(a), Arc::new(b)],
+                        None,
+                    )) as ArrayRef)
+                }
+                DataType::List(field) => {
+                    let values = Int64Array::from(vec![1i64, 2, 3]);
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3]));
+                    Some(Arc::new(
+                        ListArray::try_new(field.clone(), offsets, Arc::new(values), None).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::Map(_, _) => {
+                    let entry_field = map_entry_field.clone().unwrap();
+                    let (key_field, val_field) = match entry_field.data_type() {
+                        DataType::Struct(fs) => (fs[0].clone(), fs[1].clone()),
+                        _ => unreachable!(),
+                    };
+                    let keys = StringArray::from(vec!["k"]);
+                    let vals = StringArray::from(vec!["v"]);
+                    let entries = StructArray::new(
+                        Fields::from(vec![key_field.as_ref().clone(), val_field.as_ref().clone()]),
+                        vec![Arc::new(keys) as ArrayRef, Arc::new(vals) as ArrayRef],
+                        None,
+                    );
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1]));
+                    Some(Arc::new(MapArray::new(
+                        entry_field.clone(),
+                        offsets,
+                        entries,
+                        None,
+                        map_sorted,
+                    )) as ArrayRef)
+                }
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "union_enum_record_array_map",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let uf = match schema
+                .field_with_name("union_date_or_fixed4")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("union_date_or_fixed4 should be union, got {other:?}"),
+            };
+            let tid_date = tid_by_dt(&uf, |dt| matches!(dt, DataType::Date32));
+            let tid_fx4 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(4)));
+            let tids = vec![tid_date, tid_fx4, tid_date, tid_fx4];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Date32 => Some(Arc::new(Date32Array::from(vec![date_a, 0])) as ArrayRef),
+                DataType::FixedSizeBinary(4) => {
+                    let it = [Some(*b"\x00\x11\x22\x33"), Some(*b"ABCD")].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 4).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "union_date_or_fixed4",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let uf = match schema
+                .field_with_name("union_interval_or_string")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("union_interval_or_string should be union, got {other:?}"),
+            };
+            let tid_dur = tid_by_dt(&uf, |dt| {
+                matches!(dt, DataType::Interval(IntervalUnit::MonthDayNano))
+            });
+            let tid_str = tid_by_dt(&uf, |dt| matches!(dt, DataType::Utf8));
+            let tids = vec![tid_dur, tid_str, tid_dur, tid_str];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Interval(IntervalUnit::MonthDayNano) => Some(Arc::new(
+                    IntervalMonthDayNanoArray::from(vec![dur_small, dur_large]),
+                )
+                    as ArrayRef),
+                DataType::Utf8 => Some(Arc::new(StringArray::from(vec![
+                    "duration-as-text",
+                    "iso-8601-period-P1Y",
+                ])) as ArrayRef),
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "union_interval_or_string",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let uf = match schema
+                .field_with_name("union_uuid_or_fixed10")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("union_uuid_or_fixed10 should be union, got {other:?}"),
+            };
+            let tid_uuid = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(16)));
+            let tid_fx10 = tid_by_dt(&uf, |dt| matches!(dt, DataType::FixedSizeBinary(10)));
+            let tids = vec![tid_uuid, tid_fx10, tid_uuid, tid_fx10];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::FixedSizeBinary(16) => {
+                    let it = [Some(uuid1), Some(uuid2)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::FixedSizeBinary(10) => {
+                    let fx10_a = [0xAAu8; 10];
+                    let fx10_b = [0x00u8, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99];
+                    let it = [Some(fx10_a), Some(fx10_b)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 10).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "union_uuid_or_fixed10",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let list_field = match schema
+                .field_with_name("array_records_with_union")
+                .unwrap()
+                .data_type()
+            {
+                DataType::List(f) => f.clone(),
+                other => panic!("array_records_with_union should be List, got {other:?}"),
+            };
+            let kv_fields = match list_field.data_type() {
+                DataType::Struct(fs) => fs.clone(),
+                other => panic!("array_records_with_union items must be Struct, got {other:?}"),
+            };
+            let val_field = kv_fields
+                .iter()
+                .find(|f| f.name() == "val")
+                .unwrap()
+                .clone();
+            let uf = match val_field.data_type() {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("KV.val should be union, got {other:?}"),
+            };
+            let keys = Arc::new(StringArray::from(vec!["k1", "k2", "k", "k3", "x"])) as ArrayRef;
+            let tid_null = tid_by_name(&uf, "null");
+            let tid_i = tid_by_name(&uf, "int");
+            let tid_l = tid_by_name(&uf, "long");
+            let type_ids = vec![tid_i, tid_null, tid_l, tid_null, tid_i];
+            let offsets = vec![0, 0, 0, 1, 1];
+            let vals = mk_dense_union(&uf, type_ids, offsets, |f| match f.data_type() {
+                DataType::Int32 => Some(Arc::new(Int32Array::from(vec![5, -5])) as ArrayRef),
+                DataType::Int64 => Some(Arc::new(Int64Array::from(vec![99i64])) as ArrayRef),
+                DataType::Null => Some(Arc::new(NullArray::new(2)) as ArrayRef),
+                _ => None,
+            });
+            let values_struct =
+                Arc::new(StructArray::new(kv_fields.clone(), vec![keys, vals], None)) as ArrayRef;
+            let list_offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 4, 5]));
+            let arr = Arc::new(
+                ListArray::try_new(list_field, list_offsets, values_struct, None).unwrap(),
+            ) as ArrayRef;
+            push_like(
+                schema.as_ref(),
+                "array_records_with_union",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        {
+            let uf = match schema
+                .field_with_name("union_map_or_array_int")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Union(f, UnionMode::Dense) => f.clone(),
+                other => panic!("union_map_or_array_int should be union, got {other:?}"),
+            };
+            let tid_map = tid_by_dt(&uf, |dt| matches!(dt, DataType::Map(_, _)));
+            let tid_list = tid_by_dt(&uf, |dt| matches!(dt, DataType::List(_)));
+            let map_child: ArrayRef = {
+                let (entry_field, is_sorted) = match uf
+                    .iter()
+                    .find(|(tid, _)| *tid == tid_map)
+                    .unwrap()
+                    .1
+                    .data_type()
+                {
+                    DataType::Map(ef, is_sorted) => (ef.clone(), *is_sorted),
+                    _ => unreachable!(),
+                };
+                let (key_field, val_field) = match entry_field.data_type() {
+                    DataType::Struct(fs) => (fs[0].clone(), fs[1].clone()),
+                    _ => unreachable!(),
+                };
+                let keys = StringArray::from(vec!["x", "y", "only"]);
+                let vals = Int32Array::from(vec![1, 2, 10]);
+                let entries = StructArray::new(
+                    Fields::from(vec![key_field.as_ref().clone(), val_field.as_ref().clone()]),
+                    vec![Arc::new(keys) as ArrayRef, Arc::new(vals) as ArrayRef],
+                    None,
+                );
+                let moff = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3]));
+                Arc::new(MapArray::new(entry_field, moff, entries, None, is_sorted)) as ArrayRef
+            };
+            let list_child: ArrayRef = {
+                let list_field = match uf
+                    .iter()
+                    .find(|(tid, _)| *tid == tid_list)
+                    .unwrap()
+                    .1
+                    .data_type()
+                {
+                    DataType::List(f) => f.clone(),
+                    _ => unreachable!(),
+                };
+                let values = Int32Array::from(vec![1, 2, 3, 0]);
+                let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 4]));
+                Arc::new(ListArray::try_new(list_field, offsets, Arc::new(values), None).unwrap())
+                    as ArrayRef
+            };
+            let tids = vec![tid_map, tid_list, tid_map, tid_list];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf, tids, offs, |f| match f.data_type() {
+                DataType::Map(_, _) => Some(map_child.clone()),
+                DataType::List(_) => Some(list_child.clone()),
+                _ => None,
+            });
+            push_like(
+                schema.as_ref(),
+                "union_map_or_array_int",
+                arr,
+                &mut fields,
+                &mut columns,
+            );
+        }
+        push_like(
+            schema.as_ref(),
+            "renamed_with_default",
+            Arc::new(Int32Array::from(vec![100, 42, 7, 42])) as ArrayRef,
+            &mut fields,
+            &mut columns,
+        );
+        {
+            let fs = match schema.field_with_name("person").unwrap().data_type() {
+                DataType::Struct(fs) => fs.clone(),
+                other => panic!("person should be Struct, got {other:?}"),
+            };
+            let name =
+                Arc::new(StringArray::from(vec!["Alice", "Bob", "Carol", "Dave"])) as ArrayRef;
+            let age = Arc::new(Int32Array::from(vec![30, 0, 25, 41])) as ArrayRef;
+            let arr = Arc::new(StructArray::new(fs, vec![name, age], None)) as ArrayRef;
+            push_like(schema.as_ref(), "person", arr, &mut fields, &mut columns);
+        }
+        let expected =
+            RecordBatch::try_new(Arc::new(Schema::new(Fields::from(fields))), columns).unwrap();
+        assert_eq!(
+            expected, batch,
+            "entire RecordBatch mismatch (schema, all columns, all rows)"
+        );
+    }
+    #[test]
+    fn comprehensive_e2e_resolution_test() {
+        use serde_json::Value;
+        use std::collections::HashMap;
+
+        // Build a reader schema that stresses Avro schema‑resolution
+        //
+        // Changes relative to writer schema:
+        // * Rename fields using writer aliases:    id -> identifier, renamed_with_default -> old_count
+        // * Promote numeric types:                 count_i32 (int) -> long, ratio_f32 (float) -> double
+        // * Reorder many union branches (reverse), incl. nested unions
+        // * Reorder array/map union item/value branches
+        // * Rename nested Address field:           street -> street_name (uses alias in writer)
+        // * Change Person type name/namespace:     com.example.Person (matches writer alias)
+        // * Reverse top‑level field order
+        //
+        // Reader‑side aliases are added wherever names change (per Avro spec).
+        fn make_comprehensive_reader_schema(path: &str) -> AvroSchema {
+            fn set_type_string(f: &mut Value, new_ty: &str) {
+                if let Some(ty) = f.get_mut("type") {
+                    match ty {
+                        Value::String(_) | Value::Object(_) => {
+                            *ty = Value::String(new_ty.to_string());
+                        }
+                        Value::Array(arr) => {
+                            for b in arr.iter_mut() {
+                                match b {
+                                    Value::String(s) if s != "null" => {
+                                        *b = Value::String(new_ty.to_string());
+                                        break;
+                                    }
+                                    Value::Object(_) => {
+                                        *b = Value::String(new_ty.to_string());
+                                        break;
+                                    }
+                                    _ => {}
+                                }
+                            }
+                        }
+                        _ => {}
+                    }
+                }
+            }
+            fn reverse_union_array(f: &mut Value) {
+                if let Some(arr) = f.get_mut("type").and_then(|t| t.as_array_mut()) {
+                    arr.reverse();
+                }
+            }
+            fn reverse_items_union(f: &mut Value) {
+                if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) {
+                    if let Some(items) = obj.get_mut("items").and_then(|v| v.as_array_mut()) {
+                        items.reverse();
+                    }
+                }
+            }
+            fn reverse_map_values_union(f: &mut Value) {
+                if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) {
+                    if let Some(values) = obj.get_mut("values").and_then(|v| v.as_array_mut()) {
+                        values.reverse();
+                    }
+                }
+            }
+            fn reverse_nested_union_in_record(f: &mut Value, field_name: &str) {
+                if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) {
+                    if let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) {
+                        for ff in fields.iter_mut() {
+                            if ff.get("name").and_then(|n| n.as_str()) == Some(field_name) {
+                                if let Some(ty) = ff.get_mut("type") {
+                                    if let Some(arr) = ty.as_array_mut() {
+                                        arr.reverse();
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            fn rename_nested_field_with_alias(f: &mut Value, old: &str, new: &str) {
+                if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) {
+                    if let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) {
+                        for ff in fields.iter_mut() {
+                            if ff.get("name").and_then(|n| n.as_str()) == Some(old) {
+                                ff["name"] = Value::String(new.to_string());
+                                ff["aliases"] = Value::Array(vec![Value::String(old.to_string())]);
+                            }
+                        }
+                    }
+                }
+            }
+            let mut root = load_writer_schema_json(path);
+            assert_eq!(root["type"], "record", "writer schema must be a record");
+            let fields = root
+                .get_mut("fields")
+                .and_then(|f| f.as_array_mut())
+                .expect("record has fields");
+            for f in fields.iter_mut() {
+                let Some(name) = f.get("name").and_then(|n| n.as_str()) else {
+                    continue;
+                };
+                match name {
+                    // Field aliasing (reader‑side aliases added)
+                    "id" => {
+                        f["name"] = Value::String("identifier".into());
+                        f["aliases"] = Value::Array(vec![Value::String("id".into())]);
+                    }
+                    "renamed_with_default" => {
+                        f["name"] = Value::String("old_count".into());
+                        f["aliases"] =
+                            Value::Array(vec![Value::String("renamed_with_default".into())]);
+                    }
+                    // Promotions
+                    "count_i32" => set_type_string(f, "long"),
+                    "ratio_f32" => set_type_string(f, "double"),
+                    // Union reorder (exercise resolution)
+                    "opt_str_nullsecond" => reverse_union_array(f),
+                    "union_enum_record_array_map" => reverse_union_array(f),
+                    "union_date_or_fixed4" => reverse_union_array(f),
+                    "union_interval_or_string" => reverse_union_array(f),
+                    "union_uuid_or_fixed10" => reverse_union_array(f),
+                    "union_map_or_array_int" => reverse_union_array(f),
+                    "maybe_auth" => reverse_nested_union_in_record(f, "token"),
+                    // Array/Map unions
+                    "arr_union" => reverse_items_union(f),
+                    "map_union" => reverse_map_values_union(f),
+                    // Nested rename using reader‑side alias
+                    "address" => rename_nested_field_with_alias(f, "street", "street_name"),
+                    // Type‑name alias for nested record
+                    "person" => {
+                        if let Some(tobj) = f.get_mut("type").and_then(|t| t.as_object_mut()) {
+                            tobj.insert("name".to_string(), Value::String("Person".into()));
+                            tobj.insert(
+                                "namespace".to_string(),
+                                Value::String("com.example".into()),
+                            );
+                            tobj.insert(
+                                "aliases".into(),
+                                Value::Array(vec![
+                                    Value::String("PersonV2".into()),
+                                    Value::String("com.example.v2.PersonV2".into()),
+                                ]),
+                            );
+                        }
+                    }
+                    _ => {}
+                }
+            }
+            fields.reverse();
+            AvroSchema::new(root.to_string())
+        }
+
+        let path = "test/data/comprehensive_e2e.avro";
+        let reader_schema = make_comprehensive_reader_schema(path);
+        let batch = read_alltypes_with_reader_schema(path, reader_schema.clone());
+
+        const UUID_EXT_KEY: &str = "ARROW:extension:name";
+        const UUID_LOGICAL_KEY: &str = "logicalType";
+
+        let uuid_md_top: Option<HashMap<String, String>> = batch
+            .schema()
+            .field_with_name("uuid_str")
+            .ok()
+            .and_then(|f| {
+                let md = f.metadata();
+                let has_ext = md.get(UUID_EXT_KEY).is_some();
+                let is_uuid_logical = md
+                    .get(UUID_LOGICAL_KEY)
+                    .map(|v| v.trim_matches('"') == "uuid")
+                    .unwrap_or(false);
+                if has_ext || is_uuid_logical {
+                    Some(md.clone())
+                } else {
+                    None
+                }
+            });
+
+        let uuid_md_union: Option<HashMap<String, String>> = batch
+            .schema()
+            .field_with_name("union_uuid_or_fixed10")
+            .ok()
+            .and_then(|f| match f.data_type() {
+                DataType::Union(uf, _) => uf
+                    .iter()
+                    .find(|(_, child)| child.name() == "uuid")
+                    .and_then(|(_, child)| {
+                        let md = child.metadata();
+                        let has_ext = md.get(UUID_EXT_KEY).is_some();
+                        let is_uuid_logical = md
+                            .get(UUID_LOGICAL_KEY)
+                            .map(|v| v.trim_matches('"') == "uuid")
+                            .unwrap_or(false);
+                        if has_ext || is_uuid_logical {
+                            Some(md.clone())
+                        } else {
+                            None
+                        }
+                    }),
+                _ => None,
+            });
+
+        let add_uuid_ext_top = |f: Field| -> Field {
+            if let Some(md) = &uuid_md_top {
+                f.with_metadata(md.clone())
+            } else {
+                f
+            }
+        };
+        let add_uuid_ext_union = |f: Field| -> Field {
+            if let Some(md) = &uuid_md_union {
+                f.with_metadata(md.clone())
+            } else {
+                f
+            }
+        };
+
+        #[inline]
+        fn uuid16_from_str(s: &str) -> [u8; 16] {
+            let mut out = [0u8; 16];
+            let mut idx = 0usize;
+            let mut hi: Option<u8> = None;
+            for ch in s.chars() {
+                if ch == '-' {
+                    continue;
+                }
+                let v = ch.to_digit(16).expect("invalid hex digit in UUID") as u8;
+                if let Some(h) = hi {
+                    out[idx] = (h << 4) | v;
+                    idx += 1;
+                    hi = None;
+                } else {
+                    hi = Some(v);
+                }
+            }
+            assert_eq!(idx, 16, "UUID must decode to 16 bytes");
+            out
+        }
+
+        fn mk_dense_union(
+            fields: &UnionFields,
+            type_ids: Vec<i8>,
+            offsets: Vec<i32>,
+            provide: impl Fn(&Field) -> Option<ArrayRef>,
+        ) -> ArrayRef {
+            fn empty_child_for(dt: &DataType) -> Arc<dyn Array> {
+                match dt {
+                    DataType::Null => Arc::new(NullArray::new(0)),
+                    DataType::Boolean => Arc::new(BooleanArray::from(Vec::<bool>::new())),
+                    DataType::Int32 => Arc::new(Int32Array::from(Vec::<i32>::new())),
+                    DataType::Int64 => Arc::new(Int64Array::from(Vec::<i64>::new())),
+                    DataType::Float32 => Arc::new(Float32Array::from(Vec::<f32>::new())),
+                    DataType::Float64 => Arc::new(Float64Array::from(Vec::<f64>::new())),
+                    DataType::Binary => Arc::new(BinaryArray::from(Vec::<&[u8]>::new())),
+                    DataType::Utf8 => Arc::new(StringArray::from(Vec::<&str>::new())),
+                    DataType::Date32 => Arc::new(Date32Array::from(Vec::<i32>::new())),
+                    DataType::Time32(arrow_schema::TimeUnit::Millisecond) => {
+                        Arc::new(Time32MillisecondArray::from(Vec::<i32>::new()))
+                    }
+                    DataType::Time64(arrow_schema::TimeUnit::Microsecond) => {
+                        Arc::new(Time64MicrosecondArray::from(Vec::<i64>::new()))
+                    }
+                    DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, tz) => {
+                        let a = TimestampMillisecondArray::from(Vec::<i64>::new());
+                        Arc::new(if let Some(tz) = tz {
+                            a.with_timezone(tz.clone())
+                        } else {
+                            a
+                        })
+                    }
+                    DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, tz) => {
+                        let a = TimestampMicrosecondArray::from(Vec::<i64>::new());
+                        Arc::new(if let Some(tz) = tz {
+                            a.with_timezone(tz.clone())
+                        } else {
+                            a
+                        })
+                    }
+                    DataType::Interval(IntervalUnit::MonthDayNano) => Arc::new(
+                        IntervalMonthDayNanoArray::from(Vec::<IntervalMonthDayNano>::new()),
+                    ),
+                    DataType::FixedSizeBinary(sz) => Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+                            std::iter::empty::<Option<Vec<u8>>>(),
+                            *sz,
+                        )
+                        .unwrap(),
+                    ),
+                    DataType::Dictionary(_, _) => {
+                        let keys = Int32Array::from(Vec::<i32>::new());
+                        let values = Arc::new(StringArray::from(Vec::<&str>::new()));
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                    }
+                    DataType::Struct(fields) => {
+                        let children: Vec<ArrayRef> = fields
+                            .iter()
+                            .map(|f| empty_child_for(f.data_type()) as ArrayRef)
+                            .collect();
+                        Arc::new(StructArray::new(fields.clone(), children, None))
+                    }
+                    DataType::List(field) => {
+                        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                        Arc::new(
+                            ListArray::try_new(
+                                field.clone(),
+                                offsets,
+                                empty_child_for(field.data_type()),
+                                None,
+                            )
+                            .unwrap(),
+                        )
+                    }
+                    DataType::Map(entry_field, is_sorted) => {
+                        let (key_field, val_field) = match entry_field.data_type() {
+                            DataType::Struct(fs) => (fs[0].clone(), fs[1].clone()),
+                            other => panic!("unexpected map entries type: {other:?}"),
+                        };
+                        let keys = StringArray::from(Vec::<&str>::new());
+                        let vals: ArrayRef = match val_field.data_type() {
+                            DataType::Null => Arc::new(NullArray::new(0)) as ArrayRef,
+                            DataType::Boolean => {
+                                Arc::new(BooleanArray::from(Vec::<bool>::new())) as ArrayRef
+                            }
+                            DataType::Int32 => {
+                                Arc::new(Int32Array::from(Vec::<i32>::new())) as ArrayRef
+                            }
+                            DataType::Int64 => {
+                                Arc::new(Int64Array::from(Vec::<i64>::new())) as ArrayRef
+                            }
+                            DataType::Float32 => {
+                                Arc::new(Float32Array::from(Vec::<f32>::new())) as ArrayRef
+                            }
+                            DataType::Float64 => {
+                                Arc::new(Float64Array::from(Vec::<f64>::new())) as ArrayRef
+                            }
+                            DataType::Utf8 => {
+                                Arc::new(StringArray::from(Vec::<&str>::new())) as ArrayRef
+                            }
+                            DataType::Binary => {
+                                Arc::new(BinaryArray::from(Vec::<&[u8]>::new())) as ArrayRef
+                            }
+                            DataType::Union(uf, _) => {
+                                let children: Vec<ArrayRef> = uf
+                                    .iter()
+                                    .map(|(_, f)| empty_child_for(f.data_type()))
+                                    .collect();
+                                Arc::new(
+                                    UnionArray::try_new(
+                                        uf.clone(),
+                                        ScalarBuffer::<i8>::from(Vec::<i8>::new()),
+                                        Some(ScalarBuffer::<i32>::from(Vec::<i32>::new())),
+                                        children,
+                                    )
+                                    .unwrap(),
+                                ) as ArrayRef
+                            }
+                            other => panic!("unsupported map value type: {other:?}"),
+                        };
+                        let entries = StructArray::new(
+                            Fields::from(vec![
+                                key_field.as_ref().clone(),
+                                val_field.as_ref().clone(),
+                            ]),
+                            vec![Arc::new(keys) as ArrayRef, vals],
+                            None,
+                        );
+                        let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0]));
+                        Arc::new(MapArray::new(
+                            entry_field.clone(),
+                            offsets,
+                            entries,
+                            None,
+                            *is_sorted,
+                        ))
+                    }
+                    other => panic!("empty_child_for: unhandled type {other:?}"),
+                }
+            }
+            let children: Vec<ArrayRef> = fields
+                .iter()
+                .map(|(_, f)| provide(f).unwrap_or_else(|| empty_child_for(f.data_type())))
+                .collect();
+            Arc::new(
+                UnionArray::try_new(
+                    fields.clone(),
+                    ScalarBuffer::<i8>::from(type_ids),
+                    Some(ScalarBuffer::<i32>::from(offsets)),
+                    children,
+                )
+                .unwrap(),
+            ) as ArrayRef
+        }
+        let date_a: i32 = 19_000; // 2022-01-08
+        let time_ms_a: i32 = 12 * 3_600_000 + 34 * 60_000 + 56_000 + 789;
+        let time_us_eod: i64 = 86_400_000_000 - 1;
+        let ts_ms_2024_01_01: i64 = 1_704_067_200_000; // 2024-01-01T00:00:00Z
+        let ts_us_2024_01_01: i64 = ts_ms_2024_01_01 * 1_000;
+        let dur_small = IntervalMonthDayNanoType::make_value(1, 2, 3_000_000_000);
+        let dur_zero = IntervalMonthDayNanoType::make_value(0, 0, 0);
+        let dur_large =
+            IntervalMonthDayNanoType::make_value(12, 31, ((86_400_000 - 1) as i64) * 1_000_000);
+        let dur_2years = IntervalMonthDayNanoType::make_value(24, 0, 0);
+        let uuid1 = uuid16_from_str("fe7bc30b-4ce8-4c5e-b67c-2234a2d38e66");
+        let uuid2 = uuid16_from_str("0826cc06-d2e3-4599-b4ad-af5fa6905cdb");
+        let item_name = Field::LIST_FIELD_DEFAULT_NAME;
+        let uf_tri = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new("int", DataType::Int32, false),
+                Field::new("string", DataType::Utf8, false),
+                Field::new("boolean", DataType::Boolean, false),
+            ],
+        )
+        .unwrap();
+        let uf_arr_items = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new("null", DataType::Null, false),
+                Field::new("string", DataType::Utf8, false),
+                Field::new("long", DataType::Int64, false),
+            ],
+        )
+        .unwrap();
+        let arr_items_field = Arc::new(Field::new(
+            item_name,
+            DataType::Union(uf_arr_items.clone(), UnionMode::Dense),
+            true,
+        ));
+        let uf_map_vals = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new("string", DataType::Utf8, false),
+                Field::new("double", DataType::Float64, false),
+                Field::new("null", DataType::Null, false),
+            ],
+        )
+        .unwrap();
+        let map_entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                Field::new("key", DataType::Utf8, false),
+                Field::new(
+                    "value",
+                    DataType::Union(uf_map_vals.clone(), UnionMode::Dense),
+                    true,
+                ),
+            ])),
+            false,
+        ));
+        // Enum metadata for Color (now includes name/namespace)
+        let mut enum_md_color = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(
+                crate::schema::AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+                serde_json::to_string(&vec!["RED", "GREEN", "BLUE"]).unwrap(),
+            );
+            m
+        };
+        enum_md_color.insert(AVRO_NAME_METADATA_KEY.to_string(), "Color".to_string());
+        enum_md_color.insert(
+            AVRO_NAMESPACE_METADATA_KEY.to_string(),
+            "org.apache.arrow.avrotests.v1.types".to_string(),
+        );
+        let union_rec_a_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, false),
+        ]);
+        let union_rec_b_fields = Fields::from(vec![
+            Field::new("x", DataType::Int64, false),
+            Field::new("y", DataType::Binary, false),
+        ]);
+        let union_map_entries = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                Field::new("key", DataType::Utf8, false),
+                Field::new("value", DataType::Utf8, false),
+            ])),
+            false,
+        ));
+        let rec_a_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "RecA".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        let rec_b_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "RecB".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        let uf_union_big = UnionFields::try_new(
+            vec![0, 1, 2, 3, 4],
+            vec![
+                Field::new(
+                    "map",
+                    DataType::Map(union_map_entries.clone(), false),
+                    false,
+                ),
+                Field::new(
+                    "array",
+                    DataType::List(Arc::new(Field::new(item_name, DataType::Int64, false))),
+                    false,
+                ),
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.types.RecB",
+                    DataType::Struct(union_rec_b_fields.clone()),
+                    false,
+                )
+                .with_metadata(rec_b_md.clone()),
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.types.RecA",
+                    DataType::Struct(union_rec_a_fields.clone()),
+                    false,
+                )
+                .with_metadata(rec_a_md.clone()),
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.types.Color",
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                    false,
+                )
+                .with_metadata(enum_md_color.clone()),
+            ],
+        )
+        .unwrap();
+        let fx4_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx4".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1".to_string(),
+            );
+            m
+        };
+        let uf_date_fixed4 = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.Fx4",
+                    DataType::FixedSizeBinary(4),
+                    false,
+                )
+                .with_metadata(fx4_md.clone()),
+                Field::new("date", DataType::Date32, false),
+            ],
+        )
+        .unwrap();
+        let dur12u_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Dur12U".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1".to_string(),
+            );
+            m
+        };
+        let uf_dur_or_str = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new("string", DataType::Utf8, false),
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.Dur12U",
+                    DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano),
+                    false,
+                )
+                .with_metadata(dur12u_md.clone()),
+            ],
+        )
+        .unwrap();
+        let fx10_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx10".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1".to_string(),
+            );
+            m
+        };
+        let uf_uuid_or_fx10 = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new(
+                    "org.apache.arrow.avrotests.v1.Fx10",
+                    DataType::FixedSizeBinary(10),
+                    false,
+                )
+                .with_metadata(fx10_md.clone()),
+                add_uuid_ext_union(Field::new("uuid", DataType::FixedSizeBinary(16), false)),
+            ],
+        )
+        .unwrap();
+        let uf_kv_val = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new("null", DataType::Null, false),
+                Field::new("int", DataType::Int32, false),
+                Field::new("long", DataType::Int64, false),
+            ],
+        )
+        .unwrap();
+        let kv_fields = Fields::from(vec![
+            Field::new("key", DataType::Utf8, false),
+            Field::new(
+                "val",
+                DataType::Union(uf_kv_val.clone(), UnionMode::Dense),
+                true,
+            ),
+        ]);
+        let kv_item_field = Arc::new(Field::new(
+            item_name,
+            DataType::Struct(kv_fields.clone()),
+            false,
+        ));
+        let map_int_entries = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                Field::new("key", DataType::Utf8, false),
+                Field::new("value", DataType::Int32, false),
+            ])),
+            false,
+        ));
+        let uf_map_or_array = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new(
+                    "array",
+                    DataType::List(Arc::new(Field::new(item_name, DataType::Int32, false))),
+                    false,
+                ),
+                Field::new("map", DataType::Map(map_int_entries.clone(), false), false),
+            ],
+        )
+        .unwrap();
+        let mut enum_md_status = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(
+                crate::schema::AVRO_ENUM_SYMBOLS_METADATA_KEY.to_string(),
+                serde_json::to_string(&vec!["UNKNOWN", "NEW", "PROCESSING", "DONE"]).unwrap(),
+            );
+            m
+        };
+        enum_md_status.insert(AVRO_NAME_METADATA_KEY.to_string(), "Status".to_string());
+        enum_md_status.insert(
+            AVRO_NAMESPACE_METADATA_KEY.to_string(),
+            "org.apache.arrow.avrotests.v1.types".to_string(),
+        );
+        let mut dec20_md = HashMap::<String, String>::new();
+        dec20_md.insert("precision".to_string(), "20".to_string());
+        dec20_md.insert("scale".to_string(), "4".to_string());
+        dec20_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "DecFix20".to_string());
+        dec20_md.insert(
+            AVRO_NAMESPACE_METADATA_KEY.to_string(),
+            "org.apache.arrow.avrotests.v1.types".to_string(),
+        );
+        let mut dec10_md = HashMap::<String, String>::new();
+        dec10_md.insert("precision".to_string(), "10".to_string());
+        dec10_md.insert("scale".to_string(), "2".to_string());
+        let fx16_top_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx16".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        let dur12_top_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Dur12".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        #[cfg(feature = "small_decimals")]
+        let dec20_dt = DataType::Decimal128(20, 4);
+        #[cfg(not(feature = "small_decimals"))]
+        let dec20_dt = DataType::Decimal128(20, 4);
+        #[cfg(feature = "small_decimals")]
+        let dec10_dt = DataType::Decimal64(10, 2);
+        #[cfg(not(feature = "small_decimals"))]
+        let dec10_dt = DataType::Decimal128(10, 2);
+        let fields: Vec<FieldRef> = vec![
+            Arc::new(Field::new(
+                "person",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("name", DataType::Utf8, false),
+                    Field::new("age", DataType::Int32, false),
+                ])),
+                false,
+            )),
+            Arc::new(Field::new("old_count", DataType::Int32, false)),
+            Arc::new(Field::new(
+                "union_map_or_array_int",
+                DataType::Union(uf_map_or_array.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new(
+                "array_records_with_union",
+                DataType::List(kv_item_field.clone()),
+                false,
+            )),
+            Arc::new(Field::new(
+                "union_uuid_or_fixed10",
+                DataType::Union(uf_uuid_or_fx10.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new(
+                "union_interval_or_string",
+                DataType::Union(uf_dur_or_str.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new(
+                "union_date_or_fixed4",
+                DataType::Union(uf_date_fixed4.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new(
+                "union_enum_record_array_map",
+                DataType::Union(uf_union_big.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new(
+                "maybe_auth",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("user", DataType::Utf8, false),
+                    Field::new("token", DataType::Binary, true), // [bytes,null] -> nullable bytes
+                ])),
+                false,
+            )),
+            Arc::new(Field::new(
+                "address",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("street_name", DataType::Utf8, false),
+                    Field::new("zip", DataType::Int32, false),
+                    Field::new("country", DataType::Utf8, false),
+                ])),
+                false,
+            )),
+            Arc::new(Field::new(
+                "map_union",
+                DataType::Map(map_entries_field.clone(), false),
+                false,
+            )),
+            Arc::new(Field::new(
+                "arr_union",
+                DataType::List(arr_items_field.clone()),
+                false,
+            )),
+            Arc::new(
+                Field::new(
+                    "status",
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                    false,
+                )
+                .with_metadata(enum_md_status.clone()),
+            ),
+            Arc::new(
+                Field::new(
+                    "interval_mdn",
+                    DataType::Interval(IntervalUnit::MonthDayNano),
+                    false,
+                )
+                .with_metadata(dur12_top_md.clone()),
+            ),
+            Arc::new(Field::new(
+                "ts_micros_local",
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                "ts_millis_local",
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, None),
+                false,
+            )),
+            Arc::new(Field::new(
+                "ts_micros_utc",
+                DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, Some("+00:00".into())),
+                false,
+            )),
+            Arc::new(Field::new(
+                "ts_millis_utc",
+                DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, Some("+00:00".into())),
+                false,
+            )),
+            Arc::new(Field::new(
+                "t_micros",
+                DataType::Time64(arrow_schema::TimeUnit::Microsecond),
+                false,
+            )),
+            Arc::new(Field::new(
+                "t_millis",
+                DataType::Time32(arrow_schema::TimeUnit::Millisecond),
+                false,
+            )),
+            Arc::new(Field::new("d_date", DataType::Date32, false)),
+            Arc::new(add_uuid_ext_top(Field::new(
+                "uuid_str",
+                DataType::FixedSizeBinary(16),
+                false,
+            ))),
+            Arc::new(Field::new("dec_fix_s20_4", dec20_dt, false).with_metadata(dec20_md.clone())),
+            Arc::new(
+                Field::new("dec_bytes_s10_2", dec10_dt, false).with_metadata(dec10_md.clone()),
+            ),
+            Arc::new(
+                Field::new("fx16_plain", DataType::FixedSizeBinary(16), false)
+                    .with_metadata(fx16_top_md.clone()),
+            ),
+            Arc::new(Field::new("raw_bytes", DataType::Binary, false)),
+            Arc::new(Field::new("str_utf8", DataType::Utf8, false)),
+            Arc::new(Field::new(
+                "tri_union_prim",
+                DataType::Union(uf_tri.clone(), UnionMode::Dense),
+                false,
+            )),
+            Arc::new(Field::new("opt_str_nullsecond", DataType::Utf8, true)),
+            Arc::new(Field::new("opt_i32_nullfirst", DataType::Int32, true)),
+            Arc::new(Field::new("count_i64", DataType::Int64, false)),
+            Arc::new(Field::new("count_i32", DataType::Int64, false)),
+            Arc::new(Field::new("ratio_f64", DataType::Float64, false)),
+            Arc::new(Field::new("ratio_f32", DataType::Float64, false)),
+            Arc::new(Field::new("flag", DataType::Boolean, false)),
+            Arc::new(Field::new("identifier", DataType::Int64, false)),
+        ];
+        let expected_schema = Arc::new(arrow_schema::Schema::new(Fields::from(fields)));
+        let mut cols: Vec<ArrayRef> = vec![
+            Arc::new(StructArray::new(
+                match expected_schema
+                    .field_with_name("person")
+                    .unwrap()
+                    .data_type()
+                {
+                    DataType::Struct(fs) => fs.clone(),
+                    _ => unreachable!(),
+                },
+                vec![
+                    Arc::new(StringArray::from(vec!["Alice", "Bob", "Carol", "Dave"])) as ArrayRef,
+                    Arc::new(Int32Array::from(vec![30, 0, 25, 41])) as ArrayRef,
+                ],
+                None,
+            )) as ArrayRef,
+            Arc::new(Int32Array::from(vec![100, 42, 7, 42])) as ArrayRef,
+        ];
+        {
+            let map_child: ArrayRef = {
+                let keys = StringArray::from(vec!["x", "y", "only"]);
+                let vals = Int32Array::from(vec![1, 2, 10]);
+                let entries = StructArray::new(
+                    Fields::from(vec![
+                        Field::new("key", DataType::Utf8, false),
+                        Field::new("value", DataType::Int32, false),
+                    ]),
+                    vec![Arc::new(keys) as ArrayRef, Arc::new(vals) as ArrayRef],
+                    None,
+                );
+                let moff = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3]));
+                Arc::new(MapArray::new(
+                    map_int_entries.clone(),
+                    moff,
+                    entries,
+                    None,
+                    false,
+                )) as ArrayRef
+            };
+            let list_child: ArrayRef = {
+                let values = Int32Array::from(vec![1, 2, 3, 0]);
+                let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 4]));
+                Arc::new(
+                    ListArray::try_new(
+                        Arc::new(Field::new(item_name, DataType::Int32, false)),
+                        offsets,
+                        Arc::new(values),
+                        None,
+                    )
+                    .unwrap(),
+                ) as ArrayRef
+            };
+            let tids = vec![1, 0, 1, 0];
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf_map_or_array, tids, offs, |f| match f.name().as_str() {
+                "array" => Some(list_child.clone()),
+                "map" => Some(map_child.clone()),
+                _ => None,
+            });
+            cols.push(arr);
+        }
+        {
+            let keys = Arc::new(StringArray::from(vec!["k1", "k2", "k", "k3", "x"])) as ArrayRef;
+            let type_ids = vec![1, 0, 2, 0, 1];
+            let offsets = vec![0, 0, 0, 1, 1];
+            let vals = mk_dense_union(&uf_kv_val, type_ids, offsets, |f| match f.data_type() {
+                DataType::Int32 => Some(Arc::new(Int32Array::from(vec![5, -5])) as ArrayRef),
+                DataType::Int64 => Some(Arc::new(Int64Array::from(vec![99i64])) as ArrayRef),
+                DataType::Null => Some(Arc::new(NullArray::new(2)) as ArrayRef),
+                _ => None,
+            });
+            let values_struct =
+                Arc::new(StructArray::new(kv_fields.clone(), vec![keys, vals], None));
+            let list_offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 2, 3, 4, 5]));
+            let arr = Arc::new(
+                ListArray::try_new(kv_item_field.clone(), list_offsets, values_struct, None)
+                    .unwrap(),
+            ) as ArrayRef;
+            cols.push(arr);
+        }
+        {
+            let type_ids = vec![1, 0, 1, 0]; // [uuid, fixed10, uuid, fixed10] but uf order = [fixed10, uuid]
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf_uuid_or_fx10, type_ids, offs, |f| match f.data_type() {
+                DataType::FixedSizeBinary(16) => {
+                    let it = [Some(uuid1), Some(uuid2)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::FixedSizeBinary(10) => {
+                    let fx10_a = [0xAAu8; 10];
+                    let fx10_b = [0x00u8, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99];
+                    let it = [Some(fx10_a), Some(fx10_b)].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 10).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            cols.push(arr);
+        }
+        {
+            let type_ids = vec![1, 0, 1, 0]; // [duration, string, duration, string] but uf order = [string, duration]
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf_dur_or_str, type_ids, offs, |f| match f.data_type() {
+                DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano) => Some(Arc::new(
+                    IntervalMonthDayNanoArray::from(vec![dur_small, dur_large]),
+                )
+                    as ArrayRef),
+                DataType::Utf8 => Some(Arc::new(StringArray::from(vec![
+                    "duration-as-text",
+                    "iso-8601-period-P1Y",
+                ])) as ArrayRef),
+                _ => None,
+            });
+            cols.push(arr);
+        }
+        {
+            let type_ids = vec![1, 0, 1, 0]; // [date, fixed, date, fixed] but uf order = [fixed, date]
+            let offs = vec![0, 0, 1, 1];
+            let arr = mk_dense_union(&uf_date_fixed4, type_ids, offs, |f| match f.data_type() {
+                DataType::Date32 => Some(Arc::new(Date32Array::from(vec![date_a, 0])) as ArrayRef),
+                DataType::FixedSizeBinary(4) => {
+                    let it = [Some(*b"\x00\x11\x22\x33"), Some(*b"ABCD")].into_iter();
+                    Some(Arc::new(
+                        FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 4).unwrap(),
+                    ) as ArrayRef)
+                }
+                _ => None,
+            });
+            cols.push(arr);
+        }
+        {
+            let tids = vec![4, 3, 1, 0]; // uf order = [map(0), array(1), RecB(2), RecA(3), enum(4)]
+            let offs = vec![0, 0, 0, 0];
+            let arr = mk_dense_union(&uf_union_big, tids, offs, |f| match f.data_type() {
+                DataType::Dictionary(_, _) => {
+                    let keys = Int32Array::from(vec![0i32]);
+                    let values =
+                        Arc::new(StringArray::from(vec!["RED", "GREEN", "BLUE"])) as ArrayRef;
+                    Some(
+                        Arc::new(DictionaryArray::<Int32Type>::try_new(keys, values).unwrap())
+                            as ArrayRef,
+                    )
+                }
+                DataType::Struct(fs) if fs == &union_rec_a_fields => {
+                    let a = Int32Array::from(vec![7]);
+                    let b = StringArray::from(vec!["rec"]);
+                    Some(Arc::new(StructArray::new(
+                        fs.clone(),
+                        vec![Arc::new(a) as ArrayRef, Arc::new(b) as ArrayRef],
+                        None,
+                    )) as ArrayRef)
+                }
+                DataType::List(_) => {
+                    let values = Int64Array::from(vec![1i64, 2, 3]);
+                    let offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3]));
+                    Some(Arc::new(
+                        ListArray::try_new(
+                            Arc::new(Field::new(item_name, DataType::Int64, false)),
+                            offsets,
+                            Arc::new(values),
+                            None,
+                        )
+                        .unwrap(),
+                    ) as ArrayRef)
+                }
+                DataType::Map(_, _) => {
+                    let keys = StringArray::from(vec!["k"]);
+                    let vals = StringArray::from(vec!["v"]);
+                    let entries = StructArray::new(
+                        Fields::from(vec![
+                            Field::new("key", DataType::Utf8, false),
+                            Field::new("value", DataType::Utf8, false),
+                        ]),
+                        vec![Arc::new(keys) as ArrayRef, Arc::new(vals) as ArrayRef],
+                        None,
+                    );
+                    let moff = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 1]));
+                    Some(Arc::new(MapArray::new(
+                        union_map_entries.clone(),
+                        moff,
+                        entries,
+                        None,
+                        false,
+                    )) as ArrayRef)
+                }
+                _ => None,
+            });
+            cols.push(arr);
+        }
+        {
+            let fs = match expected_schema
+                .field_with_name("maybe_auth")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Struct(fs) => fs.clone(),
+                _ => unreachable!(),
+            };
+            let user =
+                Arc::new(StringArray::from(vec!["alice", "bob", "carol", "dave"])) as ArrayRef;
+            let token_values: Vec<Option<&[u8]>> = vec![
+                None,
+                Some(b"\x01\x02\x03".as_ref()),
+                None,
+                Some(b"".as_ref()),
+            ];
+            let token = Arc::new(BinaryArray::from(token_values)) as ArrayRef;
+            cols.push(Arc::new(StructArray::new(fs, vec![user, token], None)) as ArrayRef);
+        }
+        {
+            let fs = match expected_schema
+                .field_with_name("address")
+                .unwrap()
+                .data_type()
+            {
+                DataType::Struct(fs) => fs.clone(),
+                _ => unreachable!(),
+            };
+            let street = Arc::new(StringArray::from(vec![
+                "100 Main",
+                "",
+                "42 Galaxy Way",
+                "End Ave",
+            ])) as ArrayRef;
+            let zip = Arc::new(Int32Array::from(vec![12345, 0, 42424, 1])) as ArrayRef;
+            let country = Arc::new(StringArray::from(vec!["US", "CA", "US", "GB"])) as ArrayRef;
+            cols.push(Arc::new(StructArray::new(fs, vec![street, zip, country], None)) as ArrayRef);
+        }
+        {
+            let keys = StringArray::from(vec!["a", "b", "c", "neg", "pi", "ok"]);
+            let moff = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 3, 4, 4, 6]));
+            let tid_s = 0; // string
+            let tid_d = 1; // double
+            let tid_n = 2; // null
+            let type_ids = vec![tid_d, tid_n, tid_s, tid_d, tid_d, tid_s];
+            let offsets = vec![0, 0, 0, 1, 2, 1];
+            let pi_5dp = (std::f64::consts::PI * 100_000.0).trunc() / 100_000.0;
+            let vals = mk_dense_union(&uf_map_vals, type_ids, offsets, |f| match f.data_type() {
+                DataType::Float64 => {
+                    Some(Arc::new(Float64Array::from(vec![1.5f64, -0.5, pi_5dp])) as ArrayRef)
+                }
+                DataType::Utf8 => {
+                    Some(Arc::new(StringArray::from(vec!["yes", "true"])) as ArrayRef)
+                }
+                DataType::Null => Some(Arc::new(NullArray::new(1)) as ArrayRef),
+                _ => None,
+            });
+            let entries = StructArray::new(
+                Fields::from(vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new(
+                        "value",
+                        DataType::Union(uf_map_vals.clone(), UnionMode::Dense),
+                        true,
+                    ),
+                ]),
+                vec![Arc::new(keys) as ArrayRef, vals],
+                None,
+            );
+            let map = Arc::new(MapArray::new(
+                map_entries_field.clone(),
+                moff,
+                entries,
+                None,
+                false,
+            )) as ArrayRef;
+            cols.push(map);
+        }
+        {
+            let type_ids = vec![
+                2, 1, 0, 2, 0, 1, 2, 2, 1, 0,
+                2, // long,string,null,long,null,string,long,long,string,null,long
+            ];
+            let offsets = vec![0, 0, 0, 1, 1, 1, 2, 3, 2, 2, 4];
+            let values =
+                mk_dense_union(&uf_arr_items, type_ids, offsets, |f| match f.data_type() {
+                    DataType::Int64 => {
+                        Some(Arc::new(Int64Array::from(vec![1i64, -3, 0, -1, 0])) as ArrayRef)
+                    }
+                    DataType::Utf8 => {
+                        Some(Arc::new(StringArray::from(vec!["x", "z", "end"])) as ArrayRef)
+                    }
+                    DataType::Null => Some(Arc::new(NullArray::new(3)) as ArrayRef),
+                    _ => None,
+                });
+            let list_offsets = OffsetBuffer::new(ScalarBuffer::<i32>::from(vec![0, 4, 7, 8, 11]));
+            let arr = Arc::new(
+                ListArray::try_new(arr_items_field.clone(), list_offsets, values, None).unwrap(),
+            ) as ArrayRef;
+            cols.push(arr);
+        }
+        {
+            let keys = Int32Array::from(vec![1, 2, 3, 0]); // NEW, PROCESSING, DONE, UNKNOWN
+            let values = Arc::new(StringArray::from(vec![
+                "UNKNOWN",
+                "NEW",
+                "PROCESSING",
+                "DONE",
+            ])) as ArrayRef;
+            let dict = DictionaryArray::<Int32Type>::try_new(keys, values).unwrap();
+            cols.push(Arc::new(dict) as ArrayRef);
+        }
+        cols.push(Arc::new(IntervalMonthDayNanoArray::from(vec![
+            dur_small, dur_zero, dur_large, dur_2years,
+        ])) as ArrayRef);
+        cols.push(Arc::new(TimestampMicrosecondArray::from(vec![
+            ts_us_2024_01_01 + 123_456,
+            0,
+            ts_us_2024_01_01 + 101_112,
+            987_654_321,
+        ])) as ArrayRef);
+        cols.push(Arc::new(TimestampMillisecondArray::from(vec![
+            ts_ms_2024_01_01 + 86_400_000,
+            0,
+            ts_ms_2024_01_01 + 789,
+            123_456_789,
+        ])) as ArrayRef);
+        {
+            let a = TimestampMicrosecondArray::from(vec![
+                ts_us_2024_01_01,
+                1,
+                ts_us_2024_01_01 + 456,
+                0,
+            ])
+            .with_timezone("+00:00");
+            cols.push(Arc::new(a) as ArrayRef);
+        }
+        {
+            let a = TimestampMillisecondArray::from(vec![
+                ts_ms_2024_01_01,
+                -1,
+                ts_ms_2024_01_01 + 123,
+                0,
+            ])
+            .with_timezone("+00:00");
+            cols.push(Arc::new(a) as ArrayRef);
+        }
+        cols.push(Arc::new(Time64MicrosecondArray::from(vec![
+            time_us_eod,
+            0,
+            1,
+            1_000_000,
+        ])) as ArrayRef);
+        cols.push(Arc::new(Time32MillisecondArray::from(vec![
+            time_ms_a,
+            0,
+            1,
+            86_400_000 - 1,
+        ])) as ArrayRef);
+        cols.push(Arc::new(Date32Array::from(vec![date_a, 0, 1, 365])) as ArrayRef);
+        {
+            let it = [Some(uuid1), Some(uuid2), Some(uuid1), Some(uuid2)].into_iter();
+            cols.push(Arc::new(
+                FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap(),
+            ) as ArrayRef);
+        }
+        {
+            #[cfg(feature = "small_decimals")]
+            let arr = Arc::new(
+                Decimal128Array::from_iter_values([1_234_567_891_234i128, -420_000i128, 0, -1i128])
+                    .with_precision_and_scale(20, 4)
+                    .unwrap(),
+            ) as ArrayRef;
+            #[cfg(not(feature = "small_decimals"))]
+            let arr = Arc::new(
+                Decimal128Array::from_iter_values([1_234_567_891_234i128, -420_000i128, 0, -1i128])
+                    .with_precision_and_scale(20, 4)
+                    .unwrap(),
+            ) as ArrayRef;
+            cols.push(arr);
+        }
+        {
+            #[cfg(feature = "small_decimals")]
+            let arr = Arc::new(
+                Decimal64Array::from_iter_values([123456i64, -1, 0, 9_999_999_999i64])
+                    .with_precision_and_scale(10, 2)
+                    .unwrap(),
+            ) as ArrayRef;
+            #[cfg(not(feature = "small_decimals"))]
+            let arr = Arc::new(
+                Decimal128Array::from_iter_values([123456i128, -1, 0, 9_999_999_999i128])
+                    .with_precision_and_scale(10, 2)
+                    .unwrap(),
+            ) as ArrayRef;
+            cols.push(arr);
+        }
+        {
+            let it = [
+                Some(*b"0123456789ABCDEF"),
+                Some([0u8; 16]),
+                Some(*b"ABCDEFGHIJKLMNOP"),
+                Some([0xAA; 16]),
+            ]
+            .into_iter();
+            cols.push(Arc::new(
+                FixedSizeBinaryArray::try_from_sparse_iter_with_size(it, 16).unwrap(),
+            ) as ArrayRef);
+        }
+        cols.push(Arc::new(BinaryArray::from(vec![
+            b"\x00\x01".as_ref(),
+            b"".as_ref(),
+            b"\xFF\x00".as_ref(),
+            b"\x10\x20\x30\x40".as_ref(),
+        ])) as ArrayRef);
+        cols.push(Arc::new(StringArray::from(vec!["hello", "", "world", "✓ unicode"])) as ArrayRef);
+        {
+            let tids = vec![0, 1, 2, 1];
+            let offs = vec![0, 0, 0, 1];
+            let arr = mk_dense_union(&uf_tri, tids, offs, |f| match f.data_type() {
+                DataType::Int32 => Some(Arc::new(Int32Array::from(vec![0])) as ArrayRef),
+                DataType::Utf8 => Some(Arc::new(StringArray::from(vec!["hi", ""])) as ArrayRef),
+                DataType::Boolean => Some(Arc::new(BooleanArray::from(vec![true])) as ArrayRef),
+                _ => None,
+            });
+            cols.push(arr);
         }
+        cols.push(Arc::new(StringArray::from(vec![
+            Some("alpha"),
+            None,
+            Some("s3"),
+            Some(""),
+        ])) as ArrayRef);
+        cols.push(Arc::new(Int32Array::from(vec![None, Some(42), None, Some(0)])) as ArrayRef);
+        cols.push(Arc::new(Int64Array::from(vec![
+            7_000_000_000i64,
+            -2,
+            0,
+            -9_876_543_210i64,
+        ])) as ArrayRef);
+        cols.push(Arc::new(Int64Array::from(vec![7i64, -1, 0, 123])) as ArrayRef);
+        cols.push(Arc::new(Float64Array::from(vec![2.5f64, -1.0, 7.0, -2.25])) as ArrayRef);
+        cols.push(Arc::new(Float64Array::from(vec![1.25f64, -0.0, 3.5, 9.75])) as ArrayRef);
+        cols.push(Arc::new(BooleanArray::from(vec![true, false, true, false])) as ArrayRef);
+        cols.push(Arc::new(Int64Array::from(vec![1, 2, 3, 4])) as ArrayRef);
+        let expected = RecordBatch::try_new(expected_schema, cols).unwrap();
+        assert_eq!(
+            expected, batch,
+            "entire RecordBatch mismatch (schema, all columns, all rows)"
+        );
     }
 }
diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
index 3466b064455f..648baa60c723 100644
--- a/arrow-avro/src/reader/record.rs
+++ b/arrow-avro/src/reader/record.rs
@@ -15,92 +15,190 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::codec::{AvroDataType, Codec, Nullability};
-use crate::reader::block::{Block, BlockDecoder};
+//! Avro Decoder for Arrow types.
+
+use crate::codec::{
+    AvroDataType, AvroField, AvroLiteral, Codec, Promotion, ResolutionInfo, ResolvedRecord,
+    ResolvedUnion,
+};
 use crate::reader::cursor::AvroCursor;
-use crate::reader::header::Header;
-use crate::reader::ReadOptions;
-use crate::schema::*;
+use crate::schema::Nullability;
+#[cfg(feature = "small_decimals")]
+use arrow_array::builder::{Decimal32Builder, Decimal64Builder};
+use arrow_array::builder::{Decimal128Builder, Decimal256Builder, IntervalMonthDayNanoBuilder};
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::*;
 use arrow_schema::{
-    ArrowError, DataType, Field as ArrowField, FieldRef, Fields, Schema as ArrowSchema, SchemaRef,
+    ArrowError, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, DataType, Field as ArrowField,
+    FieldRef, Fields, Schema as ArrowSchema, SchemaRef, UnionFields, UnionMode,
 };
+#[cfg(feature = "small_decimals")]
+use arrow_schema::{DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION};
+#[cfg(feature = "avro_custom_types")]
+use arrow_select::take::{TakeOptions, take};
 use std::cmp::Ordering;
-use std::collections::HashMap;
-use std::io::Read;
 use std::sync::Arc;
+use strum_macros::AsRefStr;
+use uuid::Uuid;
+
+const DEFAULT_CAPACITY: usize = 1024;
+
+/// Runtime plan for decoding reader-side `["null", T]` types.
+#[derive(Clone, Copy, Debug)]
+enum NullablePlan {
+    /// Writer actually wrote a union (branch tag present).
+    ReadTag,
+    /// Writer wrote a single (non-union) value resolved to the non-null branch
+    /// of the reader union; do NOT read a branch tag, but apply any promotion.
+    FromSingle { promotion: Promotion },
+}
+
+/// Macro to decode a decimal payload for a given width and integer type.
+macro_rules! decode_decimal {
+    ($size:expr, $buf:expr, $builder:expr, $N:expr, $Int:ty) => {{
+        let bytes = read_decimal_bytes_be::<{ $N }>($buf, $size)?;
+        $builder.append_value(<$Int>::from_be_bytes(bytes));
+    }};
+}
+
+/// Macro to finish a decimal builder into an array with precision/scale and nulls.
+macro_rules! flush_decimal {
+    ($builder:expr, $precision:expr, $scale:expr, $nulls:expr, $ArrayTy:ty) => {{
+        let (_, vals, _) = $builder.finish().into_parts();
+        let dec = <$ArrayTy>::try_new(vals, $nulls)?
+            .with_precision_and_scale(*$precision as u8, $scale.unwrap_or(0) as i8)
+            .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+        Arc::new(dec) as ArrayRef
+    }};
+}
+
+/// Macro to append a default decimal value from two's-complement big-endian bytes
+/// into the corresponding decimal builder, with compile-time constructed error text.
+macro_rules! append_decimal_default {
+    ($lit:expr, $builder:expr, $N:literal, $Int:ty, $name:literal) => {{
+        match $lit {
+            AvroLiteral::Bytes(b) => {
+                let ext = sign_cast_to::<$N>(b)?;
+                let val = <$Int>::from_be_bytes(ext);
+                $builder.append_value(val);
+                Ok(())
+            }
+            _ => Err(ArrowError::InvalidArgumentError(
+                concat!(
+                    "Default for ",
+                    $name,
+                    " must be bytes (two's-complement big-endian)"
+                )
+                .to_string(),
+            )),
+        }
+    }};
+}
 
 /// Decodes avro encoded data into [`RecordBatch`]
-pub struct RecordDecoder {
+#[derive(Debug)]
+pub(crate) struct RecordDecoder {
     schema: SchemaRef,
     fields: Vec<Decoder>,
-    use_utf8view: bool,
+    projector: Option<Projector>,
 }
 
 impl RecordDecoder {
-    /// Create a new [`RecordDecoder`] from the provided [`AvroDataType`] with default options
-    pub fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
-        Self::try_new_with_options(data_type, ReadOptions::default())
-    }
-
-    /// Create a new [`RecordDecoder`] from the provided [`AvroDataType`] with additional options
+    /// Creates a new [`RecordDecoder`] from the provided [`AvroDataType`] with additional options.
     ///
     /// This method allows you to customize how the Avro data is decoded into Arrow arrays.
     ///
-    /// # Parameters
-    /// * `data_type` - The Avro data type to decode
-    /// * `options` - Configuration options for decoding
-    pub fn try_new_with_options(
-        data_type: &AvroDataType,
-        options: ReadOptions,
-    ) -> Result<Self, ArrowError> {
-        match Decoder::try_new(data_type)? {
-            Decoder::Record(fields, encodings) => Ok(Self {
-                schema: Arc::new(ArrowSchema::new(fields)),
-                fields: encodings,
-                use_utf8view: options.use_utf8view(),
-            }),
-            encoding => Err(ArrowError::ParseError(format!(
-                "Expected record got {encoding:?}"
+    /// # Arguments
+    /// * `data_type` - The Avro data type to decode.
+    /// * `use_utf8view` - A flag indicating whether to use `Utf8View` for string types.
+    ///
+    /// # Errors
+    /// This function will return an error if the provided `data_type` is not a `Record`.
+    pub(crate) fn try_new_with_options(data_type: &AvroDataType) -> Result<Self, ArrowError> {
+        match data_type.codec() {
+            Codec::Struct(reader_fields) => {
+                // Build Arrow schema fields and per-child decoders
+                let mut arrow_fields = Vec::with_capacity(reader_fields.len());
+                let mut encodings = Vec::with_capacity(reader_fields.len());
+                for avro_field in reader_fields.iter() {
+                    arrow_fields.push(avro_field.field());
+                    encodings.push(Decoder::try_new(avro_field.data_type())?);
+                }
+                let projector = match data_type.resolution.as_ref() {
+                    Some(ResolutionInfo::Record(rec)) => {
+                        Some(ProjectorBuilder::try_new(rec, reader_fields).build()?)
+                    }
+                    _ => None,
+                };
+                Ok(Self {
+                    schema: Arc::new(ArrowSchema::new(arrow_fields)),
+                    fields: encodings,
+                    projector,
+                })
+            }
+            other => Err(ArrowError::ParseError(format!(
+                "Expected record got {other:?}"
             ))),
         }
     }
 
-    pub fn schema(&self) -> &SchemaRef {
+    /// Returns the decoder's `SchemaRef`
+    pub(crate) fn schema(&self) -> &SchemaRef {
         &self.schema
     }
 
     /// Decode `count` records from `buf`
-    pub fn decode(&mut self, buf: &[u8], count: usize) -> Result<usize, ArrowError> {
+    pub(crate) fn decode(&mut self, buf: &[u8], count: usize) -> Result<usize, ArrowError> {
         let mut cursor = AvroCursor::new(buf);
-        for _ in 0..count {
-            for field in &mut self.fields {
-                field.decode(&mut cursor)?;
+        match self.projector.as_mut() {
+            Some(proj) => {
+                for _ in 0..count {
+                    proj.project_record(&mut cursor, &mut self.fields)?;
+                }
+            }
+            None => {
+                for _ in 0..count {
+                    for field in &mut self.fields {
+                        field.decode(&mut cursor)?;
+                    }
+                }
             }
         }
         Ok(cursor.position())
     }
 
     /// Flush the decoded records into a [`RecordBatch`]
-    pub fn flush(&mut self) -> Result<RecordBatch, ArrowError> {
+    pub(crate) fn flush(&mut self) -> Result<RecordBatch, ArrowError> {
         let arrays = self
             .fields
             .iter_mut()
             .map(|x| x.flush(None))
             .collect::<Result<Vec<_>, _>>()?;
-
         RecordBatch::try_new(self.schema.clone(), arrays)
     }
 }
 
 #[derive(Debug)]
+struct EnumResolution {
+    mapping: Arc<[i32]>,
+    default_index: i32,
+}
+
+#[derive(Debug, AsRefStr)]
 enum Decoder {
     Null(usize),
     Boolean(BooleanBufferBuilder),
     Int32(Vec<i32>),
     Int64(Vec<i64>),
+    #[cfg(feature = "avro_custom_types")]
+    DurationSecond(Vec<i64>),
+    #[cfg(feature = "avro_custom_types")]
+    DurationMillisecond(Vec<i64>),
+    #[cfg(feature = "avro_custom_types")]
+    DurationMicrosecond(Vec<i64>),
+    #[cfg(feature = "avro_custom_types")]
+    DurationNanosecond(Vec<i64>),
     Float32(Vec<f32>),
     Float64(Vec<f64>),
     Date32(Vec<i32>),
@@ -108,13 +206,22 @@ enum Decoder {
     TimeMicros(Vec<i64>),
     TimestampMillis(bool, Vec<i64>),
     TimestampMicros(bool, Vec<i64>),
+    TimestampNanos(bool, Vec<i64>),
+    Int32ToInt64(Vec<i64>),
+    Int32ToFloat32(Vec<f32>),
+    Int32ToFloat64(Vec<f64>),
+    Int64ToFloat32(Vec<f32>),
+    Int64ToFloat64(Vec<f64>),
+    Float32ToFloat64(Vec<f64>),
+    BytesToString(OffsetBufferBuilder<i32>, Vec<u8>),
+    StringToBytes(OffsetBufferBuilder<i32>, Vec<u8>),
     Binary(OffsetBufferBuilder<i32>, Vec<u8>),
     /// String data encoded as UTF-8 bytes, mapped to Arrow's StringArray
     String(OffsetBufferBuilder<i32>, Vec<u8>),
     /// String data encoded as UTF-8 bytes, but mapped to Arrow's StringViewArray
     StringView(OffsetBufferBuilder<i32>, Vec<u8>),
     Array(FieldRef, OffsetBufferBuilder<i32>, Box<Decoder>),
-    Record(Fields, Vec<Decoder>),
+    Record(Fields, Vec<Decoder>, Option<Projector>),
     Map(
         FieldRef,
         OffsetBufferBuilder<i32>,
@@ -122,44 +229,170 @@ enum Decoder {
         Vec<u8>,
         Box<Decoder>,
     ),
-    Nullable(Nullability, NullBufferBuilder, Box<Decoder>),
+    Fixed(i32, Vec<u8>),
+    Enum(Vec<i32>, Arc<[String]>, Option<EnumResolution>),
+    Duration(IntervalMonthDayNanoBuilder),
+    Uuid(Vec<u8>),
+    #[cfg(feature = "small_decimals")]
+    Decimal32(usize, Option<usize>, Option<usize>, Decimal32Builder),
+    #[cfg(feature = "small_decimals")]
+    Decimal64(usize, Option<usize>, Option<usize>, Decimal64Builder),
+    Decimal128(usize, Option<usize>, Option<usize>, Decimal128Builder),
+    Decimal256(usize, Option<usize>, Option<usize>, Decimal256Builder),
+    #[cfg(feature = "avro_custom_types")]
+    RunEndEncoded(u8, usize, Box<Decoder>),
+    Union(UnionDecoder),
+    Nullable(Nullability, NullBufferBuilder, Box<Decoder>, NullablePlan),
 }
 
 impl Decoder {
     fn try_new(data_type: &AvroDataType) -> Result<Self, ArrowError> {
-        let nyi = |s: &str| Err(ArrowError::NotYetImplemented(s.to_string()));
-
-        let decoder = match data_type.codec() {
-            Codec::Null => Self::Null(0),
-            Codec::Boolean => Self::Boolean(BooleanBufferBuilder::new(DEFAULT_CAPACITY)),
-            Codec::Int32 => Self::Int32(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::Int64 => Self::Int64(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::Float32 => Self::Float32(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::Float64 => Self::Float64(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::Binary => Self::Binary(
+        if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() {
+            if info.writer_is_union && !info.reader_is_union {
+                let mut clone = data_type.clone();
+                clone.resolution = None; // Build target base decoder without Union resolution
+                let target = Box::new(Self::try_new_internal(&clone)?);
+                let decoder = Self::Union(
+                    UnionDecoderBuilder::new()
+                        .with_resolved_union(info.clone())
+                        .with_target(target)
+                        .build()?,
+                );
+                return Ok(decoder);
+            }
+        }
+        Self::try_new_internal(data_type)
+    }
+
+    fn try_new_internal(data_type: &AvroDataType) -> Result<Self, ArrowError> {
+        // Extract just the Promotion (if any) to simplify pattern matching
+        let promotion = match data_type.resolution.as_ref() {
+            Some(ResolutionInfo::Promotion(p)) => Some(p),
+            _ => None,
+        };
+        let decoder = match (data_type.codec(), promotion) {
+            (Codec::Int64, Some(Promotion::IntToLong)) => {
+                Self::Int32ToInt64(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Float32, Some(Promotion::IntToFloat)) => {
+                Self::Int32ToFloat32(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Float64, Some(Promotion::IntToDouble)) => {
+                Self::Int32ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Float32, Some(Promotion::LongToFloat)) => {
+                Self::Int64ToFloat32(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Float64, Some(Promotion::LongToDouble)) => {
+                Self::Int64ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Float64, Some(Promotion::FloatToDouble)) => {
+                Self::Float32ToFloat64(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Utf8, Some(Promotion::BytesToString))
+            | (Codec::Utf8View, Some(Promotion::BytesToString)) => Self::BytesToString(
+                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                Vec::with_capacity(DEFAULT_CAPACITY),
+            ),
+            (Codec::Binary, Some(Promotion::StringToBytes)) => Self::StringToBytes(
+                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                Vec::with_capacity(DEFAULT_CAPACITY),
+            ),
+            (Codec::Null, _) => Self::Null(0),
+            (Codec::Boolean, _) => Self::Boolean(BooleanBufferBuilder::new(DEFAULT_CAPACITY)),
+            (Codec::Int32, _) => Self::Int32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Int64, _) => Self::Int64(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Float32, _) => Self::Float32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Float64, _) => Self::Float64(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Binary, _) => Self::Binary(
                 OffsetBufferBuilder::new(DEFAULT_CAPACITY),
                 Vec::with_capacity(DEFAULT_CAPACITY),
             ),
-            Codec::Utf8 => Self::String(
+            (Codec::Utf8, _) => Self::String(
                 OffsetBufferBuilder::new(DEFAULT_CAPACITY),
                 Vec::with_capacity(DEFAULT_CAPACITY),
             ),
-            Codec::Utf8View => Self::StringView(
+            (Codec::Utf8View, _) => Self::StringView(
                 OffsetBufferBuilder::new(DEFAULT_CAPACITY),
                 Vec::with_capacity(DEFAULT_CAPACITY),
             ),
-            Codec::Date32 => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::TimeMillis => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::TimeMicros => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)),
-            Codec::TimestampMillis(is_utc) => {
+            (Codec::Date32, _) => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::TimeMillis, _) => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::TimeMicros, _) => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::TimestampMillis(is_utc), _) => {
                 Self::TimestampMillis(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
             }
-            Codec::TimestampMicros(is_utc) => {
+            (Codec::TimestampMicros(is_utc), _) => {
                 Self::TimestampMicros(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
             }
-            Codec::Fixed(_) => return nyi("decoding fixed"),
-            Codec::Interval => return nyi("decoding interval"),
-            Codec::List(item) => {
+            (Codec::TimestampNanos(is_utc), _) => {
+                Self::TimestampNanos(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            (Codec::DurationNanos, _) => {
+                Self::DurationNanosecond(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            (Codec::DurationMicros, _) => {
+                Self::DurationMicrosecond(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            (Codec::DurationMillis, _) => {
+                Self::DurationMillisecond(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            (Codec::DurationSeconds, _) => {
+                Self::DurationSecond(Vec::with_capacity(DEFAULT_CAPACITY))
+            }
+            (Codec::Fixed(sz), _) => Self::Fixed(*sz, Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Decimal(precision, scale, size), _) => {
+                let p = *precision;
+                let s = *scale;
+                let prec = p as u8;
+                let scl = s.unwrap_or(0) as i8;
+                #[cfg(feature = "small_decimals")]
+                {
+                    if p <= DECIMAL32_MAX_PRECISION as usize {
+                        let builder = Decimal32Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal32(p, s, *size, builder)
+                    } else if p <= DECIMAL64_MAX_PRECISION as usize {
+                        let builder = Decimal64Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal64(p, s, *size, builder)
+                    } else if p <= DECIMAL128_MAX_PRECISION as usize {
+                        let builder = Decimal128Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal128(p, s, *size, builder)
+                    } else if p <= DECIMAL256_MAX_PRECISION as usize {
+                        let builder = Decimal256Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal256(p, s, *size, builder)
+                    } else {
+                        return Err(ArrowError::ParseError(format!(
+                            "Decimal precision {p} exceeds maximum supported"
+                        )));
+                    }
+                }
+                #[cfg(not(feature = "small_decimals"))]
+                {
+                    if p <= DECIMAL128_MAX_PRECISION as usize {
+                        let builder = Decimal128Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal128(p, s, *size, builder)
+                    } else if p <= DECIMAL256_MAX_PRECISION as usize {
+                        let builder = Decimal256Builder::with_capacity(DEFAULT_CAPACITY)
+                            .with_precision_and_scale(prec, scl)?;
+                        Self::Decimal256(p, s, *size, builder)
+                    } else {
+                        return Err(ArrowError::ParseError(format!(
+                            "Decimal precision {p} exceeds maximum supported"
+                        )));
+                    }
+                }
+            }
+            (Codec::Interval, _) => Self::Duration(IntervalMonthDayNanoBuilder::new()),
+            (Codec::List(item), _) => {
                 let decoder = Self::try_new(item)?;
                 Self::Array(
                     Arc::new(item.field_with_name("item")),
@@ -167,7 +400,17 @@ impl Decoder {
                     Box::new(decoder),
                 )
             }
-            Codec::Struct(fields) => {
+            (Codec::Enum(symbols), _) => {
+                let res = match data_type.resolution.as_ref() {
+                    Some(ResolutionInfo::EnumMapping(mapping)) => Some(EnumResolution {
+                        mapping: mapping.mapping.clone(),
+                        default_index: mapping.default_index,
+                    }),
+                    _ => None,
+                };
+                Self::Enum(Vec::with_capacity(DEFAULT_CAPACITY), symbols.clone(), res)
+            }
+            (Codec::Struct(fields), _) => {
                 let mut arrow_fields = Vec::with_capacity(fields.len());
                 let mut encodings = Vec::with_capacity(fields.len());
                 for avro_field in fields.iter() {
@@ -175,10 +418,16 @@ impl Decoder {
                     arrow_fields.push(avro_field.field());
                     encodings.push(encoding);
                 }
-                Self::Record(arrow_fields.into(), encodings)
+                let projector =
+                    if let Some(ResolutionInfo::Record(rec)) = data_type.resolution.as_ref() {
+                        Some(ProjectorBuilder::try_new(rec, fields).build()?)
+                    } else {
+                        None
+                    };
+                Self::Record(arrow_fields.into(), encodings, projector)
             }
-            Codec::Map(child) => {
-                let val_field = child.field_with_name("value").with_nullable(true);
+            (Codec::Map(child), _) => {
+                let val_field = child.field_with_name("value");
                 let map_field = Arc::new(ArrowField::new(
                     "entries",
                     DataType::Struct(Fields::from(vec![
@@ -196,42 +445,407 @@ impl Decoder {
                     Box::new(val_dec),
                 )
             }
+            (Codec::Uuid, _) => Self::Uuid(Vec::with_capacity(DEFAULT_CAPACITY)),
+            (Codec::Union(encodings, fields, UnionMode::Dense), _) => {
+                let decoders = encodings
+                    .iter()
+                    .map(Self::try_new_internal)
+                    .collect::<Result<Vec<_>, _>>()?;
+                if fields.len() != decoders.len() {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Union has {} fields but {} decoders",
+                        fields.len(),
+                        decoders.len()
+                    )));
+                }
+                // Proactive guard: if a user provides a union with more branches than
+                // a 32-bit Avro index can address, fail fast with a clear message.
+                let branch_count = decoders.len();
+                let max_addr = (i32::MAX as usize) + 1;
+                if branch_count > max_addr {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Union has {branch_count} branches, which exceeds the maximum addressable \
+                         branches by an Avro int tag ({} + 1).",
+                        i32::MAX
+                    )));
+                }
+                let mut builder = UnionDecoderBuilder::new()
+                    .with_fields(fields.clone())
+                    .with_branches(decoders);
+                if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() {
+                    if info.reader_is_union {
+                        builder = builder.with_resolved_union(info.clone());
+                    }
+                }
+                Self::Union(builder.build()?)
+            }
+            (Codec::Union(_, _, _), _) => {
+                return Err(ArrowError::NotYetImplemented(
+                    "Sparse Arrow unions are not yet supported".to_string(),
+                ));
+            }
+            #[cfg(feature = "avro_custom_types")]
+            (Codec::RunEndEncoded(values_dt, width_bits_or_bytes), _) => {
+                let inner = Self::try_new(values_dt)?;
+                let byte_width: u8 = match *width_bits_or_bytes {
+                    2 | 4 | 8 => *width_bits_or_bytes,
+                    16 => 2,
+                    32 => 4,
+                    64 => 8,
+                    other => {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Unsupported run-end width {other} for RunEndEncoded; \
+                             expected 16/32/64 bits or 2/4/8 bytes"
+                        )));
+                    }
+                };
+                Self::RunEndEncoded(byte_width, 0, Box::new(inner))
+            }
         };
-
         Ok(match data_type.nullability() {
-            Some(nullability) => Self::Nullable(
-                nullability,
-                NullBufferBuilder::new(DEFAULT_CAPACITY),
-                Box::new(decoder),
-            ),
+            Some(nullability) => {
+                // Default to reading a union branch tag unless the resolution proves otherwise.
+                let mut plan = NullablePlan::ReadTag;
+                if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() {
+                    if !info.writer_is_union && info.reader_is_union {
+                        if let Some(Some((_reader_idx, promo))) = info.writer_to_reader.first() {
+                            plan = NullablePlan::FromSingle { promotion: *promo };
+                        }
+                    }
+                }
+                Self::Nullable(
+                    nullability,
+                    NullBufferBuilder::new(DEFAULT_CAPACITY),
+                    Box::new(decoder),
+                    plan,
+                )
+            }
             None => decoder,
         })
     }
 
     /// Append a null record
-    fn append_null(&mut self) {
+    fn append_null(&mut self) -> Result<(), ArrowError> {
         match self {
             Self::Null(count) => *count += 1,
             Self::Boolean(b) => b.append(false),
             Self::Int32(v) | Self::Date32(v) | Self::TimeMillis(v) => v.push(0),
             Self::Int64(v)
+            | Self::Int32ToInt64(v)
             | Self::TimeMicros(v)
             | Self::TimestampMillis(_, v)
-            | Self::TimestampMicros(_, v) => v.push(0),
-            Self::Float32(v) => v.push(0.),
-            Self::Float64(v) => v.push(0.),
-            Self::Binary(offsets, _) | Self::String(offsets, _) | Self::StringView(offsets, _) => {
+            | Self::TimestampMicros(_, v)
+            | Self::TimestampNanos(_, v) => v.push(0),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationSecond(v)
+            | Self::DurationMillisecond(v)
+            | Self::DurationMicrosecond(v)
+            | Self::DurationNanosecond(v) => v.push(0),
+            Self::Float32(v) | Self::Int32ToFloat32(v) | Self::Int64ToFloat32(v) => v.push(0.),
+            Self::Float64(v)
+            | Self::Int32ToFloat64(v)
+            | Self::Int64ToFloat64(v)
+            | Self::Float32ToFloat64(v) => v.push(0.),
+            Self::Binary(offsets, _)
+            | Self::String(offsets, _)
+            | Self::StringView(offsets, _)
+            | Self::BytesToString(offsets, _)
+            | Self::StringToBytes(offsets, _) => {
                 offsets.push_length(0);
             }
-            Self::Array(_, offsets, e) => {
+            Self::Uuid(v) => {
+                v.extend([0; 16]);
+            }
+            Self::Array(_, offsets, _) => {
                 offsets.push_length(0);
-                e.append_null();
             }
-            Self::Record(_, e) => e.iter_mut().for_each(|e| e.append_null()),
+            Self::Record(_, e, _) => {
+                for encoding in e.iter_mut() {
+                    encoding.append_null()?;
+                }
+            }
             Self::Map(_, _koff, moff, _, _) => {
                 moff.push_length(0);
             }
-            Self::Nullable(_, _, _) => unreachable!("Nulls cannot be nested"),
+            Self::Fixed(sz, accum) => {
+                accum.extend(std::iter::repeat_n(0u8, *sz as usize));
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal32(_, _, _, builder) => builder.append_value(0),
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal64(_, _, _, builder) => builder.append_value(0),
+            Self::Decimal128(_, _, _, builder) => builder.append_value(0),
+            Self::Decimal256(_, _, _, builder) => builder.append_value(i256::ZERO),
+            Self::Enum(indices, _, _) => indices.push(0),
+            Self::Duration(builder) => builder.append_null(),
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(_, len, inner) => {
+                *len += 1;
+                inner.append_null()?;
+            }
+            Self::Union(u) => u.append_null()?,
+            Self::Nullable(_, null_buffer, inner, _) => {
+                null_buffer.append(false);
+                inner.append_null()?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Append a single default literal into the decoder's buffers
+    fn append_default(&mut self, lit: &AvroLiteral) -> Result<(), ArrowError> {
+        match self {
+            Self::Nullable(_, nb, inner, _) => {
+                if matches!(lit, AvroLiteral::Null) {
+                    nb.append(false);
+                    inner.append_null()
+                } else {
+                    nb.append(true);
+                    inner.append_default(lit)
+                }
+            }
+            Self::Null(count) => match lit {
+                AvroLiteral::Null => {
+                    *count += 1;
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Non-null default for null type".to_string(),
+                )),
+            },
+            Self::Boolean(b) => match lit {
+                AvroLiteral::Boolean(v) => {
+                    b.append(*v);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for boolean must be boolean".to_string(),
+                )),
+            },
+            Self::Int32(v) | Self::Date32(v) | Self::TimeMillis(v) => match lit {
+                AvroLiteral::Int(i) => {
+                    v.push(*i);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for int32/date32/time-millis must be int".to_string(),
+                )),
+            },
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationSecond(v)
+            | Self::DurationMillisecond(v)
+            | Self::DurationMicrosecond(v)
+            | Self::DurationNanosecond(v) => match lit {
+                AvroLiteral::Long(i) => {
+                    v.push(*i);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for duration long must be long".to_string(),
+                )),
+            },
+            Self::Int64(v)
+            | Self::Int32ToInt64(v)
+            | Self::TimeMicros(v)
+            | Self::TimestampMillis(_, v)
+            | Self::TimestampMicros(_, v)
+            | Self::TimestampNanos(_, v) => match lit {
+                AvroLiteral::Long(i) => {
+                    v.push(*i);
+                    Ok(())
+                }
+                AvroLiteral::Int(i) => {
+                    v.push(*i as i64);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for long/time-micros/timestamp must be long or int".to_string(),
+                )),
+            },
+            Self::Float32(v) | Self::Int32ToFloat32(v) | Self::Int64ToFloat32(v) => match lit {
+                AvroLiteral::Float(f) => {
+                    v.push(*f);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for float must be float".to_string(),
+                )),
+            },
+            Self::Float64(v)
+            | Self::Int32ToFloat64(v)
+            | Self::Int64ToFloat64(v)
+            | Self::Float32ToFloat64(v) => match lit {
+                AvroLiteral::Double(f) => {
+                    v.push(*f);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for double must be double".to_string(),
+                )),
+            },
+            Self::Binary(offsets, values) | Self::StringToBytes(offsets, values) => match lit {
+                AvroLiteral::Bytes(b) => {
+                    offsets.push_length(b.len());
+                    values.extend_from_slice(b);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for bytes must be bytes".to_string(),
+                )),
+            },
+            Self::BytesToString(offsets, values)
+            | Self::String(offsets, values)
+            | Self::StringView(offsets, values) => match lit {
+                AvroLiteral::String(s) => {
+                    let b = s.as_bytes();
+                    offsets.push_length(b.len());
+                    values.extend_from_slice(b);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for string must be string".to_string(),
+                )),
+            },
+            Self::Uuid(values) => match lit {
+                AvroLiteral::String(s) => {
+                    let uuid = Uuid::try_parse(s).map_err(|e| {
+                        ArrowError::InvalidArgumentError(format!("Invalid UUID default: {s} ({e})"))
+                    })?;
+                    values.extend_from_slice(uuid.as_bytes());
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for uuid must be string".to_string(),
+                )),
+            },
+            Self::Fixed(sz, accum) => match lit {
+                AvroLiteral::Bytes(b) => {
+                    if b.len() != *sz as usize {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Fixed default length {} does not match size {sz}",
+                            b.len(),
+                        )));
+                    }
+                    accum.extend_from_slice(b);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for fixed must be bytes".to_string(),
+                )),
+            },
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal32(_, _, _, builder) => {
+                append_decimal_default!(lit, builder, 4, i32, "decimal32")
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal64(_, _, _, builder) => {
+                append_decimal_default!(lit, builder, 8, i64, "decimal64")
+            }
+            Self::Decimal128(_, _, _, builder) => {
+                append_decimal_default!(lit, builder, 16, i128, "decimal128")
+            }
+            Self::Decimal256(_, _, _, builder) => {
+                append_decimal_default!(lit, builder, 32, i256, "decimal256")
+            }
+            Self::Duration(builder) => match lit {
+                AvroLiteral::Bytes(b) => {
+                    if b.len() != 12 {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Duration default must be exactly 12 bytes, got {}",
+                            b.len()
+                        )));
+                    }
+                    let months = u32::from_le_bytes([b[0], b[1], b[2], b[3]]);
+                    let days = u32::from_le_bytes([b[4], b[5], b[6], b[7]]);
+                    let millis = u32::from_le_bytes([b[8], b[9], b[10], b[11]]);
+                    let nanos = (millis as i64) * 1_000_000;
+                    builder.append_value(IntervalMonthDayNano::new(
+                        months as i32,
+                        days as i32,
+                        nanos,
+                    ));
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for duration must be 12-byte little-endian months/days/millis"
+                        .to_string(),
+                )),
+            },
+            Self::Array(_, offsets, inner) => match lit {
+                AvroLiteral::Array(items) => {
+                    offsets.push_length(items.len());
+                    for item in items {
+                        inner.append_default(item)?;
+                    }
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for array must be an array literal".to_string(),
+                )),
+            },
+            Self::Map(_, koff, moff, kdata, valdec) => match lit {
+                AvroLiteral::Map(entries) => {
+                    moff.push_length(entries.len());
+                    for (k, v) in entries {
+                        let kb = k.as_bytes();
+                        koff.push_length(kb.len());
+                        kdata.extend_from_slice(kb);
+                        valdec.append_default(v)?;
+                    }
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for map must be a map/object literal".to_string(),
+                )),
+            },
+            Self::Enum(indices, symbols, _) => match lit {
+                AvroLiteral::Enum(sym) => {
+                    let pos = symbols.iter().position(|s| s == sym).ok_or_else(|| {
+                        ArrowError::InvalidArgumentError(format!(
+                            "Enum default symbol {sym:?} not in reader symbols"
+                        ))
+                    })?;
+                    indices.push(pos as i32);
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for enum must be a symbol".to_string(),
+                )),
+            },
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(_, len, inner) => {
+                *len += 1;
+                inner.append_default(lit)
+            }
+            Self::Union(u) => u.append_default(lit),
+            Self::Record(field_meta, decoders, projector) => match lit {
+                AvroLiteral::Map(entries) => {
+                    for (i, dec) in decoders.iter_mut().enumerate() {
+                        let name = field_meta[i].name();
+                        if let Some(sub) = entries.get(name) {
+                            dec.append_default(sub)?;
+                        } else if let Some(proj) = projector.as_ref() {
+                            proj.project_default(dec, i)?;
+                        } else {
+                            dec.append_null()?;
+                        }
+                    }
+                    Ok(())
+                }
+                AvroLiteral::Null => {
+                    for (i, dec) in decoders.iter_mut().enumerate() {
+                        if let Some(proj) = projector.as_ref() {
+                            proj.project_default(dec, i)?;
+                        } else {
+                            dec.append_null()?;
+                        }
+                    }
+                    Ok(())
+                }
+                _ => Err(ArrowError::InvalidArgumentError(
+                    "Default for record must be a map/object or null".to_string(),
+                )),
+            },
         }
     }
 
@@ -246,25 +860,51 @@ impl Decoder {
             Self::Int64(values)
             | Self::TimeMicros(values)
             | Self::TimestampMillis(_, values)
-            | Self::TimestampMicros(_, values) => values.push(buf.get_long()?),
+            | Self::TimestampMicros(_, values)
+            | Self::TimestampNanos(_, values) => values.push(buf.get_long()?),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationSecond(values)
+            | Self::DurationMillisecond(values)
+            | Self::DurationMicrosecond(values)
+            | Self::DurationNanosecond(values) => values.push(buf.get_long()?),
             Self::Float32(values) => values.push(buf.get_float()?),
             Self::Float64(values) => values.push(buf.get_double()?),
-            Self::Binary(offsets, values)
+            Self::Int32ToInt64(values) => values.push(buf.get_int()? as i64),
+            Self::Int32ToFloat32(values) => values.push(buf.get_int()? as f32),
+            Self::Int32ToFloat64(values) => values.push(buf.get_int()? as f64),
+            Self::Int64ToFloat32(values) => values.push(buf.get_long()? as f32),
+            Self::Int64ToFloat64(values) => values.push(buf.get_long()? as f64),
+            Self::Float32ToFloat64(values) => values.push(buf.get_float()? as f64),
+            Self::StringToBytes(offsets, values)
+            | Self::BytesToString(offsets, values)
+            | Self::Binary(offsets, values)
             | Self::String(offsets, values)
             | Self::StringView(offsets, values) => {
                 let data = buf.get_bytes()?;
                 offsets.push_length(data.len());
                 values.extend_from_slice(data);
             }
+            Self::Uuid(values) => {
+                let s_bytes = buf.get_bytes()?;
+                let s = std::str::from_utf8(s_bytes).map_err(|e| {
+                    ArrowError::ParseError(format!("UUID bytes are not valid UTF-8: {e}"))
+                })?;
+                let uuid = Uuid::try_parse(s)
+                    .map_err(|e| ArrowError::ParseError(format!("Failed to parse uuid: {e}")))?;
+                values.extend_from_slice(uuid.as_bytes());
+            }
             Self::Array(_, off, encoding) => {
                 let total_items = read_blocks(buf, |cursor| encoding.decode(cursor))?;
                 off.push_length(total_items);
             }
-            Self::Record(_, encodings) => {
+            Self::Record(_, encodings, None) => {
                 for encoding in encodings {
                     encoding.decode(buf)?;
                 }
             }
+            Self::Record(_, encodings, Some(proj)) => {
+                proj.project_record(buf, encodings)?;
+            }
             Self::Map(_, koff, moff, kdata, valdec) => {
                 let newly_added = read_blocks(buf, |cur| {
                     let kb = cur.get_bytes()?;
@@ -274,22 +914,150 @@ impl Decoder {
                 })?;
                 moff.push_length(newly_added);
             }
-            Self::Nullable(nullability, nulls, e) => {
-                let is_valid = buf.get_bool()? == matches!(nullability, Nullability::NullFirst);
-                nulls.append(is_valid);
-                match is_valid {
-                    true => e.decode(buf)?,
-                    false => e.append_null(),
+            Self::Fixed(sz, accum) => {
+                let fx = buf.get_fixed(*sz as usize)?;
+                accum.extend_from_slice(fx);
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal32(_, _, size, builder) => {
+                decode_decimal!(size, buf, builder, 4, i32);
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal64(_, _, size, builder) => {
+                decode_decimal!(size, buf, builder, 8, i64);
+            }
+            Self::Decimal128(_, _, size, builder) => {
+                decode_decimal!(size, buf, builder, 16, i128);
+            }
+            Self::Decimal256(_, _, size, builder) => {
+                decode_decimal!(size, buf, builder, 32, i256);
+            }
+            Self::Enum(indices, _, None) => {
+                indices.push(buf.get_int()?);
+            }
+            Self::Enum(indices, _, Some(res)) => {
+                let raw = buf.get_int()?;
+                let resolved = usize::try_from(raw)
+                    .ok()
+                    .and_then(|idx| res.mapping.get(idx).copied())
+                    .filter(|&idx| idx >= 0)
+                    .unwrap_or(res.default_index);
+                if resolved >= 0 {
+                    indices.push(resolved);
+                } else {
+                    return Err(ArrowError::ParseError(format!(
+                        "Enum symbol index {raw} not resolvable and no default provided",
+                    )));
+                }
+            }
+            Self::Duration(builder) => {
+                let b = buf.get_fixed(12)?;
+                let months = u32::from_le_bytes(b[0..4].try_into().unwrap());
+                let days = u32::from_le_bytes(b[4..8].try_into().unwrap());
+                let millis = u32::from_le_bytes(b[8..12].try_into().unwrap());
+                let nanos = (millis as i64) * 1_000_000;
+                builder.append_value(IntervalMonthDayNano::new(months as i32, days as i32, nanos));
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(_, len, inner) => {
+                *len += 1;
+                inner.decode(buf)?;
+            }
+            Self::Union(u) => u.decode(buf)?,
+            Self::Nullable(order, nb, encoding, plan) => {
+                match *plan {
+                    NullablePlan::FromSingle { promotion } => {
+                        encoding.decode_with_promotion(buf, promotion)?;
+                        nb.append(true);
+                    }
+                    NullablePlan::ReadTag => {
+                        let branch = buf.read_vlq()?;
+                        let is_not_null = match *order {
+                            Nullability::NullFirst => branch != 0,
+                            Nullability::NullSecond => branch == 0,
+                        };
+                        if is_not_null {
+                            // It is important to decode before appending to null buffer in case of decode error
+                            encoding.decode(buf)?;
+                        } else {
+                            encoding.append_null()?;
+                        }
+                        nb.append(is_not_null);
+                    }
                 }
             }
         }
         Ok(())
     }
 
+    fn decode_with_promotion(
+        &mut self,
+        buf: &mut AvroCursor<'_>,
+        promotion: Promotion,
+    ) -> Result<(), ArrowError> {
+        #[cfg(feature = "avro_custom_types")]
+        if let Self::RunEndEncoded(_, len, inner) = self {
+            *len += 1;
+            return inner.decode_with_promotion(buf, promotion);
+        }
+
+        macro_rules! promote_numeric_to {
+            ($variant:ident, $getter:ident, $to:ty) => {{
+                match self {
+                    Self::$variant(v) => {
+                        let x = buf.$getter()?;
+                        v.push(x as $to);
+                        Ok(())
+                    }
+                    other => Err(ArrowError::ParseError(format!(
+                        "Promotion {promotion} target mismatch: expected {}, got {}",
+                        stringify!($variant),
+                        <Self as ::std::convert::AsRef<str>>::as_ref(other)
+                    ))),
+                }
+            }};
+        }
+        match promotion {
+            Promotion::Direct => self.decode(buf),
+            Promotion::IntToLong => promote_numeric_to!(Int64, get_int, i64),
+            Promotion::IntToFloat => promote_numeric_to!(Float32, get_int, f32),
+            Promotion::IntToDouble => promote_numeric_to!(Float64, get_int, f64),
+            Promotion::LongToFloat => promote_numeric_to!(Float32, get_long, f32),
+            Promotion::LongToDouble => promote_numeric_to!(Float64, get_long, f64),
+            Promotion::FloatToDouble => promote_numeric_to!(Float64, get_float, f64),
+            Promotion::StringToBytes => match self {
+                Self::Binary(offsets, values) | Self::StringToBytes(offsets, values) => {
+                    let data = buf.get_bytes()?;
+                    offsets.push_length(data.len());
+                    values.extend_from_slice(data);
+                    Ok(())
+                }
+                other => Err(ArrowError::ParseError(format!(
+                    "Promotion {promotion} target mismatch: expected bytes (Binary/StringToBytes), got {}",
+                    <Self as AsRef<str>>::as_ref(other)
+                ))),
+            },
+            Promotion::BytesToString => match self {
+                Self::String(offsets, values)
+                | Self::StringView(offsets, values)
+                | Self::BytesToString(offsets, values) => {
+                    let data = buf.get_bytes()?;
+                    offsets.push_length(data.len());
+                    values.extend_from_slice(data);
+                    Ok(())
+                }
+                other => Err(ArrowError::ParseError(format!(
+                    "Promotion {promotion} target mismatch: expected string (String/StringView/BytesToString), got {}",
+                    <Self as AsRef<str>>::as_ref(other)
+                ))),
+            },
+        }
+    }
+
     /// Flush decoded records to an [`ArrayRef`]
     fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, ArrowError> {
         Ok(match self {
-            Self::Nullable(_, n, e) => e.flush(n.finish())?,
+            Self::Nullable(_, n, e, _) => e.flush(n.finish())?,
             Self::Null(size) => Arc::new(NullArray::new(std::mem::replace(size, 0))),
             Self::Boolean(b) => Arc::new(BooleanArray::new(b.finish(), nulls)),
             Self::Int32(values) => Arc::new(flush_primitive::<Int32Type>(values, nulls)),
@@ -309,23 +1077,51 @@ impl Decoder {
                 flush_primitive::<TimestampMicrosecondType>(values, nulls)
                     .with_timezone_opt(is_utc.then(|| "+00:00")),
             ),
+            Self::TimestampNanos(is_utc, values) => Arc::new(
+                flush_primitive::<TimestampNanosecondType>(values, nulls)
+                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+            ),
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationSecond(values) => {
+                Arc::new(flush_primitive::<DurationSecondType>(values, nulls))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationMillisecond(values) => {
+                Arc::new(flush_primitive::<DurationMillisecondType>(values, nulls))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationMicrosecond(values) => {
+                Arc::new(flush_primitive::<DurationMicrosecondType>(values, nulls))
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::DurationNanosecond(values) => {
+                Arc::new(flush_primitive::<DurationNanosecondType>(values, nulls))
+            }
             Self::Float32(values) => Arc::new(flush_primitive::<Float32Type>(values, nulls)),
             Self::Float64(values) => Arc::new(flush_primitive::<Float64Type>(values, nulls)),
-            Self::Binary(offsets, values) => {
+            Self::Int32ToInt64(values) => Arc::new(flush_primitive::<Int64Type>(values, nulls)),
+            Self::Int32ToFloat32(values) | Self::Int64ToFloat32(values) => {
+                Arc::new(flush_primitive::<Float32Type>(values, nulls))
+            }
+            Self::Int32ToFloat64(values)
+            | Self::Int64ToFloat64(values)
+            | Self::Float32ToFloat64(values) => {
+                Arc::new(flush_primitive::<Float64Type>(values, nulls))
+            }
+            Self::StringToBytes(offsets, values) | Self::Binary(offsets, values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values).into();
-                Arc::new(BinaryArray::new(offsets, values, nulls))
+                Arc::new(BinaryArray::try_new(offsets, values, nulls)?)
             }
-            Self::String(offsets, values) => {
+            Self::BytesToString(offsets, values) | Self::String(offsets, values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values).into();
-                Arc::new(StringArray::new(offsets, values, nulls))
+                Arc::new(StringArray::try_new(offsets, values, nulls)?)
             }
             Self::StringView(offsets, values) => {
                 let offsets = flush_offsets(offsets);
                 let values = flush_values(values);
-                let array = StringArray::new(offsets, values.into(), nulls.clone());
-
+                let array = StringArray::try_new(offsets, values.into(), nulls.clone())?;
                 let values: Vec<&str> = (0..array.len())
                     .map(|i| {
                         if array.is_valid(i) {
@@ -335,27 +1131,26 @@ impl Decoder {
                         }
                     })
                     .collect();
-
                 Arc::new(StringViewArray::from(values))
             }
             Self::Array(field, offsets, values) => {
                 let values = values.flush(None)?;
                 let offsets = flush_offsets(offsets);
-                Arc::new(ListArray::new(field.clone(), offsets, values, nulls))
+                Arc::new(ListArray::try_new(field.clone(), offsets, values, nulls)?)
             }
-            Self::Record(fields, encodings) => {
+            Self::Record(fields, encodings, _) => {
                 let arrays = encodings
                     .iter_mut()
                     .map(|x| x.flush(None))
                     .collect::<Result<Vec<_>, _>>()?;
-                Arc::new(StructArray::new(fields.clone(), arrays, nulls))
+                Arc::new(StructArray::try_new(fields.clone(), arrays, nulls)?)
             }
             Self::Map(map_field, k_off, m_off, kdata, valdec) => {
                 let moff = flush_offsets(m_off);
                 let koff = flush_offsets(k_off);
                 let kd = flush_values(kdata).into();
                 let val_arr = valdec.flush(None)?;
-                let key_arr = StringArray::new(koff, kd, None);
+                let key_arr = StringArray::try_new(koff, kd, None)?;
                 if key_arr.len() != val_arr.len() {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "Map keys length ({}) != map values length ({})",
@@ -372,94 +1167,1000 @@ impl Decoder {
                         )));
                     }
                 }
-                let entries_struct = StructArray::new(
-                    Fields::from(vec![
-                        Arc::new(ArrowField::new("key", DataType::Utf8, false)),
-                        Arc::new(ArrowField::new("value", val_arr.data_type().clone(), true)),
-                    ]),
-                    vec![Arc::new(key_arr), val_arr],
-                    None,
-                );
-                let map_arr = MapArray::new(map_field.clone(), moff, entries_struct, nulls, false);
+                let entries_fields = match map_field.data_type() {
+                    DataType::Struct(fields) => fields.clone(),
+                    other => {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Map entries field must be a Struct, got {other:?}"
+                        )));
+                    }
+                };
+                let entries_struct =
+                    StructArray::try_new(entries_fields, vec![Arc::new(key_arr), val_arr], None)?;
+                let map_arr =
+                    MapArray::try_new(map_field.clone(), moff, entries_struct, nulls, false)?;
                 Arc::new(map_arr)
             }
+            Self::Fixed(sz, accum) => {
+                let b: Buffer = flush_values(accum).into();
+                let arr = FixedSizeBinaryArray::try_new(*sz, b, nulls)
+                    .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+                Arc::new(arr)
+            }
+            Self::Uuid(values) => {
+                let arr = FixedSizeBinaryArray::try_new(16, std::mem::take(values).into(), nulls)
+                    .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+                Arc::new(arr)
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal32(precision, scale, _, builder) => {
+                flush_decimal!(builder, precision, scale, nulls, Decimal32Array)
+            }
+            #[cfg(feature = "small_decimals")]
+            Self::Decimal64(precision, scale, _, builder) => {
+                flush_decimal!(builder, precision, scale, nulls, Decimal64Array)
+            }
+            Self::Decimal128(precision, scale, _, builder) => {
+                flush_decimal!(builder, precision, scale, nulls, Decimal128Array)
+            }
+            Self::Decimal256(precision, scale, _, builder) => {
+                flush_decimal!(builder, precision, scale, nulls, Decimal256Array)
+            }
+            Self::Enum(indices, symbols, _) => flush_dict(indices, symbols, nulls)?,
+            Self::Duration(builder) => {
+                let (_, vals, _) = builder.finish().into_parts();
+                let vals = IntervalMonthDayNanoArray::try_new(vals, nulls)
+                    .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+                Arc::new(vals)
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(width, len, inner) => {
+                let values = inner.flush(nulls)?;
+                let n = *len;
+                let arr = values.as_ref();
+                let mut run_starts: Vec<usize> = Vec::with_capacity(n);
+                if n > 0 {
+                    run_starts.push(0);
+                    for i in 1..n {
+                        if !values_equal_at(arr, i - 1, i) {
+                            run_starts.push(i);
+                        }
+                    }
+                }
+                if n > (u32::MAX as usize) {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "RunEndEncoded length {n} exceeds maximum supported by UInt32 indices for take",
+                    )));
+                }
+                let run_count = run_starts.len();
+                let take_idx: PrimitiveArray<UInt32Type> =
+                    run_starts.iter().map(|&s| s as u32).collect();
+                let per_run_values = if run_count == 0 {
+                    values.slice(0, 0)
+                } else {
+                    take(arr, &take_idx, Option::from(TakeOptions::default())).map_err(|e| {
+                        ArrowError::ParseError(format!("take() for REE values failed: {e}"))
+                    })?
+                };
+
+                macro_rules! build_run_array {
+                    ($Native:ty, $ArrowTy:ty) => {{
+                        let mut ends: Vec<$Native> = Vec::with_capacity(run_count);
+                        for (idx, &_start) in run_starts.iter().enumerate() {
+                            let end = if idx + 1 < run_count {
+                                run_starts[idx + 1]
+                            } else {
+                                n
+                            };
+                            ends.push(end as $Native);
+                        }
+                        let ends: PrimitiveArray<$ArrowTy> = ends.into_iter().collect();
+                        let run_arr = RunArray::<$ArrowTy>::try_new(&ends, per_run_values.as_ref())
+                            .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+                        Arc::new(run_arr) as ArrayRef
+                    }};
+                }
+                match *width {
+                    2 => {
+                        if n > i16::MAX as usize {
+                            return Err(ArrowError::InvalidArgumentError(format!(
+                                "RunEndEncoded length {n} exceeds i16::MAX for run end width 2"
+                            )));
+                        }
+                        build_run_array!(i16, Int16Type)
+                    }
+                    4 => build_run_array!(i32, Int32Type),
+                    8 => build_run_array!(i64, Int64Type),
+                    other => {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Unsupported run-end width {other} for RunEndEncoded"
+                        )));
+                    }
+                }
+            }
+            Self::Union(u) => u.flush(nulls)?,
         })
     }
 }
 
-fn read_blocks(
-    buf: &mut AvroCursor,
-    decode_entry: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
-) -> Result<usize, ArrowError> {
-    read_blockwise_items(buf, true, decode_entry)
+// A lookup table for resolving fields between writer and reader schemas during record projection.
+#[derive(Debug)]
+struct DispatchLookupTable {
+    // Maps each reader field index `r` to the corresponding writer field index.
+    //
+    // Semantics:
+    // - `to_reader[r] >= 0`: The value is an index into the writer's fields. The value from
+    //   the writer field is decoded, and `promotion[r]` is applied.
+    // - `to_reader[r] == NO_SOURCE` (-1): No matching writer field exists. The reader field's
+    //   default value is used.
+    //
+    // Representation (`i8`):
+    // `i8` is used for a dense, cache-friendly dispatch table, consistent with Arrow's use of
+    // `i8` for union type IDs. This requires that writer field indices do not exceed `i8::MAX`.
+    //
+    // Invariants:
+    // - `to_reader.len() == promotion.len()` and matches the reader field count.
+    // - If `to_reader[r] == NO_SOURCE`, `promotion[r]` is ignored.
+    to_reader: Box<[i8]>,
+    // For each reader field `r`, specifies the `Promotion` to apply to the writer's value.
+    //
+    // This is used when a writer field's type can be promoted to a reader field's type
+    // (e.g., `Int` to `Long`). It is ignored if `to_reader[r] == NO_SOURCE`.
+    promotion: Box<[Promotion]>,
 }
 
-fn read_blockwise_items(
-    buf: &mut AvroCursor,
-    read_size_after_negative: bool,
-    mut decode_fn: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
-) -> Result<usize, ArrowError> {
-    let mut total = 0usize;
-    loop {
-        // Read the block count
-        //  positive = that many items
-        //  negative = that many items + read block size
-        //  See: https://avro.apache.org/docs/1.11.1/specification/#maps
-        let block_count = buf.get_long()?;
-        match block_count.cmp(&0) {
-            Ordering::Equal => break,
-            Ordering::Less => {
-                // If block_count is negative, read the absolute value of count,
-                // then read the block size as a long and discard
-                let count = (-block_count) as usize;
-                if read_size_after_negative {
-                    let _size_in_bytes = buf.get_long()?;
-                }
-                for _ in 0..count {
-                    decode_fn(buf)?;
+// Sentinel used in `DispatchLookupTable::to_reader` to mark
+// "no matching writer field".
+const NO_SOURCE: i8 = -1;
+
+impl DispatchLookupTable {
+    fn from_writer_to_reader(
+        promotion_map: &[Option<(usize, Promotion)>],
+    ) -> Result<Self, ArrowError> {
+        let mut to_reader = Vec::with_capacity(promotion_map.len());
+        let mut promotion = Vec::with_capacity(promotion_map.len());
+        for map in promotion_map {
+            match *map {
+                Some((idx, promo)) => {
+                    let idx_i8 = i8::try_from(idx).map_err(|_| {
+                        ArrowError::SchemaError(format!(
+                            "Reader branch index {idx} exceeds i8 range (max {})",
+                            i8::MAX
+                        ))
+                    })?;
+                    to_reader.push(idx_i8);
+                    promotion.push(promo);
                 }
-                total += count;
-            }
-            Ordering::Greater => {
-                // If block_count is positive, decode that many items
-                let count = block_count as usize;
-                for _i in 0..count {
-                    decode_fn(buf)?;
+                None => {
+                    to_reader.push(NO_SOURCE);
+                    promotion.push(Promotion::Direct);
                 }
-                total += count;
             }
         }
+        Ok(Self {
+            to_reader: to_reader.into_boxed_slice(),
+            promotion: promotion.into_boxed_slice(),
+        })
     }
-    Ok(total)
-}
 
-#[inline]
-fn flush_values<T>(values: &mut Vec<T>) -> Vec<T> {
-    std::mem::replace(values, Vec::with_capacity(DEFAULT_CAPACITY))
+    // Resolve a writer branch index to (reader_idx, promotion)
+    #[inline]
+    fn resolve(&self, writer_index: usize) -> Option<(usize, Promotion)> {
+        let reader_index = *self.to_reader.get(writer_index)?;
+        (reader_index >= 0).then(|| (reader_index as usize, self.promotion[writer_index]))
+    }
 }
 
-#[inline]
-fn flush_offsets(offsets: &mut OffsetBufferBuilder<i32>) -> OffsetBuffer<i32> {
-    std::mem::replace(offsets, OffsetBufferBuilder::new(DEFAULT_CAPACITY)).finish()
+#[derive(Debug)]
+struct UnionDecoder {
+    fields: UnionFields,
+    type_ids: Vec<i8>,
+    offsets: Vec<i32>,
+    branches: Vec<Decoder>,
+    counts: Vec<i32>,
+    reader_type_codes: Vec<i8>,
+    default_emit_idx: usize,
+    null_emit_idx: usize,
+    plan: UnionReadPlan,
 }
 
-#[inline]
-fn flush_primitive<T: ArrowPrimitiveType>(
-    values: &mut Vec<T::Native>,
-    nulls: Option<NullBuffer>,
-) -> PrimitiveArray<T> {
-    PrimitiveArray::new(flush_values(values).into(), nulls)
+impl Default for UnionDecoder {
+    fn default() -> Self {
+        Self {
+            fields: UnionFields::empty(),
+            type_ids: Vec::new(),
+            offsets: Vec::new(),
+            branches: Vec::new(),
+            counts: Vec::new(),
+            reader_type_codes: Vec::new(),
+            default_emit_idx: 0,
+            null_emit_idx: 0,
+            plan: UnionReadPlan::Passthrough,
+        }
+    }
 }
 
-const DEFAULT_CAPACITY: usize = 1024;
+#[derive(Debug)]
+enum UnionReadPlan {
+    ReaderUnion {
+        lookup_table: DispatchLookupTable,
+    },
+    FromSingle {
+        reader_idx: usize,
+        promotion: Promotion,
+    },
+    ToSingle {
+        target: Box<Decoder>,
+        lookup_table: DispatchLookupTable,
+    },
+    Passthrough,
+}
 
-#[cfg(test)]
+impl UnionDecoder {
+    fn try_new(
+        fields: UnionFields,
+        branches: Vec<Decoder>,
+        resolved: Option<ResolvedUnion>,
+    ) -> Result<Self, ArrowError> {
+        let reader_type_codes = fields.iter().map(|(tid, _)| tid).collect::<Vec<i8>>();
+        let null_branch = branches.iter().position(|b| matches!(b, Decoder::Null(_)));
+        let default_emit_idx = 0;
+        let null_emit_idx = null_branch.unwrap_or(default_emit_idx);
+        let branch_len = branches.len().max(reader_type_codes.len());
+        // Guard against impractically large unions that cannot be indexed by an Avro int
+        let max_addr = (i32::MAX as usize) + 1;
+        if branches.len() > max_addr {
+            return Err(ArrowError::SchemaError(format!(
+                "Reader union has {} branches, which exceeds the maximum addressable \
+                 branches by an Avro int tag ({} + 1).",
+                branches.len(),
+                i32::MAX
+            )));
+        }
+        Ok(Self {
+            fields,
+            type_ids: Vec::with_capacity(DEFAULT_CAPACITY),
+            offsets: Vec::with_capacity(DEFAULT_CAPACITY),
+            branches,
+            counts: vec![0; branch_len],
+            reader_type_codes,
+            default_emit_idx,
+            null_emit_idx,
+            plan: Self::plan_from_resolved(resolved)?,
+        })
+    }
+
+    fn try_new_from_writer_union(
+        info: ResolvedUnion,
+        target: Box<Decoder>,
+    ) -> Result<Self, ArrowError> {
+        // This constructor is only for writer-union to single-type resolution
+        debug_assert!(info.writer_is_union && !info.reader_is_union);
+        let lookup_table = DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?;
+        Ok(Self {
+            plan: UnionReadPlan::ToSingle {
+                target,
+                lookup_table,
+            },
+            ..Self::default()
+        })
+    }
+
+    fn plan_from_resolved(resolved: Option<ResolvedUnion>) -> Result<UnionReadPlan, ArrowError> {
+        let Some(info) = resolved else {
+            return Ok(UnionReadPlan::Passthrough);
+        };
+        match (info.writer_is_union, info.reader_is_union) {
+            (true, true) => {
+                let lookup_table =
+                    DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?;
+                Ok(UnionReadPlan::ReaderUnion { lookup_table })
+            }
+            (false, true) => {
+                let Some(&(reader_idx, promotion)) =
+                    info.writer_to_reader.first().and_then(Option::as_ref)
+                else {
+                    return Err(ArrowError::SchemaError(
+                        "Writer type does not match any reader union branch".to_string(),
+                    ));
+                };
+                Ok(UnionReadPlan::FromSingle {
+                    reader_idx,
+                    promotion,
+                })
+            }
+            (true, false) => Err(ArrowError::InvalidArgumentError(
+                "UnionDecoder::try_new cannot build writer-union to single; use UnionDecoderBuilder with a target"
+                    .to_string(),
+            )),
+            // (false, false) is invalid and should never be constructed by the resolver.
+            _ => Err(ArrowError::SchemaError(
+                "ResolvedUnion constructed for non-union sides; resolver should return None"
+                    .to_string(),
+            )),
+        }
+    }
+
+    #[inline]
+    fn read_tag(buf: &mut AvroCursor<'_>) -> Result<usize, ArrowError> {
+        // Avro unions are encoded by first writing the zero-based branch index.
+        // In Avro 1.11.1 this is specified as an *int*; older specs said *long*,
+        // but both use zig-zag varint encoding, so decoding as long is compatible
+        // with either form and widely used in practice.
+        let raw = buf.get_long()?;
+        if raw < 0 {
+            return Err(ArrowError::ParseError(format!(
+                "Negative union branch index {raw}"
+            )));
+        }
+        usize::try_from(raw).map_err(|_| {
+            ArrowError::ParseError(format!(
+                "Union branch index {raw} does not fit into usize on this platform ({}-bit)",
+                (usize::BITS as usize)
+            ))
+        })
+    }
+
+    #[inline]
+    fn emit_to(&mut self, reader_idx: usize) -> Result<&mut Decoder, ArrowError> {
+        let branches_len = self.branches.len();
+        let Some(reader_branch) = self.branches.get_mut(reader_idx) else {
+            return Err(ArrowError::ParseError(format!(
+                "Union branch index {reader_idx} out of range ({branches_len} branches)"
+            )));
+        };
+        self.type_ids.push(self.reader_type_codes[reader_idx]);
+        self.offsets.push(self.counts[reader_idx]);
+        self.counts[reader_idx] += 1;
+        Ok(reader_branch)
+    }
+
+    #[inline]
+    fn on_decoder<F>(&mut self, fallback_idx: usize, action: F) -> Result<(), ArrowError>
+    where
+        F: FnOnce(&mut Decoder) -> Result<(), ArrowError>,
+    {
+        if let UnionReadPlan::ToSingle { target, .. } = &mut self.plan {
+            return action(target);
+        }
+        let reader_idx = match &self.plan {
+            UnionReadPlan::FromSingle { reader_idx, .. } => *reader_idx,
+            _ => fallback_idx,
+        };
+        self.emit_to(reader_idx).and_then(action)
+    }
+
+    fn append_null(&mut self) -> Result<(), ArrowError> {
+        self.on_decoder(self.null_emit_idx, |decoder| decoder.append_null())
+    }
+
+    fn append_default(&mut self, lit: &AvroLiteral) -> Result<(), ArrowError> {
+        self.on_decoder(self.default_emit_idx, |decoder| decoder.append_default(lit))
+    }
+
+    fn decode(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), ArrowError> {
+        let (reader_idx, promotion) = match &mut self.plan {
+            UnionReadPlan::Passthrough => (Self::read_tag(buf)?, Promotion::Direct),
+            UnionReadPlan::ReaderUnion { lookup_table } => {
+                let idx = Self::read_tag(buf)?;
+                lookup_table.resolve(idx).ok_or_else(|| {
+                    ArrowError::ParseError(format!(
+                        "Union branch index {idx} not resolvable by reader schema"
+                    ))
+                })?
+            }
+            UnionReadPlan::FromSingle {
+                reader_idx,
+                promotion,
+            } => (*reader_idx, *promotion),
+            UnionReadPlan::ToSingle {
+                target,
+                lookup_table,
+            } => {
+                let idx = Self::read_tag(buf)?;
+                return match lookup_table.resolve(idx) {
+                    Some((_, promotion)) => target.decode_with_promotion(buf, promotion),
+                    None => Err(ArrowError::ParseError(format!(
+                        "Writer union branch {idx} does not resolve to reader type"
+                    ))),
+                };
+            }
+        };
+        let decoder = self.emit_to(reader_idx)?;
+        decoder.decode_with_promotion(buf, promotion)
+    }
+
+    fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, ArrowError> {
+        if let UnionReadPlan::ToSingle { target, .. } = &mut self.plan {
+            return target.flush(nulls);
+        }
+        debug_assert!(
+            nulls.is_none(),
+            "UnionArray does not accept a validity bitmap; \
+                     nulls should have been materialized as a Null child during decode"
+        );
+        let children = self
+            .branches
+            .iter_mut()
+            .map(|d| d.flush(None))
+            .collect::<Result<Vec<_>, _>>()?;
+        let arr = UnionArray::try_new(
+            self.fields.clone(),
+            flush_values(&mut self.type_ids).into_iter().collect(),
+            Some(flush_values(&mut self.offsets).into_iter().collect()),
+            children,
+        )
+        .map_err(|e| ArrowError::ParseError(e.to_string()))?;
+        Ok(Arc::new(arr))
+    }
+}
+
+#[derive(Debug, Default)]
+struct UnionDecoderBuilder {
+    fields: Option<UnionFields>,
+    branches: Option<Vec<Decoder>>,
+    resolved: Option<ResolvedUnion>,
+    target: Option<Box<Decoder>>,
+}
+
+impl UnionDecoderBuilder {
+    fn new() -> Self {
+        Self::default()
+    }
+
+    fn with_fields(mut self, fields: UnionFields) -> Self {
+        self.fields = Some(fields);
+        self
+    }
+
+    fn with_branches(mut self, branches: Vec<Decoder>) -> Self {
+        self.branches = Some(branches);
+        self
+    }
+
+    fn with_resolved_union(mut self, resolved_union: ResolvedUnion) -> Self {
+        self.resolved = Some(resolved_union);
+        self
+    }
+
+    fn with_target(mut self, target: Box<Decoder>) -> Self {
+        self.target = Some(target);
+        self
+    }
+
+    fn build(self) -> Result<UnionDecoder, ArrowError> {
+        match (self.resolved, self.fields, self.branches, self.target) {
+            (resolved, Some(fields), Some(branches), None) => {
+                UnionDecoder::try_new(fields, branches, resolved)
+            }
+            (Some(info), None, None, Some(target))
+                if info.writer_is_union && !info.reader_is_union =>
+            {
+                UnionDecoder::try_new_from_writer_union(info, target)
+            }
+            _ => Err(ArrowError::InvalidArgumentError(
+                "Invalid UnionDecoderBuilder configuration: expected either \
+                 (fields + branches + resolved) with no target for reader-unions, or \
+                 (resolved + target) with no fields/branches for writer-union to single."
+                    .to_string(),
+            )),
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+enum NegativeBlockBehavior {
+    ProcessItems,
+    SkipBySize,
+}
+
+#[inline]
+fn skip_blocks(
+    buf: &mut AvroCursor,
+    mut skip_item: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    process_blockwise(
+        buf,
+        move |c| skip_item(c),
+        NegativeBlockBehavior::SkipBySize,
+    )
+}
+
+#[inline]
+fn flush_dict(
+    indices: &mut Vec<i32>,
+    symbols: &[String],
+    nulls: Option<NullBuffer>,
+) -> Result<ArrayRef, ArrowError> {
+    let keys = flush_primitive::<Int32Type>(indices, nulls);
+    let values = Arc::new(StringArray::from_iter_values(
+        symbols.iter().map(|s| s.as_str()),
+    ));
+    DictionaryArray::try_new(keys, values)
+        .map_err(|e| ArrowError::ParseError(e.to_string()))
+        .map(|arr| Arc::new(arr) as ArrayRef)
+}
+
+#[inline]
+fn read_blocks(
+    buf: &mut AvroCursor,
+    decode_entry: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+) -> Result<usize, ArrowError> {
+    process_blockwise(buf, decode_entry, NegativeBlockBehavior::ProcessItems)
+}
+
+#[inline]
+fn process_blockwise(
+    buf: &mut AvroCursor,
+    mut on_item: impl FnMut(&mut AvroCursor) -> Result<(), ArrowError>,
+    negative_behavior: NegativeBlockBehavior,
+) -> Result<usize, ArrowError> {
+    let mut total = 0usize;
+    loop {
+        // Read the block count
+        //  positive = that many items
+        //  negative = that many items + read block size
+        //  See: https://avro.apache.org/docs/1.11.1/specification/#maps
+        let block_count = buf.get_long()?;
+        match block_count.cmp(&0) {
+            Ordering::Equal => break,
+            Ordering::Less => {
+                let count = (-block_count) as usize;
+                // A negative count is followed by a long of the size in bytes
+                let size_in_bytes = buf.get_long()? as usize;
+                match negative_behavior {
+                    NegativeBlockBehavior::ProcessItems => {
+                        // Process items one-by-one after reading size
+                        for _ in 0..count {
+                            on_item(buf)?;
+                        }
+                    }
+                    NegativeBlockBehavior::SkipBySize => {
+                        // Skip the entire block payload at once
+                        let _ = buf.get_fixed(size_in_bytes)?;
+                    }
+                }
+                total += count;
+            }
+            Ordering::Greater => {
+                let count = block_count as usize;
+                for _ in 0..count {
+                    on_item(buf)?;
+                }
+                total += count;
+            }
+        }
+    }
+    Ok(total)
+}
+
+#[inline]
+fn flush_values<T>(values: &mut Vec<T>) -> Vec<T> {
+    std::mem::replace(values, Vec::with_capacity(DEFAULT_CAPACITY))
+}
+
+#[inline]
+fn flush_offsets(offsets: &mut OffsetBufferBuilder<i32>) -> OffsetBuffer<i32> {
+    std::mem::replace(offsets, OffsetBufferBuilder::new(DEFAULT_CAPACITY)).finish()
+}
+
+#[inline]
+fn flush_primitive<T: ArrowPrimitiveType>(
+    values: &mut Vec<T::Native>,
+    nulls: Option<NullBuffer>,
+) -> PrimitiveArray<T> {
+    PrimitiveArray::new(flush_values(values).into(), nulls)
+}
+
+#[inline]
+fn read_decimal_bytes_be<const N: usize>(
+    buf: &mut AvroCursor<'_>,
+    size: &Option<usize>,
+) -> Result<[u8; N], ArrowError> {
+    match size {
+        Some(n) if *n == N => {
+            let raw = buf.get_fixed(N)?;
+            let mut arr = [0u8; N];
+            arr.copy_from_slice(raw);
+            Ok(arr)
+        }
+        Some(n) => {
+            let raw = buf.get_fixed(*n)?;
+            sign_cast_to::<N>(raw)
+        }
+        None => {
+            let raw = buf.get_bytes()?;
+            sign_cast_to::<N>(raw)
+        }
+    }
+}
+
+/// Sign-extend or (when larger) validate-and-truncate a big-endian two's-complement
+/// integer into exactly `N` bytes. This matches Avro's decimal binary encoding:
+/// the payload is a big-endian two's-complement integer, and when narrowing it must
+/// be representable without changing sign or value.
+///
+/// If `raw.len() < N`, the value is sign-extended.
+/// If `raw.len() > N`, all truncated leading bytes must match the sign-extension byte
+/// and the MSB of the first kept byte must match the sign (to avoid silent overflow).
+#[inline]
+fn sign_cast_to<const N: usize>(raw: &[u8]) -> Result<[u8; N], ArrowError> {
+    let len = raw.len();
+    // Fast path: exact width, just copy
+    if len == N {
+        let mut out = [0u8; N];
+        out.copy_from_slice(raw);
+        return Ok(out);
+    }
+    // Determine sign byte from MSB of first byte (empty => positive)
+    let first = raw.first().copied().unwrap_or(0u8);
+    let sign_byte = if (first & 0x80) == 0 { 0x00 } else { 0xFF };
+    // Pre-fill with sign byte to support sign extension
+    let mut out = [sign_byte; N];
+    if len > N {
+        // Validate truncation: all dropped leading bytes must equal sign_byte,
+        // and the MSB of the first kept byte must match the sign.
+        let extra = len - N;
+        // Any non-sign byte in the truncated prefix indicates overflow
+        if raw[..extra].iter().any(|&b| b != sign_byte) {
+            return Err(ArrowError::ParseError(format!(
+                "Decimal value with {} bytes cannot be represented in {} bytes without overflow",
+                len, N
+            )));
+        }
+        if N > 0 {
+            let first_kept = raw[extra];
+            let sign_bit_mismatch = ((first_kept ^ sign_byte) & 0x80) != 0;
+            if sign_bit_mismatch {
+                return Err(ArrowError::ParseError(format!(
+                    "Decimal value with {} bytes cannot be represented in {} bytes without overflow",
+                    len, N
+                )));
+            }
+        }
+        out.copy_from_slice(&raw[extra..]);
+        return Ok(out);
+    }
+    out[N - len..].copy_from_slice(raw);
+    Ok(out)
+}
+
+#[cfg(feature = "avro_custom_types")]
+#[inline]
+fn values_equal_at(arr: &dyn Array, i: usize, j: usize) -> bool {
+    match (arr.is_null(i), arr.is_null(j)) {
+        (true, true) => true,
+        (true, false) | (false, true) => false,
+        (false, false) => {
+            let a = arr.slice(i, 1);
+            let b = arr.slice(j, 1);
+            a == b
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Projector {
+    writer_to_reader: Arc<[Option<usize>]>,
+    skip_decoders: Vec<Option<Skipper>>,
+    field_defaults: Vec<Option<AvroLiteral>>,
+    default_injections: Arc<[(usize, AvroLiteral)]>,
+}
+
+#[derive(Debug)]
+struct ProjectorBuilder<'a> {
+    rec: &'a ResolvedRecord,
+    reader_fields: Arc<[AvroField]>,
+}
+
+impl<'a> ProjectorBuilder<'a> {
+    #[inline]
+    fn try_new(rec: &'a ResolvedRecord, reader_fields: &Arc<[AvroField]>) -> Self {
+        Self {
+            rec,
+            reader_fields: reader_fields.clone(),
+        }
+    }
+
+    #[inline]
+    fn build(self) -> Result<Projector, ArrowError> {
+        let reader_fields = self.reader_fields;
+        let mut field_defaults: Vec<Option<AvroLiteral>> = Vec::with_capacity(reader_fields.len());
+        for avro_field in reader_fields.as_ref() {
+            if let Some(ResolutionInfo::DefaultValue(lit)) =
+                avro_field.data_type().resolution.as_ref()
+            {
+                field_defaults.push(Some(lit.clone()));
+            } else {
+                field_defaults.push(None);
+            }
+        }
+        let mut default_injections: Vec<(usize, AvroLiteral)> =
+            Vec::with_capacity(self.rec.default_fields.len());
+        for &idx in self.rec.default_fields.as_ref() {
+            let lit = field_defaults
+                .get(idx)
+                .and_then(|lit| lit.clone())
+                .unwrap_or(AvroLiteral::Null);
+            default_injections.push((idx, lit));
+        }
+        let mut skip_decoders: Vec<Option<Skipper>> =
+            Vec::with_capacity(self.rec.skip_fields.len());
+        for datatype in self.rec.skip_fields.as_ref() {
+            let skipper = match datatype {
+                Some(datatype) => Some(Skipper::from_avro(datatype)?),
+                None => None,
+            };
+            skip_decoders.push(skipper);
+        }
+        Ok(Projector {
+            writer_to_reader: self.rec.writer_to_reader.clone(),
+            skip_decoders,
+            field_defaults,
+            default_injections: default_injections.into(),
+        })
+    }
+}
+
+impl Projector {
+    #[inline]
+    fn project_default(&self, decoder: &mut Decoder, index: usize) -> Result<(), ArrowError> {
+        // SAFETY: `index` is obtained by listing the reader's record fields (i.e., from
+        // `decoders.iter_mut().enumerate()`), and `field_defaults` was built in
+        // `ProjectorBuilder::build` to have exactly one element per reader field.
+        // Therefore, `index < self.field_defaults.len()` always holds here, so
+        // `self.field_defaults[index]` cannot panic. We only take an immutable reference
+        // via `.as_ref()`, and `self` is borrowed immutably.
+        if let Some(default_literal) = self.field_defaults[index].as_ref() {
+            decoder.append_default(default_literal)
+        } else {
+            decoder.append_null()
+        }
+    }
+
+    #[inline]
+    fn project_record(
+        &mut self,
+        buf: &mut AvroCursor<'_>,
+        encodings: &mut [Decoder],
+    ) -> Result<(), ArrowError> {
+        debug_assert_eq!(
+            self.writer_to_reader.len(),
+            self.skip_decoders.len(),
+            "internal invariant: mapping and skipper lists must have equal length"
+        );
+        for (i, (mapping, skipper_opt)) in self
+            .writer_to_reader
+            .iter()
+            .zip(self.skip_decoders.iter_mut())
+            .enumerate()
+        {
+            match (mapping, skipper_opt.as_mut()) {
+                (Some(reader_index), _) => encodings[*reader_index].decode(buf)?,
+                (None, Some(skipper)) => skipper.skip(buf)?,
+                (None, None) => {
+                    return Err(ArrowError::SchemaError(format!(
+                        "No skipper available for writer-only field at index {i}",
+                    )));
+                }
+            }
+        }
+        for (reader_index, lit) in self.default_injections.as_ref() {
+            encodings[*reader_index].append_default(lit)?;
+        }
+        Ok(())
+    }
+}
+
+/// Lightweight skipper for non‑projected writer fields
+/// (fields present in the writer schema but omitted by the reader/projection);
+/// per Avro 1.11.1 schema resolution these fields are ignored.
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#schema-resolution>
+#[derive(Debug)]
+enum Skipper {
+    Null,
+    Boolean,
+    Int32,
+    Int64,
+    Float32,
+    Float64,
+    Bytes,
+    String,
+    TimeMicros,
+    TimestampMillis,
+    TimestampMicros,
+    TimestampNanos,
+    Fixed(usize),
+    Decimal(Option<usize>),
+    UuidString,
+    Enum,
+    DurationFixed12,
+    List(Box<Skipper>),
+    Map(Box<Skipper>),
+    Struct(Vec<Skipper>),
+    Union(Vec<Skipper>),
+    Nullable(Nullability, Box<Skipper>),
+    #[cfg(feature = "avro_custom_types")]
+    RunEndEncoded(Box<Skipper>),
+}
+
+impl Skipper {
+    fn from_avro(dt: &AvroDataType) -> Result<Self, ArrowError> {
+        let mut base = match dt.codec() {
+            Codec::Null => Self::Null,
+            Codec::Boolean => Self::Boolean,
+            Codec::Int32 | Codec::Date32 | Codec::TimeMillis => Self::Int32,
+            Codec::Int64 => Self::Int64,
+            Codec::TimeMicros => Self::TimeMicros,
+            Codec::TimestampMillis(_) => Self::TimestampMillis,
+            Codec::TimestampMicros(_) => Self::TimestampMicros,
+            Codec::TimestampNanos(_) => Self::TimestampNanos,
+            #[cfg(feature = "avro_custom_types")]
+            Codec::DurationNanos
+            | Codec::DurationMicros
+            | Codec::DurationMillis
+            | Codec::DurationSeconds => Self::Int64,
+            Codec::Float32 => Self::Float32,
+            Codec::Float64 => Self::Float64,
+            Codec::Binary => Self::Bytes,
+            Codec::Utf8 | Codec::Utf8View => Self::String,
+            Codec::Fixed(sz) => Self::Fixed(*sz as usize),
+            Codec::Decimal(_, _, size) => Self::Decimal(*size),
+            Codec::Uuid => Self::UuidString, // encoded as string
+            Codec::Enum(_) => Self::Enum,
+            Codec::List(item) => Self::List(Box::new(Skipper::from_avro(item)?)),
+            Codec::Struct(fields) => Self::Struct(
+                fields
+                    .iter()
+                    .map(|f| Skipper::from_avro(f.data_type()))
+                    .collect::<Result<_, _>>()?,
+            ),
+            Codec::Map(values) => Self::Map(Box::new(Skipper::from_avro(values)?)),
+            Codec::Interval => Self::DurationFixed12,
+            Codec::Union(encodings, _, _) => {
+                let max_addr = (i32::MAX as usize) + 1;
+                if encodings.len() > max_addr {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Writer union has {} branches, which exceeds the maximum addressable \
+                         branches by an Avro int tag ({} + 1).",
+                        encodings.len(),
+                        i32::MAX
+                    )));
+                }
+                Self::Union(
+                    encodings
+                        .iter()
+                        .map(Skipper::from_avro)
+                        .collect::<Result<_, _>>()?,
+                )
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Codec::RunEndEncoded(inner, _w) => {
+                Self::RunEndEncoded(Box::new(Skipper::from_avro(inner)?))
+            }
+        };
+        if let Some(n) = dt.nullability() {
+            base = Self::Nullable(n, Box::new(base));
+        }
+        Ok(base)
+    }
+
+    fn skip(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), ArrowError> {
+        match self {
+            Self::Null => Ok(()),
+            Self::Boolean => {
+                buf.get_bool()?;
+                Ok(())
+            }
+            Self::Int32 => {
+                buf.get_int()?;
+                Ok(())
+            }
+            Self::Int64
+            | Self::TimeMicros
+            | Self::TimestampMillis
+            | Self::TimestampMicros
+            | Self::TimestampNanos => {
+                buf.get_long()?;
+                Ok(())
+            }
+            Self::Float32 => {
+                buf.get_float()?;
+                Ok(())
+            }
+            Self::Float64 => {
+                buf.get_double()?;
+                Ok(())
+            }
+            Self::Bytes | Self::String | Self::UuidString => {
+                buf.get_bytes()?;
+                Ok(())
+            }
+            Self::Fixed(sz) => {
+                buf.get_fixed(*sz)?;
+                Ok(())
+            }
+            Self::Decimal(size) => {
+                if let Some(s) = size {
+                    buf.get_fixed(*s)
+                } else {
+                    buf.get_bytes()
+                }?;
+                Ok(())
+            }
+            Self::Enum => {
+                buf.get_int()?;
+                Ok(())
+            }
+            Self::DurationFixed12 => {
+                buf.get_fixed(12)?;
+                Ok(())
+            }
+            Self::List(item) => {
+                skip_blocks(buf, |c| item.skip(c))?;
+                Ok(())
+            }
+            Self::Map(value) => {
+                skip_blocks(buf, |c| {
+                    c.get_bytes()?; // key
+                    value.skip(c)
+                })?;
+                Ok(())
+            }
+            Self::Struct(fields) => {
+                for f in fields.iter_mut() {
+                    f.skip(buf)?
+                }
+                Ok(())
+            }
+            Self::Union(encodings) => {
+                // Union tag must be ZigZag-decoded
+                let raw = buf.get_long()?;
+                if raw < 0 {
+                    return Err(ArrowError::ParseError(format!(
+                        "Negative union branch index {raw}"
+                    )));
+                }
+                let idx: usize = usize::try_from(raw).map_err(|_| {
+                    ArrowError::ParseError(format!(
+                        "Union branch index {raw} does not fit into usize on this platform ({}-bit)",
+                        (usize::BITS as usize)
+                    ))
+                })?;
+                let Some(encoding) = encodings.get_mut(idx) else {
+                    return Err(ArrowError::ParseError(format!(
+                        "Union branch index {idx} out of range for skipper ({} branches)",
+                        encodings.len()
+                    )));
+                };
+                encoding.skip(buf)
+            }
+            Self::Nullable(order, inner) => {
+                let branch = buf.read_vlq()?;
+                let is_not_null = match *order {
+                    Nullability::NullFirst => branch != 0,
+                    Nullability::NullSecond => branch == 0,
+                };
+                if is_not_null {
+                    inner.skip(buf)?;
+                }
+                Ok(())
+            }
+            #[cfg(feature = "avro_custom_types")]
+            Self::RunEndEncoded(inner) => inner.skip(buf),
+        }
+    }
+}
+
+#[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::{
-        cast::AsArray, Array, Decimal128Array, DictionaryArray, FixedSizeBinaryArray,
-        IntervalMonthDayNanoArray, ListArray, MapArray, StringArray, StructArray,
-    };
+    use crate::codec::AvroFieldBuilder;
+    use crate::schema::{Attributes, ComplexType, Field, PrimitiveType, Record, Schema, TypeName};
+    use arrow_array::cast::AsArray;
+    use indexmap::IndexMap;
+    use std::collections::HashMap;
 
     fn encode_avro_int(value: i32) -> Vec<u8> {
         let mut buf = Vec::new();
@@ -493,40 +2194,512 @@ mod tests {
         AvroDataType::new(codec, Default::default(), None)
     }
 
-    #[test]
-    fn test_map_decoding_one_entry() {
-        let value_type = avro_from_codec(Codec::Utf8);
-        let map_type = avro_from_codec(Codec::Map(Arc::new(value_type)));
-        let mut decoder = Decoder::try_new(&map_type).unwrap();
-        // Encode a single map with one entry: {"hello": "world"}
-        let mut data = Vec::new();
-        data.extend_from_slice(&encode_avro_long(1));
-        data.extend_from_slice(&encode_avro_bytes(b"hello")); // key
-        data.extend_from_slice(&encode_avro_bytes(b"world")); // value
-        data.extend_from_slice(&encode_avro_long(0));
-        let mut cursor = AvroCursor::new(&data);
-        decoder.decode(&mut cursor).unwrap();
-        let array = decoder.flush(None).unwrap();
-        let map_arr = array.as_any().downcast_ref::<MapArray>().unwrap();
-        assert_eq!(map_arr.len(), 1); // one map
-        assert_eq!(map_arr.value_length(0), 1);
-        let entries = map_arr.value(0);
-        let struct_entries = entries.as_any().downcast_ref::<StructArray>().unwrap();
-        assert_eq!(struct_entries.len(), 1);
-        let key_arr = struct_entries
-            .column_by_name("key")
-            .unwrap()
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        let val_arr = struct_entries
-            .column_by_name("value")
-            .unwrap()
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap();
-        assert_eq!(key_arr.value(0), "hello");
-        assert_eq!(val_arr.value(0), "world");
+    fn resolved_root_datatype(
+        writer: Schema<'static>,
+        reader: Schema<'static>,
+        use_utf8view: bool,
+        strict_mode: bool,
+    ) -> AvroDataType {
+        // Wrap writer schema in a single-field record
+        let writer_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: writer,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+
+        // Wrap reader schema in a single-field record
+        let reader_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: reader,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+
+        // Build resolved record, then extract the inner field's resolved AvroDataType
+        let field = AvroFieldBuilder::new(&writer_record)
+            .with_reader_schema(&reader_record)
+            .with_utf8view(use_utf8view)
+            .with_strict_mode(strict_mode)
+            .build()
+            .expect("schema resolution should succeed");
+
+        match field.data_type().codec() {
+            Codec::Struct(fields) => fields[0].data_type().clone(),
+            other => panic!("expected wrapper struct, got {other:?}"),
+        }
+    }
+
+    fn decoder_for_promotion(
+        writer: PrimitiveType,
+        reader: PrimitiveType,
+        use_utf8view: bool,
+    ) -> Decoder {
+        let ws = Schema::TypeName(TypeName::Primitive(writer));
+        let rs = Schema::TypeName(TypeName::Primitive(reader));
+        let dt = resolved_root_datatype(ws, rs, use_utf8view, false);
+        Decoder::try_new(&dt).unwrap()
+    }
+
+    fn make_avro_dt(codec: Codec, nullability: Option<Nullability>) -> AvroDataType {
+        AvroDataType::new(codec, HashMap::new(), nullability)
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    fn encode_vlq_u64(mut x: u64) -> Vec<u8> {
+        let mut out = Vec::with_capacity(10);
+        while x >= 0x80 {
+            out.push((x as u8) | 0x80);
+            x >>= 7;
+        }
+        out.push(x as u8);
+        out
+    }
+
+    #[test]
+    fn test_union_resolution_writer_union_reader_union_reorder_and_promotion_dense() {
+        let ws = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+        ]);
+        let rs = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+        ]);
+
+        let dt = resolved_root_datatype(ws, rs, false, false);
+        let mut dec = Decoder::try_new(&dt).unwrap();
+
+        let mut rec1 = encode_avro_long(0);
+        rec1.extend(encode_avro_int(7));
+        let mut cur1 = AvroCursor::new(&rec1);
+        dec.decode(&mut cur1).unwrap();
+
+        let mut rec2 = encode_avro_long(1);
+        rec2.extend(encode_avro_bytes("abc".as_bytes()));
+        let mut cur2 = AvroCursor::new(&rec2);
+        dec.decode(&mut cur2).unwrap();
+
+        let arr = dec.flush(None).unwrap();
+        let ua = arr
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("dense union output");
+
+        assert_eq!(
+            ua.type_id(0),
+            1,
+            "first value must select reader 'long' branch"
+        );
+        assert_eq!(ua.value_offset(0), 0);
+
+        assert_eq!(
+            ua.type_id(1),
+            0,
+            "second value must select reader 'string' branch"
+        );
+        assert_eq!(ua.value_offset(1), 0);
+
+        let long_child = ua.child(1).as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(long_child.len(), 1);
+        assert_eq!(long_child.value(0), 7);
+
+        let str_child = ua.child(0).as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(str_child.len(), 1);
+        assert_eq!(str_child.value(0), "abc");
+    }
+
+    #[test]
+    fn test_union_resolution_writer_union_reader_nonunion_promotion_int_to_long() {
+        let ws = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+        ]);
+        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long));
+
+        let dt = resolved_root_datatype(ws, rs, false, false);
+        let mut dec = Decoder::try_new(&dt).unwrap();
+
+        let mut data = encode_avro_long(0);
+        data.extend(encode_avro_int(5));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+
+        let arr = dec.flush(None).unwrap();
+        let out = arr.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(out.len(), 1);
+        assert_eq!(out.value(0), 5);
+    }
+
+    #[test]
+    fn test_union_resolution_writer_union_reader_nonunion_mismatch_errors() {
+        let ws = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+        ]);
+        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long));
+
+        let dt = resolved_root_datatype(ws, rs, false, false);
+        let mut dec = Decoder::try_new(&dt).unwrap();
+
+        let mut data = encode_avro_long(1);
+        data.extend(encode_avro_bytes("z".as_bytes()));
+        let mut cur = AvroCursor::new(&data);
+        let res = dec.decode(&mut cur);
+        assert!(
+            res.is_err(),
+            "expected error when writer union branch does not resolve to reader non-union type"
+        );
+    }
+
+    #[test]
+    fn test_union_resolution_writer_nonunion_reader_union_selects_matching_branch() {
+        let ws = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
+        let rs = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+        ]);
+
+        let dt = resolved_root_datatype(ws, rs, false, false);
+        let mut dec = Decoder::try_new(&dt).unwrap();
+
+        let data = encode_avro_int(6);
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+
+        let arr = dec.flush(None).unwrap();
+        let ua = arr
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("dense union output");
+        assert_eq!(ua.len(), 1);
+        assert_eq!(
+            ua.type_id(0),
+            1,
+            "must resolve to reader 'long' branch (type_id 1)"
+        );
+        assert_eq!(ua.value_offset(0), 0);
+
+        let long_child = ua.child(1).as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(long_child.len(), 1);
+        assert_eq!(long_child.value(0), 6);
+
+        let str_child = ua.child(0).as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(str_child.len(), 0, "string branch must be empty");
+    }
+
+    #[test]
+    fn test_union_resolution_writer_union_reader_union_unmapped_branch_errors() {
+        let ws = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Boolean)),
+        ]);
+        let rs = Schema::Union(vec![
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+        ]);
+
+        let dt = resolved_root_datatype(ws, rs, false, false);
+        let mut dec = Decoder::try_new(&dt).unwrap();
+
+        let mut data = encode_avro_long(1);
+        data.push(1);
+        let mut cur = AvroCursor::new(&data);
+        let res = dec.decode(&mut cur);
+        assert!(
+            res.is_err(),
+            "expected error for unmapped writer 'boolean' branch"
+        );
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_int_to_long() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Long, false);
+        assert!(matches!(dec, Decoder::Int32ToInt64(_)));
+        for v in [0, 1, -2, 123456] {
+            let data = encode_avro_int(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(a.value(0), 0);
+        assert_eq!(a.value(1), 1);
+        assert_eq!(a.value(2), -2);
+        assert_eq!(a.value(3), 123456);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_int_to_float() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Float, false);
+        assert!(matches!(dec, Decoder::Int32ToFloat32(_)));
+        for v in [0, 42, -7] {
+            let data = encode_avro_int(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Float32Array>().unwrap();
+        assert_eq!(a.value(0), 0.0);
+        assert_eq!(a.value(1), 42.0);
+        assert_eq!(a.value(2), -7.0);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_int_to_double() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Int, PrimitiveType::Double, false);
+        assert!(matches!(dec, Decoder::Int32ToFloat64(_)));
+        for v in [1, -1, 10_000] {
+            let data = encode_avro_int(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
+        assert_eq!(a.value(0), 1.0);
+        assert_eq!(a.value(1), -1.0);
+        assert_eq!(a.value(2), 10_000.0);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_long_to_float() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Long, PrimitiveType::Float, false);
+        assert!(matches!(dec, Decoder::Int64ToFloat32(_)));
+        for v in [0_i64, 1_000_000_i64, -123_i64] {
+            let data = encode_avro_long(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Float32Array>().unwrap();
+        assert_eq!(a.value(0), 0.0);
+        assert_eq!(a.value(1), 1_000_000.0);
+        assert_eq!(a.value(2), -123.0);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_long_to_double() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Long, PrimitiveType::Double, false);
+        assert!(matches!(dec, Decoder::Int64ToFloat64(_)));
+        for v in [2_i64, -2_i64, 9_223_372_i64] {
+            let data = encode_avro_long(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
+        assert_eq!(a.value(0), 2.0);
+        assert_eq!(a.value(1), -2.0);
+        assert_eq!(a.value(2), 9_223_372.0);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_float_to_double() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Float, PrimitiveType::Double, false);
+        assert!(matches!(dec, Decoder::Float32ToFloat64(_)));
+        for v in [0.5_f32, -3.25_f32, 1.0e6_f32] {
+            let data = v.to_le_bytes().to_vec();
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Float64Array>().unwrap();
+        assert_eq!(a.value(0), 0.5_f64);
+        assert_eq!(a.value(1), -3.25_f64);
+        assert_eq!(a.value(2), 1.0e6_f64);
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_bytes_to_string_utf8() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Bytes, PrimitiveType::String, false);
+        assert!(matches!(dec, Decoder::BytesToString(_, _)));
+        for s in ["hello", "world", "héllo"] {
+            let data = encode_avro_bytes(s.as_bytes());
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(a.value(0), "hello");
+        assert_eq!(a.value(1), "world");
+        assert_eq!(a.value(2), "héllo");
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_bytes_to_string_utf8view_enabled() {
+        let mut dec = decoder_for_promotion(PrimitiveType::Bytes, PrimitiveType::String, true);
+        assert!(matches!(dec, Decoder::BytesToString(_, _)));
+        let data = encode_avro_bytes("abc".as_bytes());
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(a.value(0), "abc");
+    }
+
+    #[test]
+    fn test_schema_resolution_promotion_string_to_bytes() {
+        let mut dec = decoder_for_promotion(PrimitiveType::String, PrimitiveType::Bytes, false);
+        assert!(matches!(dec, Decoder::StringToBytes(_, _)));
+        for s in ["", "abc", "data"] {
+            let data = encode_avro_bytes(s.as_bytes());
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<BinaryArray>().unwrap();
+        assert_eq!(a.value(0), b"");
+        assert_eq!(a.value(1), b"abc");
+        assert_eq!(a.value(2), "data".as_bytes());
+    }
+
+    #[test]
+    fn test_schema_resolution_no_promotion_passthrough_int() {
+        let ws = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
+        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
+        // Wrap both in a synthetic single-field record and resolve with AvroFieldBuilder
+        let writer_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: ws,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+        let reader_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: rs,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+        let field = AvroFieldBuilder::new(&writer_record)
+            .with_reader_schema(&reader_record)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build()
+            .unwrap();
+        // Extract the resolved inner field's AvroDataType
+        let dt = match field.data_type().codec() {
+            Codec::Struct(fields) => fields[0].data_type().clone(),
+            other => panic!("expected wrapper struct, got {other:?}"),
+        };
+        let mut dec = Decoder::try_new(&dt).unwrap();
+        assert!(matches!(dec, Decoder::Int32(_)));
+        for v in [7, -9] {
+            let data = encode_avro_int(v);
+            let mut cur = AvroCursor::new(&data);
+            dec.decode(&mut cur).unwrap();
+        }
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(a.value(0), 7);
+        assert_eq!(a.value(1), -9);
+    }
+
+    #[test]
+    fn test_schema_resolution_illegal_promotion_int_to_boolean_errors() {
+        let ws = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
+        let rs = Schema::TypeName(TypeName::Primitive(PrimitiveType::Boolean));
+        let writer_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: ws,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+        let reader_record = Schema::Complex(ComplexType::Record(Record {
+            name: "Root",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![Field {
+                name: "v",
+                r#type: rs,
+                default: None,
+                doc: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+        let res = AvroFieldBuilder::new(&writer_record)
+            .with_reader_schema(&reader_record)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build();
+        assert!(res.is_err(), "expected error for illegal promotion");
+    }
+
+    #[test]
+    fn test_map_decoding_one_entry() {
+        let value_type = avro_from_codec(Codec::Utf8);
+        let map_type = avro_from_codec(Codec::Map(Arc::new(value_type)));
+        let mut decoder = Decoder::try_new(&map_type).unwrap();
+        // Encode a single map with one entry: {"hello": "world"}
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_long(1));
+        data.extend_from_slice(&encode_avro_bytes(b"hello")); // key
+        data.extend_from_slice(&encode_avro_bytes(b"world")); // value
+        data.extend_from_slice(&encode_avro_long(0));
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        let array = decoder.flush(None).unwrap();
+        let map_arr = array.as_any().downcast_ref::<MapArray>().unwrap();
+        assert_eq!(map_arr.len(), 1); // one map
+        assert_eq!(map_arr.value_length(0), 1);
+        let entries = map_arr.value(0);
+        let struct_entries = entries.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_entries.len(), 1);
+        let key_arr = struct_entries
+            .column_by_name("key")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let val_arr = struct_entries
+            .column_by_name("value")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(key_arr.value(0), "hello");
+        assert_eq!(val_arr.value(0), "world");
     }
 
     #[test]
@@ -542,6 +2715,95 @@ mod tests {
         assert_eq!(map_arr.value_length(0), 0);
     }
 
+    #[test]
+    fn test_fixed_decoding() {
+        let avro_type = avro_from_codec(Codec::Fixed(3));
+        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
+
+        let data1 = [1u8, 2, 3];
+        let mut cursor1 = AvroCursor::new(&data1);
+        decoder
+            .decode(&mut cursor1)
+            .expect("Failed to decode data1");
+        assert_eq!(cursor1.position(), 3, "Cursor should advance by fixed size");
+        let data2 = [4u8, 5, 6];
+        let mut cursor2 = AvroCursor::new(&data2);
+        decoder
+            .decode(&mut cursor2)
+            .expect("Failed to decode data2");
+        assert_eq!(cursor2.position(), 3, "Cursor should advance by fixed size");
+        let array = decoder.flush(None).expect("Failed to flush decoder");
+        assert_eq!(array.len(), 2, "Array should contain two items");
+        let fixed_size_binary_array = array
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .expect("Failed to downcast to FixedSizeBinaryArray");
+        assert_eq!(
+            fixed_size_binary_array.value_length(),
+            3,
+            "Fixed size of binary values should be 3"
+        );
+        assert_eq!(
+            fixed_size_binary_array.value(0),
+            &[1, 2, 3],
+            "First item mismatch"
+        );
+        assert_eq!(
+            fixed_size_binary_array.value(1),
+            &[4, 5, 6],
+            "Second item mismatch"
+        );
+    }
+
+    #[test]
+    fn test_fixed_decoding_empty() {
+        let avro_type = avro_from_codec(Codec::Fixed(5));
+        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
+
+        let array = decoder
+            .flush(None)
+            .expect("Failed to flush decoder for empty input");
+
+        assert_eq!(array.len(), 0, "Array should be empty");
+        let fixed_size_binary_array = array
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .expect("Failed to downcast to FixedSizeBinaryArray for empty array");
+
+        assert_eq!(
+            fixed_size_binary_array.value_length(),
+            5,
+            "Fixed size of binary values should be 5 as per type"
+        );
+    }
+
+    #[test]
+    fn test_uuid_decoding() {
+        let avro_type = avro_from_codec(Codec::Uuid);
+        let mut decoder = Decoder::try_new(&avro_type).expect("Failed to create decoder");
+        let uuid_str = "f81d4fae-7dec-11d0-a765-00a0c91e6bf6";
+        let data = encode_avro_bytes(uuid_str.as_bytes());
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).expect("Failed to decode data");
+        assert_eq!(
+            cursor.position(),
+            data.len(),
+            "Cursor should advance by varint size + data size"
+        );
+        let array = decoder.flush(None).expect("Failed to flush decoder");
+        let fixed_size_binary_array = array
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .expect("Array should be a FixedSizeBinaryArray");
+        assert_eq!(fixed_size_binary_array.len(), 1);
+        assert_eq!(fixed_size_binary_array.value_length(), 16);
+        let expected_bytes = [
+            0xf8, 0x1d, 0x4f, 0xae, 0x7d, 0xec, 0x11, 0xd0, 0xa7, 0x65, 0x00, 0xa0, 0xc9, 0x1e,
+            0x6b, 0xf6,
+        ];
+        assert_eq!(fixed_size_binary_array.value(0), &expected_bytes);
+    }
+
     #[test]
     fn test_array_decoding() {
         let item_dt = avro_from_codec(Codec::Int32);
@@ -634,4 +2896,1862 @@ mod tests {
         assert_eq!(list_arr.len(), 1);
         assert_eq!(list_arr.value_length(0), 0);
     }
+
+    #[test]
+    fn test_decimal_decoding_fixed256() {
+        let dt = avro_from_codec(Codec::Decimal(50, Some(2), Some(32)));
+        let mut decoder = Decoder::try_new(&dt).unwrap();
+        let row1 = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x30, 0x39,
+        ];
+        let row2 = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0x85,
+        ];
+        let mut data = Vec::new();
+        data.extend_from_slice(&row1);
+        data.extend_from_slice(&row2);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let arr = decoder.flush(None).unwrap();
+        let dec = arr.as_any().downcast_ref::<Decimal256Array>().unwrap();
+        assert_eq!(dec.len(), 2);
+        assert_eq!(dec.value_as_string(0), "123.45");
+        assert_eq!(dec.value_as_string(1), "-1.23");
+    }
+
+    #[test]
+    fn test_decimal_decoding_fixed128() {
+        let dt = avro_from_codec(Codec::Decimal(28, Some(2), Some(16)));
+        let mut decoder = Decoder::try_new(&dt).unwrap();
+        let row1 = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x30, 0x39,
+        ];
+        let row2 = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0x85,
+        ];
+        let mut data = Vec::new();
+        data.extend_from_slice(&row1);
+        data.extend_from_slice(&row2);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let arr = decoder.flush(None).unwrap();
+        let dec = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
+        assert_eq!(dec.len(), 2);
+        assert_eq!(dec.value_as_string(0), "123.45");
+        assert_eq!(dec.value_as_string(1), "-1.23");
+    }
+
+    #[test]
+    fn test_decimal_decoding_fixed32_from_32byte_fixed_storage() {
+        let dt = avro_from_codec(Codec::Decimal(5, Some(2), Some(32)));
+        let mut decoder = Decoder::try_new(&dt).unwrap();
+        let row1 = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x30, 0x39,
+        ];
+        let row2 = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0x85,
+        ];
+        let mut data = Vec::new();
+        data.extend_from_slice(&row1);
+        data.extend_from_slice(&row2);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let arr = decoder.flush(None).unwrap();
+        #[cfg(feature = "small_decimals")]
+        {
+            let dec = arr.as_any().downcast_ref::<Decimal32Array>().unwrap();
+            assert_eq!(dec.len(), 2);
+            assert_eq!(dec.value_as_string(0), "123.45");
+            assert_eq!(dec.value_as_string(1), "-1.23");
+        }
+        #[cfg(not(feature = "small_decimals"))]
+        {
+            let dec = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            assert_eq!(dec.len(), 2);
+            assert_eq!(dec.value_as_string(0), "123.45");
+            assert_eq!(dec.value_as_string(1), "-1.23");
+        }
+    }
+
+    #[test]
+    fn test_decimal_decoding_fixed32_from_16byte_fixed_storage() {
+        let dt = avro_from_codec(Codec::Decimal(5, Some(2), Some(16)));
+        let mut decoder = Decoder::try_new(&dt).unwrap();
+        let row1 = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x30, 0x39,
+        ];
+        let row2 = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0x85,
+        ];
+        let mut data = Vec::new();
+        data.extend_from_slice(&row1);
+        data.extend_from_slice(&row2);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+
+        let arr = decoder.flush(None).unwrap();
+        #[cfg(feature = "small_decimals")]
+        {
+            let dec = arr.as_any().downcast_ref::<Decimal32Array>().unwrap();
+            assert_eq!(dec.len(), 2);
+            assert_eq!(dec.value_as_string(0), "123.45");
+            assert_eq!(dec.value_as_string(1), "-1.23");
+        }
+        #[cfg(not(feature = "small_decimals"))]
+        {
+            let dec = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            assert_eq!(dec.len(), 2);
+            assert_eq!(dec.value_as_string(0), "123.45");
+            assert_eq!(dec.value_as_string(1), "-1.23");
+        }
+    }
+
+    #[test]
+    fn test_decimal_decoding_bytes_with_nulls() {
+        let dt = avro_from_codec(Codec::Decimal(4, Some(1), None));
+        let inner = Decoder::try_new(&dt).unwrap();
+        let mut decoder = Decoder::Nullable(
+            Nullability::NullSecond,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(inner),
+            NullablePlan::ReadTag,
+        );
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&encode_avro_bytes(&[0x04, 0xD2]));
+        data.extend_from_slice(&encode_avro_int(1));
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&encode_avro_bytes(&[0xFB, 0x2E]));
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let arr = decoder.flush(None).unwrap();
+        #[cfg(feature = "small_decimals")]
+        {
+            let dec_arr = arr.as_any().downcast_ref::<Decimal32Array>().unwrap();
+            assert_eq!(dec_arr.len(), 3);
+            assert!(dec_arr.is_valid(0));
+            assert!(!dec_arr.is_valid(1));
+            assert!(dec_arr.is_valid(2));
+            assert_eq!(dec_arr.value_as_string(0), "123.4");
+            assert_eq!(dec_arr.value_as_string(2), "-123.4");
+        }
+        #[cfg(not(feature = "small_decimals"))]
+        {
+            let dec_arr = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            assert_eq!(dec_arr.len(), 3);
+            assert!(dec_arr.is_valid(0));
+            assert!(!dec_arr.is_valid(1));
+            assert!(dec_arr.is_valid(2));
+            assert_eq!(dec_arr.value_as_string(0), "123.4");
+            assert_eq!(dec_arr.value_as_string(2), "-123.4");
+        }
+    }
+
+    #[test]
+    fn test_decimal_decoding_bytes_with_nulls_fixed_size_narrow_result() {
+        let dt = avro_from_codec(Codec::Decimal(6, Some(2), Some(16)));
+        let inner = Decoder::try_new(&dt).unwrap();
+        let mut decoder = Decoder::Nullable(
+            Nullability::NullSecond,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(inner),
+            NullablePlan::ReadTag,
+        );
+        let row1 = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+            0xE2, 0x40,
+        ];
+        let row3 = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE,
+            0x1D, 0xC0,
+        ];
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&row1);
+        data.extend_from_slice(&encode_avro_int(1));
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&row3);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let arr = decoder.flush(None).unwrap();
+        #[cfg(feature = "small_decimals")]
+        {
+            let dec_arr = arr.as_any().downcast_ref::<Decimal32Array>().unwrap();
+            assert_eq!(dec_arr.len(), 3);
+            assert!(dec_arr.is_valid(0));
+            assert!(!dec_arr.is_valid(1));
+            assert!(dec_arr.is_valid(2));
+            assert_eq!(dec_arr.value_as_string(0), "1234.56");
+            assert_eq!(dec_arr.value_as_string(2), "-1234.56");
+        }
+        #[cfg(not(feature = "small_decimals"))]
+        {
+            let dec_arr = arr.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            assert_eq!(dec_arr.len(), 3);
+            assert!(dec_arr.is_valid(0));
+            assert!(!dec_arr.is_valid(1));
+            assert!(dec_arr.is_valid(2));
+            assert_eq!(dec_arr.value_as_string(0), "1234.56");
+            assert_eq!(dec_arr.value_as_string(2), "-1234.56");
+        }
+    }
+
+    #[test]
+    fn test_enum_decoding() {
+        let symbols: Arc<[String]> = vec!["A", "B", "C"].into_iter().map(String::from).collect();
+        let avro_type = avro_from_codec(Codec::Enum(symbols.clone()));
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(2));
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&encode_avro_int(1));
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let array = decoder.flush(None).unwrap();
+        let dict_array = array
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        assert_eq!(dict_array.len(), 3);
+        let values = dict_array
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(values.value(0), "A");
+        assert_eq!(values.value(1), "B");
+        assert_eq!(values.value(2), "C");
+        assert_eq!(dict_array.keys().values(), &[2, 0, 1]);
+    }
+
+    #[test]
+    fn test_enum_decoding_with_nulls() {
+        let symbols: Arc<[String]> = vec!["X", "Y"].into_iter().map(String::from).collect();
+        let enum_codec = Codec::Enum(symbols.clone());
+        let avro_type =
+            AvroDataType::new(enum_codec, Default::default(), Some(Nullability::NullFirst));
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_long(1));
+        data.extend_from_slice(&encode_avro_int(1));
+        data.extend_from_slice(&encode_avro_long(0));
+        data.extend_from_slice(&encode_avro_long(1));
+        data.extend_from_slice(&encode_avro_int(0));
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let array = decoder.flush(None).unwrap();
+        let dict_array = array
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        assert_eq!(dict_array.len(), 3);
+        assert!(dict_array.is_valid(0));
+        assert!(dict_array.is_null(1));
+        assert!(dict_array.is_valid(2));
+        let expected_keys = Int32Array::from(vec![Some(1), None, Some(0)]);
+        assert_eq!(dict_array.keys(), &expected_keys);
+        let values = dict_array
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(values.value(0), "X");
+        assert_eq!(values.value(1), "Y");
+    }
+
+    #[test]
+    fn test_duration_decoding_with_nulls() {
+        let duration_codec = Codec::Interval;
+        let avro_type = AvroDataType::new(
+            duration_codec,
+            Default::default(),
+            Some(Nullability::NullFirst),
+        );
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        // First value: 1 month, 2 days, 3 millis
+        data.extend_from_slice(&encode_avro_long(1)); // not null
+        let mut duration1 = Vec::new();
+        duration1.extend_from_slice(&1u32.to_le_bytes());
+        duration1.extend_from_slice(&2u32.to_le_bytes());
+        duration1.extend_from_slice(&3u32.to_le_bytes());
+        data.extend_from_slice(&duration1);
+        // Second value: null
+        data.extend_from_slice(&encode_avro_long(0)); // null
+        data.extend_from_slice(&encode_avro_long(1)); // not null
+        let mut duration2 = Vec::new();
+        duration2.extend_from_slice(&4u32.to_le_bytes());
+        duration2.extend_from_slice(&5u32.to_le_bytes());
+        duration2.extend_from_slice(&6u32.to_le_bytes());
+        data.extend_from_slice(&duration2);
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let array = decoder.flush(None).unwrap();
+        let interval_array = array
+            .as_any()
+            .downcast_ref::<IntervalMonthDayNanoArray>()
+            .unwrap();
+        assert_eq!(interval_array.len(), 3);
+        assert!(interval_array.is_valid(0));
+        assert!(interval_array.is_null(1));
+        assert!(interval_array.is_valid(2));
+        let expected = IntervalMonthDayNanoArray::from(vec![
+            Some(IntervalMonthDayNano {
+                months: 1,
+                days: 2,
+                nanoseconds: 3_000_000,
+            }),
+            None,
+            Some(IntervalMonthDayNano {
+                months: 4,
+                days: 5,
+                nanoseconds: 6_000_000,
+            }),
+        ]);
+        assert_eq!(interval_array, &expected);
+    }
+
+    #[test]
+    fn test_duration_decoding_empty() {
+        let duration_codec = Codec::Interval;
+        let avro_type = AvroDataType::new(duration_codec, Default::default(), None);
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let array = decoder.flush(None).unwrap();
+        assert_eq!(array.len(), 0);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn test_duration_seconds_decoding() {
+        let avro_type = AvroDataType::new(Codec::DurationSeconds, Default::default(), None);
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        // Three values: 0, -1, 2
+        data.extend_from_slice(&encode_avro_long(0));
+        data.extend_from_slice(&encode_avro_long(-1));
+        data.extend_from_slice(&encode_avro_long(2));
+        let mut cursor = AvroCursor::new(&data);
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        decoder.decode(&mut cursor).unwrap();
+        let array = decoder.flush(None).unwrap();
+        let dur = array
+            .as_any()
+            .downcast_ref::<DurationSecondArray>()
+            .unwrap();
+        assert_eq!(dur.values(), &[0, -1, 2]);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn test_duration_milliseconds_decoding() {
+        let avro_type = AvroDataType::new(Codec::DurationMillis, Default::default(), None);
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        for v in [1i64, 0, -2] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cursor = AvroCursor::new(&data);
+        for _ in 0..3 {
+            decoder.decode(&mut cursor).unwrap();
+        }
+        let array = decoder.flush(None).unwrap();
+        let dur = array
+            .as_any()
+            .downcast_ref::<DurationMillisecondArray>()
+            .unwrap();
+        assert_eq!(dur.values(), &[1, 0, -2]);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn test_duration_microseconds_decoding() {
+        let avro_type = AvroDataType::new(Codec::DurationMicros, Default::default(), None);
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        for v in [5i64, -6, 7] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cursor = AvroCursor::new(&data);
+        for _ in 0..3 {
+            decoder.decode(&mut cursor).unwrap();
+        }
+        let array = decoder.flush(None).unwrap();
+        let dur = array
+            .as_any()
+            .downcast_ref::<DurationMicrosecondArray>()
+            .unwrap();
+        assert_eq!(dur.values(), &[5, -6, 7]);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn test_duration_nanoseconds_decoding() {
+        let avro_type = AvroDataType::new(Codec::DurationNanos, Default::default(), None);
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+        let mut data = Vec::new();
+        for v in [8i64, 9, -10] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cursor = AvroCursor::new(&data);
+        for _ in 0..3 {
+            decoder.decode(&mut cursor).unwrap();
+        }
+        let array = decoder.flush(None).unwrap();
+        let dur = array
+            .as_any()
+            .downcast_ref::<DurationNanosecondArray>()
+            .unwrap();
+        assert_eq!(dur.values(), &[8, 9, -10]);
+    }
+
+    #[test]
+    fn test_nullable_decode_error_bitmap_corruption() {
+        // Nullable Int32 with ['T','null'] encoding (NullSecond)
+        let avro_type = AvroDataType::new(
+            Codec::Int32,
+            Default::default(),
+            Some(Nullability::NullSecond),
+        );
+        let mut decoder = Decoder::try_new(&avro_type).unwrap();
+
+        // Row 1: union branch 1 (null)
+        let mut row1 = Vec::new();
+        row1.extend_from_slice(&encode_avro_int(1));
+
+        // Row 2: union branch 0 (non-null) but missing the int payload -> decode error
+        let mut row2 = Vec::new();
+        row2.extend_from_slice(&encode_avro_int(0)); // branch = 0 => non-null
+
+        // Row 3: union branch 0 (non-null) with correct int payload -> should succeed
+        let mut row3 = Vec::new();
+        row3.extend_from_slice(&encode_avro_int(0)); // branch
+        row3.extend_from_slice(&encode_avro_int(42)); // actual value
+
+        decoder.decode(&mut AvroCursor::new(&row1)).unwrap();
+        assert!(decoder.decode(&mut AvroCursor::new(&row2)).is_err()); // decode error
+        decoder.decode(&mut AvroCursor::new(&row3)).unwrap();
+
+        let array = decoder.flush(None).unwrap();
+
+        // Should contain 2 elements: row1 (null) and row3 (42)
+        assert_eq!(array.len(), 2);
+        let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert!(int_array.is_null(0)); // row1 is null
+        assert_eq!(int_array.value(1), 42); // row3 value is 42
+    }
+
+    #[test]
+    fn test_enum_mapping_reordered_symbols() {
+        let reader_symbols: Arc<[String]> =
+            vec!["B".to_string(), "C".to_string(), "A".to_string()].into();
+        let mapping: Arc<[i32]> = Arc::from(vec![2, 0, 1]);
+        let default_index: i32 = -1;
+        let mut dec = Decoder::Enum(
+            Vec::with_capacity(DEFAULT_CAPACITY),
+            reader_symbols.clone(),
+            Some(EnumResolution {
+                mapping,
+                default_index,
+            }),
+        );
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&encode_avro_int(1));
+        data.extend_from_slice(&encode_avro_int(2));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        dec.decode(&mut cur).unwrap();
+        dec.decode(&mut cur).unwrap();
+        let arr = dec.flush(None).unwrap();
+        let dict = arr
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        let expected_keys = Int32Array::from(vec![2, 0, 1]);
+        assert_eq!(dict.keys(), &expected_keys);
+        let values = dict
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(values.value(0), "B");
+        assert_eq!(values.value(1), "C");
+        assert_eq!(values.value(2), "A");
+    }
+
+    #[test]
+    fn test_enum_mapping_unknown_symbol_and_out_of_range_fall_back_to_default() {
+        let reader_symbols: Arc<[String]> = vec!["A".to_string(), "B".to_string()].into();
+        let default_index: i32 = 1;
+        let mapping: Arc<[i32]> = Arc::from(vec![0, 1]);
+        let mut dec = Decoder::Enum(
+            Vec::with_capacity(DEFAULT_CAPACITY),
+            reader_symbols.clone(),
+            Some(EnumResolution {
+                mapping,
+                default_index,
+            }),
+        );
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(0));
+        data.extend_from_slice(&encode_avro_int(1));
+        data.extend_from_slice(&encode_avro_int(99));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        dec.decode(&mut cur).unwrap();
+        dec.decode(&mut cur).unwrap();
+        let arr = dec.flush(None).unwrap();
+        let dict = arr
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        let expected_keys = Int32Array::from(vec![0, 1, 1]);
+        assert_eq!(dict.keys(), &expected_keys);
+        let values = dict
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(values.value(0), "A");
+        assert_eq!(values.value(1), "B");
+    }
+
+    #[test]
+    fn test_enum_mapping_unknown_symbol_without_default_errors() {
+        let reader_symbols: Arc<[String]> = vec!["A".to_string()].into();
+        let default_index: i32 = -1; // indicates no default at type-level
+        let mapping: Arc<[i32]> = Arc::from(vec![-1]);
+        let mut dec = Decoder::Enum(
+            Vec::with_capacity(DEFAULT_CAPACITY),
+            reader_symbols,
+            Some(EnumResolution {
+                mapping,
+                default_index,
+            }),
+        );
+        let data = encode_avro_int(0);
+        let mut cur = AvroCursor::new(&data);
+        let err = dec
+            .decode(&mut cur)
+            .expect_err("expected decode error for unresolved enum without default");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("not resolvable") && msg.contains("no default"),
+            "unexpected error message: {msg}"
+        );
+    }
+
+    fn make_record_resolved_decoder(
+        reader_fields: &[(&str, DataType, bool)],
+        writer_to_reader: Vec<Option<usize>>,
+        skip_decoders: Vec<Option<Skipper>>,
+    ) -> Decoder {
+        let mut field_refs: Vec<FieldRef> = Vec::with_capacity(reader_fields.len());
+        let mut encodings: Vec<Decoder> = Vec::with_capacity(reader_fields.len());
+        for (name, dt, nullable) in reader_fields {
+            field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable)));
+            let enc = match dt {
+                DataType::Int32 => Decoder::Int32(Vec::new()),
+                DataType::Int64 => Decoder::Int64(Vec::new()),
+                DataType::Utf8 => {
+                    Decoder::String(OffsetBufferBuilder::new(DEFAULT_CAPACITY), Vec::new())
+                }
+                other => panic!("Unsupported test reader field type: {other:?}"),
+            };
+            encodings.push(enc);
+        }
+        let fields: Fields = field_refs.into();
+        Decoder::Record(
+            fields,
+            encodings,
+            Some(Projector {
+                writer_to_reader: Arc::from(writer_to_reader),
+                skip_decoders,
+                field_defaults: vec![None; reader_fields.len()],
+                default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()),
+            }),
+        )
+    }
+
+    #[test]
+    fn test_skip_writer_trailing_field_int32() {
+        let mut dec = make_record_resolved_decoder(
+            &[("id", arrow_schema::DataType::Int32, false)],
+            vec![Some(0), None],
+            vec![None, Some(super::Skipper::Int32)],
+        );
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(7));
+        data.extend_from_slice(&encode_avro_int(999));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        assert_eq!(cur.position(), data.len());
+        let arr = dec.flush(None).unwrap();
+        let struct_arr = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_arr.len(), 1);
+        let id = struct_arr
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id.value(0), 7);
+    }
+
+    #[test]
+    fn test_skip_writer_middle_field_string() {
+        let mut dec = make_record_resolved_decoder(
+            &[
+                ("id", DataType::Int32, false),
+                ("score", DataType::Int64, false),
+            ],
+            vec![Some(0), None, Some(1)],
+            vec![None, Some(Skipper::String), None],
+        );
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_int(42));
+        data.extend_from_slice(&encode_avro_bytes(b"abcdef"));
+        data.extend_from_slice(&encode_avro_long(1000));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        assert_eq!(cur.position(), data.len());
+        let arr = dec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let id = s
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let score = s
+            .column_by_name("score")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+        assert_eq!(id.value(0), 42);
+        assert_eq!(score.value(0), 1000);
+    }
+
+    #[test]
+    fn test_skip_writer_array_with_negative_block_count_fast() {
+        let mut dec = make_record_resolved_decoder(
+            &[("id", DataType::Int32, false)],
+            vec![None, Some(0)],
+            vec![Some(super::Skipper::List(Box::new(Skipper::Int32))), None],
+        );
+        let mut array_payload = Vec::new();
+        array_payload.extend_from_slice(&encode_avro_int(1));
+        array_payload.extend_from_slice(&encode_avro_int(2));
+        array_payload.extend_from_slice(&encode_avro_int(3));
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_long(-3));
+        data.extend_from_slice(&encode_avro_long(array_payload.len() as i64));
+        data.extend_from_slice(&array_payload);
+        data.extend_from_slice(&encode_avro_long(0));
+        data.extend_from_slice(&encode_avro_int(5));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        assert_eq!(cur.position(), data.len());
+        let arr = dec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let id = s
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id.len(), 1);
+        assert_eq!(id.value(0), 5);
+    }
+
+    #[test]
+    fn test_skip_writer_map_with_negative_block_count_fast() {
+        let mut dec = make_record_resolved_decoder(
+            &[("id", DataType::Int32, false)],
+            vec![None, Some(0)],
+            vec![Some(Skipper::Map(Box::new(Skipper::Int32))), None],
+        );
+        let mut entries = Vec::new();
+        entries.extend_from_slice(&encode_avro_bytes(b"k1"));
+        entries.extend_from_slice(&encode_avro_int(10));
+        entries.extend_from_slice(&encode_avro_bytes(b"k2"));
+        entries.extend_from_slice(&encode_avro_int(20));
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_long(-2));
+        data.extend_from_slice(&encode_avro_long(entries.len() as i64));
+        data.extend_from_slice(&entries);
+        data.extend_from_slice(&encode_avro_long(0));
+        data.extend_from_slice(&encode_avro_int(123));
+        let mut cur = AvroCursor::new(&data);
+        dec.decode(&mut cur).unwrap();
+        assert_eq!(cur.position(), data.len());
+        let arr = dec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let id = s
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id.len(), 1);
+        assert_eq!(id.value(0), 123);
+    }
+
+    #[test]
+    fn test_skip_writer_nullable_field_union_nullfirst() {
+        let mut dec = make_record_resolved_decoder(
+            &[("id", DataType::Int32, false)],
+            vec![None, Some(0)],
+            vec![
+                Some(super::Skipper::Nullable(
+                    Nullability::NullFirst,
+                    Box::new(super::Skipper::Int32),
+                )),
+                None,
+            ],
+        );
+        let mut row1 = Vec::new();
+        row1.extend_from_slice(&encode_avro_long(0));
+        row1.extend_from_slice(&encode_avro_int(5));
+        let mut row2 = Vec::new();
+        row2.extend_from_slice(&encode_avro_long(1));
+        row2.extend_from_slice(&encode_avro_int(123));
+        row2.extend_from_slice(&encode_avro_int(7));
+        let mut cur1 = AvroCursor::new(&row1);
+        let mut cur2 = AvroCursor::new(&row2);
+        dec.decode(&mut cur1).unwrap();
+        dec.decode(&mut cur2).unwrap();
+        assert_eq!(cur1.position(), row1.len());
+        assert_eq!(cur2.position(), row2.len());
+        let arr = dec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let id = s
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id.len(), 2);
+        assert_eq!(id.value(0), 5);
+        assert_eq!(id.value(1), 7);
+    }
+
+    fn make_dense_union_avro(
+        children: Vec<(Codec, &'_ str, DataType)>,
+        type_ids: Vec<i8>,
+    ) -> AvroDataType {
+        let mut avro_children: Vec<AvroDataType> = Vec::with_capacity(children.len());
+        let mut fields: Vec<arrow_schema::Field> = Vec::with_capacity(children.len());
+        for (codec, name, dt) in children.into_iter() {
+            avro_children.push(AvroDataType::new(codec, Default::default(), None));
+            fields.push(arrow_schema::Field::new(name, dt, true));
+        }
+        let union_fields = UnionFields::try_new(type_ids, fields).unwrap();
+        let union_codec = Codec::Union(avro_children.into(), union_fields, UnionMode::Dense);
+        AvroDataType::new(union_codec, Default::default(), None)
+    }
+
+    #[test]
+    fn test_union_dense_two_children_custom_type_ids() {
+        let union_dt = make_dense_union_avro(
+            vec![
+                (Codec::Int32, "i", DataType::Int32),
+                (Codec::Utf8, "s", DataType::Utf8),
+            ],
+            vec![2, 5],
+        );
+        let mut dec = Decoder::try_new(&union_dt).unwrap();
+        let mut r1 = Vec::new();
+        r1.extend_from_slice(&encode_avro_long(0));
+        r1.extend_from_slice(&encode_avro_int(7));
+        let mut r2 = Vec::new();
+        r2.extend_from_slice(&encode_avro_long(1));
+        r2.extend_from_slice(&encode_avro_bytes(b"x"));
+        let mut r3 = Vec::new();
+        r3.extend_from_slice(&encode_avro_long(0));
+        r3.extend_from_slice(&encode_avro_int(-1));
+        dec.decode(&mut AvroCursor::new(&r1)).unwrap();
+        dec.decode(&mut AvroCursor::new(&r2)).unwrap();
+        dec.decode(&mut AvroCursor::new(&r3)).unwrap();
+        let array = dec.flush(None).unwrap();
+        let ua = array
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("expected UnionArray");
+        assert_eq!(ua.len(), 3);
+        assert_eq!(ua.type_id(0), 2);
+        assert_eq!(ua.type_id(1), 5);
+        assert_eq!(ua.type_id(2), 2);
+        assert_eq!(ua.value_offset(0), 0);
+        assert_eq!(ua.value_offset(1), 0);
+        assert_eq!(ua.value_offset(2), 1);
+        let int_child = ua
+            .child(2)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("int child");
+        assert_eq!(int_child.len(), 2);
+        assert_eq!(int_child.value(0), 7);
+        assert_eq!(int_child.value(1), -1);
+        let str_child = ua
+            .child(5)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .expect("string child");
+        assert_eq!(str_child.len(), 1);
+        assert_eq!(str_child.value(0), "x");
+    }
+
+    #[test]
+    fn test_union_dense_with_null_and_string_children() {
+        let union_dt = make_dense_union_avro(
+            vec![
+                (Codec::Null, "n", DataType::Null),
+                (Codec::Utf8, "s", DataType::Utf8),
+            ],
+            vec![42, 7],
+        );
+        let mut dec = Decoder::try_new(&union_dt).unwrap();
+        let r1 = encode_avro_long(0);
+        let mut r2 = Vec::new();
+        r2.extend_from_slice(&encode_avro_long(1));
+        r2.extend_from_slice(&encode_avro_bytes(b"abc"));
+        let r3 = encode_avro_long(0);
+        dec.decode(&mut AvroCursor::new(&r1)).unwrap();
+        dec.decode(&mut AvroCursor::new(&r2)).unwrap();
+        dec.decode(&mut AvroCursor::new(&r3)).unwrap();
+        let array = dec.flush(None).unwrap();
+        let ua = array
+            .as_any()
+            .downcast_ref::<UnionArray>()
+            .expect("expected UnionArray");
+        assert_eq!(ua.len(), 3);
+        assert_eq!(ua.type_id(0), 42);
+        assert_eq!(ua.type_id(1), 7);
+        assert_eq!(ua.type_id(2), 42);
+        assert_eq!(ua.value_offset(0), 0);
+        assert_eq!(ua.value_offset(1), 0);
+        assert_eq!(ua.value_offset(2), 1);
+        let null_child = ua
+            .child(42)
+            .as_any()
+            .downcast_ref::<NullArray>()
+            .expect("null child");
+        assert_eq!(null_child.len(), 2);
+        let str_child = ua
+            .child(7)
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .expect("string child");
+        assert_eq!(str_child.len(), 1);
+        assert_eq!(str_child.value(0), "abc");
+    }
+
+    #[test]
+    fn test_union_decode_negative_branch_index_errors() {
+        let union_dt = make_dense_union_avro(
+            vec![
+                (Codec::Int32, "i", DataType::Int32),
+                (Codec::Utf8, "s", DataType::Utf8),
+            ],
+            vec![0, 1],
+        );
+        let mut dec = Decoder::try_new(&union_dt).unwrap();
+        let row = encode_avro_long(-1); // decodes back to -1
+        let err = dec
+            .decode(&mut AvroCursor::new(&row))
+            .expect_err("expected error for negative branch index");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Negative union branch index"),
+            "unexpected error message: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_union_decode_out_of_range_branch_index_errors() {
+        let union_dt = make_dense_union_avro(
+            vec![
+                (Codec::Int32, "i", DataType::Int32),
+                (Codec::Utf8, "s", DataType::Utf8),
+            ],
+            vec![10, 11],
+        );
+        let mut dec = Decoder::try_new(&union_dt).unwrap();
+        let row = encode_avro_long(2);
+        let err = dec
+            .decode(&mut AvroCursor::new(&row))
+            .expect_err("expected error for out-of-range branch index");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("out of range"),
+            "unexpected error message: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_union_sparse_mode_not_supported() {
+        let children: Vec<AvroDataType> = vec![
+            AvroDataType::new(Codec::Int32, Default::default(), None),
+            AvroDataType::new(Codec::Utf8, Default::default(), None),
+        ];
+        let uf = UnionFields::try_new(
+            vec![1, 3],
+            vec![
+                arrow_schema::Field::new("i", DataType::Int32, true),
+                arrow_schema::Field::new("s", DataType::Utf8, true),
+            ],
+        )
+        .unwrap();
+        let codec = Codec::Union(children.into(), uf, UnionMode::Sparse);
+        let dt = AvroDataType::new(codec, Default::default(), None);
+        let err = Decoder::try_new(&dt).expect_err("sparse union should not be supported");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Sparse Arrow unions are not yet supported"),
+            "unexpected error message: {msg}"
+        );
+    }
+
+    fn make_record_decoder_with_projector_defaults(
+        reader_fields: &[(&str, DataType, bool)],
+        field_defaults: Vec<Option<AvroLiteral>>,
+        default_injections: Vec<(usize, AvroLiteral)>,
+        writer_to_reader_len: usize,
+    ) -> Decoder {
+        assert_eq!(
+            field_defaults.len(),
+            reader_fields.len(),
+            "field_defaults must have one entry per reader field"
+        );
+        let mut field_refs: Vec<FieldRef> = Vec::with_capacity(reader_fields.len());
+        let mut encodings: Vec<Decoder> = Vec::with_capacity(reader_fields.len());
+        for (name, dt, nullable) in reader_fields {
+            field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable)));
+            let enc = match dt {
+                DataType::Int32 => Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY)),
+                DataType::Int64 => Decoder::Int64(Vec::with_capacity(DEFAULT_CAPACITY)),
+                DataType::Utf8 => Decoder::String(
+                    OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                    Vec::with_capacity(DEFAULT_CAPACITY),
+                ),
+                other => panic!("Unsupported test field type in helper: {other:?}"),
+            };
+            encodings.push(enc);
+        }
+        let fields: Fields = field_refs.into();
+        let skip_decoders: Vec<Option<Skipper>> =
+            (0..writer_to_reader_len).map(|_| None::<Skipper>).collect();
+        let projector = Projector {
+            writer_to_reader: Arc::from(vec![None; writer_to_reader_len]),
+            skip_decoders,
+            field_defaults,
+            default_injections: Arc::from(default_injections),
+        };
+        Decoder::Record(fields, encodings, Some(projector))
+    }
+
+    #[test]
+    fn test_default_append_int32_and_int64_from_int_and_long() {
+        let mut d_i32 = Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY));
+        d_i32.append_default(&AvroLiteral::Int(42)).unwrap();
+        let arr = d_i32.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(a.len(), 1);
+        assert_eq!(a.value(0), 42);
+        let mut d_i64 = Decoder::Int64(Vec::with_capacity(DEFAULT_CAPACITY));
+        d_i64.append_default(&AvroLiteral::Int(5)).unwrap();
+        d_i64.append_default(&AvroLiteral::Long(7)).unwrap();
+        let arr64 = d_i64.flush(None).unwrap();
+        let a64 = arr64.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(a64.len(), 2);
+        assert_eq!(a64.value(0), 5);
+        assert_eq!(a64.value(1), 7);
+    }
+
+    #[test]
+    fn test_default_append_floats_and_doubles() {
+        let mut d_f32 = Decoder::Float32(Vec::with_capacity(DEFAULT_CAPACITY));
+        d_f32.append_default(&AvroLiteral::Float(1.5)).unwrap();
+        let arr32 = d_f32.flush(None).unwrap();
+        let a = arr32.as_any().downcast_ref::<Float32Array>().unwrap();
+        assert_eq!(a.value(0), 1.5);
+        let mut d_f64 = Decoder::Float64(Vec::with_capacity(DEFAULT_CAPACITY));
+        d_f64.append_default(&AvroLiteral::Double(2.25)).unwrap();
+        let arr64 = d_f64.flush(None).unwrap();
+        let b = arr64.as_any().downcast_ref::<Float64Array>().unwrap();
+        assert_eq!(b.value(0), 2.25);
+    }
+
+    #[test]
+    fn test_default_append_string_and_bytes() {
+        let mut d_str = Decoder::String(
+            OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+            Vec::with_capacity(DEFAULT_CAPACITY),
+        );
+        d_str
+            .append_default(&AvroLiteral::String("hi".into()))
+            .unwrap();
+        let s_arr = d_str.flush(None).unwrap();
+        let arr = s_arr.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(arr.value(0), "hi");
+        let mut d_bytes = Decoder::Binary(
+            OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+            Vec::with_capacity(DEFAULT_CAPACITY),
+        );
+        d_bytes
+            .append_default(&AvroLiteral::Bytes(vec![1, 2, 3]))
+            .unwrap();
+        let b_arr = d_bytes.flush(None).unwrap();
+        let barr = b_arr.as_any().downcast_ref::<BinaryArray>().unwrap();
+        assert_eq!(barr.value(0), &[1, 2, 3]);
+        let mut d_str_err = Decoder::String(
+            OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+            Vec::with_capacity(DEFAULT_CAPACITY),
+        );
+        let err = d_str_err
+            .append_default(&AvroLiteral::Bytes(vec![0x61, 0x62]))
+            .unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Default for string must be string"),
+            "unexpected error: {err:?}"
+        );
+    }
+
+    #[test]
+    fn test_default_append_nullable_int32_null_and_value() {
+        let inner = Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY));
+        let mut dec = Decoder::Nullable(
+            Nullability::NullFirst,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(inner),
+            NullablePlan::ReadTag,
+        );
+        dec.append_default(&AvroLiteral::Null).unwrap();
+        dec.append_default(&AvroLiteral::Int(11)).unwrap();
+        let arr = dec.flush(None).unwrap();
+        let a = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(a.len(), 2);
+        assert!(a.is_null(0));
+        assert_eq!(a.value(1), 11);
+    }
+
+    #[test]
+    fn test_default_append_array_of_ints() {
+        let list_dt = avro_from_codec(Codec::List(Arc::new(avro_from_codec(Codec::Int32))));
+        let mut d = Decoder::try_new(&list_dt).unwrap();
+        let items = vec![
+            AvroLiteral::Int(1),
+            AvroLiteral::Int(2),
+            AvroLiteral::Int(3),
+        ];
+        d.append_default(&AvroLiteral::Array(items)).unwrap();
+        let arr = d.flush(None).unwrap();
+        let list = arr.as_any().downcast_ref::<ListArray>().unwrap();
+        assert_eq!(list.len(), 1);
+        assert_eq!(list.value_length(0), 3);
+        let vals = list.values().as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(vals.values(), &[1, 2, 3]);
+    }
+
+    #[test]
+    fn test_default_append_map_string_to_int() {
+        let map_dt = avro_from_codec(Codec::Map(Arc::new(avro_from_codec(Codec::Int32))));
+        let mut d = Decoder::try_new(&map_dt).unwrap();
+        let mut m: IndexMap<String, AvroLiteral> = IndexMap::new();
+        m.insert("k1".to_string(), AvroLiteral::Int(10));
+        m.insert("k2".to_string(), AvroLiteral::Int(20));
+        d.append_default(&AvroLiteral::Map(m)).unwrap();
+        let arr = d.flush(None).unwrap();
+        let map = arr.as_any().downcast_ref::<MapArray>().unwrap();
+        assert_eq!(map.len(), 1);
+        assert_eq!(map.value_length(0), 2);
+        let binding = map.value(0);
+        let entries = binding.as_any().downcast_ref::<StructArray>().unwrap();
+        let k = entries
+            .column_by_name("key")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        let v = entries
+            .column_by_name("value")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let keys: std::collections::HashSet<&str> = (0..k.len()).map(|i| k.value(i)).collect();
+        assert_eq!(keys, ["k1", "k2"].into_iter().collect());
+        let vals: std::collections::HashSet<i32> = (0..v.len()).map(|i| v.value(i)).collect();
+        assert_eq!(vals, [10, 20].into_iter().collect());
+    }
+
+    #[test]
+    fn test_default_append_enum_by_symbol() {
+        let symbols: Arc<[String]> = vec!["A".into(), "B".into(), "C".into()].into();
+        let mut d = Decoder::Enum(Vec::with_capacity(DEFAULT_CAPACITY), symbols.clone(), None);
+        d.append_default(&AvroLiteral::Enum("B".into())).unwrap();
+        let arr = d.flush(None).unwrap();
+        let dict = arr
+            .as_any()
+            .downcast_ref::<DictionaryArray<Int32Type>>()
+            .unwrap();
+        assert_eq!(dict.len(), 1);
+        let expected = Int32Array::from(vec![1]);
+        assert_eq!(dict.keys(), &expected);
+        let values = dict
+            .values()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(values.value(1), "B");
+    }
+
+    #[test]
+    fn test_default_append_uuid_and_type_error() {
+        let mut d = Decoder::Uuid(Vec::with_capacity(DEFAULT_CAPACITY));
+        let uuid_str = "123e4567-e89b-12d3-a456-426614174000";
+        d.append_default(&AvroLiteral::String(uuid_str.into()))
+            .unwrap();
+        let arr_ref = d.flush(None).unwrap();
+        let arr = arr_ref
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        assert_eq!(arr.value_length(), 16);
+        assert_eq!(arr.len(), 1);
+        let mut d2 = Decoder::Uuid(Vec::with_capacity(DEFAULT_CAPACITY));
+        let err = d2
+            .append_default(&AvroLiteral::Bytes(vec![0u8; 16]))
+            .unwrap_err();
+        assert!(
+            err.to_string().contains("Default for uuid must be string"),
+            "unexpected error: {err:?}"
+        );
+    }
+
+    #[test]
+    fn test_default_append_fixed_and_length_mismatch() {
+        let mut d = Decoder::Fixed(4, Vec::with_capacity(DEFAULT_CAPACITY));
+        d.append_default(&AvroLiteral::Bytes(vec![1, 2, 3, 4]))
+            .unwrap();
+        let arr_ref = d.flush(None).unwrap();
+        let arr = arr_ref
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        assert_eq!(arr.value_length(), 4);
+        assert_eq!(arr.value(0), &[1, 2, 3, 4]);
+        let mut d_err = Decoder::Fixed(4, Vec::with_capacity(DEFAULT_CAPACITY));
+        let err = d_err
+            .append_default(&AvroLiteral::Bytes(vec![1, 2, 3]))
+            .unwrap_err();
+        assert!(
+            err.to_string().contains("Fixed default length"),
+            "unexpected error: {err:?}"
+        );
+    }
+
+    #[test]
+    fn test_default_append_duration_and_length_validation() {
+        let dt = avro_from_codec(Codec::Interval);
+        let mut d = Decoder::try_new(&dt).unwrap();
+        let mut bytes = Vec::with_capacity(12);
+        bytes.extend_from_slice(&1u32.to_le_bytes());
+        bytes.extend_from_slice(&2u32.to_le_bytes());
+        bytes.extend_from_slice(&3u32.to_le_bytes());
+        d.append_default(&AvroLiteral::Bytes(bytes)).unwrap();
+        let arr_ref = d.flush(None).unwrap();
+        let arr = arr_ref
+            .as_any()
+            .downcast_ref::<IntervalMonthDayNanoArray>()
+            .unwrap();
+        assert_eq!(arr.len(), 1);
+        let v = arr.value(0);
+        assert_eq!(v.months, 1);
+        assert_eq!(v.days, 2);
+        assert_eq!(v.nanoseconds, 3_000_000);
+        let mut d_err = Decoder::try_new(&avro_from_codec(Codec::Interval)).unwrap();
+        let err = d_err
+            .append_default(&AvroLiteral::Bytes(vec![0u8; 11]))
+            .unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Duration default must be exactly 12 bytes"),
+            "unexpected error: {err:?}"
+        );
+    }
+
+    #[test]
+    fn test_default_append_decimal256_from_bytes() {
+        let dt = avro_from_codec(Codec::Decimal(50, Some(2), Some(32)));
+        let mut d = Decoder::try_new(&dt).unwrap();
+        let pos: [u8; 32] = [
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x30, 0x39,
+        ];
+        d.append_default(&AvroLiteral::Bytes(pos.to_vec())).unwrap();
+        let neg: [u8; 32] = [
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0x85,
+        ];
+        d.append_default(&AvroLiteral::Bytes(neg.to_vec())).unwrap();
+        let arr = d.flush(None).unwrap();
+        let dec = arr.as_any().downcast_ref::<Decimal256Array>().unwrap();
+        assert_eq!(dec.len(), 2);
+        assert_eq!(dec.value_as_string(0), "123.45");
+        assert_eq!(dec.value_as_string(1), "-1.23");
+    }
+
+    #[test]
+    fn test_record_append_default_map_missing_fields_uses_projector_field_defaults() {
+        let field_defaults = vec![None, Some(AvroLiteral::String("hi".into()))];
+        let mut rec = make_record_decoder_with_projector_defaults(
+            &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)],
+            field_defaults,
+            vec![],
+            0,
+        );
+        let mut map: IndexMap<String, AvroLiteral> = IndexMap::new();
+        map.insert("a".to_string(), AvroLiteral::Int(7));
+        rec.append_default(&AvroLiteral::Map(map)).unwrap();
+        let arr = rec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let a = s
+            .column_by_name("a")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let b = s
+            .column_by_name("b")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(a.value(0), 7);
+        assert_eq!(b.value(0), "hi");
+    }
+
+    #[test]
+    fn test_record_append_default_null_uses_projector_field_defaults() {
+        let field_defaults = vec![
+            Some(AvroLiteral::Int(5)),
+            Some(AvroLiteral::String("x".into())),
+        ];
+        let mut rec = make_record_decoder_with_projector_defaults(
+            &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)],
+            field_defaults,
+            vec![],
+            0,
+        );
+        rec.append_default(&AvroLiteral::Null).unwrap();
+        let arr = rec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let a = s
+            .column_by_name("a")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let b = s
+            .column_by_name("b")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(a.value(0), 5);
+        assert_eq!(b.value(0), "x");
+    }
+
+    #[test]
+    fn test_record_append_default_missing_fields_without_projector_defaults_yields_type_nulls_or_empties()
+     {
+        let fields = vec![("a", DataType::Int32, true), ("b", DataType::Utf8, true)];
+        let mut field_refs: Vec<FieldRef> = Vec::new();
+        let mut encoders: Vec<Decoder> = Vec::new();
+        for (name, dt, nullable) in &fields {
+            field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable)));
+        }
+        let enc_a = Decoder::Nullable(
+            Nullability::NullSecond,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY))),
+            NullablePlan::ReadTag,
+        );
+        let enc_b = Decoder::Nullable(
+            Nullability::NullSecond,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(Decoder::String(
+                OffsetBufferBuilder::new(DEFAULT_CAPACITY),
+                Vec::with_capacity(DEFAULT_CAPACITY),
+            )),
+            NullablePlan::ReadTag,
+        );
+        encoders.push(enc_a);
+        encoders.push(enc_b);
+        let projector = Projector {
+            writer_to_reader: Arc::from(vec![]),
+            skip_decoders: vec![],
+            field_defaults: vec![None, None], // no defaults -> append_null
+            default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()),
+        };
+        let mut rec = Decoder::Record(field_refs.into(), encoders, Some(projector));
+        let mut map: IndexMap<String, AvroLiteral> = IndexMap::new();
+        map.insert("a".to_string(), AvroLiteral::Int(9));
+        rec.append_default(&AvroLiteral::Map(map)).unwrap();
+        let arr = rec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let a = s
+            .column_by_name("a")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let b = s
+            .column_by_name("b")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert!(a.is_valid(0));
+        assert_eq!(a.value(0), 9);
+        assert!(b.is_null(0));
+    }
+
+    #[test]
+    fn test_projector_default_injection_when_writer_lacks_fields() {
+        let defaults = vec![None, None];
+        let injections = vec![
+            (0, AvroLiteral::Int(99)),
+            (1, AvroLiteral::String("alice".into())),
+        ];
+        let mut rec = make_record_decoder_with_projector_defaults(
+            &[
+                ("id", DataType::Int32, false),
+                ("name", DataType::Utf8, false),
+            ],
+            defaults,
+            injections,
+            0,
+        );
+        rec.decode(&mut AvroCursor::new(&[])).unwrap();
+        let arr = rec.flush(None).unwrap();
+        let s = arr.as_any().downcast_ref::<StructArray>().unwrap();
+        let id = s
+            .column_by_name("id")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let name = s
+            .column_by_name("name")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert_eq!(id.value(0), 99);
+        assert_eq!(name.value(0), "alice");
+    }
+
+    #[test]
+    fn union_type_ids_are_not_child_indexes() {
+        let encodings: Vec<AvroDataType> =
+            vec![avro_from_codec(Codec::Int32), avro_from_codec(Codec::Utf8)];
+        let fields: UnionFields = [
+            (42_i8, Arc::new(ArrowField::new("a", DataType::Int32, true))),
+            (7_i8, Arc::new(ArrowField::new("b", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+        let dt = avro_from_codec(Codec::Union(
+            encodings.into(),
+            fields.clone(),
+            UnionMode::Dense,
+        ));
+        let mut dec = Decoder::try_new(&dt).expect("decoder");
+        let mut b1 = encode_avro_long(1);
+        b1.extend(encode_avro_bytes("hi".as_bytes()));
+        dec.decode(&mut AvroCursor::new(&b1)).expect("decode b1");
+        let mut b0 = encode_avro_long(0);
+        b0.extend(encode_avro_int(5));
+        dec.decode(&mut AvroCursor::new(&b0)).expect("decode b0");
+        let arr = dec.flush(None).expect("flush");
+        let ua = arr.as_any().downcast_ref::<UnionArray>().expect("union");
+        assert_eq!(ua.len(), 2);
+        assert_eq!(ua.type_id(0), 7, "type id must come from UnionFields");
+        assert_eq!(ua.type_id(1), 42, "type id must come from UnionFields");
+        assert_eq!(ua.value_offset(0), 0);
+        assert_eq!(ua.value_offset(1), 0);
+        let utf8_child = ua.child(7).as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(utf8_child.len(), 1);
+        assert_eq!(utf8_child.value(0), "hi");
+        let int_child = ua.child(42).as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int_child.len(), 1);
+        assert_eq!(int_child.value(0), 5);
+        let type_ids: Vec<i8> = fields.iter().map(|(tid, _)| tid).collect();
+        assert_eq!(type_ids, vec![42_i8, 7_i8]);
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn skipper_from_avro_maps_custom_duration_variants_to_int64() -> Result<(), ArrowError> {
+        for codec in [
+            Codec::DurationNanos,
+            Codec::DurationMicros,
+            Codec::DurationMillis,
+            Codec::DurationSeconds,
+        ] {
+            let dt = make_avro_dt(codec.clone(), None);
+            let s = Skipper::from_avro(&dt)?;
+            match s {
+                Skipper::Int64 => {}
+                other => panic!("expected Int64 skipper for {:?}, got {:?}", codec, other),
+            }
+        }
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn skipper_skip_consumes_one_long_for_custom_durations() -> Result<(), ArrowError> {
+        let values: [i64; 7] = [0, 1, -1, 150, -150, i64::MAX / 3, i64::MIN / 3];
+        for codec in [
+            Codec::DurationNanos,
+            Codec::DurationMicros,
+            Codec::DurationMillis,
+            Codec::DurationSeconds,
+        ] {
+            let dt = make_avro_dt(codec.clone(), None);
+            let mut s = Skipper::from_avro(&dt)?;
+            for &v in &values {
+                let bytes = encode_avro_long(v);
+                let mut cursor = AvroCursor::new(&bytes);
+                s.skip(&mut cursor)?;
+                assert_eq!(
+                    cursor.position(),
+                    bytes.len(),
+                    "did not consume all bytes for {:?} value {}",
+                    codec,
+                    v
+                );
+            }
+        }
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn skipper_nullable_custom_duration_respects_null_first() -> Result<(), ArrowError> {
+        let dt = make_avro_dt(Codec::DurationNanos, Some(Nullability::NullFirst));
+        let mut s = Skipper::from_avro(&dt)?;
+        match &s {
+            Skipper::Nullable(Nullability::NullFirst, inner) => match **inner {
+                Skipper::Int64 => {}
+                ref other => panic!("expected inner Int64, got {:?}", other),
+            },
+            other => panic!("expected Nullable(NullFirst, Int64), got {:?}", other),
+        }
+        {
+            let buf = encode_vlq_u64(0);
+            let mut cursor = AvroCursor::new(&buf);
+            s.skip(&mut cursor)?;
+            assert_eq!(cursor.position(), 1, "expected to consume only tag=0");
+        }
+        {
+            let mut buf = encode_vlq_u64(1);
+            buf.extend(encode_avro_long(0));
+            let mut cursor = AvroCursor::new(&buf);
+            s.skip(&mut cursor)?;
+            assert_eq!(cursor.position(), 2, "expected to consume tag=1 + long(0)");
+        }
+
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn skipper_nullable_custom_duration_respects_null_second() -> Result<(), ArrowError> {
+        let dt = make_avro_dt(Codec::DurationMicros, Some(Nullability::NullSecond));
+        let mut s = Skipper::from_avro(&dt)?;
+        match &s {
+            Skipper::Nullable(Nullability::NullSecond, inner) => match **inner {
+                Skipper::Int64 => {}
+                ref other => panic!("expected inner Int64, got {:?}", other),
+            },
+            other => panic!("expected Nullable(NullSecond, Int64), got {:?}", other),
+        }
+        {
+            let buf = encode_vlq_u64(1);
+            let mut cursor = AvroCursor::new(&buf);
+            s.skip(&mut cursor)?;
+            assert_eq!(cursor.position(), 1, "expected to consume only tag=1");
+        }
+        {
+            let mut buf = encode_vlq_u64(0);
+            buf.extend(encode_avro_long(-1));
+            let mut cursor = AvroCursor::new(&buf);
+            s.skip(&mut cursor)?;
+            assert_eq!(
+                cursor.position(),
+                1 + encode_avro_long(-1).len(),
+                "expected to consume tag=0 + long(-1)"
+            );
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn skipper_interval_is_fixed12_and_skips_12_bytes() -> Result<(), ArrowError> {
+        let dt = make_avro_dt(Codec::Interval, None);
+        let mut s = Skipper::from_avro(&dt)?;
+        match s {
+            Skipper::DurationFixed12 => {}
+            other => panic!("expected DurationFixed12, got {:?}", other),
+        }
+        let payload = vec![0u8; 12];
+        let mut cursor = AvroCursor::new(&payload);
+        s.skip(&mut cursor)?;
+        assert_eq!(cursor.position(), 12, "expected to consume 12 fixed bytes");
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_width16_int32_basic_grouping() {
+        use arrow_array::RunArray;
+        use std::sync::Arc;
+        let inner = avro_from_codec(Codec::Int32);
+        let ree = AvroDataType::new(
+            Codec::RunEndEncoded(Arc::new(inner), 16),
+            Default::default(),
+            None,
+        );
+        let mut dec = Decoder::try_new(&ree).expect("create REE decoder");
+        for v in [1, 1, 1, 2, 2, 3, 3, 3, 3] {
+            let bytes = encode_avro_int(v);
+            dec.decode(&mut AvroCursor::new(&bytes)).expect("decode");
+        }
+        let arr = dec.flush(None).expect("flush");
+        let ra = arr
+            .as_any()
+            .downcast_ref::<RunArray<Int16Type>>()
+            .expect("RunArray<Int16Type>");
+        assert_eq!(ra.len(), 9);
+        assert_eq!(ra.run_ends().values(), &[3, 5, 9]);
+        let vals = ra
+            .values()
+            .as_ref()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("values Int32");
+        assert_eq!(vals.values(), &[1, 2, 3]);
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_width32_nullable_values_group_nulls() {
+        use arrow_array::RunArray;
+        use std::sync::Arc;
+        let inner = AvroDataType::new(
+            Codec::Int32,
+            Default::default(),
+            Some(Nullability::NullSecond),
+        );
+        let ree = AvroDataType::new(
+            Codec::RunEndEncoded(Arc::new(inner), 32),
+            Default::default(),
+            None,
+        );
+        let mut dec = Decoder::try_new(&ree).expect("create REE decoder");
+        let seq: [Option<i32>; 8] = [
+            None,
+            None,
+            Some(7),
+            Some(7),
+            Some(7),
+            None,
+            Some(5),
+            Some(5),
+        ];
+        for item in seq {
+            let mut bytes = Vec::new();
+            match item {
+                None => bytes.extend_from_slice(&encode_vlq_u64(1)),
+                Some(v) => {
+                    bytes.extend_from_slice(&encode_vlq_u64(0));
+                    bytes.extend_from_slice(&encode_avro_int(v));
+                }
+            }
+            dec.decode(&mut AvroCursor::new(&bytes)).expect("decode");
+        }
+        let arr = dec.flush(None).expect("flush");
+        let ra = arr
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .expect("RunArray<Int32Type>");
+        assert_eq!(ra.len(), 8);
+        assert_eq!(ra.run_ends().values(), &[2, 5, 6, 8]);
+        let vals = ra
+            .values()
+            .as_ref()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("values Int32 (nullable)");
+        assert_eq!(vals.len(), 4);
+        assert!(vals.is_null(0));
+        assert_eq!(vals.value(1), 7);
+        assert!(vals.is_null(2));
+        assert_eq!(vals.value(3), 5);
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_decode_with_promotion_int_to_double_via_nullable_from_single() {
+        use arrow_array::RunArray;
+        let inner_values = Decoder::Float64(Vec::with_capacity(DEFAULT_CAPACITY));
+        let ree = Decoder::RunEndEncoded(
+            8, /* bytes => Int64 run-ends */
+            0,
+            Box::new(inner_values),
+        );
+        let mut dec = Decoder::Nullable(
+            Nullability::NullSecond,
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(ree),
+            NullablePlan::FromSingle {
+                promotion: Promotion::IntToDouble,
+            },
+        );
+        for v in [1, 1, 2, 2, 2] {
+            let bytes = encode_avro_int(v);
+            dec.decode(&mut AvroCursor::new(&bytes)).expect("decode");
+        }
+        let arr = dec.flush(None).expect("flush");
+        let ra = arr
+            .as_any()
+            .downcast_ref::<RunArray<Int64Type>>()
+            .expect("RunArray<Int64Type>");
+        assert_eq!(ra.len(), 5);
+        assert_eq!(ra.run_ends().values(), &[2, 5]);
+        let vals = ra
+            .values()
+            .as_ref()
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .expect("values Float64");
+        assert_eq!(vals.values(), &[1.0, 2.0]);
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_unsupported_run_end_width_errors() {
+        use std::sync::Arc;
+        let inner = avro_from_codec(Codec::Int32);
+        let dt = AvroDataType::new(
+            Codec::RunEndEncoded(Arc::new(inner), 3),
+            Default::default(),
+            None,
+        );
+        let err = Decoder::try_new(&dt).expect_err("must reject unsupported width");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("Unsupported run-end width")
+                && msg.contains("16/32/64 bits or 2/4/8 bytes"),
+            "unexpected error message: {msg}"
+        );
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_empty_input_is_empty_runarray() {
+        use arrow_array::RunArray;
+        use std::sync::Arc;
+        let inner = avro_from_codec(Codec::Utf8);
+        let dt = AvroDataType::new(
+            Codec::RunEndEncoded(Arc::new(inner), 4),
+            Default::default(),
+            None,
+        );
+        let mut dec = Decoder::try_new(&dt).expect("create REE decoder");
+        let arr = dec.flush(None).expect("flush");
+        let ra = arr
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .expect("RunArray<Int32Type>");
+        assert_eq!(ra.len(), 0);
+        assert_eq!(ra.run_ends().len(), 0);
+        assert_eq!(ra.values().len(), 0);
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_strings_grouping_width32_bits() {
+        use arrow_array::RunArray;
+        use std::sync::Arc;
+        let inner = avro_from_codec(Codec::Utf8);
+        let dt = AvroDataType::new(
+            Codec::RunEndEncoded(Arc::new(inner), 32),
+            Default::default(),
+            None,
+        );
+        let mut dec = Decoder::try_new(&dt).expect("create REE decoder");
+        for s in ["a", "a", "bb", "bb", "bb", "a"] {
+            let bytes = encode_avro_bytes(s.as_bytes());
+            dec.decode(&mut AvroCursor::new(&bytes)).expect("decode");
+        }
+        let arr = dec.flush(None).expect("flush");
+        let ra = arr
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .expect("RunArray<Int32Type>");
+        assert_eq!(ra.run_ends().values(), &[2, 5, 6]);
+        let vals = ra
+            .values()
+            .as_ref()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .expect("values String");
+        assert_eq!(vals.len(), 3);
+        assert_eq!(vals.value(0), "a");
+        assert_eq!(vals.value(1), "bb");
+        assert_eq!(vals.value(2), "a");
+    }
+
+    #[cfg(not(feature = "avro_custom_types"))]
+    #[test]
+    fn test_no_custom_types_feature_smoke_decodes_plain_int32() {
+        let dt = avro_from_codec(Codec::Int32);
+        let mut dec = Decoder::try_new(&dt).expect("create Int32 decoder");
+        for v in [1, 2, 3] {
+            let bytes = encode_avro_int(v);
+            dec.decode(&mut AvroCursor::new(&bytes)).expect("decode");
+        }
+        let arr = dec.flush(None).expect("flush");
+        let a = arr
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("Int32Array");
+        assert_eq!(a.values(), &[1, 2, 3]);
+    }
+
+    #[test]
+    fn test_timestamp_nanos_decoding_utc() {
+        let avro_type = avro_from_codec(Codec::TimestampNanos(true));
+        let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder");
+        let mut data = Vec::new();
+        for v in [0_i64, 1_i64, -1_i64, 1_234_567_890_i64] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cur = AvroCursor::new(&data);
+        for _ in 0..4 {
+            decoder.decode(&mut cur).expect("decode nanos ts");
+        }
+        let array = decoder.flush(None).expect("flush nanos ts");
+        let ts = array
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .expect("TimestampNanosecondArray");
+        assert_eq!(ts.values(), &[0, 1, -1, 1_234_567_890]);
+        match ts.data_type() {
+            DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, tz) => {
+                assert_eq!(tz.as_deref(), Some("+00:00"));
+            }
+            other => panic!("expected Timestamp(Nanosecond, Some(\"+00:00\")), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_timestamp_nanos_decoding_local() {
+        let avro_type = avro_from_codec(Codec::TimestampNanos(false));
+        let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder");
+        let mut data = Vec::new();
+        for v in [10_i64, 20_i64, -30_i64] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cur = AvroCursor::new(&data);
+        for _ in 0..3 {
+            decoder.decode(&mut cur).expect("decode nanos ts");
+        }
+        let array = decoder.flush(None).expect("flush nanos ts");
+        let ts = array
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .expect("TimestampNanosecondArray");
+        assert_eq!(ts.values(), &[10, 20, -30]);
+        match ts.data_type() {
+            DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, tz) => {
+                assert_eq!(tz.as_deref(), None);
+            }
+            other => panic!("expected Timestamp(Nanosecond, None), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_timestamp_nanos_decoding_with_nulls() {
+        let avro_type = AvroDataType::new(
+            Codec::TimestampNanos(false),
+            Default::default(),
+            Some(Nullability::NullFirst),
+        );
+        let mut decoder = Decoder::try_new(&avro_type).expect("create nullable TimestampNanos");
+        let mut data = Vec::new();
+        data.extend_from_slice(&encode_avro_long(1));
+        data.extend_from_slice(&encode_avro_long(42));
+        data.extend_from_slice(&encode_avro_long(0));
+        data.extend_from_slice(&encode_avro_long(1));
+        data.extend_from_slice(&encode_avro_long(-7));
+        let mut cur = AvroCursor::new(&data);
+        for _ in 0..3 {
+            decoder.decode(&mut cur).expect("decode nullable nanos ts");
+        }
+        let array = decoder.flush(None).expect("flush nullable nanos ts");
+        let ts = array
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .expect("TimestampNanosecondArray");
+        assert_eq!(ts.len(), 3);
+        assert!(ts.is_valid(0));
+        assert!(ts.is_null(1));
+        assert!(ts.is_valid(2));
+        assert_eq!(ts.value(0), 42);
+        assert_eq!(ts.value(2), -7);
+        match ts.data_type() {
+            DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, tz) => {
+                assert_eq!(tz.as_deref(), None);
+            }
+            other => panic!("expected Timestamp(Nanosecond, None), got {other:?}"),
+        }
+    }
 }
diff --git a/arrow-avro/src/reader/vlq.rs b/arrow-avro/src/reader/vlq.rs
index b198a0d66f24..c0b471b466ea 100644
--- a/arrow-avro/src/reader/vlq.rs
+++ b/arrow-avro/src/reader/vlq.rs
@@ -84,7 +84,7 @@ fn read_varint_array(buf: [u8; 10]) -> Option<(u64, usize)> {
 #[cold]
 fn read_varint_slow(buf: &[u8]) -> Option<(u64, usize)> {
     let mut value = 0;
-    for (count, byte) in buf.iter().take(10).enumerate() {
+    for (count, _byte) in buf.iter().take(10).enumerate() {
         let byte = buf[count];
         value |= u64::from(byte & 0x7F) << (count * 7);
         if byte <= 0x7F {
diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs
index c3e4549c8c38..819ea1f16e9b 100644
--- a/arrow-avro/src/schema.rs
+++ b/arrow-avro/src/schema.rs
@@ -15,12 +15,69 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Avro Schema representations for Arrow.
+
+#[cfg(feature = "canonical_extension_types")]
+use arrow_schema::extension::ExtensionType;
+use arrow_schema::{
+    ArrowError, DataType, Field as ArrowField, IntervalUnit, Schema as ArrowSchema, TimeUnit,
+    UnionMode,
+};
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use serde_json::{Map as JsonMap, Value, json};
+#[cfg(feature = "sha256")]
+use sha2::{Digest, Sha256};
+use std::borrow::Cow;
+use std::cmp::PartialEq;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet};
+use strum_macros::AsRefStr;
+
+/// The Avro single‑object encoding “magic” bytes (`0xC3 0x01`)
+pub const SINGLE_OBJECT_MAGIC: [u8; 2] = [0xC3, 0x01];
+
+/// The Confluent "magic" byte (`0x00`)
+pub const CONFLUENT_MAGIC: [u8; 1] = [0x00];
+
+/// The maximum possible length of a prefix.
+/// SHA256 (32) + single-object magic (2)
+pub const MAX_PREFIX_LEN: usize = 34;
 
-/// The metadata key used for storing the JSON encoded [`Schema`]
+/// The metadata key used for storing the JSON encoded `Schema`
 pub const SCHEMA_METADATA_KEY: &str = "avro.schema";
 
+/// Metadata key used to represent Avro enum symbols in an Arrow schema.
+pub const AVRO_ENUM_SYMBOLS_METADATA_KEY: &str = "avro.enum.symbols";
+
+/// Metadata key used to store the default value of a field in an Avro schema.
+pub const AVRO_FIELD_DEFAULT_METADATA_KEY: &str = "avro.field.default";
+
+/// Metadata key used to store the name of a type in an Avro schema.
+pub const AVRO_NAME_METADATA_KEY: &str = "avro.name";
+
+/// Metadata key used to store the name of a type in an Avro schema.
+pub const AVRO_NAMESPACE_METADATA_KEY: &str = "avro.namespace";
+
+/// Metadata key used to store the documentation for a type in an Avro schema.
+pub const AVRO_DOC_METADATA_KEY: &str = "avro.doc";
+
+/// Default name for the root record in an Avro schema.
+pub const AVRO_ROOT_RECORD_DEFAULT_NAME: &str = "topLevelRecord";
+
+/// Avro types are not nullable, with nullability instead encoded as a union
+/// where one of the variants is the null type.
+///
+/// To accommodate this, we specially case two-variant unions where one of the
+/// variants is the null type, and use this to derive arrow's notion of nullability
+#[derive(Debug, Copy, Clone, PartialEq, Default)]
+pub(crate) enum Nullability {
+    /// The nulls are encoded as the first union variant
+    #[default]
+    NullFirst,
+    /// The nulls are encoded as the second union variant
+    NullSecond,
+}
+
 /// Either a [`PrimitiveType`] or a reference to a previously defined named type
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#names>
@@ -29,7 +86,7 @@ pub const SCHEMA_METADATA_KEY: &str = "avro.schema";
 /// A type name in an Avro schema
 ///
 /// This represents the different ways a type can be referenced in an Avro schema.
-pub enum TypeName<'a> {
+pub(crate) enum TypeName<'a> {
     /// A primitive type like null, boolean, int, etc.
     Primitive(PrimitiveType),
     /// A reference to another named type
@@ -39,9 +96,10 @@ pub enum TypeName<'a> {
 /// A primitive type
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#primitive-types>
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, AsRefStr)]
 #[serde(rename_all = "camelCase")]
-pub enum PrimitiveType {
+#[strum(serialize_all = "lowercase")]
+pub(crate) enum PrimitiveType {
     /// null: no value
     Null,
     /// boolean: a binary value
@@ -60,21 +118,21 @@ pub enum PrimitiveType {
     String,
 }
 
-/// Additional attributes within a [`Schema`]
+/// Additional attributes within a `Schema`
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#schema-declaration>
 #[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
-pub struct Attributes<'a> {
+pub(crate) struct Attributes<'a> {
     /// A logical type name
     ///
     /// <https://avro.apache.org/docs/1.11.1/specification/#logical-types>
     #[serde(default)]
-    pub logical_type: Option<&'a str>,
+    pub(crate) logical_type: Option<&'a str>,
 
     /// Additional JSON attributes
     #[serde(flatten)]
-    pub additional: HashMap<&'a str, serde_json::Value>,
+    pub(crate) additional: HashMap<&'a str, Value>,
 }
 
 impl Attributes<'_> {
@@ -90,13 +148,13 @@ impl Attributes<'_> {
 /// A type definition that is not a variant of [`ComplexType`]
 #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
-pub struct Type<'a> {
+pub(crate) struct Type<'a> {
     /// The type of this Avro data structure
     #[serde(borrow)]
-    pub r#type: TypeName<'a>,
+    pub(crate) r#type: TypeName<'a>,
     /// Additional attributes associated with this type
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
 }
 
 /// An Avro schema
@@ -105,7 +163,7 @@ pub struct Type<'a> {
 /// See <https://avro.apache.org/docs/1.11.1/specification/#schemas> for more details.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(untagged)]
-pub enum Schema<'a> {
+pub(crate) enum Schema<'a> {
     /// A direct type name (primitive or reference)
     #[serde(borrow)]
     TypeName(TypeName<'a>),
@@ -125,7 +183,7 @@ pub enum Schema<'a> {
 /// <https://avro.apache.org/docs/1.11.1/specification/#complex-types>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "camelCase")]
-pub enum ComplexType<'a> {
+pub(crate) enum ComplexType<'a> {
     /// Record type: a sequence of fields with names and types
     #[serde(borrow)]
     Record(Record<'a>),
@@ -147,125 +205,1580 @@ pub enum ComplexType<'a> {
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#schema-record>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Record<'a> {
+pub(crate) struct Record<'a> {
     /// Name of the record
     #[serde(borrow)]
-    pub name: &'a str,
+    pub(crate) name: &'a str,
     /// Optional namespace for the record, provides a way to organize names
     #[serde(borrow, default)]
-    pub namespace: Option<&'a str>,
+    pub(crate) namespace: Option<&'a str>,
     /// Optional documentation string for the record
     #[serde(borrow, default)]
-    pub doc: Option<&'a str>,
+    pub(crate) doc: Option<Cow<'a, str>>,
     /// Alternative names for this record
     #[serde(borrow, default)]
-    pub aliases: Vec<&'a str>,
+    pub(crate) aliases: Vec<&'a str>,
     /// The fields contained in this record
     #[serde(borrow)]
-    pub fields: Vec<Field<'a>>,
+    pub(crate) fields: Vec<Field<'a>>,
     /// Additional attributes for this record
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
+}
+
+fn deserialize_default<'de, D>(deserializer: D) -> Result<Option<Value>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    Value::deserialize(deserializer).map(Some)
 }
 
 /// A field within a [`Record`]
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Field<'a> {
+pub(crate) struct Field<'a> {
     /// Name of the field within the record
     #[serde(borrow)]
-    pub name: &'a str,
+    pub(crate) name: &'a str,
     /// Optional documentation for this field
     #[serde(borrow, default)]
-    pub doc: Option<&'a str>,
+    pub(crate) doc: Option<Cow<'a, str>>,
     /// The field's type definition
     #[serde(borrow)]
-    pub r#type: Schema<'a>,
+    pub(crate) r#type: Schema<'a>,
     /// Optional default value for this field
+    #[serde(deserialize_with = "deserialize_default", default)]
+    pub(crate) default: Option<Value>,
+    /// Alternative names (aliases) for this field (Avro spec: field-level aliases).
+    /// Borrowed from input JSON where possible.
     #[serde(borrow, default)]
-    pub default: Option<&'a str>,
+    pub(crate) aliases: Vec<&'a str>,
 }
 
 /// An enumeration
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#enums>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Enum<'a> {
+pub(crate) struct Enum<'a> {
     /// Name of the enum
     #[serde(borrow)]
-    pub name: &'a str,
+    pub(crate) name: &'a str,
     /// Optional namespace for the enum, provides organizational structure
     #[serde(borrow, default)]
-    pub namespace: Option<&'a str>,
+    pub(crate) namespace: Option<&'a str>,
     /// Optional documentation string describing the enum
     #[serde(borrow, default)]
-    pub doc: Option<&'a str>,
+    pub(crate) doc: Option<Cow<'a, str>>,
     /// Alternative names for this enum
     #[serde(borrow, default)]
-    pub aliases: Vec<&'a str>,
+    pub(crate) aliases: Vec<&'a str>,
     /// The symbols (values) that this enum can have
     #[serde(borrow)]
-    pub symbols: Vec<&'a str>,
+    pub(crate) symbols: Vec<&'a str>,
     /// Optional default value for this enum
     #[serde(borrow, default)]
-    pub default: Option<&'a str>,
+    pub(crate) default: Option<&'a str>,
     /// Additional attributes for this enum
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
 }
 
 /// An array
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#arrays>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Array<'a> {
+pub(crate) struct Array<'a> {
     /// The schema for items in this array
     #[serde(borrow)]
-    pub items: Box<Schema<'a>>,
+    pub(crate) items: Box<Schema<'a>>,
     /// Additional attributes for this array
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
 }
 
 /// A map
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#maps>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Map<'a> {
+pub(crate) struct Map<'a> {
     /// The schema for values in this map
     #[serde(borrow)]
-    pub values: Box<Schema<'a>>,
+    pub(crate) values: Box<Schema<'a>>,
     /// Additional attributes for this map
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
 }
 
 /// A fixed length binary array
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#fixed>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct Fixed<'a> {
+pub(crate) struct Fixed<'a> {
     /// Name of the fixed type
     #[serde(borrow)]
-    pub name: &'a str,
+    pub(crate) name: &'a str,
     /// Optional namespace for the fixed type
     #[serde(borrow, default)]
-    pub namespace: Option<&'a str>,
+    pub(crate) namespace: Option<&'a str>,
     /// Alternative names for this fixed type
     #[serde(borrow, default)]
-    pub aliases: Vec<&'a str>,
+    pub(crate) aliases: Vec<&'a str>,
     /// The number of bytes in this fixed type
-    pub size: usize,
+    pub(crate) size: usize,
     /// Additional attributes for this fixed type
     #[serde(flatten)]
-    pub attributes: Attributes<'a>,
+    pub(crate) attributes: Attributes<'a>,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Default)]
+pub(crate) struct AvroSchemaOptions {
+    pub(crate) null_order: Option<Nullability>,
+    pub(crate) strip_metadata: bool,
+}
+
+/// A wrapper for an Avro schema in its JSON string representation.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct AvroSchema {
+    /// The Avro schema as a JSON string.
+    pub json_string: String,
+}
+
+impl TryFrom<&ArrowSchema> for AvroSchema {
+    type Error = ArrowError;
+
+    /// Converts an `ArrowSchema` to `AvroSchema`, delegating to
+    /// `AvroSchema::from_arrow_with_options` with `None` so that the
+    /// union null ordering is decided by `Nullability::default()`.
+    fn try_from(schema: &ArrowSchema) -> Result<Self, Self::Error> {
+        AvroSchema::from_arrow_with_options(schema, None)
+    }
+}
+
+impl AvroSchema {
+    /// Creates a new `AvroSchema` from a JSON string.
+    pub fn new(json_string: String) -> Self {
+        Self { json_string }
+    }
+
+    pub(crate) fn schema(&self) -> Result<Schema<'_>, ArrowError> {
+        serde_json::from_str(self.json_string.as_str())
+            .map_err(|e| ArrowError::ParseError(format!("Invalid Avro schema JSON: {e}")))
+    }
+
+    /// Returns the fingerprint of the schema, computed using the specified [`FingerprintAlgorithm`].
+    ///
+    /// The fingerprint is computed over the schema's Parsed Canonical Form
+    /// as defined by the Avro specification. Depending on `hash_type`, this
+    /// will return one of the supported [`Fingerprint`] variants:
+    /// - [`Fingerprint::Rabin`] for [`FingerprintAlgorithm::Rabin`]
+    /// - `Fingerprint::MD5` for `FingerprintAlgorithm::MD5`
+    /// - `Fingerprint::SHA256` for `FingerprintAlgorithm::SHA256`
+    ///
+    /// Note: [`FingerprintAlgorithm::Id`] or [`FingerprintAlgorithm::Id64`] cannot be used to generate a fingerprint
+    /// and will result in an error. If you intend to use a Schema Registry ID-based
+    /// wire format, either use [`SchemaStore::set`] or load the [`Fingerprint::Id`] directly via [`Fingerprint::load_fingerprint_id`] or for
+    /// [`Fingerprint::Id64`] via [`Fingerprint::load_fingerprint_id64`].
+    ///
+    /// See also: <https://avro.apache.org/docs/1.11.1/specification/#schema-fingerprints>
+    ///
+    /// # Errors
+    /// Returns an error if deserializing the schema fails, if generating the
+    /// canonical form of the schema fails, or if `hash_type` is [`FingerprintAlgorithm::Id`].
+    ///
+    /// # Examples
+    /// ```
+    /// use arrow_avro::schema::{AvroSchema, FingerprintAlgorithm};
+    ///
+    /// let avro = AvroSchema::new("\"string\"".to_string());
+    /// let fp = avro.fingerprint(FingerprintAlgorithm::Rabin).unwrap();
+    /// ```
+    pub fn fingerprint(&self, hash_type: FingerprintAlgorithm) -> Result<Fingerprint, ArrowError> {
+        Self::generate_fingerprint(&self.schema()?, hash_type)
+    }
+
+    pub(crate) fn generate_fingerprint(
+        schema: &Schema,
+        hash_type: FingerprintAlgorithm,
+    ) -> Result<Fingerprint, ArrowError> {
+        let canonical = Self::generate_canonical_form(schema).map_err(|e| {
+            ArrowError::ComputeError(format!("Failed to generate canonical form for schema: {e}"))
+        })?;
+        match hash_type {
+            FingerprintAlgorithm::Rabin => {
+                Ok(Fingerprint::Rabin(compute_fingerprint_rabin(&canonical)))
+            }
+            FingerprintAlgorithm::Id | FingerprintAlgorithm::Id64 => Err(ArrowError::SchemaError(
+                "FingerprintAlgorithm of Id or Id64 cannot be used to generate a fingerprint; \
+                if using Fingerprint::Id, pass the registry ID in instead using the set method."
+                    .to_string(),
+            )),
+            #[cfg(feature = "md5")]
+            FingerprintAlgorithm::MD5 => Ok(Fingerprint::MD5(compute_fingerprint_md5(&canonical))),
+            #[cfg(feature = "sha256")]
+            FingerprintAlgorithm::SHA256 => {
+                Ok(Fingerprint::SHA256(compute_fingerprint_sha256(&canonical)))
+            }
+        }
+    }
+
+    /// Generates the Parsed Canonical Form for the given `Schema`.
+    ///
+    /// The canonical form is a standardized JSON representation of the schema,
+    /// primarily used for generating a schema fingerprint for equality checking.
+    ///
+    /// This form strips attributes that do not affect the schema's identity,
+    /// such as `doc` fields, `aliases`, and any properties not defined in the
+    /// Avro specification.
+    ///
+    /// <https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas>
+    pub(crate) fn generate_canonical_form(schema: &Schema) -> Result<String, ArrowError> {
+        build_canonical(schema, None)
+    }
+
+    /// Build Avro JSON from an Arrow [`ArrowSchema`], applying the given null‑union order and optionally stripping internal Arrow metadata.
+    ///
+    /// If the input Arrow schema already contains Avro JSON in
+    /// [`SCHEMA_METADATA_KEY`], that JSON is returned verbatim to preserve
+    /// the exact header encoding alignment; otherwise, a new JSON is generated
+    /// honoring `null_union_order` at **all nullable sites**.
+    pub(crate) fn from_arrow_with_options(
+        schema: &ArrowSchema,
+        options: Option<AvroSchemaOptions>,
+    ) -> Result<AvroSchema, ArrowError> {
+        let opts = options.unwrap_or_default();
+        let order = opts.null_order.unwrap_or_default();
+        let strip = opts.strip_metadata;
+        if !strip {
+            if let Some(json) = schema.metadata.get(SCHEMA_METADATA_KEY) {
+                return Ok(AvroSchema::new(json.clone()));
+            }
+        }
+        let mut name_gen = NameGenerator::default();
+        let fields_json = schema
+            .fields()
+            .iter()
+            .map(|f| arrow_field_to_avro(f, &mut name_gen, order, strip))
+            .collect::<Result<Vec<_>, _>>()?;
+        let record_name = schema
+            .metadata
+            .get(AVRO_NAME_METADATA_KEY)
+            .map_or(AVRO_ROOT_RECORD_DEFAULT_NAME, |s| s.as_str());
+        let mut record = JsonMap::with_capacity(schema.metadata.len() + 4);
+        record.insert("type".into(), Value::String("record".into()));
+        record.insert(
+            "name".into(),
+            Value::String(sanitise_avro_name(record_name)),
+        );
+        if let Some(ns) = schema.metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+            record.insert("namespace".into(), Value::String(ns.clone()));
+        }
+        if let Some(doc) = schema.metadata.get(AVRO_DOC_METADATA_KEY) {
+            record.insert("doc".into(), Value::String(doc.clone()));
+        }
+        record.insert("fields".into(), Value::Array(fields_json));
+        extend_with_passthrough_metadata(&mut record, &schema.metadata);
+        let json_string = serde_json::to_string(&Value::Object(record))
+            .map_err(|e| ArrowError::SchemaError(format!("Serializing Avro JSON failed: {e}")))?;
+        Ok(AvroSchema::new(json_string))
+    }
+}
+
+/// A stack-allocated, fixed-size buffer for the prefix.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Prefix {
+    buf: [u8; MAX_PREFIX_LEN],
+    len: u8,
+}
+
+impl Prefix {
+    #[inline]
+    pub(crate) fn as_slice(&self) -> &[u8] {
+        &self.buf[..self.len as usize]
+    }
+}
+
+/// Defines the strategy for generating the per-record prefix for an Avro binary stream.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum FingerprintStrategy {
+    /// Use the 64-bit Rabin fingerprint (default for single-object encoding).
+    #[default]
+    Rabin,
+    /// Use a Confluent Schema Registry 32-bit ID.
+    Id(u32),
+    /// Use an Apicurio Schema Registry 64-bit ID.
+    Id64(u64),
+    #[cfg(feature = "md5")]
+    /// Use the 128-bit MD5 fingerprint.
+    MD5,
+    #[cfg(feature = "sha256")]
+    /// Use the 256-bit SHA-256 fingerprint.
+    SHA256,
+}
+
+impl From<Fingerprint> for FingerprintStrategy {
+    fn from(f: Fingerprint) -> Self {
+        Self::from(&f)
+    }
+}
+
+impl From<FingerprintAlgorithm> for FingerprintStrategy {
+    fn from(f: FingerprintAlgorithm) -> Self {
+        match f {
+            FingerprintAlgorithm::Rabin => FingerprintStrategy::Rabin,
+            FingerprintAlgorithm::Id => FingerprintStrategy::Id(0),
+            FingerprintAlgorithm::Id64 => FingerprintStrategy::Id64(0),
+            #[cfg(feature = "md5")]
+            FingerprintAlgorithm::MD5 => FingerprintStrategy::MD5,
+            #[cfg(feature = "sha256")]
+            FingerprintAlgorithm::SHA256 => FingerprintStrategy::SHA256,
+        }
+    }
+}
+
+impl From<&Fingerprint> for FingerprintStrategy {
+    fn from(f: &Fingerprint) -> Self {
+        match f {
+            Fingerprint::Rabin(_) => FingerprintStrategy::Rabin,
+            Fingerprint::Id(_) => FingerprintStrategy::Id(0),
+            Fingerprint::Id64(_) => FingerprintStrategy::Id64(0),
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(_) => FingerprintStrategy::MD5,
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(_) => FingerprintStrategy::SHA256,
+        }
+    }
+}
+
+/// Supported fingerprint algorithms for Avro schema identification.
+/// For use with Confluent Schema Registry IDs, set to None.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
+pub enum FingerprintAlgorithm {
+    /// 64‑bit CRC‑64‑AVRO Rabin fingerprint.
+    #[default]
+    Rabin,
+    /// Represents a 32 bit fingerprint not based on a hash algorithm, (e.g., a 32-bit Schema Registry ID.)
+    Id,
+    /// Represents a 64 bit fingerprint not based on a hash algorithm, (e.g., a 64-bit Schema Registry ID.)
+    Id64,
+    #[cfg(feature = "md5")]
+    /// 128-bit MD5 message digest.
+    MD5,
+    #[cfg(feature = "sha256")]
+    /// 256-bit SHA-256 digest.
+    SHA256,
+}
+
+/// Allow easy extraction of the algorithm used to create a fingerprint.
+impl From<&Fingerprint> for FingerprintAlgorithm {
+    fn from(fp: &Fingerprint) -> Self {
+        match fp {
+            Fingerprint::Rabin(_) => FingerprintAlgorithm::Rabin,
+            Fingerprint::Id(_) => FingerprintAlgorithm::Id,
+            Fingerprint::Id64(_) => FingerprintAlgorithm::Id64,
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(_) => FingerprintAlgorithm::MD5,
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(_) => FingerprintAlgorithm::SHA256,
+        }
+    }
+}
+
+impl From<FingerprintStrategy> for FingerprintAlgorithm {
+    fn from(s: FingerprintStrategy) -> Self {
+        Self::from(&s)
+    }
+}
+
+impl From<&FingerprintStrategy> for FingerprintAlgorithm {
+    fn from(s: &FingerprintStrategy) -> Self {
+        match s {
+            FingerprintStrategy::Rabin => FingerprintAlgorithm::Rabin,
+            FingerprintStrategy::Id(_) => FingerprintAlgorithm::Id,
+            FingerprintStrategy::Id64(_) => FingerprintAlgorithm::Id64,
+            #[cfg(feature = "md5")]
+            FingerprintStrategy::MD5 => FingerprintAlgorithm::MD5,
+            #[cfg(feature = "sha256")]
+            FingerprintStrategy::SHA256 => FingerprintAlgorithm::SHA256,
+        }
+    }
+}
+
+/// A schema fingerprint in one of the supported formats.
+///
+/// This is used as the key inside `SchemaStore` `HashMap`. Each `SchemaStore`
+/// instance always stores only one variant, matching its configured
+/// `FingerprintAlgorithm`, but the enum makes the API uniform.
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#schema-fingerprints>
+/// <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Fingerprint {
+    /// A 64-bit Rabin fingerprint.
+    Rabin(u64),
+    /// A 32-bit Schema Registry ID.
+    Id(u32),
+    /// A 64-bit Schema Registry ID.
+    Id64(u64),
+    #[cfg(feature = "md5")]
+    /// A 128-bit MD5 fingerprint.
+    MD5([u8; 16]),
+    #[cfg(feature = "sha256")]
+    /// A 256-bit SHA-256 fingerprint.
+    SHA256([u8; 32]),
+}
+
+impl From<FingerprintStrategy> for Fingerprint {
+    fn from(s: FingerprintStrategy) -> Self {
+        Self::from(&s)
+    }
+}
+
+impl From<&FingerprintStrategy> for Fingerprint {
+    fn from(s: &FingerprintStrategy) -> Self {
+        match s {
+            FingerprintStrategy::Rabin => Fingerprint::Rabin(0),
+            FingerprintStrategy::Id(id) => Fingerprint::Id(*id),
+            FingerprintStrategy::Id64(id) => Fingerprint::Id64(*id),
+            #[cfg(feature = "md5")]
+            FingerprintStrategy::MD5 => Fingerprint::MD5([0; 16]),
+            #[cfg(feature = "sha256")]
+            FingerprintStrategy::SHA256 => Fingerprint::SHA256([0; 32]),
+        }
+    }
+}
+
+impl From<FingerprintAlgorithm> for Fingerprint {
+    fn from(s: FingerprintAlgorithm) -> Self {
+        match s {
+            FingerprintAlgorithm::Rabin => Fingerprint::Rabin(0),
+            FingerprintAlgorithm::Id => Fingerprint::Id(0),
+            FingerprintAlgorithm::Id64 => Fingerprint::Id64(0),
+            #[cfg(feature = "md5")]
+            FingerprintAlgorithm::MD5 => Fingerprint::MD5([0; 16]),
+            #[cfg(feature = "sha256")]
+            FingerprintAlgorithm::SHA256 => Fingerprint::SHA256([0; 32]),
+        }
+    }
+}
+
+impl Fingerprint {
+    /// Loads the 32-bit Schema Registry fingerprint (Confluent Schema Registry ID).
+    ///
+    /// The provided `id` is in big-endian wire order; this converts it to host order
+    /// and returns `Fingerprint::Id`.
+    ///
+    /// # Returns
+    /// A `Fingerprint::Id` variant containing the 32-bit fingerprint.
+    pub fn load_fingerprint_id(id: u32) -> Self {
+        Fingerprint::Id(u32::from_be(id))
+    }
+
+    /// Loads the 64-bit Schema Registry fingerprint (Apicurio Schema Registry ID).
+    ///
+    /// The provided `id` is in big-endian wire order; this converts it to host order
+    /// and returns `Fingerprint::Id64`.
+    ///
+    /// # Returns
+    /// A `Fingerprint::Id64` variant containing the 64-bit fingerprint.
+    pub fn load_fingerprint_id64(id: u64) -> Self {
+        Fingerprint::Id64(u64::from_be(id))
+    }
+
+    /// Constructs a serialized prefix represented as a `Vec<u8>` based on the variant of the enum.
+    ///
+    /// This method serializes data in different formats depending on the variant of `self`:
+    /// - **`Id(id)`**: Uses the Confluent wire format, which includes a predefined magic header (`CONFLUENT_MAGIC`)
+    ///   followed by the big-endian byte representation of the `id`.
+    /// - **`Id64(id)`**: Uses the Apicurio wire format, which includes a predefined magic header (`CONFLUENT_MAGIC`)
+    ///   followed by the big-endian 8-byte representation of the `id`.
+    /// - **`Rabin(val)`**: Uses the Avro single-object specification format. This includes a different magic header
+    ///   (`SINGLE_OBJECT_MAGIC`) followed by the little-endian byte representation of the `val`.
+    /// - **`MD5(bytes)`** (optional, `md5` feature enabled): A non-standard extension that adds the
+    ///   `SINGLE_OBJECT_MAGIC` header followed by the provided `bytes`.
+    /// - **`SHA256(bytes)`** (optional, `sha256` feature enabled): Similar to the `MD5` variant, this is
+    ///   a non-standard extension that attaches the `SINGLE_OBJECT_MAGIC` header followed by the given `bytes`.
+    ///
+    /// # Returns
+    ///
+    /// A `Prefix` containing the serialized prefix data.
+    ///
+    /// # Features
+    ///
+    /// - You can optionally enable the `md5` feature to include the `MD5` variant.
+    /// - You can optionally enable the `sha256` feature to include the `SHA256` variant.
+    ///
+    pub(crate) fn make_prefix(&self) -> Prefix {
+        let mut buf = [0u8; MAX_PREFIX_LEN];
+        let len = match self {
+            Self::Id(val) => write_prefix(&mut buf, &CONFLUENT_MAGIC, &val.to_be_bytes()),
+            Self::Id64(val) => write_prefix(&mut buf, &CONFLUENT_MAGIC, &val.to_be_bytes()),
+            Self::Rabin(val) => write_prefix(&mut buf, &SINGLE_OBJECT_MAGIC, &val.to_le_bytes()),
+            #[cfg(feature = "md5")]
+            Self::MD5(val) => write_prefix(&mut buf, &SINGLE_OBJECT_MAGIC, val),
+            #[cfg(feature = "sha256")]
+            Self::SHA256(val) => write_prefix(&mut buf, &SINGLE_OBJECT_MAGIC, val),
+        };
+        Prefix { buf, len }
+    }
+}
+
+fn write_prefix<const MAGIC_LEN: usize, const PAYLOAD_LEN: usize>(
+    buf: &mut [u8; MAX_PREFIX_LEN],
+    magic: &[u8; MAGIC_LEN],
+    payload: &[u8; PAYLOAD_LEN],
+) -> u8 {
+    debug_assert!(MAGIC_LEN + PAYLOAD_LEN <= MAX_PREFIX_LEN);
+    let total = MAGIC_LEN + PAYLOAD_LEN;
+    let prefix_slice = &mut buf[..total];
+    prefix_slice[..MAGIC_LEN].copy_from_slice(magic);
+    prefix_slice[MAGIC_LEN..total].copy_from_slice(payload);
+    total as u8
+}
+
+/// An in-memory cache of Avro schemas, indexed by their fingerprint.
+///
+/// `SchemaStore` provides a mechanism to store and retrieve Avro schemas efficiently.
+/// Each schema is associated with a unique [`Fingerprint`], which is generated based
+/// on the schema's canonical form and a specific hashing algorithm.
+///
+/// A `SchemaStore` instance is configured to use a single [`FingerprintAlgorithm`] such as Rabin,
+/// MD5 (not yet supported), or SHA256 (not yet supported) for all its operations.
+/// This ensures consistency when generating fingerprints and looking up schemas.
+/// All schemas registered will have their fingerprint computed with this algorithm, and
+/// lookups must use a matching fingerprint.
+///
+/// # Examples
+///
+/// ```no_run
+/// // Create a new store with the default Rabin fingerprinting.
+/// use arrow_avro::schema::{AvroSchema, SchemaStore};
+///
+/// let mut store = SchemaStore::new();
+/// let schema = AvroSchema::new("\"string\"".to_string());
+/// // Register the schema to get its fingerprint.
+/// let fingerprint = store.register(schema.clone()).unwrap();
+/// // Use the fingerprint to look up the schema.
+/// let retrieved_schema = store.lookup(&fingerprint).cloned();
+/// assert_eq!(retrieved_schema, Some(schema));
+/// ```
+#[derive(Debug, Clone, Default)]
+pub struct SchemaStore {
+    /// The hashing algorithm used for generating fingerprints.
+    fingerprint_algorithm: FingerprintAlgorithm,
+    /// A map from a schema's fingerprint to the schema itself.
+    schemas: HashMap<Fingerprint, AvroSchema>,
+}
+
+impl TryFrom<HashMap<Fingerprint, AvroSchema>> for SchemaStore {
+    type Error = ArrowError;
+
+    /// Creates a `SchemaStore` from a HashMap of schemas.
+    /// Each schema in the HashMap is registered with the new store.
+    fn try_from(schemas: HashMap<Fingerprint, AvroSchema>) -> Result<Self, Self::Error> {
+        Ok(Self {
+            schemas,
+            ..Self::default()
+        })
+    }
+}
+
+impl SchemaStore {
+    /// Creates an empty `SchemaStore` using the default fingerprinting algorithm (64-bit Rabin).
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Creates an empty `SchemaStore` using the default fingerprinting algorithm (64-bit Rabin).
+    pub fn new_with_type(fingerprint_algorithm: FingerprintAlgorithm) -> Self {
+        Self {
+            fingerprint_algorithm,
+            ..Self::default()
+        }
+    }
+
+    /// Registers a schema with the store and the provided fingerprint.
+    /// Note: Confluent wire format implementations should leverage this method.
+    ///
+    /// A schema is set in the store, using the provided fingerprint. If a schema
+    /// with the same fingerprint does not already exist in the store, the new schema
+    /// is inserted. If the fingerprint already exists, the existing schema is not overwritten.
+    ///
+    /// # Arguments
+    ///
+    /// * `fingerprint` - A reference to the `Fingerprint` of the schema to register.
+    /// * `schema` - The `AvroSchema` to register.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` returning the provided `Fingerprint` of the schema if successful,
+    /// or an `ArrowError` on failure.
+    pub fn set(
+        &mut self,
+        fingerprint: Fingerprint,
+        schema: AvroSchema,
+    ) -> Result<Fingerprint, ArrowError> {
+        match self.schemas.entry(fingerprint) {
+            Entry::Occupied(entry) => {
+                if entry.get() != &schema {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Schema fingerprint collision detected for fingerprint {fingerprint:?}"
+                    )));
+                }
+            }
+            Entry::Vacant(entry) => {
+                entry.insert(schema);
+            }
+        }
+        Ok(fingerprint)
+    }
+
+    /// Registers a schema with the store and returns its fingerprint.
+    ///
+    /// A fingerprint is calculated for the given schema using the store's configured
+    /// hash type. If a schema with the same fingerprint does not already exist in the
+    /// store, the new schema is inserted. If the fingerprint already exists, the
+    /// existing schema is not overwritten. If FingerprintAlgorithm is set to Id or Id64, this
+    /// method will return an error. Confluent wire format implementations should leverage the
+    /// set method instead.
+    ///
+    /// # Arguments
+    ///
+    /// * `schema` - The `AvroSchema` to register.
+    ///
+    /// # Returns
+    ///
+    /// A `Result` containing the `Fingerprint` of the schema if successful,
+    /// or an `ArrowError` on failure.
+    pub fn register(&mut self, schema: AvroSchema) -> Result<Fingerprint, ArrowError> {
+        if self.fingerprint_algorithm == FingerprintAlgorithm::Id
+            || self.fingerprint_algorithm == FingerprintAlgorithm::Id64
+        {
+            return Err(ArrowError::SchemaError(
+                "Invalid FingerprintAlgorithm; unable to generate fingerprint. \
+            Use the set method directly instead, providing a valid fingerprint"
+                    .to_string(),
+            ));
+        }
+        let fingerprint =
+            AvroSchema::generate_fingerprint(&schema.schema()?, self.fingerprint_algorithm)?;
+        self.set(fingerprint, schema)?;
+        Ok(fingerprint)
+    }
+
+    /// Looks up a schema by its `Fingerprint`.
+    ///
+    /// # Arguments
+    ///
+    /// * `fingerprint` - A reference to the `Fingerprint` of the schema to look up.
+    ///
+    /// # Returns
+    ///
+    /// An `Option` containing a clone of the `AvroSchema` if found, otherwise `None`.
+    pub fn lookup(&self, fingerprint: &Fingerprint) -> Option<&AvroSchema> {
+        self.schemas.get(fingerprint)
+    }
+
+    /// Returns a `Vec` containing **all unique [`Fingerprint`]s** currently
+    /// held by this [`SchemaStore`].
+    ///
+    /// The order of the returned fingerprints is unspecified and should not be
+    /// relied upon.
+    pub fn fingerprints(&self) -> Vec<Fingerprint> {
+        self.schemas.keys().copied().collect()
+    }
+
+    /// Returns the `FingerprintAlgorithm` used by the `SchemaStore` for fingerprinting.
+    pub(crate) fn fingerprint_algorithm(&self) -> FingerprintAlgorithm {
+        self.fingerprint_algorithm
+    }
+}
+
+fn quote(s: &str) -> Result<String, ArrowError> {
+    serde_json::to_string(s)
+        .map_err(|e| ArrowError::ComputeError(format!("Failed to quote string: {e}")))
+}
+
+// Avro names are defined by a `name` and an optional `namespace`.
+// The full name is composed of the namespace and the name, separated by a dot.
+//
+// Avro specification defines two ways to specify a full name:
+// 1. The `name` attribute contains the full name (e.g., "a.b.c.d").
+//    In this case, the `namespace` attribute is ignored.
+// 2. The `name` attribute contains the simple name (e.g., "d") and the
+//    `namespace` attribute contains the namespace (e.g., "a.b.c").
+//
+// Each part of the name must match the regex `^[A-Za-z_][A-Za-z0-9_]*$`.
+// Complex paths with quotes or backticks like `a."hi".b` are not supported.
+//
+// This function constructs the full name and extracts the namespace,
+// handling both ways of specifying the name. It prioritizes a namespace
+// defined within the `name` attribute itself, then the explicit `namespace_attr`,
+// and finally the `enclosing_ns`.
+pub(crate) fn make_full_name(
+    name: &str,
+    namespace_attr: Option<&str>,
+    enclosing_ns: Option<&str>,
+) -> (String, Option<String>) {
+    // `name` already contains a dot then treat as full-name, ignore namespace.
+    if let Some((ns, _)) = name.rsplit_once('.') {
+        return (name.to_string(), Some(ns.to_string()));
+    }
+    match namespace_attr.or(enclosing_ns) {
+        Some(ns) => (format!("{ns}.{name}"), Some(ns.to_string())),
+        None => (name.to_string(), None),
+    }
+}
+
+fn build_canonical(schema: &Schema, enclosing_ns: Option<&str>) -> Result<String, ArrowError> {
+    Ok(match schema {
+        Schema::TypeName(tn) | Schema::Type(Type { r#type: tn, .. }) => match tn {
+            TypeName::Primitive(pt) => quote(pt.as_ref())?,
+            TypeName::Ref(name) => {
+                let (full_name, _) = make_full_name(name, None, enclosing_ns);
+                quote(&full_name)?
+            }
+        },
+        Schema::Union(branches) => format!(
+            "[{}]",
+            branches
+                .iter()
+                .map(|b| build_canonical(b, enclosing_ns))
+                .collect::<Result<Vec<_>, _>>()?
+                .join(",")
+        ),
+        Schema::Complex(ct) => match ct {
+            ComplexType::Record(r) => {
+                let (full_name, child_ns) = make_full_name(r.name, r.namespace, enclosing_ns);
+                let fields = r
+                    .fields
+                    .iter()
+                    .map(|f| {
+                        // PCF [STRIP] per Avro spec: keep only attributes relevant to parsing
+                        // ("name" and "type" for fields) and **strip others** such as doc,
+                        // default, order, and **aliases**. This preserves canonicalization. See:
+                        // https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas
+                        let field_type =
+                            build_canonical(&f.r#type, child_ns.as_deref().or(enclosing_ns))?;
+                        Ok(format!(
+                            r#"{{"name":{},"type":{}}}"#,
+                            quote(f.name)?,
+                            field_type
+                        ))
+                    })
+                    .collect::<Result<Vec<_>, ArrowError>>()?
+                    .join(",");
+                format!(
+                    r#"{{"name":{},"type":"record","fields":[{fields}]}}"#,
+                    quote(&full_name)?,
+                )
+            }
+            ComplexType::Enum(e) => {
+                let (full_name, _) = make_full_name(e.name, e.namespace, enclosing_ns);
+                let symbols = e
+                    .symbols
+                    .iter()
+                    .map(|s| quote(s))
+                    .collect::<Result<Vec<_>, _>>()?
+                    .join(",");
+                format!(
+                    r#"{{"name":{},"type":"enum","symbols":[{symbols}]}}"#,
+                    quote(&full_name)?
+                )
+            }
+            ComplexType::Array(arr) => format!(
+                r#"{{"type":"array","items":{}}}"#,
+                build_canonical(&arr.items, enclosing_ns)?
+            ),
+            ComplexType::Map(map) => format!(
+                r#"{{"type":"map","values":{}}}"#,
+                build_canonical(&map.values, enclosing_ns)?
+            ),
+            ComplexType::Fixed(f) => {
+                let (full_name, _) = make_full_name(f.name, f.namespace, enclosing_ns);
+                format!(
+                    r#"{{"name":{},"type":"fixed","size":{}}}"#,
+                    quote(&full_name)?,
+                    f.size
+                )
+            }
+        },
+    })
+}
+
+/// 64‑bit Rabin fingerprint as described in the Avro spec.
+const EMPTY: u64 = 0xc15d_213a_a4d7_a795;
+
+/// Build one entry of the polynomial‑division table.
+///
+/// We cannot yet write `for _ in 0..8` here: `for` loops rely on
+/// `Iterator::next`, which is not `const` on stable Rust.  Until the
+/// `const_for` feature (tracking issue #87575) is stabilized, a `while`
+/// loop is the only option in a `const fn`
+const fn one_entry(i: usize) -> u64 {
+    let mut fp = i as u64;
+    let mut j = 0;
+    while j < 8 {
+        fp = (fp >> 1) ^ (EMPTY & (0u64.wrapping_sub(fp & 1)));
+        j += 1;
+    }
+    fp
+}
+
+/// Build the full 256‑entry table at compile time.
+///
+/// We cannot yet write `for _ in 0..256` here: `for` loops rely on
+/// `Iterator::next`, which is not `const` on stable Rust.  Until the
+/// `const_for` feature (tracking issue #87575) is stabilized, a `while`
+/// loop is the only option in a `const fn`
+const fn build_table() -> [u64; 256] {
+    let mut table = [0u64; 256];
+    let mut i = 0;
+    while i < 256 {
+        table[i] = one_entry(i);
+        i += 1;
+    }
+    table
+}
+
+/// The pre‑computed table.
+static FINGERPRINT_TABLE: [u64; 256] = build_table();
+
+/// Computes the 64-bit Rabin fingerprint for a given canonical schema string.
+/// This implementation is based on the Avro specification for schema fingerprinting.
+pub(crate) fn compute_fingerprint_rabin(canonical_form: &str) -> u64 {
+    let mut fp = EMPTY;
+    for &byte in canonical_form.as_bytes() {
+        let idx = ((fp as u8) ^ byte) as usize;
+        fp = (fp >> 8) ^ FINGERPRINT_TABLE[idx];
+    }
+    fp
+}
+
+#[cfg(feature = "md5")]
+/// Compute the **128‑bit MD5** fingerprint of the canonical form.
+///
+/// Returns a 16‑byte array (`[u8; 16]`) containing the full MD5 digest,
+/// exactly as required by the Avro specification.
+#[inline]
+pub(crate) fn compute_fingerprint_md5(canonical_form: &str) -> [u8; 16] {
+    let digest = md5::compute(canonical_form.as_bytes());
+    digest.0
+}
+
+#[cfg(feature = "sha256")]
+/// Compute the **256‑bit SHA‑256** fingerprint of the canonical form.
+///
+/// Returns a 32‑byte array (`[u8; 32]`) containing the full SHA‑256 digest.
+#[inline]
+pub(crate) fn compute_fingerprint_sha256(canonical_form: &str) -> [u8; 32] {
+    let mut hasher = Sha256::new();
+    hasher.update(canonical_form.as_bytes());
+    let digest = hasher.finalize();
+    digest.into()
+}
+
+#[inline]
+fn is_internal_arrow_key(key: &str) -> bool {
+    key.starts_with("ARROW:") || key == SCHEMA_METADATA_KEY
+}
+
+/// Copies Arrow schema metadata entries to the provided JSON map,
+/// skipping keys that are Avro-reserved, internal Arrow keys, or
+/// nested under the `avro.schema.` namespace. Values that parse as
+/// JSON are inserted as JSON; otherwise the raw string is preserved.
+fn extend_with_passthrough_metadata(
+    target: &mut JsonMap<String, Value>,
+    metadata: &HashMap<String, String>,
+) {
+    for (meta_key, meta_val) in metadata {
+        if meta_key.starts_with("avro.") || is_internal_arrow_key(meta_key) {
+            continue;
+        }
+        let json_val =
+            serde_json::from_str(meta_val).unwrap_or_else(|_| Value::String(meta_val.clone()));
+        target.insert(meta_key.clone(), json_val);
+    }
+}
+
+// Sanitize an arbitrary string so it is a valid Avro field or type name
+fn sanitise_avro_name(base_name: &str) -> String {
+    if base_name.is_empty() {
+        return "_".to_owned();
+    }
+    let mut out: String = base_name
+        .chars()
+        .map(|char| {
+            if char.is_ascii_alphanumeric() || char == '_' {
+                char
+            } else {
+                '_'
+            }
+        })
+        .collect();
+    if out.as_bytes()[0].is_ascii_digit() {
+        out.insert(0, '_');
+    }
+    out
+}
+
+#[derive(Default)]
+struct NameGenerator {
+    used: HashSet<String>,
+    counters: HashMap<String, usize>,
+}
+
+impl NameGenerator {
+    fn make_unique(&mut self, field_name: &str) -> String {
+        let field_name = sanitise_avro_name(field_name);
+        if self.used.insert(field_name.clone()) {
+            self.counters.insert(field_name.clone(), 1);
+            return field_name;
+        }
+        let counter = self.counters.entry(field_name.clone()).or_insert(1);
+        loop {
+            let candidate = format!("{field_name}_{}", *counter);
+            if self.used.insert(candidate.clone()) {
+                return candidate;
+            }
+            *counter += 1;
+        }
+    }
+}
+
+fn merge_extras(schema: Value, extras: JsonMap<String, Value>) -> Value {
+    if extras.is_empty() {
+        return schema;
+    }
+    match schema {
+        Value::Object(mut map) => {
+            map.extend(extras);
+            Value::Object(map)
+        }
+        Value::Array(mut union) => {
+            // For unions, we cannot attach attributes to the array itself (per Avro spec).
+            // As a fallback for extension metadata, attach extras to the first non-null branch object.
+            if let Some(non_null) = union.iter_mut().find(|val| val.as_str() != Some("null")) {
+                let original = std::mem::take(non_null);
+                *non_null = merge_extras(original, extras);
+            }
+            Value::Array(union)
+        }
+        primitive => {
+            let mut map = JsonMap::with_capacity(extras.len() + 1);
+            map.insert("type".into(), primitive);
+            map.extend(extras);
+            Value::Object(map)
+        }
+    }
+}
+
+#[inline]
+fn is_avro_json_null(v: &Value) -> bool {
+    matches!(v, Value::String(s) if s == "null")
+}
+
+fn wrap_nullable(inner: Value, null_order: Nullability) -> Value {
+    let null = Value::String("null".into());
+    match inner {
+        Value::Array(mut union) => {
+            // If this site is already a union and already contains "null",
+            // preserve the branch order exactly. Reordering "null" breaks
+            // the correspondence between Arrow union child order (type_ids)
+            // and the Avro branch index written on the wire.
+            if union.iter().any(is_avro_json_null) {
+                return Value::Array(union);
+            }
+            // Otherwise, inject "null" without reordering existing branches.
+            match null_order {
+                Nullability::NullFirst => union.insert(0, null),
+                Nullability::NullSecond => union.push(null),
+            }
+            Value::Array(union)
+        }
+        other => match null_order {
+            Nullability::NullFirst => Value::Array(vec![null, other]),
+            Nullability::NullSecond => Value::Array(vec![other, null]),
+        },
+    }
+}
+
+fn min_fixed_bytes_for_precision(p: usize) -> usize {
+    // From the spec: max precision for n=1..=32 bytes:
+    // [2,4,6,9,11,14,16,18,21,23,26,28,31,33,35,38,40,43,45,47,50,52,55,57,59,62,64,67,69,71,74,76]
+    const MAX_P: [usize; 32] = [
+        2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55, 57,
+        59, 62, 64, 67, 69, 71, 74, 76,
+    ];
+    for (i, &max_p) in MAX_P.iter().enumerate() {
+        if p <= max_p {
+            return i + 1;
+        }
+    }
+    32 // saturate at Decimal256
+}
+
+fn union_branch_signature(branch: &Value) -> Result<String, ArrowError> {
+    match branch {
+        Value::String(t) => Ok(format!("P:{t}")),
+        Value::Object(map) => {
+            let t = map.get("type").and_then(|v| v.as_str()).ok_or_else(|| {
+                ArrowError::SchemaError("Union branch object missing string 'type'".into())
+            })?;
+            match t {
+                "record" | "enum" | "fixed" => {
+                    let name = map.get("name").and_then(|v| v.as_str()).ok_or_else(|| {
+                        ArrowError::SchemaError(format!(
+                            "Union branch '{t}' missing required 'name'"
+                        ))
+                    })?;
+                    Ok(format!("N:{t}:{name}"))
+                }
+                "array" | "map" => Ok(format!("C:{t}")),
+                other => Ok(format!("P:{other}")),
+            }
+        }
+        Value::Array(_) => Err(ArrowError::SchemaError(
+            "Avro union may not immediately contain another union".into(),
+        )),
+        _ => Err(ArrowError::SchemaError(
+            "Invalid JSON for Avro union branch".into(),
+        )),
+    }
+}
+
+fn datatype_to_avro(
+    dt: &DataType,
+    field_name: &str,
+    metadata: &HashMap<String, String>,
+    name_gen: &mut NameGenerator,
+    null_order: Nullability,
+    strip: bool,
+) -> Result<(Value, JsonMap<String, Value>), ArrowError> {
+    let mut extras = JsonMap::new();
+    let mut handle_decimal = |precision: &u8, scale: &i8| -> Result<Value, ArrowError> {
+        if *scale < 0 {
+            return Err(ArrowError::SchemaError(format!(
+                "Invalid Avro decimal for field '{field_name}': scale ({scale}) must be >= 0"
+            )));
+        }
+        if (*scale as usize) > (*precision as usize) {
+            return Err(ArrowError::SchemaError(format!(
+                "Invalid Avro decimal for field '{field_name}': scale ({scale}) \
+                 must be <= precision ({precision})"
+            )));
+        }
+        let mut meta = JsonMap::from_iter([
+            ("logicalType".into(), json!("decimal")),
+            ("precision".into(), json!(*precision)),
+            ("scale".into(), json!(*scale)),
+        ]);
+        let mut fixed_size = metadata.get("size").and_then(|v| v.parse::<usize>().ok());
+        let carries_name = metadata.contains_key(AVRO_NAME_METADATA_KEY)
+            || metadata.contains_key(AVRO_NAMESPACE_METADATA_KEY);
+        if fixed_size.is_none() && carries_name {
+            fixed_size = Some(min_fixed_bytes_for_precision(*precision as usize));
+        }
+        if let Some(size) = fixed_size {
+            meta.insert("type".into(), json!("fixed"));
+            meta.insert("size".into(), json!(size));
+            let chosen_name = metadata
+                .get(AVRO_NAME_METADATA_KEY)
+                .map(|s| sanitise_avro_name(s))
+                .unwrap_or_else(|| name_gen.make_unique(field_name));
+            meta.insert("name".into(), json!(chosen_name));
+            if let Some(ns) = metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+                meta.insert("namespace".into(), json!(ns));
+            }
+        } else {
+            // default to bytes-backed decimal
+            meta.insert("type".into(), json!("bytes"));
+        }
+        Ok(Value::Object(meta))
+    };
+    let val = match dt {
+        DataType::Null => Value::String("null".into()),
+        DataType::Boolean => Value::String("boolean".into()),
+        DataType::Int8 | DataType::Int16 | DataType::UInt8 | DataType::UInt16 | DataType::Int32 => {
+            Value::String("int".into())
+        }
+        DataType::UInt32 | DataType::Int64 | DataType::UInt64 => Value::String("long".into()),
+        DataType::Float16 | DataType::Float32 => Value::String("float".into()),
+        DataType::Float64 => Value::String("double".into()),
+        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Value::String("string".into()),
+        DataType::Binary | DataType::LargeBinary => Value::String("bytes".into()),
+        DataType::BinaryView => {
+            if !strip {
+                extras.insert("arrowBinaryView".into(), Value::Bool(true));
+            }
+            Value::String("bytes".into())
+        }
+        DataType::FixedSizeBinary(len) => {
+            let md_is_uuid = metadata
+                .get("logicalType")
+                .map(|s| s.trim_matches('"') == "uuid")
+                .unwrap_or(false);
+            #[cfg(feature = "canonical_extension_types")]
+            let ext_is_uuid = metadata
+                .get(arrow_schema::extension::EXTENSION_TYPE_NAME_KEY)
+                .map(|v| v == arrow_schema::extension::Uuid::NAME || v == "uuid")
+                .unwrap_or(false);
+            #[cfg(not(feature = "canonical_extension_types"))]
+            let ext_is_uuid = false;
+            let is_uuid = (*len == 16) && (md_is_uuid || ext_is_uuid);
+            if is_uuid {
+                json!({ "type": "string", "logicalType": "uuid" })
+            } else {
+                let chosen_name = metadata
+                    .get(AVRO_NAME_METADATA_KEY)
+                    .map(|s| sanitise_avro_name(s))
+                    .unwrap_or_else(|| name_gen.make_unique(field_name));
+                let mut obj = JsonMap::from_iter([
+                    ("type".into(), json!("fixed")),
+                    ("name".into(), json!(chosen_name)),
+                    ("size".into(), json!(len)),
+                ]);
+                if let Some(ns) = metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+                    obj.insert("namespace".into(), json!(ns));
+                }
+                Value::Object(obj)
+            }
+        }
+        #[cfg(feature = "small_decimals")]
+        DataType::Decimal32(precision, scale) | DataType::Decimal64(precision, scale) => {
+            handle_decimal(precision, scale)?
+        }
+        DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
+            handle_decimal(precision, scale)?
+        }
+        DataType::Date32 => json!({ "type": "int", "logicalType": "date" }),
+        DataType::Date64 => json!({ "type": "long", "logicalType": "local-timestamp-millis" }),
+        DataType::Time32(unit) => match unit {
+            TimeUnit::Millisecond => json!({ "type": "int", "logicalType": "time-millis" }),
+            TimeUnit::Second => {
+                if !strip {
+                    extras.insert("arrowTimeUnit".into(), Value::String("second".into()));
+                }
+                Value::String("int".into())
+            }
+            _ => Value::String("int".into()),
+        },
+        DataType::Time64(unit) => match unit {
+            TimeUnit::Microsecond => json!({ "type": "long", "logicalType": "time-micros" }),
+            TimeUnit::Nanosecond => {
+                if !strip {
+                    extras.insert("arrowTimeUnit".into(), Value::String("nanosecond".into()));
+                }
+                Value::String("long".into())
+            }
+            _ => Value::String("long".into()),
+        },
+        DataType::Timestamp(unit, tz) => {
+            let logical_type = match (unit, tz.is_some()) {
+                (TimeUnit::Millisecond, true) => "timestamp-millis",
+                (TimeUnit::Millisecond, false) => "local-timestamp-millis",
+                (TimeUnit::Microsecond, true) => "timestamp-micros",
+                (TimeUnit::Microsecond, false) => "local-timestamp-micros",
+                (TimeUnit::Nanosecond, true) => "timestamp-nanos",
+                (TimeUnit::Nanosecond, false) => "local-timestamp-nanos",
+                (TimeUnit::Second, _) => {
+                    if !strip {
+                        extras.insert("arrowTimeUnit".into(), Value::String("second".into()));
+                    }
+                    return Ok((Value::String("long".into()), extras));
+                }
+            };
+            if !strip && matches!(unit, TimeUnit::Nanosecond) {
+                extras.insert("arrowTimeUnit".into(), Value::String("nanosecond".into()));
+            }
+            json!({ "type": "long", "logicalType": logical_type })
+        }
+        #[cfg(not(feature = "avro_custom_types"))]
+        DataType::Duration(_unit) => Value::String("long".into()),
+        #[cfg(feature = "avro_custom_types")]
+        DataType::Duration(unit) => {
+            // When the feature is enabled, create an Avro schema object
+            // with the correct `logicalType` annotation.
+            let logical_type = match unit {
+                TimeUnit::Second => "arrow.duration-seconds",
+                TimeUnit::Millisecond => "arrow.duration-millis",
+                TimeUnit::Microsecond => "arrow.duration-micros",
+                TimeUnit::Nanosecond => "arrow.duration-nanos",
+            };
+            json!({ "type": "long", "logicalType": logical_type })
+        }
+        DataType::Interval(IntervalUnit::MonthDayNano) => {
+            // Avro duration logical type: fixed(12) with months/days/millis per spec.
+            let chosen_name = metadata
+                .get(AVRO_NAME_METADATA_KEY)
+                .map(|s| sanitise_avro_name(s))
+                .unwrap_or_else(|| name_gen.make_unique(field_name));
+            let mut obj = JsonMap::from_iter([
+                ("type".into(), json!("fixed")),
+                ("name".into(), json!(chosen_name)),
+                ("size".into(), json!(12)),
+                ("logicalType".into(), json!("duration")),
+            ]);
+            if let Some(ns) = metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+                obj.insert("namespace".into(), json!(ns));
+            }
+            json!(obj)
+        }
+        DataType::Interval(IntervalUnit::YearMonth) => {
+            if !strip {
+                extras.insert(
+                    "arrowIntervalUnit".into(),
+                    Value::String("yearmonth".into()),
+                );
+            }
+            Value::String("long".into())
+        }
+        DataType::Interval(IntervalUnit::DayTime) => {
+            if !strip {
+                extras.insert("arrowIntervalUnit".into(), Value::String("daytime".into()));
+            }
+            Value::String("long".into())
+        }
+        DataType::List(child) | DataType::LargeList(child) => {
+            if matches!(dt, DataType::LargeList(_)) && !strip {
+                extras.insert("arrowLargeList".into(), Value::Bool(true));
+            }
+            let items_schema = process_datatype(
+                child.data_type(),
+                child.name(),
+                child.metadata(),
+                name_gen,
+                null_order,
+                child.is_nullable(),
+                strip,
+            )?;
+            json!({
+                "type": "array",
+                "items": items_schema
+            })
+        }
+        DataType::ListView(child) | DataType::LargeListView(child) => {
+            if matches!(dt, DataType::LargeListView(_)) && !strip {
+                extras.insert("arrowLargeList".into(), Value::Bool(true));
+            }
+            if !strip {
+                extras.insert("arrowListView".into(), Value::Bool(true));
+            }
+            let items_schema = process_datatype(
+                child.data_type(),
+                child.name(),
+                child.metadata(),
+                name_gen,
+                null_order,
+                child.is_nullable(),
+                strip,
+            )?;
+            json!({
+                "type": "array",
+                "items": items_schema
+            })
+        }
+        DataType::FixedSizeList(child, len) => {
+            if !strip {
+                extras.insert("arrowFixedSize".into(), json!(len));
+            }
+            let items_schema = process_datatype(
+                child.data_type(),
+                child.name(),
+                child.metadata(),
+                name_gen,
+                null_order,
+                child.is_nullable(),
+                strip,
+            )?;
+            json!({
+                "type": "array",
+                "items": items_schema
+            })
+        }
+        DataType::Map(entries, _) => {
+            let value_field = match entries.data_type() {
+                DataType::Struct(fs) => &fs[1],
+                _ => {
+                    return Err(ArrowError::SchemaError(
+                        "Map 'entries' field must be Struct(key,value)".into(),
+                    ));
+                }
+            };
+            let values_schema = process_datatype(
+                value_field.data_type(),
+                value_field.name(),
+                value_field.metadata(),
+                name_gen,
+                null_order,
+                value_field.is_nullable(),
+                strip,
+            )?;
+            json!({
+                "type": "map",
+                "values": values_schema
+            })
+        }
+        DataType::Struct(fields) => {
+            let avro_fields = fields
+                .iter()
+                .map(|field| arrow_field_to_avro(field, name_gen, null_order, strip))
+                .collect::<Result<Vec<_>, _>>()?;
+            // Prefer avro.name/avro.namespace when provided on the struct field metadata
+            let chosen_name = metadata
+                .get(AVRO_NAME_METADATA_KEY)
+                .map(|s| sanitise_avro_name(s))
+                .unwrap_or_else(|| name_gen.make_unique(field_name));
+            let mut obj = JsonMap::from_iter([
+                ("type".into(), json!("record")),
+                ("name".into(), json!(chosen_name)),
+                ("fields".into(), Value::Array(avro_fields)),
+            ]);
+            if let Some(ns) = metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+                obj.insert("namespace".into(), json!(ns));
+            }
+            Value::Object(obj)
+        }
+        DataType::Dictionary(_, value) => {
+            if let Some(j) = metadata.get(AVRO_ENUM_SYMBOLS_METADATA_KEY) {
+                let symbols: Vec<&str> =
+                    serde_json::from_str(j).map_err(|e| ArrowError::ParseError(e.to_string()))?;
+                // Prefer avro.name/namespace when provided for enums
+                let chosen_name = metadata
+                    .get(AVRO_NAME_METADATA_KEY)
+                    .map(|s| sanitise_avro_name(s))
+                    .unwrap_or_else(|| name_gen.make_unique(field_name));
+                let mut obj = JsonMap::from_iter([
+                    ("type".into(), json!("enum")),
+                    ("name".into(), json!(chosen_name)),
+                    ("symbols".into(), json!(symbols)),
+                ]);
+                if let Some(ns) = metadata.get(AVRO_NAMESPACE_METADATA_KEY) {
+                    obj.insert("namespace".into(), json!(ns));
+                }
+                Value::Object(obj)
+            } else {
+                process_datatype(
+                    value.as_ref(),
+                    field_name,
+                    metadata,
+                    name_gen,
+                    null_order,
+                    false,
+                    strip,
+                )?
+            }
+        }
+        #[cfg(feature = "avro_custom_types")]
+        DataType::RunEndEncoded(run_ends, values) => {
+            let bits = match run_ends.data_type() {
+                DataType::Int16 => 16,
+                DataType::Int32 => 32,
+                DataType::Int64 => 64,
+                other => {
+                    return Err(ArrowError::SchemaError(format!(
+                        "RunEndEncoded requires Int16/Int32/Int64 for run_ends, found: {other:?}"
+                    )));
+                }
+            };
+            // Build the value site schema, preserving its own nullability
+            let (value_schema, value_extras) = datatype_to_avro(
+                values.data_type(),
+                values.name(),
+                values.metadata(),
+                name_gen,
+                null_order,
+                strip,
+            )?;
+            let mut merged = merge_extras(value_schema, value_extras);
+            if values.is_nullable() {
+                merged = wrap_nullable(merged, null_order);
+            }
+            let mut extras = JsonMap::new();
+            extras.insert("logicalType".into(), json!("arrow.run-end-encoded"));
+            extras.insert("arrow.runEndIndexBits".into(), json!(bits));
+            return Ok((merged, extras));
+        }
+        #[cfg(not(feature = "avro_custom_types"))]
+        DataType::RunEndEncoded(_run_ends, values) => {
+            let (value_schema, _extras) = datatype_to_avro(
+                values.data_type(),
+                values.name(),
+                values.metadata(),
+                name_gen,
+                null_order,
+                strip,
+            )?;
+            return Ok((value_schema, JsonMap::new()));
+        }
+        DataType::Union(fields, mode) => {
+            let mut branches: Vec<Value> = Vec::with_capacity(fields.len());
+            let mut type_ids: Vec<i32> = Vec::with_capacity(fields.len());
+            for (type_id, field_ref) in fields.iter() {
+                // NOTE: `process_datatype` would wrap nullability; force is_nullable=false here.
+                let (branch_schema, _branch_extras) = datatype_to_avro(
+                    field_ref.data_type(),
+                    field_ref.name(),
+                    field_ref.metadata(),
+                    name_gen,
+                    null_order,
+                    strip,
+                )?;
+                // Avro unions cannot immediately contain another union
+                if matches!(branch_schema, Value::Array(_)) {
+                    return Err(ArrowError::SchemaError(
+                        "Avro union may not immediately contain another union".into(),
+                    ));
+                }
+                branches.push(branch_schema);
+                type_ids.push(type_id as i32);
+            }
+            let mut seen: HashSet<String> = HashSet::with_capacity(branches.len());
+            for b in &branches {
+                let sig = union_branch_signature(b)?;
+                if !seen.insert(sig) {
+                    return Err(ArrowError::SchemaError(
+                        "Avro union contains duplicate branch types (disallowed by spec)".into(),
+                    ));
+                }
+            }
+            if !strip {
+                extras.insert(
+                    "arrowUnionMode".into(),
+                    Value::String(
+                        match mode {
+                            UnionMode::Sparse => "sparse",
+                            UnionMode::Dense => "dense",
+                        }
+                        .to_string(),
+                    ),
+                );
+                extras.insert(
+                    "arrowUnionTypeIds".into(),
+                    Value::Array(type_ids.into_iter().map(|id| json!(id)).collect()),
+                );
+            }
+            Value::Array(branches)
+        }
+        #[cfg(not(feature = "small_decimals"))]
+        other => {
+            return Err(ArrowError::NotYetImplemented(format!(
+                "Arrow type {other:?} has no Avro representation"
+            )));
+        }
+    };
+    Ok((val, extras))
+}
+
+fn process_datatype(
+    dt: &DataType,
+    field_name: &str,
+    metadata: &HashMap<String, String>,
+    name_gen: &mut NameGenerator,
+    null_order: Nullability,
+    is_nullable: bool,
+    strip: bool,
+) -> Result<Value, ArrowError> {
+    let (schema, extras) = datatype_to_avro(dt, field_name, metadata, name_gen, null_order, strip)?;
+    let mut merged = merge_extras(schema, extras);
+    if is_nullable {
+        merged = wrap_nullable(merged, null_order)
+    }
+    Ok(merged)
+}
+
+fn arrow_field_to_avro(
+    field: &ArrowField,
+    name_gen: &mut NameGenerator,
+    null_order: Nullability,
+    strip: bool,
+) -> Result<Value, ArrowError> {
+    let avro_name = sanitise_avro_name(field.name());
+    let schema_value = process_datatype(
+        field.data_type(),
+        &avro_name,
+        field.metadata(),
+        name_gen,
+        null_order,
+        field.is_nullable(),
+        strip,
+    )?;
+    // Build the field map
+    let mut map = JsonMap::with_capacity(field.metadata().len() + 3);
+    map.insert("name".into(), Value::String(avro_name));
+    map.insert("type".into(), schema_value);
+    // Transfer selected metadata
+    for (meta_key, meta_val) in field.metadata() {
+        if is_internal_arrow_key(meta_key) {
+            continue;
+        }
+        match meta_key.as_str() {
+            AVRO_DOC_METADATA_KEY => {
+                map.insert("doc".into(), Value::String(meta_val.clone()));
+            }
+            AVRO_FIELD_DEFAULT_METADATA_KEY => {
+                let default_value = serde_json::from_str(meta_val)
+                    .unwrap_or_else(|_| Value::String(meta_val.clone()));
+                map.insert("default".into(), default_value);
+            }
+            _ => {
+                let json_val = serde_json::from_str(meta_val)
+                    .unwrap_or_else(|_| Value::String(meta_val.clone()));
+                map.insert(meta_key.clone(), json_val);
+            }
+        }
+    }
+    Ok(Value::Object(map))
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::codec::{AvroDataType, AvroField};
-    use arrow_schema::{DataType, Fields, TimeUnit};
+    use crate::codec::{AvroField, AvroFieldBuilder};
+    use arrow_schema::{DataType, Fields, SchemaBuilder, TimeUnit, UnionFields};
     use serde_json::json;
+    use std::sync::Arc;
+
+    fn int_schema() -> Schema<'static> {
+        Schema::TypeName(TypeName::Primitive(PrimitiveType::Int))
+    }
+
+    fn record_schema() -> Schema<'static> {
+        Schema::Complex(ComplexType::Record(Record {
+            name: "record1",
+            namespace: Some("test.namespace"),
+            doc: Some(Cow::from("A test record")),
+            aliases: vec![],
+            fields: vec![
+                Field {
+                    name: "field1",
+                    doc: Some(Cow::from("An integer field")),
+                    r#type: int_schema(),
+                    default: None,
+                    aliases: vec![],
+                },
+                Field {
+                    name: "field2",
+                    doc: None,
+                    r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+                    default: None,
+                    aliases: vec![],
+                },
+            ],
+            attributes: Attributes::default(),
+        }))
+    }
+
+    fn single_field_schema(field: ArrowField) -> arrow_schema::Schema {
+        let mut sb = SchemaBuilder::new();
+        sb.push(field);
+        sb.finish()
+    }
+
+    fn assert_json_contains(avro_json: &str, needle: &str) {
+        assert!(
+            avro_json.contains(needle),
+            "JSON did not contain `{needle}` : {avro_json}"
+        )
+    }
 
     #[test]
     fn test_deserialize() {
@@ -370,6 +1883,7 @@ mod tests {
                         Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
                     ]),
                     default: None,
+                    aliases: vec![],
                 },],
                 attributes: Default::default(),
             }))
@@ -401,6 +1915,7 @@ mod tests {
                         doc: None,
                         r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
                         default: None,
+                        aliases: vec![],
                     },
                     Field {
                         name: "next",
@@ -410,6 +1925,7 @@ mod tests {
                             Schema::TypeName(TypeName::Ref("LongList")),
                         ]),
                         default: None,
+                        aliases: vec![],
                     }
                 ],
                 attributes: Attributes::default(),
@@ -463,6 +1979,7 @@ mod tests {
                             Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
                         ]),
                         default: None,
+                        aliases: vec![],
                     },
                     Field {
                         name: "timestamp_col",
@@ -472,27 +1989,31 @@ mod tests {
                             Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
                         ]),
                         default: None,
+                        aliases: vec![],
                     }
                 ],
                 attributes: Default::default(),
             }))
         );
         let codec = AvroField::try_from(&schema).unwrap();
-        assert_eq!(
-            codec.field(),
-            arrow_schema::Field::new(
-                "topLevelRecord",
-                DataType::Struct(Fields::from(vec![
-                    arrow_schema::Field::new("id", DataType::Int32, true),
-                    arrow_schema::Field::new(
-                        "timestamp_col",
-                        DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
-                        true
-                    ),
-                ])),
-                false
-            )
-        );
+        let expected_arrow_field = arrow_schema::Field::new(
+            "topLevelRecord",
+            DataType::Struct(Fields::from(vec![
+                arrow_schema::Field::new("id", DataType::Int32, true),
+                arrow_schema::Field::new(
+                    "timestamp_col",
+                    DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
+                    true,
+                ),
+            ])),
+            false,
+        )
+        .with_metadata(std::collections::HashMap::from([(
+            AVRO_NAME_METADATA_KEY.to_string(),
+            "topLevelRecord".to_string(),
+        )]));
+
+        assert_eq!(codec.field(), expected_arrow_field);
 
         let schema: Schema = serde_json::from_str(
             r#"{
@@ -527,6 +2048,7 @@ mod tests {
                             attributes: Default::default(),
                         })),
                         default: None,
+                        aliases: vec![],
                     },
                     Field {
                         name: "clientProtocol",
@@ -536,12 +2058,14 @@ mod tests {
                             Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
                         ]),
                         default: None,
+                        aliases: vec![],
                     },
                     Field {
                         name: "serverHash",
                         doc: None,
                         r#type: Schema::TypeName(TypeName::Ref("MD5")),
                         default: None,
+                        aliases: vec![],
                     },
                     Field {
                         name: "meta",
@@ -556,10 +2080,1061 @@ mod tests {
                             })),
                         ]),
                         default: None,
+                        aliases: vec![],
                     }
                 ],
                 attributes: Default::default(),
             }))
         );
     }
+
+    #[test]
+    fn test_canonical_form_generation_comprehensive_record() {
+        // NOTE: This schema is identical to the one used in test_deserialize_comprehensive.
+        let json_str = r#"{
+          "type": "record",
+          "name": "E2eComprehensive",
+          "namespace": "org.apache.arrow.avrotests.v1",
+          "doc": "Comprehensive Avro writer schema to exercise arrow-avro Reader/Decoder paths.",
+          "fields": [
+            {"name": "id", "type": "long", "doc": "Primary row id", "aliases": ["identifier"]},
+            {"name": "flag", "type": "boolean", "default": true, "doc": "A sample boolean with default true"},
+            {"name": "ratio_f32", "type": "float", "default": 0.0, "doc": "Float32 example"},
+            {"name": "ratio_f64", "type": "double", "default": 0.0, "doc": "Float64 example"},
+            {"name": "count_i32", "type": "int", "default": 0, "doc": "Int32 example"},
+            {"name": "count_i64", "type": "long", "default": 0, "doc": "Int64 example"},
+            {"name": "opt_i32_nullfirst", "type": ["null", "int"], "default": null, "doc": "Nullable int (null-first)"},
+            {"name": "opt_str_nullsecond", "type": ["string", "null"], "default": "", "aliases": ["old_opt_str"], "doc": "Nullable string (null-second). Default is empty string."},
+            {"name": "tri_union_prim", "type": ["int", "string", "boolean"], "default": 0, "doc": "Union[int, string, boolean] with default on first branch (int=0)."},
+            {"name": "str_utf8", "type": "string", "default": "default", "doc": "Plain Utf8 string (Reader may use Utf8View)."},
+            {"name": "raw_bytes", "type": "bytes", "default": "", "doc": "Raw bytes field"},
+            {"name": "fx16_plain", "type": {"type": "fixed", "name": "Fx16", "namespace": "org.apache.arrow.avrotests.v1.types", "aliases": ["Fixed16Old"], "size": 16}, "doc": "Plain fixed(16)"},
+            {"name": "dec_bytes_s10_2", "type": {"type": "bytes", "logicalType": "decimal", "precision": 10, "scale": 2}, "doc": "Decimal encoded on bytes, precision 10, scale 2"},
+            {"name": "dec_fix_s20_4", "type": {"type": "fixed", "name": "DecFix20", "namespace": "org.apache.arrow.avrotests.v1.types", "size": 20, "logicalType": "decimal", "precision": 20, "scale": 4}, "doc": "Decimal encoded on fixed(20), precision 20, scale 4"},
+            {"name": "uuid_str", "type": {"type": "string", "logicalType": "uuid"}, "doc": "UUID logical type on string"},
+            {"name": "d_date", "type": {"type": "int", "logicalType": "date"}, "doc": "Date32: days since 1970-01-01"},
+            {"name": "t_millis", "type": {"type": "int", "logicalType": "time-millis"}, "doc": "Time32-millis"},
+            {"name": "t_micros", "type": {"type": "long", "logicalType": "time-micros"}, "doc": "Time64-micros"},
+            {"name": "ts_millis_utc", "type": {"type": "long", "logicalType": "timestamp-millis"}, "doc": "Timestamp ms (UTC)"},
+            {"name": "ts_micros_utc", "type": {"type": "long", "logicalType": "timestamp-micros"}, "doc": "Timestamp µs (UTC)"},
+            {"name": "ts_millis_local", "type": {"type": "long", "logicalType": "local-timestamp-millis"}, "doc": "Local timestamp ms"},
+            {"name": "ts_micros_local", "type": {"type": "long", "logicalType": "local-timestamp-micros"}, "doc": "Local timestamp µs"},
+            {"name": "interval_mdn", "type": {"type": "fixed", "name": "Dur12", "namespace": "org.apache.arrow.avrotests.v1.types", "size": 12, "logicalType": "duration"}, "doc": "Duration: fixed(12) little-endian (months, days, millis)"},
+            {"name": "status", "type": {"type": "enum", "name": "Status", "namespace": "org.apache.arrow.avrotests.v1.types", "symbols": ["UNKNOWN", "NEW", "PROCESSING", "DONE"], "aliases": ["State"], "doc": "Processing status enum with default"}, "default": "UNKNOWN", "doc": "Enum field using default when resolving"},
+            {"name": "arr_union", "type": {"type": "array", "items": ["long", "string", "null"]}, "default": [], "doc": "Array whose items are a union[long,string,null]"},
+            {"name": "map_union", "type": {"type": "map", "values": ["null", "double", "string"]}, "default": {}, "doc": "Map whose values are a union[null,double,string]"},
+            {"name": "address", "type": {"type": "record", "name": "Address", "namespace": "org.apache.arrow.avrotests.v1.types", "doc": "Postal address with defaults and field alias", "fields": [
+                {"name": "street", "type": "string", "default": "", "aliases": ["street_name"], "doc": "Street (field alias = street_name)"},
+                {"name": "zip", "type": "int", "default": 0, "doc": "ZIP/postal code"},
+                {"name": "country", "type": "string", "default": "US", "doc": "Country code"}
+            ]}, "doc": "Embedded Address record"},
+            {"name": "maybe_auth", "type": {"type": "record", "name": "MaybeAuth", "namespace": "org.apache.arrow.avrotests.v1.types", "doc": "Optional auth token model", "fields": [
+                {"name": "user", "type": "string", "doc": "Username"},
+                {"name": "token", "type": ["null", "bytes"], "default": null, "doc": "Nullable auth token"}
+            ]}},
+            {"name": "union_enum_record_array_map", "type": [
+                {"type": "enum", "name": "Color", "namespace": "org.apache.arrow.avrotests.v1.types", "symbols": ["RED", "GREEN", "BLUE"], "doc": "Color enum"},
+                {"type": "record", "name": "RecA", "namespace": "org.apache.arrow.avrotests.v1.types", "fields": [{"name": "a", "type": "int"}, {"name": "b", "type": "string"}]},
+                {"type": "record", "name": "RecB", "namespace": "org.apache.arrow.avrotests.v1.types", "fields": [{"name": "x", "type": "long"}, {"name": "y", "type": "bytes"}]},
+                {"type": "array", "items": "long"},
+                {"type": "map", "values": "string"}
+            ], "doc": "Union of enum, two records, array, and map"},
+            {"name": "union_date_or_fixed4", "type": [
+                {"type": "int", "logicalType": "date"},
+                {"type": "fixed", "name": "Fx4", "size": 4}
+            ], "doc": "Union of date(int) or fixed(4)"},
+            {"name": "union_interval_or_string", "type": [
+                {"type": "fixed", "name": "Dur12U", "size": 12, "logicalType": "duration"},
+                "string"
+            ], "doc": "Union of duration(fixed12) or string"},
+            {"name": "union_uuid_or_fixed10", "type": [
+                {"type": "string", "logicalType": "uuid"},
+                {"type": "fixed", "name": "Fx10", "size": 10}
+            ], "doc": "Union of UUID string or fixed(10)"},
+            {"name": "array_records_with_union", "type": {"type": "array", "items": {
+                "type": "record", "name": "KV", "namespace": "org.apache.arrow.avrotests.v1.types",
+                "fields": [
+                    {"name": "key", "type": "string"},
+                    {"name": "val", "type": ["null", "int", "long"], "default": null}
+                ]
+            }}, "doc": "Array<record{key, val: union[null,int,long]}>", "default": []},
+            {"name": "union_map_or_array_int", "type": [
+                {"type": "map", "values": "int"},
+                {"type": "array", "items": "int"}
+            ], "doc": "Union[map<string,int>, array<int>]"},
+            {"name": "renamed_with_default", "type": "int", "default": 42, "aliases": ["old_count"], "doc": "Field with alias and default"},
+            {"name": "person", "type": {"type": "record", "name": "PersonV2", "namespace": "com.example.v2", "aliases": ["com.example.Person"], "doc": "Person record with alias pointing to previous namespace/name", "fields": [
+                {"name": "name", "type": "string"},
+                {"name": "age", "type": "int", "default": 0}
+            ]}, "doc": "Record using type alias for schema evolution tests"}
+          ]
+        }"#;
+        let avro = AvroSchema::new(json_str.to_string());
+        let parsed = avro.schema().expect("schema should deserialize");
+        let expected_canonical_form = r#"{"name":"org.apache.arrow.avrotests.v1.E2eComprehensive","type":"record","fields":[{"name":"id","type":"long"},{"name":"flag","type":"boolean"},{"name":"ratio_f32","type":"float"},{"name":"ratio_f64","type":"double"},{"name":"count_i32","type":"int"},{"name":"count_i64","type":"long"},{"name":"opt_i32_nullfirst","type":["null","int"]},{"name":"opt_str_nullsecond","type":["string","null"]},{"name":"tri_union_prim","type":["int","string","boolean"]},{"name":"str_utf8","type":"string"},{"name":"raw_bytes","type":"bytes"},{"name":"fx16_plain","type":{"name":"org.apache.arrow.avrotests.v1.types.Fx16","type":"fixed","size":16}},{"name":"dec_bytes_s10_2","type":"bytes"},{"name":"dec_fix_s20_4","type":{"name":"org.apache.arrow.avrotests.v1.types.DecFix20","type":"fixed","size":20}},{"name":"uuid_str","type":"string"},{"name":"d_date","type":"int"},{"name":"t_millis","type":"int"},{"name":"t_micros","type":"long"},{"name":"ts_millis_utc","type":"long"},{"name":"ts_micros_utc","type":"long"},{"name":"ts_millis_local","type":"long"},{"name":"ts_micros_local","type":"long"},{"name":"interval_mdn","type":{"name":"org.apache.arrow.avrotests.v1.types.Dur12","type":"fixed","size":12}},{"name":"status","type":{"name":"org.apache.arrow.avrotests.v1.types.Status","type":"enum","symbols":["UNKNOWN","NEW","PROCESSING","DONE"]}},{"name":"arr_union","type":{"type":"array","items":["long","string","null"]}},{"name":"map_union","type":{"type":"map","values":["null","double","string"]}},{"name":"address","type":{"name":"org.apache.arrow.avrotests.v1.types.Address","type":"record","fields":[{"name":"street","type":"string"},{"name":"zip","type":"int"},{"name":"country","type":"string"}]}},{"name":"maybe_auth","type":{"name":"org.apache.arrow.avrotests.v1.types.MaybeAuth","type":"record","fields":[{"name":"user","type":"string"},{"name":"token","type":["null","bytes"]}]}},{"name":"union_enum_record_array_map","type":[{"name":"org.apache.arrow.avrotests.v1.types.Color","type":"enum","symbols":["RED","GREEN","BLUE"]},{"name":"org.apache.arrow.avrotests.v1.types.RecA","type":"record","fields":[{"name":"a","type":"int"},{"name":"b","type":"string"}]},{"name":"org.apache.arrow.avrotests.v1.types.RecB","type":"record","fields":[{"name":"x","type":"long"},{"name":"y","type":"bytes"}]},{"type":"array","items":"long"},{"type":"map","values":"string"}]},{"name":"union_date_or_fixed4","type":["int",{"name":"org.apache.arrow.avrotests.v1.Fx4","type":"fixed","size":4}]},{"name":"union_interval_or_string","type":[{"name":"org.apache.arrow.avrotests.v1.Dur12U","type":"fixed","size":12},"string"]},{"name":"union_uuid_or_fixed10","type":["string",{"name":"org.apache.arrow.avrotests.v1.Fx10","type":"fixed","size":10}]},{"name":"array_records_with_union","type":{"type":"array","items":{"name":"org.apache.arrow.avrotests.v1.types.KV","type":"record","fields":[{"name":"key","type":"string"},{"name":"val","type":["null","int","long"]}]}}},{"name":"union_map_or_array_int","type":[{"type":"map","values":"int"},{"type":"array","items":"int"}]},{"name":"renamed_with_default","type":"int"},{"name":"person","type":{"name":"com.example.v2.PersonV2","type":"record","fields":[{"name":"name","type":"string"},{"name":"age","type":"int"}]}}]}"#;
+        let canonical_form =
+            AvroSchema::generate_canonical_form(&parsed).expect("canonical form should be built");
+        assert_eq!(
+            canonical_form, expected_canonical_form,
+            "Canonical form must match Avro spec PCF exactly"
+        );
+    }
+
+    #[test]
+    fn test_new_schema_store() {
+        let store = SchemaStore::new();
+        assert!(store.schemas.is_empty());
+    }
+
+    #[test]
+    fn test_try_from_schemas_rabin() {
+        let int_avro_schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let record_avro_schema = AvroSchema::new(serde_json::to_string(&record_schema()).unwrap());
+        let mut schemas: HashMap<Fingerprint, AvroSchema> = HashMap::new();
+        schemas.insert(
+            int_avro_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .unwrap(),
+            int_avro_schema.clone(),
+        );
+        schemas.insert(
+            record_avro_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .unwrap(),
+            record_avro_schema.clone(),
+        );
+        let store = SchemaStore::try_from(schemas).unwrap();
+        let int_fp = int_avro_schema
+            .fingerprint(FingerprintAlgorithm::Rabin)
+            .unwrap();
+        assert_eq!(store.lookup(&int_fp).cloned(), Some(int_avro_schema));
+        let rec_fp = record_avro_schema
+            .fingerprint(FingerprintAlgorithm::Rabin)
+            .unwrap();
+        assert_eq!(store.lookup(&rec_fp).cloned(), Some(record_avro_schema));
+    }
+
+    #[test]
+    fn test_try_from_with_duplicates() {
+        let int_avro_schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let record_avro_schema = AvroSchema::new(serde_json::to_string(&record_schema()).unwrap());
+        let mut schemas: HashMap<Fingerprint, AvroSchema> = HashMap::new();
+        schemas.insert(
+            int_avro_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .unwrap(),
+            int_avro_schema.clone(),
+        );
+        schemas.insert(
+            record_avro_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .unwrap(),
+            record_avro_schema.clone(),
+        );
+        // Insert duplicate of int schema
+        schemas.insert(
+            int_avro_schema
+                .fingerprint(FingerprintAlgorithm::Rabin)
+                .unwrap(),
+            int_avro_schema.clone(),
+        );
+        let store = SchemaStore::try_from(schemas).unwrap();
+        assert_eq!(store.schemas.len(), 2);
+        let int_fp = int_avro_schema
+            .fingerprint(FingerprintAlgorithm::Rabin)
+            .unwrap();
+        assert_eq!(store.lookup(&int_fp).cloned(), Some(int_avro_schema));
+    }
+
+    #[test]
+    fn test_register_and_lookup_rabin() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let fp_enum = store.register(schema.clone()).unwrap();
+        match fp_enum {
+            Fingerprint::Rabin(fp_val) => {
+                assert_eq!(
+                    store.lookup(&Fingerprint::Rabin(fp_val)).cloned(),
+                    Some(schema.clone())
+                );
+                assert!(
+                    store
+                        .lookup(&Fingerprint::Rabin(fp_val.wrapping_add(1)))
+                        .is_none()
+                );
+            }
+            Fingerprint::Id(_id) => {
+                unreachable!("This test should only generate Rabin fingerprints")
+            }
+            Fingerprint::Id64(_id) => {
+                unreachable!("This test should only generate Rabin fingerprints")
+            }
+            #[cfg(feature = "md5")]
+            Fingerprint::MD5(_id) => {
+                unreachable!("This test should only generate Rabin fingerprints")
+            }
+            #[cfg(feature = "sha256")]
+            Fingerprint::SHA256(_id) => {
+                unreachable!("This test should only generate Rabin fingerprints")
+            }
+        }
+    }
+
+    #[test]
+    fn test_set_and_lookup_id() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let id = 42u32;
+        let fp = Fingerprint::Id(id);
+        let out_fp = store.set(fp, schema.clone()).unwrap();
+        assert_eq!(out_fp, fp);
+        assert_eq!(store.lookup(&fp).cloned(), Some(schema.clone()));
+        assert!(store.lookup(&Fingerprint::Id(id.wrapping_add(1))).is_none());
+    }
+
+    #[test]
+    fn test_set_and_lookup_id64() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let id64: u64 = 0xDEAD_BEEF_DEAD_BEEF;
+        let fp = Fingerprint::Id64(id64);
+        let out_fp = store.set(fp, schema.clone()).unwrap();
+        assert_eq!(out_fp, fp, "set should return the same Id64 fingerprint");
+        assert_eq!(
+            store.lookup(&fp).cloned(),
+            Some(schema.clone()),
+            "lookup should find the schema by Id64"
+        );
+        assert!(
+            store
+                .lookup(&Fingerprint::Id64(id64.wrapping_add(1)))
+                .is_none(),
+            "lookup with a different Id64 must return None"
+        );
+    }
+
+    #[test]
+    fn test_fingerprint_id64_conversions() {
+        let algo_from_fp = FingerprintAlgorithm::from(&Fingerprint::Id64(123));
+        assert_eq!(algo_from_fp, FingerprintAlgorithm::Id64);
+        let fp_from_algo = Fingerprint::from(FingerprintAlgorithm::Id64);
+        assert!(matches!(fp_from_algo, Fingerprint::Id64(0)));
+        let strategy_from_fp = FingerprintStrategy::from(Fingerprint::Id64(5));
+        assert!(matches!(strategy_from_fp, FingerprintStrategy::Id64(0)));
+        let algo_from_strategy = FingerprintAlgorithm::from(strategy_from_fp);
+        assert_eq!(algo_from_strategy, FingerprintAlgorithm::Id64);
+    }
+
+    #[test]
+    fn test_register_duplicate_schema() {
+        let mut store = SchemaStore::new();
+        let schema1 = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let schema2 = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let fingerprint1 = store.register(schema1).unwrap();
+        let fingerprint2 = store.register(schema2).unwrap();
+        assert_eq!(fingerprint1, fingerprint2);
+        assert_eq!(store.schemas.len(), 1);
+    }
+
+    #[test]
+    fn test_set_and_lookup_with_provided_fingerprint() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let fp = schema.fingerprint(FingerprintAlgorithm::Rabin).unwrap();
+        let out_fp = store.set(fp, schema.clone()).unwrap();
+        assert_eq!(out_fp, fp);
+        assert_eq!(store.lookup(&fp).cloned(), Some(schema));
+    }
+
+    #[test]
+    fn test_set_duplicate_same_schema_ok() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let fp = schema.fingerprint(FingerprintAlgorithm::Rabin).unwrap();
+        let _ = store.set(fp, schema.clone()).unwrap();
+        let _ = store.set(fp, schema.clone()).unwrap();
+        assert_eq!(store.schemas.len(), 1);
+    }
+
+    #[test]
+    fn test_set_duplicate_different_schema_collision_error() {
+        let mut store = SchemaStore::new();
+        let schema1 = AvroSchema::new(serde_json::to_string(&int_schema()).unwrap());
+        let schema2 = AvroSchema::new(serde_json::to_string(&record_schema()).unwrap());
+        // Use the same Fingerprint::Id to simulate a collision across different schemas
+        let fp = Fingerprint::Id(123);
+        let _ = store.set(fp, schema1).unwrap();
+        let err = store.set(fp, schema2).unwrap_err();
+        let msg = format!("{err}");
+        assert!(msg.contains("Schema fingerprint collision"));
+    }
+
+    #[test]
+    fn test_canonical_form_generation_primitive() {
+        let schema = int_schema();
+        let canonical_form = AvroSchema::generate_canonical_form(&schema).unwrap();
+        assert_eq!(canonical_form, r#""int""#);
+    }
+
+    #[test]
+    fn test_canonical_form_generation_record() {
+        let schema = record_schema();
+        let expected_canonical_form = r#"{"name":"test.namespace.record1","type":"record","fields":[{"name":"field1","type":"int"},{"name":"field2","type":"string"}]}"#;
+        let canonical_form = AvroSchema::generate_canonical_form(&schema).unwrap();
+        assert_eq!(canonical_form, expected_canonical_form);
+    }
+
+    #[test]
+    fn test_fingerprint_calculation() {
+        let canonical_form = r#"{"fields":[{"name":"a","type":"long"},{"name":"b","type":"string"}],"name":"test","type":"record"}"#;
+        let expected_fingerprint = 10505236152925314060;
+        let fingerprint = compute_fingerprint_rabin(canonical_form);
+        assert_eq!(fingerprint, expected_fingerprint);
+    }
+
+    #[test]
+    fn test_register_and_lookup_complex_schema() {
+        let mut store = SchemaStore::new();
+        let schema = AvroSchema::new(serde_json::to_string(&record_schema()).unwrap());
+        let canonical_form = r#"{"name":"test.namespace.record1","type":"record","fields":[{"name":"field1","type":"int"},{"name":"field2","type":"string"}]}"#;
+        let expected_fingerprint = Fingerprint::Rabin(compute_fingerprint_rabin(canonical_form));
+        let fingerprint = store.register(schema.clone()).unwrap();
+        assert_eq!(fingerprint, expected_fingerprint);
+        let looked_up = store.lookup(&fingerprint).cloned();
+        assert_eq!(looked_up, Some(schema));
+    }
+
+    #[test]
+    fn test_fingerprints_returns_all_keys() {
+        let mut store = SchemaStore::new();
+        let fp_int = store
+            .register(AvroSchema::new(
+                serde_json::to_string(&int_schema()).unwrap(),
+            ))
+            .unwrap();
+        let fp_record = store
+            .register(AvroSchema::new(
+                serde_json::to_string(&record_schema()).unwrap(),
+            ))
+            .unwrap();
+        let fps = store.fingerprints();
+        assert_eq!(fps.len(), 2);
+        assert!(fps.contains(&fp_int));
+        assert!(fps.contains(&fp_record));
+    }
+
+    #[test]
+    fn test_canonical_form_strips_attributes() {
+        let schema_with_attrs = Schema::Complex(ComplexType::Record(Record {
+            name: "record_with_attrs",
+            namespace: None,
+            doc: Some(Cow::from("This doc should be stripped")),
+            aliases: vec!["alias1", "alias2"],
+            fields: vec![Field {
+                name: "f1",
+                doc: Some(Cow::from("field doc")),
+                r#type: Schema::Type(Type {
+                    r#type: TypeName::Primitive(PrimitiveType::Bytes),
+                    attributes: Attributes {
+                        logical_type: None,
+                        additional: HashMap::from([("precision", json!(4))]),
+                    },
+                }),
+                default: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes {
+                logical_type: None,
+                additional: HashMap::from([("custom_attr", json!("value"))]),
+            },
+        }));
+        let expected_canonical_form = r#"{"name":"record_with_attrs","type":"record","fields":[{"name":"f1","type":"bytes"}]}"#;
+        let canonical_form = AvroSchema::generate_canonical_form(&schema_with_attrs).unwrap();
+        assert_eq!(canonical_form, expected_canonical_form);
+    }
+
+    #[test]
+    fn test_primitive_mappings() {
+        let cases = vec![
+            (DataType::Boolean, "\"boolean\""),
+            (DataType::Int8, "\"int\""),
+            (DataType::Int16, "\"int\""),
+            (DataType::Int32, "\"int\""),
+            (DataType::Int64, "\"long\""),
+            (DataType::UInt8, "\"int\""),
+            (DataType::UInt16, "\"int\""),
+            (DataType::UInt32, "\"long\""),
+            (DataType::UInt64, "\"long\""),
+            (DataType::Float16, "\"float\""),
+            (DataType::Float32, "\"float\""),
+            (DataType::Float64, "\"double\""),
+            (DataType::Utf8, "\"string\""),
+            (DataType::Binary, "\"bytes\""),
+        ];
+        for (dt, avro_token) in cases {
+            let field = ArrowField::new("col", dt.clone(), false);
+            let arrow_schema = single_field_schema(field);
+            let avro = AvroSchema::try_from(&arrow_schema).unwrap();
+            assert_json_contains(&avro.json_string, avro_token);
+        }
+    }
+
+    #[test]
+    fn test_temporal_mappings() {
+        let cases = vec![
+            (DataType::Date32, "\"logicalType\":\"date\""),
+            (
+                DataType::Time32(TimeUnit::Millisecond),
+                "\"logicalType\":\"time-millis\"",
+            ),
+            (
+                DataType::Time64(TimeUnit::Microsecond),
+                "\"logicalType\":\"time-micros\"",
+            ),
+            (
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+                "\"logicalType\":\"local-timestamp-millis\"",
+            ),
+            (
+                DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
+                "\"logicalType\":\"timestamp-micros\"",
+            ),
+        ];
+        for (dt, needle) in cases {
+            let field = ArrowField::new("ts", dt.clone(), true);
+            let arrow_schema = single_field_schema(field);
+            let avro = AvroSchema::try_from(&arrow_schema).unwrap();
+            assert_json_contains(&avro.json_string, needle);
+        }
+    }
+
+    #[test]
+    fn test_decimal_and_uuid() {
+        let decimal_field = ArrowField::new("amount", DataType::Decimal128(25, 2), false);
+        let dec_schema = single_field_schema(decimal_field);
+        let avro_dec = AvroSchema::try_from(&dec_schema).unwrap();
+        assert_json_contains(&avro_dec.json_string, "\"logicalType\":\"decimal\"");
+        assert_json_contains(&avro_dec.json_string, "\"precision\":25");
+        assert_json_contains(&avro_dec.json_string, "\"scale\":2");
+        let mut md = HashMap::new();
+        md.insert("logicalType".into(), "uuid".into());
+        let uuid_field =
+            ArrowField::new("id", DataType::FixedSizeBinary(16), false).with_metadata(md);
+        let uuid_schema = single_field_schema(uuid_field);
+        let avro_uuid = AvroSchema::try_from(&uuid_schema).unwrap();
+        assert_json_contains(&avro_uuid.json_string, "\"logicalType\":\"uuid\"");
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_interval_duration() {
+        let interval_field = ArrowField::new(
+            "span",
+            DataType::Interval(IntervalUnit::MonthDayNano),
+            false,
+        );
+        let s = single_field_schema(interval_field);
+        let avro = AvroSchema::try_from(&s).unwrap();
+        assert_json_contains(&avro.json_string, "\"logicalType\":\"duration\"");
+        assert_json_contains(&avro.json_string, "\"size\":12");
+        let dur_field = ArrowField::new("latency", DataType::Duration(TimeUnit::Nanosecond), false);
+        let s2 = single_field_schema(dur_field);
+        let avro2 = AvroSchema::try_from(&s2).unwrap();
+        assert_json_contains(
+            &avro2.json_string,
+            "\"logicalType\":\"arrow.duration-nanos\"",
+        );
+    }
+
+    #[test]
+    fn test_complex_types() {
+        let list_dt = DataType::List(Arc::new(ArrowField::new("item", DataType::Int32, true)));
+        let list_schema = single_field_schema(ArrowField::new("numbers", list_dt, false));
+        let avro_list = AvroSchema::try_from(&list_schema).unwrap();
+        assert_json_contains(&avro_list.json_string, "\"type\":\"array\"");
+        assert_json_contains(&avro_list.json_string, "\"items\"");
+        let value_field = ArrowField::new("value", DataType::Boolean, true);
+        let entries_struct = ArrowField::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("key", DataType::Utf8, false),
+                value_field.clone(),
+            ])),
+            false,
+        );
+        let map_dt = DataType::Map(Arc::new(entries_struct), false);
+        let map_schema = single_field_schema(ArrowField::new("props", map_dt, false));
+        let avro_map = AvroSchema::try_from(&map_schema).unwrap();
+        assert_json_contains(&avro_map.json_string, "\"type\":\"map\"");
+        assert_json_contains(&avro_map.json_string, "\"values\"");
+        let struct_dt = DataType::Struct(Fields::from(vec![
+            ArrowField::new("f1", DataType::Int64, false),
+            ArrowField::new("f2", DataType::Utf8, true),
+        ]));
+        let struct_schema = single_field_schema(ArrowField::new("person", struct_dt, true));
+        let avro_struct = AvroSchema::try_from(&struct_schema).unwrap();
+        assert_json_contains(&avro_struct.json_string, "\"type\":\"record\"");
+        assert_json_contains(&avro_struct.json_string, "\"null\"");
+    }
+
+    #[test]
+    fn test_enum_dictionary() {
+        let mut md = HashMap::new();
+        md.insert(
+            AVRO_ENUM_SYMBOLS_METADATA_KEY.into(),
+            "[\"OPEN\",\"CLOSED\"]".into(),
+        );
+        let enum_dt = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+        let field = ArrowField::new("status", enum_dt, false).with_metadata(md);
+        let schema = single_field_schema(field);
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(&avro.json_string, "\"type\":\"enum\"");
+        assert_json_contains(&avro.json_string, "\"symbols\":[\"OPEN\",\"CLOSED\"]");
+    }
+
+    #[test]
+    fn test_run_end_encoded() {
+        let ree_dt = DataType::RunEndEncoded(
+            Arc::new(ArrowField::new("run_ends", DataType::Int32, false)),
+            Arc::new(ArrowField::new("values", DataType::Utf8, false)),
+        );
+        let s = single_field_schema(ArrowField::new("text", ree_dt, false));
+        let avro = AvroSchema::try_from(&s).unwrap();
+        assert_json_contains(&avro.json_string, "\"string\"");
+    }
+
+    #[test]
+    fn test_dense_union() {
+        let uf: UnionFields = vec![
+            (2i8, Arc::new(ArrowField::new("a", DataType::Int32, false))),
+            (7i8, Arc::new(ArrowField::new("b", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+        let union_dt = DataType::Union(uf, UnionMode::Dense);
+        let s = single_field_schema(ArrowField::new("u", union_dt, false));
+        let avro =
+            AvroSchema::try_from(&s).expect("Arrow Union -> Avro union conversion should succeed");
+        let v: serde_json::Value = serde_json::from_str(&avro.json_string).unwrap();
+        let fields = v
+            .get("fields")
+            .and_then(|x| x.as_array())
+            .expect("fields array");
+        let u_field = fields
+            .iter()
+            .find(|f| f.get("name").and_then(|n| n.as_str()) == Some("u"))
+            .expect("field 'u'");
+        let union = u_field.get("type").expect("u.type");
+        let arr = union.as_array().expect("u.type must be Avro union array");
+        assert_eq!(arr.len(), 2, "expected two union branches");
+        let first = &arr[0];
+        let obj = first
+            .as_object()
+            .expect("first branch should be an object with metadata");
+        assert_eq!(obj.get("type").and_then(|t| t.as_str()), Some("int"));
+        assert_eq!(
+            obj.get("arrowUnionMode").and_then(|m| m.as_str()),
+            Some("dense")
+        );
+        let type_ids: Vec<i64> = obj
+            .get("arrowUnionTypeIds")
+            .and_then(|a| a.as_array())
+            .expect("arrowUnionTypeIds array")
+            .iter()
+            .map(|n| n.as_i64().expect("i64"))
+            .collect();
+        assert_eq!(type_ids, vec![2, 7], "type id ordering should be preserved");
+        assert_eq!(arr[1], Value::String("string".into()));
+    }
+
+    #[test]
+    fn round_trip_primitive() {
+        let arrow_schema = ArrowSchema::new(vec![ArrowField::new("f1", DataType::Int32, false)]);
+        let avro_schema = AvroSchema::try_from(&arrow_schema).unwrap();
+        let decoded = avro_schema.schema().unwrap();
+        assert!(matches!(decoded, Schema::Complex(_)));
+    }
+
+    #[test]
+    fn test_name_generator_sanitization_and_uniqueness() {
+        let f1 = ArrowField::new("weird-name", DataType::FixedSizeBinary(8), false);
+        let f2 = ArrowField::new("weird name", DataType::FixedSizeBinary(8), false);
+        let f3 = ArrowField::new("123bad", DataType::FixedSizeBinary(8), false);
+        let arrow_schema = ArrowSchema::new(vec![f1, f2, f3]);
+        let avro = AvroSchema::try_from(&arrow_schema).unwrap();
+        assert_json_contains(&avro.json_string, "\"name\":\"weird_name\"");
+        assert_json_contains(&avro.json_string, "\"name\":\"weird_name_1\"");
+        assert_json_contains(&avro.json_string, "\"name\":\"_123bad\"");
+    }
+
+    #[test]
+    fn test_date64_logical_type_mapping() {
+        let field = ArrowField::new("d", DataType::Date64, true);
+        let schema = single_field_schema(field);
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(
+            &avro.json_string,
+            "\"logicalType\":\"local-timestamp-millis\"",
+        );
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_duration_list_extras_propagated() {
+        let child = ArrowField::new("lat", DataType::Duration(TimeUnit::Microsecond), false);
+        let list_dt = DataType::List(Arc::new(child));
+        let arrow_schema = single_field_schema(ArrowField::new("durations", list_dt, false));
+        let avro = AvroSchema::try_from(&arrow_schema).unwrap();
+        assert_json_contains(
+            &avro.json_string,
+            "\"logicalType\":\"arrow.duration-micros\"",
+        );
+    }
+
+    #[test]
+    fn test_interval_yearmonth_extra() {
+        let field = ArrowField::new("iv", DataType::Interval(IntervalUnit::YearMonth), false);
+        let schema = single_field_schema(field);
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(&avro.json_string, "\"arrowIntervalUnit\":\"yearmonth\"");
+    }
+
+    #[test]
+    fn test_interval_daytime_extra() {
+        let field = ArrowField::new("iv_dt", DataType::Interval(IntervalUnit::DayTime), false);
+        let schema = single_field_schema(field);
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(&avro.json_string, "\"arrowIntervalUnit\":\"daytime\"");
+    }
+
+    #[test]
+    fn test_fixed_size_list_extra() {
+        let child = ArrowField::new("item", DataType::Int32, false);
+        let dt = DataType::FixedSizeList(Arc::new(child), 3);
+        let schema = single_field_schema(ArrowField::new("triples", dt, false));
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(&avro.json_string, "\"arrowFixedSize\":3");
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_map_duration_value_extra() {
+        let val_field = ArrowField::new("value", DataType::Duration(TimeUnit::Second), true);
+        let entries_struct = ArrowField::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("key", DataType::Utf8, false),
+                val_field,
+            ])),
+            false,
+        );
+        let map_dt = DataType::Map(Arc::new(entries_struct), false);
+        let schema = single_field_schema(ArrowField::new("metrics", map_dt, false));
+        let avro = AvroSchema::try_from(&schema).unwrap();
+        assert_json_contains(
+            &avro.json_string,
+            "\"logicalType\":\"arrow.duration-seconds\"",
+        );
+    }
+
+    #[test]
+    fn test_schema_with_non_string_defaults_decodes_successfully() {
+        let schema_json = r#"{
+            "type": "record",
+            "name": "R",
+            "fields": [
+                {"name": "a", "type": "int", "default": 0},
+                {"name": "b", "type": {"type": "array", "items": "long"}, "default": [1, 2, 3]},
+                {"name": "c", "type": {"type": "map", "values": "double"}, "default": {"x": 1.5, "y": 2.5}},
+                {"name": "inner", "type": {"type": "record", "name": "Inner", "fields": [
+                    {"name": "flag", "type": "boolean", "default": true},
+                    {"name": "name", "type": "string", "default": "hi"}
+                ]}, "default": {"flag": false, "name": "d"}},
+                {"name": "u", "type": ["int", "null"], "default": 42}
+            ]
+        }"#;
+        let schema: Schema = serde_json::from_str(schema_json).expect("schema should parse");
+        match &schema {
+            Schema::Complex(ComplexType::Record(_)) => {}
+            other => panic!("expected record schema, got: {:?}", other),
+        }
+        // Avro to Arrow conversion
+        let field = crate::codec::AvroField::try_from(&schema)
+            .expect("Avro->Arrow conversion should succeed");
+        let arrow_field = field.field();
+        // Build expected Arrow field
+        let expected_list_item = ArrowField::new(
+            arrow_schema::Field::LIST_FIELD_DEFAULT_NAME,
+            DataType::Int64,
+            false,
+        );
+        let expected_b = ArrowField::new("b", DataType::List(Arc::new(expected_list_item)), false);
+
+        let expected_map_value = ArrowField::new("value", DataType::Float64, false);
+        let expected_entries = ArrowField::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("key", DataType::Utf8, false),
+                expected_map_value,
+            ])),
+            false,
+        );
+        let expected_c =
+            ArrowField::new("c", DataType::Map(Arc::new(expected_entries), false), false);
+        let mut inner_md = std::collections::HashMap::new();
+        inner_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "Inner".to_string());
+        let expected_inner = ArrowField::new(
+            "inner",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("flag", DataType::Boolean, false),
+                ArrowField::new("name", DataType::Utf8, false),
+            ])),
+            false,
+        )
+        .with_metadata(inner_md);
+        let mut root_md = std::collections::HashMap::new();
+        root_md.insert(AVRO_NAME_METADATA_KEY.to_string(), "R".to_string());
+        let expected = ArrowField::new(
+            "R",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("a", DataType::Int32, false),
+                expected_b,
+                expected_c,
+                expected_inner,
+                ArrowField::new("u", DataType::Int32, true),
+            ])),
+            false,
+        )
+        .with_metadata(root_md);
+        assert_eq!(arrow_field, expected);
+    }
+
+    #[test]
+    fn default_order_is_consistent() {
+        let arrow_schema = ArrowSchema::new(vec![ArrowField::new("s", DataType::Utf8, true)]);
+        let a = AvroSchema::try_from(&arrow_schema).unwrap().json_string;
+        let b = AvroSchema::from_arrow_with_options(&arrow_schema, None);
+        assert_eq!(a, b.unwrap().json_string);
+    }
+
+    #[test]
+    fn test_union_branch_missing_name_errors() {
+        for t in ["record", "enum", "fixed"] {
+            let branch = json!({ "type": t });
+            let err = union_branch_signature(&branch).unwrap_err().to_string();
+            assert!(
+                err.contains(&format!("Union branch '{t}' missing required 'name'")),
+                "expected missing-name error for {t}, got: {err}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_union_branch_named_type_signature_includes_name() {
+        let rec = json!({ "type": "record", "name": "Foo" });
+        assert_eq!(union_branch_signature(&rec).unwrap(), "N:record:Foo");
+        let en = json!({ "type": "enum", "name": "Color", "symbols": ["R", "G", "B"] });
+        assert_eq!(union_branch_signature(&en).unwrap(), "N:enum:Color");
+        let fx = json!({ "type": "fixed", "name": "Bytes16", "size": 16 });
+        assert_eq!(union_branch_signature(&fx).unwrap(), "N:fixed:Bytes16");
+    }
+
+    #[test]
+    fn test_record_field_alias_resolution_without_default() {
+        let writer_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"old","type":"int"}]
+        }"#;
+        let reader_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"new","aliases":["old"],"type":"int"}]
+        }"#;
+        let writer: Schema = serde_json::from_str(writer_json).unwrap();
+        let reader: Schema = serde_json::from_str(reader_json).unwrap();
+        let resolved = AvroFieldBuilder::new(&writer)
+            .with_reader_schema(&reader)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build()
+            .unwrap();
+        let expected = ArrowField::new(
+            "R",
+            DataType::Struct(Fields::from(vec![ArrowField::new(
+                "new",
+                DataType::Int32,
+                false,
+            )])),
+            false,
+        );
+        assert_eq!(resolved.field(), expected);
+    }
+
+    #[test]
+    fn test_record_field_alias_ambiguous_in_strict_mode_errors() {
+        let writer_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[
+            {"name":"a","type":"int","aliases":["old"]},
+            {"name":"b","type":"int","aliases":["old"]}
+          ]
+        }"#;
+        let reader_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"target","type":"int","aliases":["old"]}]
+        }"#;
+        let writer: Schema = serde_json::from_str(writer_json).unwrap();
+        let reader: Schema = serde_json::from_str(reader_json).unwrap();
+        let err = AvroFieldBuilder::new(&writer)
+            .with_reader_schema(&reader)
+            .with_utf8view(false)
+            .with_strict_mode(true)
+            .build()
+            .unwrap_err()
+            .to_string();
+        assert!(
+            err.contains("Ambiguous alias 'old'"),
+            "expected ambiguous-alias error, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_pragmatic_writer_field_alias_mapping_non_strict() {
+        let writer_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"before","type":"int","aliases":["now"]}]
+        }"#;
+        let reader_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"now","type":"int"}]
+        }"#;
+        let writer: Schema = serde_json::from_str(writer_json).unwrap();
+        let reader: Schema = serde_json::from_str(reader_json).unwrap();
+        let resolved = AvroFieldBuilder::new(&writer)
+            .with_reader_schema(&reader)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build()
+            .unwrap();
+        let expected = ArrowField::new(
+            "R",
+            DataType::Struct(Fields::from(vec![ArrowField::new(
+                "now",
+                DataType::Int32,
+                false,
+            )])),
+            false,
+        );
+        assert_eq!(resolved.field(), expected);
+    }
+
+    #[test]
+    fn test_missing_reader_field_null_first_no_default_is_ok() {
+        let writer_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"a","type":"int"}]
+        }"#;
+        let reader_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[
+            {"name":"a","type":"int"},
+            {"name":"b","type":["null","int"]}
+          ]
+        }"#;
+        let writer: Schema = serde_json::from_str(writer_json).unwrap();
+        let reader: Schema = serde_json::from_str(reader_json).unwrap();
+        let resolved = AvroFieldBuilder::new(&writer)
+            .with_reader_schema(&reader)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build()
+            .unwrap();
+        let expected = ArrowField::new(
+            "R",
+            DataType::Struct(Fields::from(vec![
+                ArrowField::new("a", DataType::Int32, false),
+                ArrowField::new("b", DataType::Int32, true).with_metadata(HashMap::from([(
+                    AVRO_FIELD_DEFAULT_METADATA_KEY.to_string(),
+                    "null".to_string(),
+                )])),
+            ])),
+            false,
+        );
+        assert_eq!(resolved.field(), expected);
+    }
+
+    #[test]
+    fn test_missing_reader_field_null_second_without_default_errors() {
+        let writer_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[{"name":"a","type":"int"}]
+        }"#;
+        let reader_json = r#"{
+          "type":"record",
+          "name":"R",
+          "fields":[
+            {"name":"a","type":"int"},
+            {"name":"b","type":["int","null"]}
+          ]
+        }"#;
+        let writer: Schema = serde_json::from_str(writer_json).unwrap();
+        let reader: Schema = serde_json::from_str(reader_json).unwrap();
+        let err = AvroFieldBuilder::new(&writer)
+            .with_reader_schema(&reader)
+            .with_utf8view(false)
+            .with_strict_mode(false)
+            .build()
+            .unwrap_err()
+            .to_string();
+        assert!(
+            err.contains("must have a default value"),
+            "expected missing-default error, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_from_arrow_with_options_respects_schema_metadata_when_not_stripping() {
+        let field = ArrowField::new("x", DataType::Int32, true);
+        let injected_json =
+            r#"{"type":"record","name":"Injected","fields":[{"name":"ignored","type":"int"}]}"#
+                .to_string();
+        let mut md = HashMap::new();
+        md.insert(SCHEMA_METADATA_KEY.to_string(), injected_json.clone());
+        md.insert("custom".to_string(), "123".to_string());
+        let arrow_schema = ArrowSchema::new_with_metadata(vec![field], md);
+        let opts = AvroSchemaOptions {
+            null_order: Some(Nullability::NullSecond),
+            strip_metadata: false,
+        };
+        let out = AvroSchema::from_arrow_with_options(&arrow_schema, Some(opts)).unwrap();
+        assert_eq!(
+            out.json_string, injected_json,
+            "When strip_metadata=false and avro.schema is present, return the embedded JSON verbatim"
+        );
+        let v: Value = serde_json::from_str(&out.json_string).unwrap();
+        assert_eq!(v.get("type").and_then(|t| t.as_str()), Some("record"));
+        assert_eq!(v.get("name").and_then(|n| n.as_str()), Some("Injected"));
+    }
+
+    #[test]
+    fn test_from_arrow_with_options_ignores_schema_metadata_when_stripping_and_keeps_passthrough() {
+        let field = ArrowField::new("x", DataType::Int32, true);
+        let injected_json =
+            r#"{"type":"record","name":"Injected","fields":[{"name":"ignored","type":"int"}]}"#
+                .to_string();
+        let mut md = HashMap::new();
+        md.insert(SCHEMA_METADATA_KEY.to_string(), injected_json);
+        md.insert("custom_meta".to_string(), "7".to_string());
+        let arrow_schema = ArrowSchema::new_with_metadata(vec![field], md);
+        let opts = AvroSchemaOptions {
+            null_order: Some(Nullability::NullFirst),
+            strip_metadata: true,
+        };
+        let out = AvroSchema::from_arrow_with_options(&arrow_schema, Some(opts)).unwrap();
+        assert_json_contains(&out.json_string, "\"type\":\"record\"");
+        assert_json_contains(&out.json_string, "\"name\":\"topLevelRecord\"");
+        assert_json_contains(&out.json_string, "\"custom_meta\":7");
+    }
+
+    #[test]
+    fn test_from_arrow_with_options_null_first_for_nullable_primitive() {
+        let field = ArrowField::new("s", DataType::Utf8, true);
+        let arrow_schema = single_field_schema(field);
+        let opts = AvroSchemaOptions {
+            null_order: Some(Nullability::NullFirst),
+            strip_metadata: true,
+        };
+        let out = AvroSchema::from_arrow_with_options(&arrow_schema, Some(opts)).unwrap();
+        let v: Value = serde_json::from_str(&out.json_string).unwrap();
+        let arr = v["fields"][0]["type"]
+            .as_array()
+            .expect("nullable primitive should be Avro union array");
+        assert_eq!(arr[0], Value::String("null".into()));
+        assert_eq!(arr[1], Value::String("string".into()));
+    }
+
+    #[test]
+    fn test_from_arrow_with_options_null_second_for_nullable_primitive() {
+        let field = ArrowField::new("s", DataType::Utf8, true);
+        let arrow_schema = single_field_schema(field);
+        let opts = AvroSchemaOptions {
+            null_order: Some(Nullability::NullSecond),
+            strip_metadata: true,
+        };
+        let out = AvroSchema::from_arrow_with_options(&arrow_schema, Some(opts)).unwrap();
+        let v: Value = serde_json::from_str(&out.json_string).unwrap();
+        let arr = v["fields"][0]["type"]
+            .as_array()
+            .expect("nullable primitive should be Avro union array");
+        assert_eq!(arr[0], Value::String("string".into()));
+        assert_eq!(arr[1], Value::String("null".into()));
+    }
+
+    #[test]
+    fn test_from_arrow_with_options_union_extras_respected_by_strip_metadata() {
+        let uf: UnionFields = vec![
+            (2i8, Arc::new(ArrowField::new("a", DataType::Int32, false))),
+            (7i8, Arc::new(ArrowField::new("b", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect();
+        let union_dt = DataType::Union(uf, UnionMode::Dense);
+        let arrow_schema = single_field_schema(ArrowField::new("u", union_dt, true));
+        let with_extras = AvroSchema::from_arrow_with_options(
+            &arrow_schema,
+            Some(AvroSchemaOptions {
+                null_order: Some(Nullability::NullFirst),
+                strip_metadata: false,
+            }),
+        )
+        .unwrap();
+        let v_with: Value = serde_json::from_str(&with_extras.json_string).unwrap();
+        let union_arr = v_with["fields"][0]["type"].as_array().expect("union array");
+        let first_obj = union_arr
+            .iter()
+            .find(|b| b.is_object())
+            .expect("expected an object branch with extras");
+        let obj = first_obj.as_object().unwrap();
+        assert_eq!(obj.get("type").and_then(|t| t.as_str()), Some("int"));
+        assert_eq!(
+            obj.get("arrowUnionMode").and_then(|m| m.as_str()),
+            Some("dense")
+        );
+        let type_ids: Vec<i64> = obj["arrowUnionTypeIds"]
+            .as_array()
+            .expect("arrowUnionTypeIds array")
+            .iter()
+            .map(|n| n.as_i64().expect("i64"))
+            .collect();
+        assert_eq!(type_ids, vec![2, 7]);
+        let stripped = AvroSchema::from_arrow_with_options(
+            &arrow_schema,
+            Some(AvroSchemaOptions {
+                null_order: Some(Nullability::NullFirst),
+                strip_metadata: true,
+            }),
+        )
+        .unwrap();
+        let v_stripped: Value = serde_json::from_str(&stripped.json_string).unwrap();
+        let union_arr2 = v_stripped["fields"][0]["type"]
+            .as_array()
+            .expect("union array");
+        assert!(
+            !union_arr2.iter().any(|b| b
+                .as_object()
+                .is_some_and(|m| m.contains_key("arrowUnionMode"))),
+            "extras must be removed when strip_metadata=true"
+        );
+        assert_eq!(union_arr2[0], Value::String("null".into()));
+        assert_eq!(union_arr2[1], Value::String("int".into()));
+        assert_eq!(union_arr2[2], Value::String("string".into()));
+    }
 }
diff --git a/arrow-avro/src/writer/encoder.rs b/arrow-avro/src/writer/encoder.rs
new file mode 100644
index 000000000000..ef9e02c8faf1
--- /dev/null
+++ b/arrow-avro/src/writer/encoder.rs
@@ -0,0 +1,3048 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Avro Encoder for Arrow types.
+
+use crate::codec::{AvroDataType, AvroField, Codec};
+use crate::schema::{Fingerprint, Nullability, Prefix};
+use arrow_array::cast::AsArray;
+use arrow_array::types::{
+    ArrowPrimitiveType, Date32Type, DurationMicrosecondType, DurationMillisecondType,
+    DurationNanosecondType, DurationSecondType, Float32Type, Float64Type, Int16Type, Int32Type,
+    Int64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
+    Time32MillisecondType, Time64MicrosecondType, TimestampMicrosecondType,
+    TimestampMillisecondType,
+};
+use arrow_array::types::{
+    RunEndIndexType, Time32SecondType, TimestampNanosecondType, TimestampSecondType,
+};
+use arrow_array::{
+    Array, BinaryViewArray, Decimal128Array, Decimal256Array, DictionaryArray,
+    FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
+    GenericListViewArray, GenericStringArray, LargeListArray, LargeListViewArray, ListArray,
+    ListViewArray, MapArray, OffsetSizeTrait, PrimitiveArray, RecordBatch, RunArray, StringArray,
+    StringViewArray, StructArray, UnionArray,
+};
+#[cfg(feature = "small_decimals")]
+use arrow_array::{Decimal32Array, Decimal64Array};
+use arrow_buffer::{ArrowNativeType, NullBuffer};
+use arrow_schema::{
+    ArrowError, DataType, Field, IntervalUnit, Schema as ArrowSchema, TimeUnit, UnionMode,
+};
+use std::io::Write;
+use std::sync::Arc;
+use uuid::Uuid;
+
+/// Encode a single Avro-`long` using ZigZag + variable length, buffered.
+///
+/// Spec: <https://avro.apache.org/docs/1.11.1/specification/#binary-encoding>
+#[inline]
+pub(crate) fn write_long<W: Write + ?Sized>(out: &mut W, value: i64) -> Result<(), ArrowError> {
+    let mut zz = ((value << 1) ^ (value >> 63)) as u64;
+    // At most 10 bytes for 64-bit varint
+    let mut buf = [0u8; 10];
+    let mut i = 0;
+    while (zz & !0x7F) != 0 {
+        buf[i] = ((zz & 0x7F) as u8) | 0x80;
+        i += 1;
+        zz >>= 7;
+    }
+    buf[i] = (zz & 0x7F) as u8;
+    i += 1;
+    out.write_all(&buf[..i])
+        .map_err(|e| ArrowError::IoError(format!("write long: {e}"), e))
+}
+
+#[inline]
+fn write_int<W: Write + ?Sized>(out: &mut W, value: i32) -> Result<(), ArrowError> {
+    write_long(out, value as i64)
+}
+
+#[inline]
+fn write_len_prefixed<W: Write + ?Sized>(out: &mut W, bytes: &[u8]) -> Result<(), ArrowError> {
+    write_long(out, bytes.len() as i64)?;
+    out.write_all(bytes)
+        .map_err(|e| ArrowError::IoError(format!("write bytes: {e}"), e))
+}
+
+#[inline]
+fn write_bool<W: Write + ?Sized>(out: &mut W, v: bool) -> Result<(), ArrowError> {
+    out.write_all(&[if v { 1 } else { 0 }])
+        .map_err(|e| ArrowError::IoError(format!("write bool: {e}"), e))
+}
+
+/// Minimal two's-complement big-endian representation helper for Avro decimal (bytes).
+///
+/// For positive numbers, trim leading 0x00 until an essential byte is reached.
+/// For negative numbers, trim leading 0xFF until an essential byte is reached.
+/// The resulting slice still encodes the same signed value.
+///
+/// See Avro spec: decimal over `bytes` uses two's-complement big-endian
+/// representation of the unscaled integer value. 1.11.1 specification.
+#[inline]
+fn minimal_twos_complement(be: &[u8]) -> &[u8] {
+    if be.is_empty() {
+        return be;
+    }
+    let sign_byte = if (be[0] & 0x80) != 0 { 0xFF } else { 0x00 };
+    let mut k = 0usize;
+    while k < be.len() && be[k] == sign_byte {
+        k += 1;
+    }
+    if k == 0 {
+        return be;
+    }
+    if k == be.len() {
+        return &be[be.len() - 1..];
+    }
+    let drop = if ((be[k] ^ sign_byte) & 0x80) == 0 {
+        k
+    } else {
+        k - 1
+    };
+    &be[drop..]
+}
+
+/// Sign-extend (or validate/truncate) big-endian integer bytes to exactly `n` bytes.
+///
+///
+/// - If shorter than `n`, the slice is sign-extended by left-padding with the
+///   sign byte (`0x00` for positive, `0xFF` for negative).
+/// - If longer than `n`, the slice is truncated from the left. An overflow error
+///   is returned if any of the truncated bytes are not redundant sign bytes,
+///   or if the resulting value's sign bit would differ from the original.
+/// - If the slice is already `n` bytes long, it is copied.
+///
+/// Used for encoding Avro decimal values into `fixed(N)` fields.
+#[inline]
+fn write_sign_extended<W: Write + ?Sized>(
+    out: &mut W,
+    src_be: &[u8],
+    n: usize,
+) -> Result<(), ArrowError> {
+    let len = src_be.len();
+    if len == n {
+        return out
+            .write_all(src_be)
+            .map_err(|e| ArrowError::IoError(format!("write decimal fixed: {e}"), e));
+    }
+    let sign_byte = if len > 0 && (src_be[0] & 0x80) != 0 {
+        0xFF
+    } else {
+        0x00
+    };
+    if len > n {
+        let extra = len - n;
+        if n == 0 && src_be.iter().all(|&b| b == sign_byte) {
+            return Ok(());
+        }
+        // All truncated bytes must equal the sign byte, and the MSB of the first
+        // retained byte must match the sign (otherwise overflow).
+        if src_be[..extra].iter().any(|&b| b != sign_byte)
+            || ((src_be[extra] ^ sign_byte) & 0x80) != 0
+        {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Decimal value with {len} bytes cannot be represented in {n} bytes without overflow",
+            )));
+        }
+        return out
+            .write_all(&src_be[extra..])
+            .map_err(|e| ArrowError::IoError(format!("write decimal fixed: {e}"), e));
+    }
+    // len < n: prepend sign bytes (sign extension) then the payload
+    let pad_len = n - len;
+    // Fixed-size stack pads to avoid heap allocation on the hot path
+    const ZPAD: [u8; 64] = [0x00; 64];
+    const FPAD: [u8; 64] = [0xFF; 64];
+    let pad = if sign_byte == 0x00 {
+        &ZPAD[..]
+    } else {
+        &FPAD[..]
+    };
+    // Emit padding in 64‑byte chunks (minimizes write calls without allocating),
+    // then write the original bytes.
+    let mut rem = pad_len;
+    while rem >= pad.len() {
+        out.write_all(pad)
+            .map_err(|e| ArrowError::IoError(format!("write decimal fixed: {e}"), e))?;
+        rem -= pad.len();
+    }
+    if rem > 0 {
+        out.write_all(&pad[..rem])
+            .map_err(|e| ArrowError::IoError(format!("write decimal fixed: {e}"), e))?;
+    }
+    out.write_all(src_be)
+        .map_err(|e| ArrowError::IoError(format!("write decimal fixed: {e}"), e))
+}
+
+/// Write the union branch index for an optional field.
+///
+/// Branch index is 0-based per Avro unions:
+/// - Null-first (default): null => 0, value => 1
+/// - Null-second (Impala): value => 0, null => 1
+fn write_optional_index<W: Write + ?Sized>(
+    out: &mut W,
+    is_null: bool,
+    null_order: Nullability,
+) -> Result<(), ArrowError> {
+    let byte = union_value_branch_byte(null_order, is_null);
+    out.write_all(&[byte])
+        .map_err(|e| ArrowError::IoError(format!("write union branch: {e}"), e))
+}
+
+#[derive(Debug, Clone)]
+enum NullState<'a> {
+    NonNullable,
+    NullableNoNulls {
+        union_value_byte: u8,
+    },
+    Nullable {
+        nulls: &'a NullBuffer,
+        null_order: Nullability,
+    },
+}
+
+/// Arrow to Avro FieldEncoder:
+/// - Holds the inner `Encoder` (by value)
+/// - Carries the per-site nullability **state** as a single enum that enforces invariants
+pub(crate) struct FieldEncoder<'a> {
+    encoder: Encoder<'a>,
+    null_state: NullState<'a>,
+}
+
+impl<'a> FieldEncoder<'a> {
+    fn make_encoder(
+        array: &'a dyn Array,
+        plan: &FieldPlan,
+        nullability: Option<Nullability>,
+    ) -> Result<Self, ArrowError> {
+        let encoder = match plan {
+            FieldPlan::Scalar => match array.data_type() {
+                DataType::Null => Encoder::Null,
+                DataType::Boolean => Encoder::Boolean(BooleanEncoder(array.as_boolean())),
+                DataType::Utf8 => {
+                    Encoder::Utf8(Utf8GenericEncoder::<i32>(array.as_string::<i32>()))
+                }
+                DataType::LargeUtf8 => {
+                    Encoder::Utf8Large(Utf8GenericEncoder::<i64>(array.as_string::<i64>()))
+                }
+                DataType::Utf8View => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<StringViewArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected StringViewArray".into())
+                        })?;
+                    Encoder::Utf8View(Utf8ViewEncoder(arr))
+                }
+                DataType::BinaryView => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<BinaryViewArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected BinaryViewArray".into())
+                        })?;
+                    Encoder::BinaryView(BinaryViewEncoder(arr))
+                }
+                DataType::Int32 => Encoder::Int(IntEncoder(array.as_primitive::<Int32Type>())),
+                DataType::Int64 => Encoder::Long(LongEncoder(array.as_primitive::<Int64Type>())),
+                DataType::Date32 => Encoder::Date32(IntEncoder(array.as_primitive::<Date32Type>())),
+                DataType::Date64 => {
+                    return Err(ArrowError::NotYetImplemented(
+                        "Avro logical type 'date' is days since epoch (int). Arrow Date64 (ms) has no direct Avro logical type; cast to Date32 or to a Timestamp."
+                            .into(),
+                    ));
+                }
+                DataType::Time32(TimeUnit::Second) => Encoder::Time32SecsToMillis(
+                    Time32SecondsToMillisEncoder(array.as_primitive::<Time32SecondType>()),
+                ),
+                DataType::Time32(TimeUnit::Millisecond) => {
+                    Encoder::Time32Millis(IntEncoder(array.as_primitive::<Time32MillisecondType>()))
+                }
+                DataType::Time32(TimeUnit::Microsecond) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "Arrow Time32 only supports Second or Millisecond. Use Time64 for microseconds."
+                            .into(),
+                    ));
+                }
+                DataType::Time32(TimeUnit::Nanosecond) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "Arrow Time32 only supports Second or Millisecond. Use Time64 for nanoseconds."
+                            .into(),
+                    ));
+                }
+                DataType::Time64(TimeUnit::Microsecond) => Encoder::Time64Micros(LongEncoder(
+                    array.as_primitive::<Time64MicrosecondType>(),
+                )),
+                DataType::Time64(TimeUnit::Nanosecond) => {
+                    return Err(ArrowError::NotYetImplemented(
+                        "Avro writer does not support time-nanos; cast to Time64(Microsecond)."
+                            .into(),
+                    ));
+                }
+                DataType::Time64(TimeUnit::Millisecond) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "Arrow Time64 with millisecond unit is not a valid Arrow type (use Time32 for millis)."
+                            .into(),
+                    ));
+                }
+                DataType::Time64(TimeUnit::Second) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "Arrow Time64 with second unit is not a valid Arrow type (use Time32 for seconds)."
+                            .into(),
+                    ));
+                }
+                DataType::Float32 => {
+                    Encoder::Float32(F32Encoder(array.as_primitive::<Float32Type>()))
+                }
+                DataType::Float64 => {
+                    Encoder::Float64(F64Encoder(array.as_primitive::<Float64Type>()))
+                }
+                DataType::Binary => Encoder::Binary(BinaryEncoder(array.as_binary::<i32>())),
+                DataType::LargeBinary => {
+                    Encoder::LargeBinary(BinaryEncoder(array.as_binary::<i64>()))
+                }
+                DataType::FixedSizeBinary(_len) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<FixedSizeBinaryArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected FixedSizeBinaryArray".into())
+                        })?;
+                    Encoder::Fixed(FixedEncoder(arr))
+                }
+                DataType::Timestamp(unit, _) => match unit {
+                    TimeUnit::Second => {
+                        Encoder::TimestampSecsToMillis(TimestampSecondsToMillisEncoder(
+                            array.as_primitive::<TimestampSecondType>(),
+                        ))
+                    }
+                    TimeUnit::Millisecond => Encoder::TimestampMillis(LongEncoder(
+                        array.as_primitive::<TimestampMillisecondType>(),
+                    )),
+                    TimeUnit::Microsecond => Encoder::TimestampMicros(LongEncoder(
+                        array.as_primitive::<TimestampMicrosecondType>(),
+                    )),
+                    TimeUnit::Nanosecond => Encoder::TimestampNanos(LongEncoder(
+                        array.as_primitive::<TimestampNanosecondType>(),
+                    )),
+                },
+                DataType::Interval(unit) => match unit {
+                    IntervalUnit::MonthDayNano => Encoder::IntervalMonthDayNano(DurationEncoder(
+                        array.as_primitive::<IntervalMonthDayNanoType>(),
+                    )),
+                    IntervalUnit::YearMonth => Encoder::IntervalYearMonth(DurationEncoder(
+                        array.as_primitive::<IntervalYearMonthType>(),
+                    )),
+                    IntervalUnit::DayTime => Encoder::IntervalDayTime(DurationEncoder(
+                        array.as_primitive::<IntervalDayTimeType>(),
+                    )),
+                },
+                DataType::Duration(tu) => match tu {
+                    TimeUnit::Second => Encoder::DurationSeconds(LongEncoder(
+                        array.as_primitive::<DurationSecondType>(),
+                    )),
+                    TimeUnit::Millisecond => Encoder::DurationMillis(LongEncoder(
+                        array.as_primitive::<DurationMillisecondType>(),
+                    )),
+                    TimeUnit::Microsecond => Encoder::DurationMicros(LongEncoder(
+                        array.as_primitive::<DurationMicrosecondType>(),
+                    )),
+                    TimeUnit::Nanosecond => Encoder::DurationNanos(LongEncoder(
+                        array.as_primitive::<DurationNanosecondType>(),
+                    )),
+                },
+                other => {
+                    return Err(ArrowError::NotYetImplemented(format!(
+                        "Avro scalar type not yet supported: {other:?}"
+                    )));
+                }
+            },
+            FieldPlan::Struct { bindings } => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<StructArray>()
+                    .ok_or_else(|| ArrowError::SchemaError("Expected StructArray".into()))?;
+                Encoder::Struct(Box::new(StructEncoder::try_new(arr, bindings)?))
+            }
+            FieldPlan::List {
+                items_nullability,
+                item_plan,
+            } => match array.data_type() {
+                DataType::List(_) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<ListArray>()
+                        .ok_or_else(|| ArrowError::SchemaError("Expected ListArray".into()))?;
+                    Encoder::List(Box::new(ListEncoder32::try_new(
+                        arr,
+                        *items_nullability,
+                        item_plan.as_ref(),
+                    )?))
+                }
+                DataType::LargeList(_) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<LargeListArray>()
+                        .ok_or_else(|| ArrowError::SchemaError("Expected LargeListArray".into()))?;
+                    Encoder::LargeList(Box::new(ListEncoder64::try_new(
+                        arr,
+                        *items_nullability,
+                        item_plan.as_ref(),
+                    )?))
+                }
+                DataType::ListView(_) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<ListViewArray>()
+                        .ok_or_else(|| ArrowError::SchemaError("Expected ListViewArray".into()))?;
+                    Encoder::ListView(Box::new(ListViewEncoder32::try_new(
+                        arr,
+                        *items_nullability,
+                        item_plan.as_ref(),
+                    )?))
+                }
+                DataType::LargeListView(_) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<LargeListViewArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected LargeListViewArray".into())
+                        })?;
+                    Encoder::LargeListView(Box::new(ListViewEncoder64::try_new(
+                        arr,
+                        *items_nullability,
+                        item_plan.as_ref(),
+                    )?))
+                }
+                DataType::FixedSizeList(_, _) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<FixedSizeListArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected FixedSizeListArray".into())
+                        })?;
+                    Encoder::FixedSizeList(Box::new(FixedSizeListEncoder::try_new(
+                        arr,
+                        *items_nullability,
+                        item_plan.as_ref(),
+                    )?))
+                }
+                other => {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Avro array site requires Arrow List/LargeList/ListView/LargeListView/FixedSizeList, found: {other:?}"
+                    )));
+                }
+            },
+            FieldPlan::Decimal { size } => match array.data_type() {
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal32(_, _) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<Decimal32Array>()
+                        .ok_or_else(|| ArrowError::SchemaError("Expected Decimal32Array".into()))?;
+                    Encoder::Decimal32(DecimalEncoder::<4, Decimal32Array>::new(arr, *size))
+                }
+                #[cfg(feature = "small_decimals")]
+                DataType::Decimal64(_, _) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<Decimal64Array>()
+                        .ok_or_else(|| ArrowError::SchemaError("Expected Decimal64Array".into()))?;
+                    Encoder::Decimal64(DecimalEncoder::<8, Decimal64Array>::new(arr, *size))
+                }
+                DataType::Decimal128(_, _) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<Decimal128Array>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected Decimal128Array".into())
+                        })?;
+                    Encoder::Decimal128(DecimalEncoder::<16, Decimal128Array>::new(arr, *size))
+                }
+                DataType::Decimal256(_, _) => {
+                    let arr = array
+                        .as_any()
+                        .downcast_ref::<Decimal256Array>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected Decimal256Array".into())
+                        })?;
+                    Encoder::Decimal256(DecimalEncoder::<32, Decimal256Array>::new(arr, *size))
+                }
+                other => {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Avro decimal site requires Arrow Decimal 32, 64, 128, or 256, found: {other:?}"
+                    )));
+                }
+            },
+            FieldPlan::Uuid => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<FixedSizeBinaryArray>()
+                    .ok_or_else(|| {
+                        ArrowError::SchemaError("Expected FixedSizeBinaryArray".into())
+                    })?;
+                Encoder::Uuid(UuidEncoder(arr))
+            }
+            FieldPlan::Map {
+                values_nullability,
+                value_plan,
+            } => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<MapArray>()
+                    .ok_or_else(|| ArrowError::SchemaError("Expected MapArray".into()))?;
+                Encoder::Map(Box::new(MapEncoder::try_new(
+                    arr,
+                    *values_nullability,
+                    value_plan.as_ref(),
+                )?))
+            }
+            FieldPlan::Enum { symbols } => match array.data_type() {
+                DataType::Dictionary(key_dt, value_dt) => {
+                    if **key_dt != DataType::Int32 || **value_dt != DataType::Utf8 {
+                        return Err(ArrowError::SchemaError(
+                            "Avro enum requires Dictionary<Int32, Utf8>".into(),
+                        ));
+                    }
+                    let dict = array
+                        .as_any()
+                        .downcast_ref::<DictionaryArray<Int32Type>>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Expected DictionaryArray<Int32>".into())
+                        })?;
+                    let values = dict
+                        .values()
+                        .as_any()
+                        .downcast_ref::<StringArray>()
+                        .ok_or_else(|| {
+                            ArrowError::SchemaError("Dictionary values must be Utf8".into())
+                        })?;
+                    if values.len() != symbols.len() {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Enum symbol length {} != dictionary size {}",
+                            symbols.len(),
+                            values.len()
+                        )));
+                    }
+                    for i in 0..values.len() {
+                        if values.value(i) != symbols[i].as_str() {
+                            return Err(ArrowError::SchemaError(format!(
+                                "Enum symbol mismatch at {i}: schema='{}' dict='{}'",
+                                symbols[i],
+                                values.value(i)
+                            )));
+                        }
+                    }
+                    let keys = dict.keys();
+                    Encoder::Enum(EnumEncoder { keys })
+                }
+                other => {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Avro enum site requires DataType::Dictionary, found: {other:?}"
+                    )));
+                }
+            },
+            FieldPlan::Union { bindings } => {
+                let arr = array
+                    .as_any()
+                    .downcast_ref::<UnionArray>()
+                    .ok_or_else(|| ArrowError::SchemaError("Expected UnionArray".into()))?;
+                Encoder::Union(Box::new(UnionEncoder::try_new(arr, bindings)?))
+            }
+            FieldPlan::RunEndEncoded {
+                values_nullability,
+                value_plan,
+            } => {
+                // Helper closure to build a typed RunEncodedEncoder<R>
+                let build = |run_arr_any: &'a dyn Array| -> Result<Encoder<'a>, ArrowError> {
+                    if let Some(arr) = run_arr_any.as_any().downcast_ref::<RunArray<Int16Type>>() {
+                        return Ok(Encoder::RunEncoded16(Box::new(RunEncodedEncoder::<
+                            Int16Type,
+                        >::new(
+                            arr,
+                            FieldEncoder::make_encoder(
+                                arr.values().as_ref(),
+                                value_plan.as_ref(),
+                                *values_nullability,
+                            )?,
+                        ))));
+                    }
+                    if let Some(arr) = run_arr_any.as_any().downcast_ref::<RunArray<Int32Type>>() {
+                        return Ok(Encoder::RunEncoded32(Box::new(RunEncodedEncoder::<
+                            Int32Type,
+                        >::new(
+                            arr,
+                            FieldEncoder::make_encoder(
+                                arr.values().as_ref(),
+                                value_plan.as_ref(),
+                                *values_nullability,
+                            )?,
+                        ))));
+                    }
+                    if let Some(arr) = run_arr_any.as_any().downcast_ref::<RunArray<Int64Type>>() {
+                        return Ok(Encoder::RunEncoded64(Box::new(RunEncodedEncoder::<
+                            Int64Type,
+                        >::new(
+                            arr,
+                            FieldEncoder::make_encoder(
+                                arr.values().as_ref(),
+                                value_plan.as_ref(),
+                                *values_nullability,
+                            )?,
+                        ))));
+                    }
+                    Err(ArrowError::SchemaError(
+                        "Unsupported run-ends index type for RunEndEncoded; expected Int16/Int32/Int64"
+                            .into(),
+                    ))
+                };
+                build(array)?
+            }
+        };
+        // Compute the effective null state from writer-declared nullability and data nulls.
+        let null_state = match nullability {
+            None => NullState::NonNullable,
+            Some(null_order) => {
+                match array.nulls() {
+                    Some(nulls) if array.null_count() > 0 => {
+                        NullState::Nullable { nulls, null_order }
+                    }
+                    _ => NullState::NullableNoNulls {
+                        // Nullable site with no null buffer for this view
+                        union_value_byte: union_value_branch_byte(null_order, false),
+                    },
+                }
+            }
+        };
+        Ok(Self {
+            encoder,
+            null_state,
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        match &self.null_state {
+            NullState::NonNullable => {}
+            NullState::NullableNoNulls { union_value_byte } => out
+                .write_all(&[*union_value_byte])
+                .map_err(|e| ArrowError::IoError(format!("write union value branch: {e}"), e))?,
+            NullState::Nullable { nulls, null_order } if nulls.is_null(idx) => {
+                return write_optional_index(out, true, *null_order); // no value to write
+            }
+            NullState::Nullable { null_order, .. } => {
+                write_optional_index(out, false, *null_order)?;
+            }
+        }
+        self.encoder.encode(out, idx)
+    }
+}
+
+fn union_value_branch_byte(null_order: Nullability, is_null: bool) -> u8 {
+    let nulls_first = null_order == Nullability::default();
+    if nulls_first == is_null { 0x00 } else { 0x02 }
+}
+
+/// Per‑site encoder plan for a field. This mirrors the Avro structure, so nested
+/// optional branch order can be honored exactly as declared by the schema.
+#[derive(Debug, Clone)]
+enum FieldPlan {
+    /// Non-nested scalar/logical type
+    Scalar,
+    /// Record/Struct with Avro‑ordered children
+    Struct { bindings: Vec<FieldBinding> },
+    /// Array with item‑site nullability and nested plan
+    List {
+        items_nullability: Option<Nullability>,
+        item_plan: Box<FieldPlan>,
+    },
+    /// Avro decimal logical type (bytes or fixed). `size=None` => bytes(decimal), `Some(n)` => fixed(n)
+    Decimal { size: Option<usize> },
+    /// Avro UUID logical type (fixed)
+    Uuid,
+    /// Avro map with value‑site nullability and nested plan
+    Map {
+        values_nullability: Option<Nullability>,
+        value_plan: Box<FieldPlan>,
+    },
+    /// Avro enum; maps to Arrow Dictionary<Int32, Utf8> with dictionary values
+    /// exactly equal and ordered as the Avro enum `symbols`.
+    Enum { symbols: Arc<[String]> },
+    /// Avro union, maps to Arrow Union.
+    Union { bindings: Vec<FieldBinding> },
+    /// Avro RunEndEncoded site. Values are encoded per logical row by mapping the
+    /// row index to its containing run and emitting that run's value with `value_plan`.
+    RunEndEncoded {
+        values_nullability: Option<Nullability>,
+        value_plan: Box<FieldPlan>,
+    },
+}
+
+#[derive(Debug, Clone)]
+struct FieldBinding {
+    /// Index of the Arrow field/column associated with this Avro field site
+    arrow_index: usize,
+    /// Nullability/order for this site (None for required fields)
+    nullability: Option<Nullability>,
+    /// Nested plan for this site
+    plan: FieldPlan,
+}
+
+/// Builder for `RecordEncoder` write plan
+#[derive(Debug)]
+pub(crate) struct RecordEncoderBuilder<'a> {
+    avro_root: &'a AvroField,
+    arrow_schema: &'a ArrowSchema,
+    fingerprint: Option<Fingerprint>,
+}
+
+impl<'a> RecordEncoderBuilder<'a> {
+    /// Create a new builder from the Avro root and Arrow schema.
+    pub(crate) fn new(avro_root: &'a AvroField, arrow_schema: &'a ArrowSchema) -> Self {
+        Self {
+            avro_root,
+            arrow_schema,
+            fingerprint: None,
+        }
+    }
+
+    pub(crate) fn with_fingerprint(mut self, fingerprint: Option<Fingerprint>) -> Self {
+        self.fingerprint = fingerprint;
+        self
+    }
+
+    /// Build the `RecordEncoder` by walking the Avro **record** root in Avro order,
+    /// resolving each field to an Arrow index by name.
+    pub(crate) fn build(self) -> Result<RecordEncoder, ArrowError> {
+        let avro_root_dt = self.avro_root.data_type();
+        let Codec::Struct(root_fields) = avro_root_dt.codec() else {
+            return Err(ArrowError::SchemaError(
+                "Top-level Avro schema must be a record/struct".into(),
+            ));
+        };
+        let mut columns = Vec::with_capacity(root_fields.len());
+        for root_field in root_fields.as_ref() {
+            let name = root_field.name();
+            let arrow_index = self.arrow_schema.index_of(name).map_err(|e| {
+                ArrowError::SchemaError(format!("Schema mismatch for field '{name}': {e}"))
+            })?;
+            columns.push(FieldBinding {
+                arrow_index,
+                nullability: root_field.data_type().nullability(),
+                plan: FieldPlan::build(
+                    root_field.data_type(),
+                    self.arrow_schema.field(arrow_index),
+                )?,
+            });
+        }
+        Ok(RecordEncoder {
+            columns,
+            prefix: self.fingerprint.map(|fp| fp.make_prefix()),
+        })
+    }
+}
+
+/// A pre-computed plan for encoding a `RecordBatch` to Avro.
+///
+/// Derived from an Avro schema and an Arrow schema. It maps
+/// top-level Avro fields to Arrow columns and contains a nested encoding plan
+/// for each column.
+#[derive(Debug, Clone)]
+pub(crate) struct RecordEncoder {
+    columns: Vec<FieldBinding>,
+    /// Optional pre-built, variable-length prefix written before each record.
+    prefix: Option<Prefix>,
+}
+
+impl RecordEncoder {
+    fn prepare_for_batch<'a>(
+        &'a self,
+        batch: &'a RecordBatch,
+    ) -> Result<Vec<FieldEncoder<'a>>, ArrowError> {
+        let arrays = batch.columns();
+        let mut out = Vec::with_capacity(self.columns.len());
+        for col_plan in self.columns.iter() {
+            let arrow_index = col_plan.arrow_index;
+            let array = arrays.get(arrow_index).ok_or_else(|| {
+                ArrowError::SchemaError(format!("Column index {arrow_index} out of range"))
+            })?;
+            #[cfg(not(feature = "avro_custom_types"))]
+            let site_nullability = match &col_plan.plan {
+                FieldPlan::RunEndEncoded { .. } => None,
+                _ => col_plan.nullability,
+            };
+            #[cfg(feature = "avro_custom_types")]
+            let site_nullability = col_plan.nullability;
+            out.push(FieldEncoder::make_encoder(
+                array.as_ref(),
+                &col_plan.plan,
+                site_nullability,
+            )?);
+        }
+        Ok(out)
+    }
+
+    /// Encode a `RecordBatch` using this encoder plan.
+    ///
+    /// Tip: Wrap `out` in a `std::io::BufWriter` to reduce the overhead of many small writes.
+    pub(crate) fn encode<W: Write>(
+        &self,
+        out: &mut W,
+        batch: &RecordBatch,
+    ) -> Result<(), ArrowError> {
+        let mut column_encoders = self.prepare_for_batch(batch)?;
+        let n = batch.num_rows();
+        match self.prefix {
+            Some(prefix) => {
+                for row in 0..n {
+                    out.write_all(prefix.as_slice())
+                        .map_err(|e| ArrowError::IoError(format!("write prefix: {e}"), e))?;
+                    for enc in column_encoders.iter_mut() {
+                        enc.encode(out, row)?;
+                    }
+                }
+            }
+            None => {
+                for row in 0..n {
+                    for enc in column_encoders.iter_mut() {
+                        enc.encode(out, row)?;
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+fn find_struct_child_index(fields: &arrow_schema::Fields, name: &str) -> Option<usize> {
+    fields.iter().position(|f| f.name() == name)
+}
+
+fn find_map_value_field_index(fields: &arrow_schema::Fields) -> Option<usize> {
+    // Prefer common Arrow field names; fall back to second child if exactly two
+    find_struct_child_index(fields, "value")
+        .or_else(|| find_struct_child_index(fields, "values"))
+        .or_else(|| if fields.len() == 2 { Some(1) } else { None })
+}
+
+impl FieldPlan {
+    fn build(avro_dt: &AvroDataType, arrow_field: &Field) -> Result<Self, ArrowError> {
+        #[cfg(not(feature = "avro_custom_types"))]
+        if let DataType::RunEndEncoded(_re_field, values_field) = arrow_field.data_type() {
+            let values_nullability = avro_dt.nullability();
+            let value_site_dt: &AvroDataType = match avro_dt.codec() {
+                Codec::Union(branches, _, _) => branches
+                    .iter()
+                    .find(|b| !matches!(b.codec(), Codec::Null))
+                    .ok_or_else(|| {
+                        ArrowError::SchemaError(
+                            "Avro union at RunEndEncoded site has no non-null branch".into(),
+                        )
+                    })?,
+                _ => avro_dt,
+            };
+            return Ok(FieldPlan::RunEndEncoded {
+                values_nullability,
+                value_plan: Box::new(FieldPlan::build(value_site_dt, values_field.as_ref())?),
+            });
+        }
+        if let DataType::FixedSizeBinary(len) = arrow_field.data_type() {
+            // Extension-based detection (only when the feature is enabled)
+            let ext_is_uuid = {
+                #[cfg(feature = "canonical_extension_types")]
+                {
+                    matches!(
+                        arrow_field.extension_type_name(),
+                        Some("arrow.uuid") | Some("uuid")
+                    )
+                }
+                #[cfg(not(feature = "canonical_extension_types"))]
+                {
+                    false
+                }
+            };
+            let md_is_uuid = arrow_field
+                .metadata()
+                .get("logicalType")
+                .map(|s| s.as_str())
+                == Some("uuid");
+            if ext_is_uuid || md_is_uuid {
+                if *len != 16 {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "logicalType=uuid requires FixedSizeBinary(16)".into(),
+                    ));
+                }
+                return Ok(FieldPlan::Uuid);
+            }
+        }
+        match avro_dt.codec() {
+            Codec::Struct(avro_fields) => {
+                let fields = match arrow_field.data_type() {
+                    DataType::Struct(struct_fields) => struct_fields,
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Avro struct maps to Arrow Struct, found: {other:?}"
+                        )));
+                    }
+                };
+                let mut bindings = Vec::with_capacity(avro_fields.len());
+                for avro_field in avro_fields.iter() {
+                    let name = avro_field.name().to_string();
+                    let idx = find_struct_child_index(fields, &name).ok_or_else(|| {
+                        ArrowError::SchemaError(format!(
+                            "Struct field '{name}' not present in Arrow field '{}'",
+                            arrow_field.name()
+                        ))
+                    })?;
+                    bindings.push(FieldBinding {
+                        arrow_index: idx,
+                        nullability: avro_field.data_type().nullability(),
+                        plan: FieldPlan::build(avro_field.data_type(), fields[idx].as_ref())?,
+                    });
+                }
+                Ok(FieldPlan::Struct { bindings })
+            }
+            Codec::List(items_dt) => match arrow_field.data_type() {
+                DataType::List(field_ref)
+                | DataType::LargeList(field_ref)
+                | DataType::ListView(field_ref)
+                | DataType::LargeListView(field_ref) => Ok(FieldPlan::List {
+                    items_nullability: items_dt.nullability(),
+                    item_plan: Box::new(FieldPlan::build(items_dt.as_ref(), field_ref.as_ref())?),
+                }),
+                DataType::FixedSizeList(field_ref, _len) => Ok(FieldPlan::List {
+                    items_nullability: items_dt.nullability(),
+                    item_plan: Box::new(FieldPlan::build(items_dt.as_ref(), field_ref.as_ref())?),
+                }),
+                other => Err(ArrowError::SchemaError(format!(
+                    "Avro array maps to Arrow List/LargeList/ListView/LargeListView/FixedSizeList, found: {other:?}"
+                ))),
+            },
+            Codec::Map(values_dt) => {
+                let entries_field = match arrow_field.data_type() {
+                    DataType::Map(entries, _sorted) => entries.as_ref(),
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Avro map maps to Arrow DataType::Map, found: {other:?}"
+                        )));
+                    }
+                };
+                let entries_struct_fields = match entries_field.data_type() {
+                    DataType::Struct(fs) => fs,
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Arrow Map entries must be Struct, found: {other:?}"
+                        )));
+                    }
+                };
+                let value_idx =
+                    find_map_value_field_index(entries_struct_fields).ok_or_else(|| {
+                        ArrowError::SchemaError("Map entries struct missing value field".into())
+                    })?;
+                let value_field = entries_struct_fields[value_idx].as_ref();
+                let value_plan = FieldPlan::build(values_dt.as_ref(), value_field)?;
+                Ok(FieldPlan::Map {
+                    values_nullability: values_dt.nullability(),
+                    value_plan: Box::new(value_plan),
+                })
+            }
+            Codec::Enum(symbols) => match arrow_field.data_type() {
+                DataType::Dictionary(key_dt, value_dt) => {
+                    if **key_dt != DataType::Int32 {
+                        return Err(ArrowError::SchemaError(
+                            "Avro enum requires Dictionary<Int32, Utf8>".into(),
+                        ));
+                    }
+                    if **value_dt != DataType::Utf8 {
+                        return Err(ArrowError::SchemaError(
+                            "Avro enum requires Dictionary<Int32, Utf8>".into(),
+                        ));
+                    }
+                    Ok(FieldPlan::Enum {
+                        symbols: symbols.clone(),
+                    })
+                }
+                other => Err(ArrowError::SchemaError(format!(
+                    "Avro enum maps to Arrow Dictionary<Int32, Utf8>, found: {other:?}"
+                ))),
+            },
+            // decimal site (bytes or fixed(N)) with precision/scale validation
+            Codec::Decimal(precision, scale_opt, fixed_size_opt) => {
+                let (ap, as_) = match arrow_field.data_type() {
+                    #[cfg(feature = "small_decimals")]
+                    DataType::Decimal32(p, s) => (*p as usize, *s as i32),
+                    #[cfg(feature = "small_decimals")]
+                    DataType::Decimal64(p, s) => (*p as usize, *s as i32),
+                    DataType::Decimal128(p, s) => (*p as usize, *s as i32),
+                    DataType::Decimal256(p, s) => (*p as usize, *s as i32),
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Avro decimal requires Arrow decimal, got {other:?} for field '{}'",
+                            arrow_field.name()
+                        )));
+                    }
+                };
+                let sc = scale_opt.unwrap_or(0) as i32; // Avro scale defaults to 0 if absent
+                if ap != *precision || as_ != sc {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Decimal precision/scale mismatch for field '{}': Avro({precision},{sc}) vs Arrow({ap},{as_})",
+                        arrow_field.name()
+                    )));
+                }
+                Ok(FieldPlan::Decimal {
+                    size: *fixed_size_opt,
+                })
+            }
+            Codec::Interval => match arrow_field.data_type() {
+                DataType::Interval(
+                    IntervalUnit::MonthDayNano | IntervalUnit::YearMonth | IntervalUnit::DayTime,
+                ) => Ok(FieldPlan::Scalar),
+                other => Err(ArrowError::SchemaError(format!(
+                    "Avro duration logical type requires Arrow Interval(MonthDayNano), found: {other:?}"
+                ))),
+            },
+            Codec::Union(avro_branches, _, UnionMode::Dense) => {
+                let arrow_union_fields = match arrow_field.data_type() {
+                    DataType::Union(fields, UnionMode::Dense) => fields,
+                    DataType::Union(_, UnionMode::Sparse) => {
+                        return Err(ArrowError::NotYetImplemented(
+                            "Sparse Arrow unions are not yet supported".to_string(),
+                        ));
+                    }
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Avro union maps to Arrow Union, found: {other:?}"
+                        )));
+                    }
+                };
+                if avro_branches.len() != arrow_union_fields.len() {
+                    return Err(ArrowError::SchemaError(format!(
+                        "Mismatched number of branches between Avro union ({}) and Arrow union ({}) for field '{}'",
+                        avro_branches.len(),
+                        arrow_union_fields.len(),
+                        arrow_field.name()
+                    )));
+                }
+                let bindings = avro_branches
+                    .iter()
+                    .zip(arrow_union_fields.iter())
+                    .enumerate()
+                    .map(|(i, (avro_branch, (_, arrow_child_field)))| {
+                        Ok(FieldBinding {
+                            arrow_index: i,
+                            nullability: avro_branch.nullability(),
+                            plan: FieldPlan::build(avro_branch, arrow_child_field)?,
+                        })
+                    })
+                    .collect::<Result<Vec<_>, ArrowError>>()?;
+                Ok(FieldPlan::Union { bindings })
+            }
+            Codec::Union(_, _, UnionMode::Sparse) => Err(ArrowError::NotYetImplemented(
+                "Sparse Arrow unions are not yet supported".to_string(),
+            )),
+            #[cfg(feature = "avro_custom_types")]
+            Codec::RunEndEncoded(values_dt, _width_code) => {
+                let values_field = match arrow_field.data_type() {
+                    DataType::RunEndEncoded(_run_ends_field, values_field) => values_field.as_ref(),
+                    other => {
+                        return Err(ArrowError::SchemaError(format!(
+                            "Avro RunEndEncoded maps to Arrow DataType::RunEndEncoded, found: {other:?}"
+                        )));
+                    }
+                };
+                Ok(FieldPlan::RunEndEncoded {
+                    values_nullability: values_dt.nullability(),
+                    value_plan: Box::new(FieldPlan::build(values_dt.as_ref(), values_field)?),
+                })
+            }
+            _ => Ok(FieldPlan::Scalar),
+        }
+    }
+}
+
+enum Encoder<'a> {
+    Boolean(BooleanEncoder<'a>),
+    Int(IntEncoder<'a, Int32Type>),
+    Long(LongEncoder<'a, Int64Type>),
+    TimestampMicros(LongEncoder<'a, TimestampMicrosecondType>),
+    TimestampMillis(LongEncoder<'a, TimestampMillisecondType>),
+    TimestampNanos(LongEncoder<'a, TimestampNanosecondType>),
+    TimestampSecsToMillis(TimestampSecondsToMillisEncoder<'a>),
+    Date32(IntEncoder<'a, Date32Type>),
+    Time32SecsToMillis(Time32SecondsToMillisEncoder<'a>),
+    Time32Millis(IntEncoder<'a, Time32MillisecondType>),
+    Time64Micros(LongEncoder<'a, Time64MicrosecondType>),
+    DurationSeconds(LongEncoder<'a, DurationSecondType>),
+    DurationMillis(LongEncoder<'a, DurationMillisecondType>),
+    DurationMicros(LongEncoder<'a, DurationMicrosecondType>),
+    DurationNanos(LongEncoder<'a, DurationNanosecondType>),
+    Float32(F32Encoder<'a>),
+    Float64(F64Encoder<'a>),
+    Binary(BinaryEncoder<'a, i32>),
+    LargeBinary(BinaryEncoder<'a, i64>),
+    Utf8(Utf8Encoder<'a>),
+    Utf8Large(Utf8LargeEncoder<'a>),
+    Utf8View(Utf8ViewEncoder<'a>),
+    BinaryView(BinaryViewEncoder<'a>),
+    List(Box<ListEncoder32<'a>>),
+    LargeList(Box<ListEncoder64<'a>>),
+    ListView(Box<ListViewEncoder32<'a>>),
+    LargeListView(Box<ListViewEncoder64<'a>>),
+    FixedSizeList(Box<FixedSizeListEncoder<'a>>),
+    Struct(Box<StructEncoder<'a>>),
+    /// Avro `fixed` encoder (raw bytes, no length)
+    Fixed(FixedEncoder<'a>),
+    /// Avro `uuid` logical type encoder (string with RFC‑4122 hyphenated text)
+    Uuid(UuidEncoder<'a>),
+    /// Avro `duration` logical type (Arrow Interval(MonthDayNano)) encoder
+    IntervalMonthDayNano(DurationEncoder<'a, IntervalMonthDayNanoType>),
+    /// Avro `duration` logical type (Arrow Interval(YearMonth)) encoder
+    IntervalYearMonth(DurationEncoder<'a, IntervalYearMonthType>),
+    /// Avro `duration` logical type (Arrow Interval(DayTime)) encoder
+    IntervalDayTime(DurationEncoder<'a, IntervalDayTimeType>),
+    #[cfg(feature = "small_decimals")]
+    Decimal32(Decimal32Encoder<'a>),
+    #[cfg(feature = "small_decimals")]
+    Decimal64(Decimal64Encoder<'a>),
+    Decimal128(Decimal128Encoder<'a>),
+    Decimal256(Decimal256Encoder<'a>),
+    /// Avro `enum` encoder: writes the key (int) as the enum index.
+    Enum(EnumEncoder<'a>),
+    Map(Box<MapEncoder<'a>>),
+    Union(Box<UnionEncoder<'a>>),
+    /// Run-end encoded values with specific run-end index widths
+    RunEncoded16(Box<RunEncodedEncoder16<'a>>),
+    RunEncoded32(Box<RunEncodedEncoder32<'a>>),
+    RunEncoded64(Box<RunEncodedEncoder64<'a>>),
+    Null,
+}
+
+impl<'a> Encoder<'a> {
+    /// Encode the value at `idx`.
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        match self {
+            Encoder::Boolean(e) => e.encode(out, idx),
+            Encoder::Int(e) => e.encode(out, idx),
+            Encoder::Long(e) => e.encode(out, idx),
+            Encoder::TimestampMicros(e) => e.encode(out, idx),
+            Encoder::TimestampMillis(e) => e.encode(out, idx),
+            Encoder::TimestampNanos(e) => e.encode(out, idx),
+            Encoder::TimestampSecsToMillis(e) => e.encode(out, idx),
+            Encoder::Date32(e) => e.encode(out, idx),
+            Encoder::Time32SecsToMillis(e) => e.encode(out, idx),
+            Encoder::Time32Millis(e) => e.encode(out, idx),
+            Encoder::Time64Micros(e) => e.encode(out, idx),
+            Encoder::DurationSeconds(e) => e.encode(out, idx),
+            Encoder::DurationMicros(e) => e.encode(out, idx),
+            Encoder::DurationMillis(e) => e.encode(out, idx),
+            Encoder::DurationNanos(e) => e.encode(out, idx),
+            Encoder::Float32(e) => e.encode(out, idx),
+            Encoder::Float64(e) => e.encode(out, idx),
+            Encoder::Binary(e) => e.encode(out, idx),
+            Encoder::LargeBinary(e) => e.encode(out, idx),
+            Encoder::Utf8(e) => e.encode(out, idx),
+            Encoder::Utf8Large(e) => e.encode(out, idx),
+            Encoder::Utf8View(e) => e.encode(out, idx),
+            Encoder::BinaryView(e) => e.encode(out, idx),
+            Encoder::List(e) => e.encode(out, idx),
+            Encoder::LargeList(e) => e.encode(out, idx),
+            Encoder::ListView(e) => e.encode(out, idx),
+            Encoder::LargeListView(e) => e.encode(out, idx),
+            Encoder::FixedSizeList(e) => e.encode(out, idx),
+            Encoder::Struct(e) => e.encode(out, idx),
+            Encoder::Fixed(e) => (e).encode(out, idx),
+            Encoder::Uuid(e) => (e).encode(out, idx),
+            Encoder::IntervalMonthDayNano(e) => (e).encode(out, idx),
+            Encoder::IntervalYearMonth(e) => (e).encode(out, idx),
+            Encoder::IntervalDayTime(e) => (e).encode(out, idx),
+            #[cfg(feature = "small_decimals")]
+            Encoder::Decimal32(e) => (e).encode(out, idx),
+            #[cfg(feature = "small_decimals")]
+            Encoder::Decimal64(e) => (e).encode(out, idx),
+            Encoder::Decimal128(e) => (e).encode(out, idx),
+            Encoder::Decimal256(e) => (e).encode(out, idx),
+            Encoder::Map(e) => (e).encode(out, idx),
+            Encoder::Enum(e) => (e).encode(out, idx),
+            Encoder::Union(e) => (e).encode(out, idx),
+            Encoder::RunEncoded16(e) => (e).encode(out, idx),
+            Encoder::RunEncoded32(e) => (e).encode(out, idx),
+            Encoder::RunEncoded64(e) => (e).encode(out, idx),
+            Encoder::Null => Ok(()),
+        }
+    }
+}
+
+struct BooleanEncoder<'a>(&'a arrow_array::BooleanArray);
+impl BooleanEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_bool(out, self.0.value(idx))
+    }
+}
+
+/// Generic Avro `int` encoder for primitive arrays with `i32` native values.
+struct IntEncoder<'a, P: ArrowPrimitiveType<Native = i32>>(&'a PrimitiveArray<P>);
+impl<'a, P: ArrowPrimitiveType<Native = i32>> IntEncoder<'a, P> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_int(out, self.0.value(idx))
+    }
+}
+
+/// Generic Avro `long` encoder for primitive arrays with `i64` native values.
+struct LongEncoder<'a, P: ArrowPrimitiveType<Native = i64>>(&'a PrimitiveArray<P>);
+impl<'a, P: ArrowPrimitiveType<Native = i64>> LongEncoder<'a, P> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_long(out, self.0.value(idx))
+    }
+}
+
+/// Time32(Second) to Avro time-millis (int), via safe scaling by 1000
+struct Time32SecondsToMillisEncoder<'a>(&'a PrimitiveArray<Time32SecondType>);
+impl<'a> Time32SecondsToMillisEncoder<'a> {
+    #[inline]
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let secs = self.0.value(idx);
+        let millis = secs.checked_mul(1000).ok_or_else(|| {
+            ArrowError::InvalidArgumentError("time32(secs) * 1000 overflowed".into())
+        })?;
+        write_int(out, millis)
+    }
+}
+
+/// Timestamp(Second) to Avro timestamp-millis (long), via safe scaling by 1000
+struct TimestampSecondsToMillisEncoder<'a>(&'a PrimitiveArray<TimestampSecondType>);
+impl<'a> TimestampSecondsToMillisEncoder<'a> {
+    #[inline]
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let secs = self.0.value(idx);
+        let millis = secs.checked_mul(1000).ok_or_else(|| {
+            ArrowError::InvalidArgumentError("timestamp(secs) * 1000 overflowed".into())
+        })?;
+        write_long(out, millis)
+    }
+}
+
+/// Unified binary encoder generic over offset size (i32/i64).
+struct BinaryEncoder<'a, O: OffsetSizeTrait>(&'a GenericBinaryArray<O>);
+impl<'a, O: OffsetSizeTrait> BinaryEncoder<'a, O> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_len_prefixed(out, self.0.value(idx))
+    }
+}
+
+/// BinaryView (byte view) encoder.
+struct BinaryViewEncoder<'a>(&'a BinaryViewArray);
+impl BinaryViewEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_len_prefixed(out, self.0.value(idx))
+    }
+}
+
+/// StringView encoder.
+struct Utf8ViewEncoder<'a>(&'a StringViewArray);
+impl Utf8ViewEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_len_prefixed(out, self.0.value(idx).as_bytes())
+    }
+}
+
+struct F32Encoder<'a>(&'a arrow_array::Float32Array);
+impl F32Encoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        // Avro float: 4 bytes, IEEE-754 little-endian
+        let bits = self.0.value(idx).to_bits();
+        out.write_all(&bits.to_le_bytes())
+            .map_err(|e| ArrowError::IoError(format!("write f32: {e}"), e))
+    }
+}
+
+struct F64Encoder<'a>(&'a arrow_array::Float64Array);
+impl F64Encoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        // Avro double: 8 bytes, IEEE-754 little-endian
+        let bits = self.0.value(idx).to_bits();
+        out.write_all(&bits.to_le_bytes())
+            .map_err(|e| ArrowError::IoError(format!("write f64: {e}"), e))
+    }
+}
+
+struct Utf8GenericEncoder<'a, O: OffsetSizeTrait>(&'a GenericStringArray<O>);
+
+impl<'a, O: OffsetSizeTrait> Utf8GenericEncoder<'a, O> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        write_len_prefixed(out, self.0.value(idx).as_bytes())
+    }
+}
+
+type Utf8Encoder<'a> = Utf8GenericEncoder<'a, i32>;
+type Utf8LargeEncoder<'a> = Utf8GenericEncoder<'a, i64>;
+
+/// Internal key array kind used by Map encoder.
+enum KeyKind<'a> {
+    Utf8(&'a GenericStringArray<i32>),
+    LargeUtf8(&'a GenericStringArray<i64>),
+}
+struct MapEncoder<'a> {
+    map: &'a MapArray,
+    keys: KeyKind<'a>,
+    values: FieldEncoder<'a>,
+    keys_offset: usize,
+    values_offset: usize,
+}
+
+impl<'a> MapEncoder<'a> {
+    fn try_new(
+        map: &'a MapArray,
+        values_nullability: Option<Nullability>,
+        value_plan: &FieldPlan,
+    ) -> Result<Self, ArrowError> {
+        let keys_arr = map.keys();
+        let keys_kind = match keys_arr.data_type() {
+            DataType::Utf8 => KeyKind::Utf8(keys_arr.as_string::<i32>()),
+            DataType::LargeUtf8 => KeyKind::LargeUtf8(keys_arr.as_string::<i64>()),
+            other => {
+                return Err(ArrowError::SchemaError(format!(
+                    "Avro map requires string keys; Arrow key type must be Utf8/LargeUtf8, found: {other:?}"
+                )));
+            }
+        };
+        Ok(Self {
+            map,
+            keys: keys_kind,
+            values: FieldEncoder::make_encoder(
+                map.values().as_ref(),
+                value_plan,
+                values_nullability,
+            )?,
+            keys_offset: keys_arr.offset(),
+            values_offset: map.values().offset(),
+        })
+    }
+
+    fn encode_map_entries<W, O>(
+        out: &mut W,
+        keys: &GenericStringArray<O>,
+        keys_offset: usize,
+        start: usize,
+        end: usize,
+        mut write_item: impl FnMut(&mut W, usize) -> Result<(), ArrowError>,
+    ) -> Result<(), ArrowError>
+    where
+        W: Write + ?Sized,
+        O: OffsetSizeTrait,
+    {
+        encode_blocked_range(out, start, end, |out, j| {
+            let j_key = j.saturating_sub(keys_offset);
+            write_len_prefixed(out, keys.value(j_key).as_bytes())?;
+            write_item(out, j)
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let offsets = self.map.offsets();
+        let start = offsets[idx] as usize;
+        let end = offsets[idx + 1] as usize;
+        let write_item = |out: &mut W, j: usize| {
+            let j_val = j.saturating_sub(self.values_offset);
+            self.values.encode(out, j_val)
+        };
+        match self.keys {
+            KeyKind::Utf8(arr) => MapEncoder::<'a>::encode_map_entries(
+                out,
+                arr,
+                self.keys_offset,
+                start,
+                end,
+                write_item,
+            ),
+            KeyKind::LargeUtf8(arr) => MapEncoder::<'a>::encode_map_entries(
+                out,
+                arr,
+                self.keys_offset,
+                start,
+                end,
+                write_item,
+            ),
+        }
+    }
+}
+
+/// Avro `enum` encoder for Arrow `DictionaryArray<Int32, Utf8>`.
+///
+/// Per Avro spec, an enum is encoded as an **int** equal to the
+/// zero-based position of the symbol in the schema’s `symbols` list.
+/// We validate at construction that the dictionary values equal the symbols,
+/// so we can directly write the key value here.
+struct EnumEncoder<'a> {
+    keys: &'a PrimitiveArray<Int32Type>,
+}
+impl EnumEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, row: usize) -> Result<(), ArrowError> {
+        write_int(out, self.keys.value(row))
+    }
+}
+
+struct UnionEncoder<'a> {
+    encoders: Vec<FieldEncoder<'a>>,
+    array: &'a UnionArray,
+    type_id_to_encoder_index: Vec<Option<usize>>,
+}
+
+impl<'a> UnionEncoder<'a> {
+    fn try_new(array: &'a UnionArray, field_bindings: &[FieldBinding]) -> Result<Self, ArrowError> {
+        let DataType::Union(fields, UnionMode::Dense) = array.data_type() else {
+            return Err(ArrowError::SchemaError("Expected Dense UnionArray".into()));
+        };
+        if fields.len() != field_bindings.len() {
+            return Err(ArrowError::SchemaError(format!(
+                "Mismatched number of union branches between Arrow array ({}) and encoding plan ({})",
+                fields.len(),
+                field_bindings.len()
+            )));
+        }
+        let max_type_id = fields.iter().map(|(tid, _)| tid).max().unwrap_or(0);
+        let mut type_id_to_encoder_index: Vec<Option<usize>> =
+            vec![None; (max_type_id + 1) as usize];
+        let mut encoders = Vec::with_capacity(fields.len());
+        for (i, (type_id, _)) in fields.iter().enumerate() {
+            let binding = field_bindings
+                .get(i)
+                .ok_or_else(|| ArrowError::SchemaError("Binding and field mismatch".to_string()))?;
+            encoders.push(FieldEncoder::make_encoder(
+                array.child(type_id).as_ref(),
+                &binding.plan,
+                binding.nullability,
+            )?);
+            type_id_to_encoder_index[type_id as usize] = Some(i);
+        }
+        Ok(Self {
+            encoders,
+            array,
+            type_id_to_encoder_index,
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        // SAFETY: `idx` is always in bounds because:
+        // 1. The encoder is called from `RecordEncoder::encode,` which iterates over `0..batch.num_rows()`
+        // 2. `self.array` is a column from the same batch, so its length equals `batch.num_rows()`
+        // 3. `type_ids()` returns a buffer with exactly `self.array.len()` entries (one per logical element)
+        let type_id = self.array.type_ids()[idx];
+        let encoder_index = self
+            .type_id_to_encoder_index
+            .get(type_id as usize)
+            .and_then(|opt| *opt)
+            .ok_or_else(|| ArrowError::SchemaError(format!("Invalid type_id {type_id}")))?;
+        write_int(out, encoder_index as i32)?;
+        let encoder = self.encoders.get_mut(encoder_index).ok_or_else(|| {
+            ArrowError::SchemaError(format!("Invalid encoder index {encoder_index}"))
+        })?;
+        encoder.encode(out, self.array.value_offset(idx))
+    }
+}
+
+struct StructEncoder<'a> {
+    encoders: Vec<FieldEncoder<'a>>,
+}
+
+impl<'a> StructEncoder<'a> {
+    fn try_new(
+        array: &'a StructArray,
+        field_bindings: &[FieldBinding],
+    ) -> Result<Self, ArrowError> {
+        let mut encoders = Vec::with_capacity(field_bindings.len());
+        for field_binding in field_bindings {
+            let idx = field_binding.arrow_index;
+            let column = array.columns().get(idx).ok_or_else(|| {
+                ArrowError::SchemaError(format!("Struct child index {idx} out of range"))
+            })?;
+            let encoder = FieldEncoder::make_encoder(
+                column.as_ref(),
+                &field_binding.plan,
+                field_binding.nullability,
+            )?;
+            encoders.push(encoder);
+        }
+        Ok(Self { encoders })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        for encoder in self.encoders.iter_mut() {
+            encoder.encode(out, idx)?;
+        }
+        Ok(())
+    }
+}
+
+/// Encode a blocked range of items with Avro array block framing.
+///
+/// `write_item` must take `(out, index)` to maintain the "out-first" convention.
+fn encode_blocked_range<W: Write + ?Sized, F>(
+    out: &mut W,
+    start: usize,
+    end: usize,
+    mut write_item: F,
+) -> Result<(), ArrowError>
+where
+    F: FnMut(&mut W, usize) -> Result<(), ArrowError>,
+{
+    let len = end.saturating_sub(start);
+    if len == 0 {
+        // Zero-length terminator per Avro spec.
+        write_long(out, 0)?;
+        return Ok(());
+    }
+    // Emit a single positive block for performance, then the end marker.
+    write_long(out, len as i64)?;
+    for row in start..end {
+        write_item(out, row)?;
+    }
+    write_long(out, 0)?;
+    Ok(())
+}
+
+struct ListEncoder<'a, O: OffsetSizeTrait> {
+    list: &'a GenericListArray<O>,
+    values: FieldEncoder<'a>,
+    values_offset: usize,
+}
+
+type ListEncoder32<'a> = ListEncoder<'a, i32>;
+type ListEncoder64<'a> = ListEncoder<'a, i64>;
+
+impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> {
+    fn try_new(
+        list: &'a GenericListArray<O>,
+        items_nullability: Option<Nullability>,
+        item_plan: &FieldPlan,
+    ) -> Result<Self, ArrowError> {
+        Ok(Self {
+            list,
+            values: FieldEncoder::make_encoder(
+                list.values().as_ref(),
+                item_plan,
+                items_nullability,
+            )?,
+            values_offset: list.values().offset(),
+        })
+    }
+
+    fn encode_list_range<W: Write + ?Sized>(
+        &mut self,
+        out: &mut W,
+        start: usize,
+        end: usize,
+    ) -> Result<(), ArrowError> {
+        encode_blocked_range(out, start, end, |out, row| {
+            self.values
+                .encode(out, row.saturating_sub(self.values_offset))
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let offsets = self.list.offsets();
+        let start = offsets[idx].to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!("Error converting offset[{idx}] to usize"))
+        })?;
+        let end = offsets[idx + 1].to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "Error converting offset[{}] to usize",
+                idx + 1
+            ))
+        })?;
+        self.encode_list_range(out, start, end)
+    }
+}
+
+/// ListView encoder using `(offset, size)` buffers.
+struct ListViewEncoder<'a, O: OffsetSizeTrait> {
+    list: &'a GenericListViewArray<O>,
+    values: FieldEncoder<'a>,
+    values_offset: usize,
+}
+type ListViewEncoder32<'a> = ListViewEncoder<'a, i32>;
+type ListViewEncoder64<'a> = ListViewEncoder<'a, i64>;
+
+impl<'a, O: OffsetSizeTrait> ListViewEncoder<'a, O> {
+    fn try_new(
+        list: &'a GenericListViewArray<O>,
+        items_nullability: Option<Nullability>,
+        item_plan: &FieldPlan,
+    ) -> Result<Self, ArrowError> {
+        Ok(Self {
+            list,
+            values: FieldEncoder::make_encoder(
+                list.values().as_ref(),
+                item_plan,
+                items_nullability,
+            )?,
+            values_offset: list.values().offset(),
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let start = self.list.value_offset(idx).to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "Error converting value_offset[{idx}] to usize"
+            ))
+        })?;
+        let len = self.list.value_size(idx).to_usize().ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!("Error converting value_size[{idx}] to usize"))
+        })?;
+        let start = start + self.values_offset;
+        let end = start + len;
+        encode_blocked_range(out, start, end, |out, row| {
+            self.values
+                .encode(out, row.saturating_sub(self.values_offset))
+        })
+    }
+}
+
+/// FixedSizeList encoder.
+struct FixedSizeListEncoder<'a> {
+    list: &'a FixedSizeListArray,
+    values: FieldEncoder<'a>,
+    values_offset: usize,
+    elem_len: usize,
+}
+
+impl<'a> FixedSizeListEncoder<'a> {
+    fn try_new(
+        list: &'a FixedSizeListArray,
+        items_nullability: Option<Nullability>,
+        item_plan: &FieldPlan,
+    ) -> Result<Self, ArrowError> {
+        Ok(Self {
+            list,
+            values: FieldEncoder::make_encoder(
+                list.values().as_ref(),
+                item_plan,
+                items_nullability,
+            )?,
+            values_offset: list.values().offset(),
+            elem_len: list.value_length() as usize,
+        })
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        // Starting index is relative to values() start
+        let rel = self.list.value_offset(idx) as usize;
+        let start = self.values_offset + rel;
+        let end = start + self.elem_len;
+        encode_blocked_range(out, start, end, |out, row| {
+            self.values
+                .encode(out, row.saturating_sub(self.values_offset))
+        })
+    }
+}
+
+/// Avro `fixed` encoder for Arrow `FixedSizeBinaryArray`.
+/// Spec: a fixed is encoded as exactly `size` bytes, with no length prefix.
+struct FixedEncoder<'a>(&'a FixedSizeBinaryArray);
+impl FixedEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let v = self.0.value(idx); // &[u8] of fixed width
+        out.write_all(v)
+            .map_err(|e| ArrowError::IoError(format!("write fixed bytes: {e}"), e))
+    }
+}
+
+/// Avro UUID logical type encoder: Arrow FixedSizeBinary(16) to Avro string (UUID).
+/// Spec: uuid is a logical type over string (RFC‑4122). We output hyphenated form.
+struct UuidEncoder<'a>(&'a FixedSizeBinaryArray);
+impl UuidEncoder<'_> {
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let mut buf = [0u8; 1 + uuid::fmt::Hyphenated::LENGTH];
+        buf[0] = 0x48;
+        let v = self.0.value(idx);
+        let u = Uuid::from_slice(v)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("Invalid UUID bytes: {e}")))?;
+        let _ = u.hyphenated().encode_lower(&mut buf[1..]);
+        out.write_all(&buf)
+            .map_err(|e| ArrowError::IoError(format!("write uuid: {e}"), e))
+    }
+}
+
+#[derive(Copy, Clone)]
+struct DurationParts {
+    months: u32,
+    days: u32,
+    millis: u32,
+}
+/// Trait mapping an Arrow interval native value to Avro duration `(months, days, millis)`.
+trait IntervalToDurationParts: ArrowPrimitiveType {
+    fn duration_parts(native: Self::Native) -> Result<DurationParts, ArrowError>;
+}
+impl IntervalToDurationParts for IntervalMonthDayNanoType {
+    fn duration_parts(native: Self::Native) -> Result<DurationParts, ArrowError> {
+        let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(native);
+        if months < 0 || days < 0 || nanos < 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Avro 'duration' cannot encode negative months/days/nanoseconds".into(),
+            ));
+        }
+        if nanos % 1_000_000 != 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Avro 'duration' requires whole milliseconds; nanoseconds must be divisible by 1_000_000"
+                    .into(),
+            ));
+        }
+        let millis = nanos / 1_000_000;
+        if millis > u32::MAX as i64 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Avro 'duration' milliseconds exceed u32::MAX".into(),
+            ));
+        }
+        Ok(DurationParts {
+            months: months as u32,
+            days: days as u32,
+            millis: millis as u32,
+        })
+    }
+}
+impl IntervalToDurationParts for IntervalYearMonthType {
+    fn duration_parts(native: Self::Native) -> Result<DurationParts, ArrowError> {
+        if native < 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Avro 'duration' cannot encode negative months".into(),
+            ));
+        }
+        Ok(DurationParts {
+            months: native as u32,
+            days: 0,
+            millis: 0,
+        })
+    }
+}
+impl IntervalToDurationParts for IntervalDayTimeType {
+    fn duration_parts(native: Self::Native) -> Result<DurationParts, ArrowError> {
+        let (days, millis) = IntervalDayTimeType::to_parts(native);
+        if days < 0 || millis < 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Avro 'duration' cannot encode negative days or milliseconds".into(),
+            ));
+        }
+        Ok(DurationParts {
+            months: 0,
+            days: days as u32,
+            millis: millis as u32,
+        })
+    }
+}
+
+/// Single generic encoder used for all three interval units.
+/// Writes Avro `fixed(12)` as three little-endian u32 values in one call.
+struct DurationEncoder<'a, P: ArrowPrimitiveType + IntervalToDurationParts>(&'a PrimitiveArray<P>);
+impl<'a, P: ArrowPrimitiveType + IntervalToDurationParts> DurationEncoder<'a, P> {
+    #[inline(always)]
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let parts = P::duration_parts(self.0.value(idx))?;
+        let months = parts.months.to_le_bytes();
+        let days = parts.days.to_le_bytes();
+        let ms = parts.millis.to_le_bytes();
+        // SAFETY
+        // - Endianness & layout: Avro's `duration` logical type is encoded as fixed(12)
+        //   with three *little-endian* unsigned 32-bit integers in order: (months, days, millis).
+        //   We explicitly materialize exactly those 12 bytes.
+        // - In-bounds indexing: `to_le_bytes()` on `u32` returns `[u8; 4]` by contract,
+        //   therefore, the constant indices 0..=3 used below are *always* in-bounds.
+        //   Rust will panic on out-of-bounds indexing, but there is no such path here;
+        //   the compiler can also elide the bound checks for constant, provably in-range
+        //   indices. [std docs; Rust Performance Book on bounds-check elimination]
+        // - Memory safety: The `[u8; 12]` array is built on the stack by value, with no
+        //   aliasing and no uninitialized memory. There is no `unsafe`.
+        // - I/O: `write_all(&buf)` is fallible and its `Result` is propagated and mapped
+        //   into `ArrowError`, so I/O errors are reported, not panicked.
+        // Consequently, constructing `buf` with the constant indices below is safe and
+        // panic-free under these validated preconditions.
+        let buf = [
+            months[0], months[1], months[2], months[3], days[0], days[1], days[2], days[3], ms[0],
+            ms[1], ms[2], ms[3],
+        ];
+        out.write_all(&buf)
+            .map_err(|e| ArrowError::IoError(format!("write duration: {e}"), e))
+    }
+}
+
+/// Minimal trait to obtain a big-endian fixed-size byte array for a decimal's
+/// unscaled integer value at `idx`.
+trait DecimalBeBytes<const N: usize> {
+    fn value_be_bytes(&self, idx: usize) -> [u8; N];
+}
+#[cfg(feature = "small_decimals")]
+impl DecimalBeBytes<4> for Decimal32Array {
+    fn value_be_bytes(&self, idx: usize) -> [u8; 4] {
+        self.value(idx).to_be_bytes()
+    }
+}
+#[cfg(feature = "small_decimals")]
+impl DecimalBeBytes<8> for Decimal64Array {
+    fn value_be_bytes(&self, idx: usize) -> [u8; 8] {
+        self.value(idx).to_be_bytes()
+    }
+}
+impl DecimalBeBytes<16> for Decimal128Array {
+    fn value_be_bytes(&self, idx: usize) -> [u8; 16] {
+        self.value(idx).to_be_bytes()
+    }
+}
+impl DecimalBeBytes<32> for Decimal256Array {
+    fn value_be_bytes(&self, idx: usize) -> [u8; 32] {
+        // Arrow i256 → [u8; 32] big-endian
+        self.value(idx).to_be_bytes()
+    }
+}
+
+/// Generic Avro decimal encoder over Arrow decimal arrays.
+/// - When `fixed_size` is `None` → Avro `bytes(decimal)`; writes the minimal
+///   two's-complement representation with a length prefix.
+/// - When `Some(n)` → Avro `fixed(n, decimal)`; sign-extends (or validates)
+///   to exactly `n` bytes and writes them directly.
+struct DecimalEncoder<'a, const N: usize, A: DecimalBeBytes<N>> {
+    arr: &'a A,
+    fixed_size: Option<usize>,
+}
+
+impl<'a, const N: usize, A: DecimalBeBytes<N>> DecimalEncoder<'a, N, A> {
+    fn new(arr: &'a A, fixed_size: Option<usize>) -> Self {
+        Self { arr, fixed_size }
+    }
+
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        let be = self.arr.value_be_bytes(idx);
+        match self.fixed_size {
+            Some(n) => write_sign_extended(out, &be, n),
+            None => write_len_prefixed(out, minimal_twos_complement(&be)),
+        }
+    }
+}
+
+#[cfg(feature = "small_decimals")]
+type Decimal32Encoder<'a> = DecimalEncoder<'a, 4, Decimal32Array>;
+#[cfg(feature = "small_decimals")]
+type Decimal64Encoder<'a> = DecimalEncoder<'a, 8, Decimal64Array>;
+type Decimal128Encoder<'a> = DecimalEncoder<'a, 16, Decimal128Array>;
+type Decimal256Encoder<'a> = DecimalEncoder<'a, 32, Decimal256Array>;
+
+/// Generic encoder for Arrow `RunArray<R>`-based sites (run-end encoded).
+/// Follows the pattern used by other generic encoders (i.e., `ListEncoder<O>`),
+/// avoiding runtime branching on run-end width.
+struct RunEncodedEncoder<'a, R: RunEndIndexType> {
+    ends_slice: &'a [<R as ArrowPrimitiveType>::Native],
+    base: usize,
+    len: usize,
+    values: FieldEncoder<'a>,
+    // Cached run index used for sequential scans of rows [0..n)
+    cur_run: usize,
+    // Cached end (logical index, 1-based per spec) for the current run.
+    cur_end: usize,
+}
+
+type RunEncodedEncoder16<'a> = RunEncodedEncoder<'a, Int16Type>;
+type RunEncodedEncoder32<'a> = RunEncodedEncoder<'a, Int32Type>;
+type RunEncodedEncoder64<'a> = RunEncodedEncoder<'a, Int64Type>;
+
+impl<'a, R: RunEndIndexType> RunEncodedEncoder<'a, R> {
+    fn new(arr: &'a RunArray<R>, values: FieldEncoder<'a>) -> Self {
+        let ends = arr.run_ends();
+        let base = ends.get_start_physical_index();
+        let slice = ends.values();
+        let len = ends.len();
+        let cur_end = if len == 0 { 0 } else { slice[base].as_usize() };
+        Self {
+            ends_slice: slice,
+            base,
+            len,
+            values,
+            cur_run: 0,
+            cur_end,
+        }
+    }
+
+    /// Advance `cur_run` so that `idx` is within the run ending at `cur_end`.
+    /// Uses the REE invariant: run ends are strictly increasing, positive, and 1-based.
+    #[inline(always)]
+    fn advance_to_row(&mut self, idx: usize) -> Result<(), ArrowError> {
+        if idx < self.cur_end {
+            return Ok(());
+        }
+        // Move forward across run boundaries until idx falls within cur_end
+        while self.cur_run + 1 < self.len && idx >= self.cur_end {
+            self.cur_run += 1;
+            self.cur_end = self.ends_slice[self.base + self.cur_run].as_usize();
+        }
+        if idx < self.cur_end {
+            Ok(())
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "row index {idx} out of bounds for run-ends ({} runs)",
+                self.len
+            )))
+        }
+    }
+
+    #[inline(always)]
+    fn encode<W: Write + ?Sized>(&mut self, out: &mut W, idx: usize) -> Result<(), ArrowError> {
+        self.advance_to_row(idx)?;
+        // For REE values, the value for any logical row within a run is at
+        // the physical index of that run.
+        self.values.encode(out, self.cur_run)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::types::Int32Type;
+    use arrow_array::{
+        Array, ArrayRef, BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
+        Int64Array, LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, NullArray,
+        StringArray,
+    };
+    use arrow_buffer::Buffer;
+    use arrow_schema::{DataType, Field, Fields, UnionFields};
+
+    fn zigzag_i64(v: i64) -> u64 {
+        ((v << 1) ^ (v >> 63)) as u64
+    }
+
+    fn varint(mut x: u64) -> Vec<u8> {
+        let mut out = Vec::new();
+        while (x & !0x7f) != 0 {
+            out.push(((x & 0x7f) as u8) | 0x80);
+            x >>= 7;
+        }
+        out.push((x & 0x7f) as u8);
+        out
+    }
+
+    fn avro_long_bytes(v: i64) -> Vec<u8> {
+        varint(zigzag_i64(v))
+    }
+
+    fn avro_len_prefixed_bytes(payload: &[u8]) -> Vec<u8> {
+        let mut out = avro_long_bytes(payload.len() as i64);
+        out.extend_from_slice(payload);
+        out
+    }
+
+    fn duration_fixed12(months: u32, days: u32, millis: u32) -> [u8; 12] {
+        let m = months.to_le_bytes();
+        let d = days.to_le_bytes();
+        let ms = millis.to_le_bytes();
+        [
+            m[0], m[1], m[2], m[3], d[0], d[1], d[2], d[3], ms[0], ms[1], ms[2], ms[3],
+        ]
+    }
+
+    fn encode_all(
+        array: &dyn Array,
+        plan: &FieldPlan,
+        nullability: Option<Nullability>,
+    ) -> Vec<u8> {
+        let mut enc = FieldEncoder::make_encoder(array, plan, nullability).unwrap();
+        let mut out = Vec::new();
+        for i in 0..array.len() {
+            enc.encode(&mut out, i).unwrap();
+        }
+        out
+    }
+
+    fn assert_bytes_eq(actual: &[u8], expected: &[u8]) {
+        if actual != expected {
+            let to_hex = |b: &[u8]| {
+                b.iter()
+                    .map(|x| format!("{:02X}", x))
+                    .collect::<Vec<_>>()
+                    .join(" ")
+            };
+            panic!(
+                "mismatch\n  expected: [{}]\n    actual: [{}]",
+                to_hex(expected),
+                to_hex(actual)
+            );
+        }
+    }
+
+    #[test]
+    fn binary_encoder() {
+        let values: Vec<&[u8]> = vec![b"", b"ab", b"\x00\xFF"];
+        let arr = BinaryArray::from_vec(values);
+        let mut expected = Vec::new();
+        for payload in [b"" as &[u8], b"ab", b"\x00\xFF"] {
+            expected.extend(avro_len_prefixed_bytes(payload));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn large_binary_encoder() {
+        let values: Vec<&[u8]> = vec![b"xyz", b""];
+        let arr = LargeBinaryArray::from_vec(values);
+        let mut expected = Vec::new();
+        for payload in [b"xyz" as &[u8], b""] {
+            expected.extend(avro_len_prefixed_bytes(payload));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn utf8_encoder() {
+        let arr = StringArray::from(vec!["", "A", "BC"]);
+        let mut expected = Vec::new();
+        for s in ["", "A", "BC"] {
+            expected.extend(avro_len_prefixed_bytes(s.as_bytes()));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn large_utf8_encoder() {
+        let arr = LargeStringArray::from(vec!["hello", ""]);
+        let mut expected = Vec::new();
+        for s in ["hello", ""] {
+            expected.extend(avro_len_prefixed_bytes(s.as_bytes()));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn list_encoder_int32() {
+        // Build ListArray [[1,2], [], [3]]
+        let values = Int32Array::from(vec![1, 2, 3]);
+        let offsets = vec![0, 2, 2, 3];
+        let list = ListArray::new(
+            Field::new("item", DataType::Int32, true).into(),
+            arrow_buffer::OffsetBuffer::new(offsets.into()),
+            Arc::new(values) as ArrayRef,
+            None,
+        );
+        // Avro array encoding per row
+        let mut expected = Vec::new();
+        // row 0: block len 2, items 1,2 then 0
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(0));
+        // row 1: empty
+        expected.extend(avro_long_bytes(0));
+        // row 2: one item 3
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(3));
+        expected.extend(avro_long_bytes(0));
+
+        let plan = FieldPlan::List {
+            items_nullability: None,
+            item_plan: Box::new(FieldPlan::Scalar),
+        };
+        let got = encode_all(&list, &plan, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn struct_encoder_two_fields() {
+        // Struct { a: Int32, b: Utf8 }
+        let a = Int32Array::from(vec![1, 2]);
+        let b = StringArray::from(vec!["x", "y"]);
+        let fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Utf8, true),
+        ]);
+        let struct_arr = StructArray::new(
+            fields.clone(),
+            vec![Arc::new(a) as ArrayRef, Arc::new(b) as ArrayRef],
+            None,
+        );
+        let plan = FieldPlan::Struct {
+            bindings: vec![
+                FieldBinding {
+                    arrow_index: 0,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+                FieldBinding {
+                    arrow_index: 1,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+            ],
+        };
+        let got = encode_all(&struct_arr, &plan, None);
+        // Expected: rows concatenated: a then b
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(1)); // a=1
+        expected.extend(avro_len_prefixed_bytes(b"x")); // b="x"
+        expected.extend(avro_long_bytes(2)); // a=2
+        expected.extend(avro_len_prefixed_bytes(b"y")); // b="y"
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn enum_encoder_dictionary() {
+        // symbols: ["A","B","C"], keys [2,0,1]
+        let dict_values = StringArray::from(vec!["A", "B", "C"]);
+        let keys = Int32Array::from(vec![2, 0, 1]);
+        let dict =
+            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(dict_values) as ArrayRef).unwrap();
+        let symbols = Arc::<[String]>::from(
+            vec!["A".to_string(), "B".to_string(), "C".to_string()].into_boxed_slice(),
+        );
+        let plan = FieldPlan::Enum { symbols };
+        let got = encode_all(&dict, &plan, None);
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_long_bytes(1));
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn decimal_bytes_and_fixed() {
+        // Use Decimal128 with small positives and negatives
+        let dec = Decimal128Array::from(vec![1i128, -1i128, 0i128])
+            .with_precision_and_scale(20, 0)
+            .unwrap();
+        // bytes(decimal): minimal two's complement length-prefixed
+        let plan_bytes = FieldPlan::Decimal { size: None };
+        let got_bytes = encode_all(&dec, &plan_bytes, None);
+        // 1 -> 0x01; -1 -> 0xFF; 0 -> 0x00
+        let mut expected_bytes = Vec::new();
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x01]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0xFF]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x00]));
+        assert_bytes_eq(&got_bytes, &expected_bytes);
+
+        let plan_fixed = FieldPlan::Decimal { size: Some(16) };
+        let got_fixed = encode_all(&dec, &plan_fixed, None);
+        let mut expected_fixed = Vec::new();
+        expected_fixed.extend_from_slice(&1i128.to_be_bytes());
+        expected_fixed.extend_from_slice(&(-1i128).to_be_bytes());
+        expected_fixed.extend_from_slice(&0i128.to_be_bytes());
+        assert_bytes_eq(&got_fixed, &expected_fixed);
+    }
+
+    #[test]
+    fn decimal_bytes_256() {
+        use arrow_buffer::i256;
+        // Use Decimal256 with small positives and negatives
+        let dec = Decimal256Array::from(vec![
+            i256::from_i128(1),
+            i256::from_i128(-1),
+            i256::from_i128(0),
+        ])
+        .with_precision_and_scale(76, 0)
+        .unwrap();
+        // bytes(decimal): minimal two's complement length-prefixed
+        let plan_bytes = FieldPlan::Decimal { size: None };
+        let got_bytes = encode_all(&dec, &plan_bytes, None);
+        // 1 -> 0x01; -1 -> 0xFF; 0 -> 0x00
+        let mut expected_bytes = Vec::new();
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x01]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0xFF]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x00]));
+        assert_bytes_eq(&got_bytes, &expected_bytes);
+
+        // fixed(32): 32-byte big-endian two's complement
+        let plan_fixed = FieldPlan::Decimal { size: Some(32) };
+        let got_fixed = encode_all(&dec, &plan_fixed, None);
+        let mut expected_fixed = Vec::new();
+        expected_fixed.extend_from_slice(&i256::from_i128(1).to_be_bytes());
+        expected_fixed.extend_from_slice(&i256::from_i128(-1).to_be_bytes());
+        expected_fixed.extend_from_slice(&i256::from_i128(0).to_be_bytes());
+        assert_bytes_eq(&got_fixed, &expected_fixed);
+    }
+
+    #[cfg(feature = "small_decimals")]
+    #[test]
+    fn decimal_bytes_and_fixed_32() {
+        // Use Decimal32 with small positives and negatives
+        let dec = Decimal32Array::from(vec![1i32, -1i32, 0i32])
+            .with_precision_and_scale(9, 0)
+            .unwrap();
+        // bytes(decimal)
+        let plan_bytes = FieldPlan::Decimal { size: None };
+        let got_bytes = encode_all(&dec, &plan_bytes, None);
+        let mut expected_bytes = Vec::new();
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x01]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0xFF]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x00]));
+        assert_bytes_eq(&got_bytes, &expected_bytes);
+        // fixed(4)
+        let plan_fixed = FieldPlan::Decimal { size: Some(4) };
+        let got_fixed = encode_all(&dec, &plan_fixed, None);
+        let mut expected_fixed = Vec::new();
+        expected_fixed.extend_from_slice(&1i32.to_be_bytes());
+        expected_fixed.extend_from_slice(&(-1i32).to_be_bytes());
+        expected_fixed.extend_from_slice(&0i32.to_be_bytes());
+        assert_bytes_eq(&got_fixed, &expected_fixed);
+    }
+
+    #[cfg(feature = "small_decimals")]
+    #[test]
+    fn decimal_bytes_and_fixed_64() {
+        // Use Decimal64 with small positives and negatives
+        let dec = Decimal64Array::from(vec![1i64, -1i64, 0i64])
+            .with_precision_and_scale(18, 0)
+            .unwrap();
+        // bytes(decimal)
+        let plan_bytes = FieldPlan::Decimal { size: None };
+        let got_bytes = encode_all(&dec, &plan_bytes, None);
+        let mut expected_bytes = Vec::new();
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x01]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0xFF]));
+        expected_bytes.extend(avro_len_prefixed_bytes(&[0x00]));
+        assert_bytes_eq(&got_bytes, &expected_bytes);
+        // fixed(8)
+        let plan_fixed = FieldPlan::Decimal { size: Some(8) };
+        let got_fixed = encode_all(&dec, &plan_fixed, None);
+        let mut expected_fixed = Vec::new();
+        expected_fixed.extend_from_slice(&1i64.to_be_bytes());
+        expected_fixed.extend_from_slice(&(-1i64).to_be_bytes());
+        expected_fixed.extend_from_slice(&0i64.to_be_bytes());
+        assert_bytes_eq(&got_fixed, &expected_fixed);
+    }
+
+    #[test]
+    fn float32_and_float64_encoders() {
+        let f32a = Float32Array::from(vec![0.0f32, -1.5f32, f32::from_bits(0x7fc00000)]); // includes a quiet NaN bit pattern
+        let f64a = Float64Array::from(vec![0.0f64, -2.25f64]);
+        // f32 expected
+        let mut expected32 = Vec::new();
+        for v in [0.0f32, -1.5f32, f32::from_bits(0x7fc00000)] {
+            expected32.extend_from_slice(&v.to_bits().to_le_bytes());
+        }
+        let got32 = encode_all(&f32a, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got32, &expected32);
+        // f64 expected
+        let mut expected64 = Vec::new();
+        for v in [0.0f64, -2.25f64] {
+            expected64.extend_from_slice(&v.to_bits().to_le_bytes());
+        }
+        let got64 = encode_all(&f64a, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got64, &expected64);
+    }
+
+    #[test]
+    fn long_encoder_int64() {
+        let arr = Int64Array::from(vec![0i64, 1i64, -1i64, 2i64, -2i64, i64::MIN + 1]);
+        let mut expected = Vec::new();
+        for v in [0, 1, -1, 2, -2, i64::MIN + 1] {
+            expected.extend(avro_long_bytes(v));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn fixed_encoder_plain() {
+        // Two values of width 4
+        let data = [[0xDE, 0xAD, 0xBE, 0xEF], [0x00, 0x01, 0x02, 0x03]];
+        let values: Vec<Vec<u8>> = data.iter().map(|x| x.to_vec()).collect();
+        let arr = FixedSizeBinaryArray::try_from_iter(values.into_iter()).unwrap();
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        let mut expected = Vec::new();
+        expected.extend_from_slice(&data[0]);
+        expected.extend_from_slice(&data[1]);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn uuid_encoder_test() {
+        // Happy path
+        let u = Uuid::parse_str("00112233-4455-6677-8899-aabbccddeeff").unwrap();
+        let bytes = *u.as_bytes();
+        let arr_ok = FixedSizeBinaryArray::try_from_iter(vec![bytes.to_vec()].into_iter()).unwrap();
+        // Expected: length 36 (0x48) followed by hyphenated lowercase text
+        let mut expected = Vec::new();
+        expected.push(0x48);
+        expected.extend_from_slice(u.hyphenated().to_string().as_bytes());
+        let got = encode_all(&arr_ok, &FieldPlan::Uuid, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn uuid_encoder_error() {
+        // Invalid UUID bytes: wrong length
+        let arr =
+            FixedSizeBinaryArray::try_new(10, arrow_buffer::Buffer::from(vec![0u8; 10]), None)
+                .unwrap();
+        let plan = FieldPlan::Uuid;
+        let mut enc = FieldEncoder::make_encoder(&arr, &plan, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(msg) => {
+                assert!(msg.contains("Invalid UUID bytes"))
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    fn test_scalar_primitive_encoding<T>(
+        non_nullable_data: &[T::Native],
+        nullable_data: &[Option<T::Native>],
+    ) where
+        T: ArrowPrimitiveType,
+        T::Native: Into<i64> + Copy,
+        PrimitiveArray<T>: From<Vec<<T as ArrowPrimitiveType>::Native>>,
+    {
+        let plan = FieldPlan::Scalar;
+
+        let array = PrimitiveArray::<T>::from(non_nullable_data.to_vec());
+        let got = encode_all(&array, &plan, None);
+
+        let mut expected = Vec::new();
+        for &value in non_nullable_data {
+            expected.extend(avro_long_bytes(value.into()));
+        }
+        assert_bytes_eq(&got, &expected);
+
+        let array_nullable: PrimitiveArray<T> = nullable_data.iter().copied().collect();
+        let got_nullable = encode_all(&array_nullable, &plan, Some(Nullability::NullFirst));
+
+        let mut expected_nullable = Vec::new();
+        for &opt_value in nullable_data {
+            match opt_value {
+                Some(value) => {
+                    // Union index 1 for the value, then the value itself
+                    expected_nullable.extend(avro_long_bytes(1));
+                    expected_nullable.extend(avro_long_bytes(value.into()));
+                }
+                None => {
+                    // Union index 0 for the null
+                    expected_nullable.extend(avro_long_bytes(0));
+                }
+            }
+        }
+        assert_bytes_eq(&got_nullable, &expected_nullable);
+    }
+
+    #[test]
+    fn date32_encoder() {
+        test_scalar_primitive_encoding::<Date32Type>(
+            &[
+                19345, // 2022-12-20
+                0,     // 1970-01-01 (epoch)
+                -1,    // 1969-12-31 (pre-epoch)
+            ],
+            &[Some(19345), None],
+        );
+    }
+
+    #[test]
+    fn time32_millis_encoder() {
+        test_scalar_primitive_encoding::<Time32MillisecondType>(
+            &[
+                0,        // Midnight
+                49530123, // 13:45:30.123
+                86399999, // 23:59:59.999
+            ],
+            &[None, Some(49530123)],
+        );
+    }
+
+    #[test]
+    fn time64_micros_encoder() {
+        test_scalar_primitive_encoding::<Time64MicrosecondType>(
+            &[
+                0,           // Midnight
+                86399999999, // 23:59:59.999999
+            ],
+            &[Some(86399999999), None],
+        );
+    }
+
+    #[test]
+    fn timestamp_millis_encoder() {
+        test_scalar_primitive_encoding::<TimestampMillisecondType>(
+            &[
+                1704067200000, // 2024-01-01T00:00:00Z
+                0,             // 1970-01-01T00:00:00Z (epoch)
+                -123456789,    // Pre-epoch timestamp
+            ],
+            &[None, Some(1704067200000)],
+        );
+    }
+
+    #[test]
+    fn map_encoder_string_keys_int_values() {
+        // Build MapArray with two rows
+        // Row0: {"k1":1, "k2":2}
+        // Row1: {}
+        let keys = StringArray::from(vec!["k1", "k2"]);
+        let values = Int32Array::from(vec![1, 2]);
+        let entries_fields = Fields::from(vec![
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, true),
+        ]);
+        let entries = StructArray::new(
+            entries_fields,
+            vec![Arc::new(keys) as ArrayRef, Arc::new(values) as ArrayRef],
+            None,
+        );
+        let offsets = arrow_buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
+        let map = MapArray::new(
+            Field::new("entries", entries.data_type().clone(), false).into(),
+            offsets,
+            entries,
+            None,
+            false,
+        );
+        let plan = FieldPlan::Map {
+            values_nullability: None,
+            value_plan: Box::new(FieldPlan::Scalar),
+        };
+        let got = encode_all(&map, &plan, None);
+        let mut expected = Vec::new();
+        // Row0: block 2 then pairs
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_len_prefixed_bytes(b"k1"));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_len_prefixed_bytes(b"k2"));
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(0));
+        // Row1: empty
+        expected.extend(avro_long_bytes(0));
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn union_encoder_string_int() {
+        let strings = StringArray::from(vec!["hello", "world"]);
+        let ints = Int32Array::from(vec![10, 20, 30]);
+
+        let union_fields = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new("v_str", DataType::Utf8, true),
+                Field::new("v_int", DataType::Int32, true),
+            ],
+        )
+        .unwrap();
+
+        let type_ids = Buffer::from_slice_ref([0_i8, 1, 1, 0, 1]);
+        let offsets = Buffer::from_slice_ref([0_i32, 0, 1, 1, 2]);
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids.into(),
+            Some(offsets.into()),
+            vec![Arc::new(strings), Arc::new(ints)],
+        )
+        .unwrap();
+
+        let plan = FieldPlan::Union {
+            bindings: vec![
+                FieldBinding {
+                    arrow_index: 0,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+                FieldBinding {
+                    arrow_index: 1,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+            ],
+        };
+
+        let got = encode_all(&union_array, &plan, None);
+
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_len_prefixed_bytes(b"hello"));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(10));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(20));
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_len_prefixed_bytes(b"world"));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(30));
+
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn union_encoder_null_string_int() {
+        let nulls = NullArray::new(1);
+        let strings = StringArray::from(vec!["hello"]);
+        let ints = Int32Array::from(vec![10]);
+
+        let union_fields = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new("v_null", DataType::Null, true),
+                Field::new("v_str", DataType::Utf8, true),
+                Field::new("v_int", DataType::Int32, true),
+            ],
+        )
+        .unwrap();
+
+        let type_ids = Buffer::from_slice_ref([0_i8, 1, 2]);
+        // For a null value in a dense union, no value is added to a child array.
+        // The offset points to the last value of that type. Since there's only one
+        // null, and one of each other type, all offsets are 0.
+        let offsets = Buffer::from_slice_ref([0_i32, 0, 0]);
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids.into(),
+            Some(offsets.into()),
+            vec![Arc::new(nulls), Arc::new(strings), Arc::new(ints)],
+        )
+        .unwrap();
+
+        let plan = FieldPlan::Union {
+            bindings: vec![
+                FieldBinding {
+                    arrow_index: 0,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+                FieldBinding {
+                    arrow_index: 1,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+                FieldBinding {
+                    arrow_index: 2,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+            ],
+        };
+
+        let got = encode_all(&union_array, &plan, None);
+
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_len_prefixed_bytes(b"hello"));
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(10));
+
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn list64_encoder_int32() {
+        // LargeList [[1,2,3], []]
+        let values = Int32Array::from(vec![1, 2, 3]);
+        let offsets: Vec<i64> = vec![0, 3, 3];
+        let list = LargeListArray::new(
+            Field::new("item", DataType::Int32, true).into(),
+            arrow_buffer::OffsetBuffer::new(offsets.into()),
+            Arc::new(values) as ArrayRef,
+            None,
+        );
+        let plan = FieldPlan::List {
+            items_nullability: None,
+            item_plan: Box::new(FieldPlan::Scalar),
+        };
+        let got = encode_all(&list, &plan, None);
+        // Expected one block of 3 and then 0, then empty 0
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(3));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(2));
+        expected.extend(avro_long_bytes(3));
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_long_bytes(0));
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn int_encoder_test() {
+        let ints = Int32Array::from(vec![0, -1, 2]);
+        let mut expected_i = Vec::new();
+        for v in [0i32, -1, 2] {
+            expected_i.extend(avro_long_bytes(v as i64));
+        }
+        let got_i = encode_all(&ints, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got_i, &expected_i);
+    }
+
+    #[test]
+    fn boolean_encoder_test() {
+        let bools = BooleanArray::from(vec![true, false]);
+        let mut expected_b = Vec::new();
+        expected_b.extend_from_slice(&[1]);
+        expected_b.extend_from_slice(&[0]);
+        let got_b = encode_all(&bools, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got_b, &expected_b);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn duration_encoding_seconds() {
+        let arr: PrimitiveArray<DurationSecondType> = vec![0i64, -1, 2].into();
+        let mut expected = Vec::new();
+        for v in [0i64, -1, 2] {
+            expected.extend_from_slice(&avro_long_bytes(v));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn duration_encoding_milliseconds() {
+        let arr: PrimitiveArray<DurationMillisecondType> = vec![1i64, 0, -2].into();
+        let mut expected = Vec::new();
+        for v in [1i64, 0, -2] {
+            expected.extend_from_slice(&avro_long_bytes(v));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn duration_encoding_microseconds() {
+        let arr: PrimitiveArray<DurationMicrosecondType> = vec![5i64, -6, 7].into();
+        let mut expected = Vec::new();
+        for v in [5i64, -6, 7] {
+            expected.extend_from_slice(&avro_long_bytes(v));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    #[cfg(feature = "avro_custom_types")]
+    fn duration_encoding_nanoseconds() {
+        let arr: PrimitiveArray<DurationNanosecondType> = vec![8i64, 9, -10].into();
+        let mut expected = Vec::new();
+        for v in [8i64, 9, -10] {
+            expected.extend_from_slice(&avro_long_bytes(v));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn duration_encoder_year_month_happy_path() {
+        let arr: PrimitiveArray<IntervalYearMonthType> = vec![0i32, 1i32, 25i32].into();
+        let mut expected = Vec::new();
+        for m in [0u32, 1u32, 25u32] {
+            expected.extend_from_slice(&duration_fixed12(m, 0, 0));
+        }
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn duration_encoder_year_month_rejects_negative() {
+        let arr: PrimitiveArray<IntervalYearMonthType> = vec![-1i32].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(msg) => {
+                assert!(msg.contains("cannot encode negative months"))
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn duration_encoder_day_time_happy_path() {
+        let v0 = IntervalDayTimeType::make_value(2, 500); // days=2, millis=500
+        let v1 = IntervalDayTimeType::make_value(0, 0);
+        let arr: PrimitiveArray<IntervalDayTimeType> = vec![v0, v1].into();
+        let mut expected = Vec::new();
+        expected.extend_from_slice(&duration_fixed12(0, 2, 500));
+        expected.extend_from_slice(&duration_fixed12(0, 0, 0));
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn duration_encoder_day_time_rejects_negative() {
+        let bad = IntervalDayTimeType::make_value(-1, 0);
+        let arr: PrimitiveArray<IntervalDayTimeType> = vec![bad].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(msg) => {
+                assert!(msg.contains("cannot encode negative days"))
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn duration_encoder_month_day_nano_happy_path() {
+        let v0 = IntervalMonthDayNanoType::make_value(1, 2, 3_000_000); // -> millis = 3
+        let v1 = IntervalMonthDayNanoType::make_value(0, 0, 0);
+        let arr: PrimitiveArray<IntervalMonthDayNanoType> = vec![v0, v1].into();
+        let mut expected = Vec::new();
+        expected.extend_from_slice(&duration_fixed12(1, 2, 3));
+        expected.extend_from_slice(&duration_fixed12(0, 0, 0));
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn duration_encoder_month_day_nano_rejects_non_ms_multiple() {
+        let bad = IntervalMonthDayNanoType::make_value(0, 0, 1);
+        let arr: PrimitiveArray<IntervalMonthDayNanoType> = vec![bad].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(msg) => {
+                assert!(msg.contains("requires whole milliseconds") || msg.contains("divisible"))
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn minimal_twos_complement_test() {
+        let pos = [0x00, 0x00, 0x01];
+        assert_eq!(minimal_twos_complement(&pos), &pos[2..]);
+        let neg = [0xFF, 0xFF, 0x80]; // negative minimal is 0x80
+        assert_eq!(minimal_twos_complement(&neg), &neg[2..]);
+        let zero = [0x00, 0x00, 0x00];
+        assert_eq!(minimal_twos_complement(&zero), &zero[2..]);
+    }
+
+    #[test]
+    fn write_sign_extend_test() {
+        let mut out = Vec::new();
+        write_sign_extended(&mut out, &[0x01], 4).unwrap();
+        assert_eq!(out, vec![0x00, 0x00, 0x00, 0x01]);
+        out.clear();
+        write_sign_extended(&mut out, &[0xFF], 4).unwrap();
+        assert_eq!(out, vec![0xFF, 0xFF, 0xFF, 0xFF]);
+        out.clear();
+        // truncation success (sign bytes only removed)
+        write_sign_extended(&mut out, &[0xFF, 0xFF, 0x80], 2).unwrap();
+        assert_eq!(out, vec![0xFF, 0x80]);
+        out.clear();
+        // truncation overflow
+        let err = write_sign_extended(&mut out, &[0x01, 0x00], 1).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(_) => {}
+            _ => panic!("expected InvalidArgumentError"),
+        }
+    }
+
+    #[test]
+    fn duration_month_day_nano_overflow_millis() {
+        // nanos leading to millis > u32::MAX
+        let nanos = ((u64::from(u32::MAX) + 1) * 1_000_000) as i64;
+        let v = IntervalMonthDayNanoType::make_value(0, 0, nanos);
+        let arr: PrimitiveArray<IntervalMonthDayNanoType> = vec![v].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            ArrowError::InvalidArgumentError(msg) => assert!(msg.contains("exceed u32::MAX")),
+            _ => panic!("expected InvalidArgumentError"),
+        }
+    }
+
+    #[test]
+    fn fieldplan_decimal_precision_scale_mismatch_errors() {
+        // Avro expects (10,2), Arrow has (12,2)
+        use crate::codec::Codec;
+        use std::collections::HashMap;
+        let arrow_field = Field::new("d", DataType::Decimal128(12, 2), true);
+        let avro_dt = AvroDataType::new(Codec::Decimal(10, Some(2), None), HashMap::new(), None);
+        let err = FieldPlan::build(&avro_dt, &arrow_field).unwrap_err();
+        match err {
+            ArrowError::SchemaError(msg) => {
+                assert!(msg.contains("Decimal precision/scale mismatch"))
+            }
+            _ => panic!("expected SchemaError"),
+        }
+    }
+
+    #[test]
+    fn timestamp_micros_encoder() {
+        // Mirrors the style used by `timestamp_millis_encoder`
+        test_scalar_primitive_encoding::<TimestampMicrosecondType>(
+            &[
+                1_704_067_200_000_000, // 2024-01-01T00:00:00Z in micros
+                0,                     // epoch
+                -123_456_789,          // pre-epoch
+            ],
+            &[None, Some(1_704_067_200_000_000)],
+        );
+    }
+
+    #[test]
+    fn list_encoder_nullable_items_null_first() {
+        // One List row with three elements: [Some(1), None, Some(2)]
+        let values = Int32Array::from(vec![Some(1), None, Some(2)]);
+        let offsets = arrow_buffer::OffsetBuffer::new(vec![0i32, 3].into());
+        let list = ListArray::new(
+            Field::new("item", DataType::Int32, true).into(),
+            offsets,
+            Arc::new(values) as ArrayRef,
+            None,
+        );
+
+        let plan = FieldPlan::List {
+            items_nullability: Some(Nullability::NullFirst),
+            item_plan: Box::new(FieldPlan::Scalar),
+        };
+
+        // Avro array encoding per row: one positive block, then 0 terminator.
+        // For NullFirst: Some(v) => branch 1 (0x02) then the value; None => branch 0 (0x00)
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(3)); // block of 3
+        expected.extend(avro_long_bytes(1)); // union branch=1 (value)
+        expected.extend(avro_long_bytes(1)); // value 1
+        expected.extend(avro_long_bytes(0)); // union branch=0 (null)
+        expected.extend(avro_long_bytes(1)); // union branch=1 (value)
+        expected.extend(avro_long_bytes(2)); // value 2
+        expected.extend(avro_long_bytes(0)); // block terminator
+
+        let got = encode_all(&list, &plan, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn large_list_encoder_nullable_items_null_first() {
+        // LargeList single row: [Some(10), None]
+        let values = Int32Array::from(vec![Some(10), None]);
+        let offsets = arrow_buffer::OffsetBuffer::new(vec![0i64, 2].into());
+        let list = LargeListArray::new(
+            Field::new("item", DataType::Int32, true).into(),
+            offsets,
+            Arc::new(values) as ArrayRef,
+            None,
+        );
+
+        let plan = FieldPlan::List {
+            items_nullability: Some(Nullability::NullFirst),
+            item_plan: Box::new(FieldPlan::Scalar),
+        };
+
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(2)); // block of 2
+        expected.extend(avro_long_bytes(1)); // union branch=1 (value)
+        expected.extend(avro_long_bytes(10)); // value 10
+        expected.extend(avro_long_bytes(0)); // union branch=0 (null)
+        expected.extend(avro_long_bytes(0)); // block terminator
+
+        let got = encode_all(&list, &plan, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn map_encoder_string_keys_nullable_int_values_null_first() {
+        // One map row: {"k1": Some(7), "k2": None}
+        let keys = StringArray::from(vec!["k1", "k2"]);
+        let values = Int32Array::from(vec![Some(7), None]);
+
+        let entries_fields = Fields::from(vec![
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, true),
+        ]);
+        let entries = StructArray::new(
+            entries_fields,
+            vec![Arc::new(keys) as ArrayRef, Arc::new(values) as ArrayRef],
+            None,
+        );
+
+        // Single row -> offsets [0, 2]
+        let offsets = arrow_buffer::OffsetBuffer::new(vec![0i32, 2].into());
+        let map = MapArray::new(
+            Field::new("entries", entries.data_type().clone(), false).into(),
+            offsets,
+            entries,
+            None,
+            false,
+        );
+
+        let plan = FieldPlan::Map {
+            values_nullability: Some(Nullability::NullFirst),
+            value_plan: Box::new(FieldPlan::Scalar),
+        };
+
+        // Expected:
+        // - one positive block (len=2)
+        // - "k1", branch=1 + value=7
+        // - "k2", branch=0 (null)
+        // - end-of-block marker 0
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(2)); // block length 2
+        expected.extend(avro_len_prefixed_bytes(b"k1")); // key "k1"
+        expected.extend(avro_long_bytes(1)); // union branch 1 (value)
+        expected.extend(avro_long_bytes(7)); // value 7
+        expected.extend(avro_len_prefixed_bytes(b"k2")); // key "k2"
+        expected.extend(avro_long_bytes(0)); // union branch 0 (null)
+        expected.extend(avro_long_bytes(0)); // block terminator
+
+        let got = encode_all(&map, &plan, None);
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn time32_seconds_to_millis_encoder() {
+        // Time32(Second) must encode as Avro time-millis (ms since midnight).
+        let arr: arrow_array::PrimitiveArray<arrow_array::types::Time32SecondType> =
+            vec![0i32, 1, -2, 12_345].into();
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        let mut expected = Vec::new();
+        for secs in [0i32, 1, -2, 12_345] {
+            let millis = (secs as i64) * 1000;
+            expected.extend_from_slice(&avro_long_bytes(millis));
+        }
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn time32_seconds_to_millis_overflow() {
+        // Choose a value that will overflow i32 when multiplied by 1000.
+        let overflow_secs: i32 = i32::MAX / 1000 + 1;
+        let arr: PrimitiveArray<Time32SecondType> = vec![overflow_secs].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            arrow_schema::ArrowError::InvalidArgumentError(msg) => {
+                assert!(
+                    msg.contains("overflowed") || msg.contains("overflow"),
+                    "unexpected message: {msg}"
+                )
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn timestamp_seconds_to_millis_encoder() {
+        // Timestamp(Second) must encode as Avro timestamp-millis (ms since epoch).
+        let arr: PrimitiveArray<TimestampSecondType> = vec![0i64, 1, -1, 1_234_567_890].into();
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        let mut expected = Vec::new();
+        for secs in [0i64, 1, -1, 1_234_567_890] {
+            let millis = secs * 1000;
+            expected.extend_from_slice(&avro_long_bytes(millis));
+        }
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn timestamp_seconds_to_millis_overflow() {
+        // Overflow i64 when multiplied by 1000.
+        let overflow_secs: i64 = i64::MAX / 1000 + 1;
+        let arr: PrimitiveArray<TimestampSecondType> = vec![overflow_secs].into();
+        let mut enc = FieldEncoder::make_encoder(&arr, &FieldPlan::Scalar, None).unwrap();
+        let mut out = Vec::new();
+        let err = enc.encode(&mut out, 0).unwrap_err();
+        match err {
+            arrow_schema::ArrowError::InvalidArgumentError(msg) => {
+                assert!(
+                    msg.contains("overflowed") || msg.contains("overflow"),
+                    "unexpected message: {msg}"
+                )
+            }
+            other => panic!("expected InvalidArgumentError, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn timestamp_nanos_encoder() {
+        let arr: PrimitiveArray<TimestampNanosecondType> = vec![0i64, 1, -1, 123].into();
+        let got = encode_all(&arr, &FieldPlan::Scalar, None);
+        let mut expected = Vec::new();
+        for ns in [0i64, 1, -1, 123] {
+            expected.extend_from_slice(&avro_long_bytes(ns));
+        }
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn union_encoder_string_int_nonzero_type_ids() {
+        let strings = StringArray::from(vec!["hello", "world"]);
+        let ints = Int32Array::from(vec![10, 20, 30]);
+        let union_fields = UnionFields::try_new(
+            vec![2, 5],
+            vec![
+                Field::new("v_str", DataType::Utf8, true),
+                Field::new("v_int", DataType::Int32, true),
+            ],
+        )
+        .unwrap();
+        let type_ids = Buffer::from_slice_ref([2_i8, 5, 5, 2, 5]);
+        let offsets = Buffer::from_slice_ref([0_i32, 0, 1, 1, 2]);
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids.into(),
+            Some(offsets.into()),
+            vec![Arc::new(strings), Arc::new(ints)],
+        )
+        .unwrap();
+        let plan = FieldPlan::Union {
+            bindings: vec![
+                FieldBinding {
+                    arrow_index: 0,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+                FieldBinding {
+                    arrow_index: 1,
+                    nullability: None,
+                    plan: FieldPlan::Scalar,
+                },
+            ],
+        };
+        let got = encode_all(&union_array, &plan, None);
+        let mut expected = Vec::new();
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_len_prefixed_bytes(b"hello"));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(10));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(20));
+        expected.extend(avro_long_bytes(0));
+        expected.extend(avro_len_prefixed_bytes(b"world"));
+        expected.extend(avro_long_bytes(1));
+        expected.extend(avro_long_bytes(30));
+        assert_bytes_eq(&got, &expected);
+    }
+
+    #[test]
+    fn nullable_state_with_null_buffer_and_zero_nulls() {
+        let values = vec![1i32, 2, 3];
+        let arr = Int32Array::from_iter_values_with_nulls(values, Some(NullBuffer::new_valid(3)));
+        assert_eq!(arr.null_count(), 0);
+        assert!(arr.nulls().is_some());
+        let plan = FieldPlan::Scalar;
+        let enc = FieldEncoder::make_encoder(&arr, &plan, Some(Nullability::NullFirst)).unwrap();
+        match enc.null_state {
+            NullState::NullableNoNulls { union_value_byte } => {
+                assert_eq!(
+                    union_value_byte,
+                    union_value_branch_byte(Nullability::NullFirst, false)
+                );
+            }
+            other => panic!("expected NullableNoNulls, got {other:?}"),
+        }
+    }
+}
diff --git a/arrow-avro/src/writer/format.rs b/arrow-avro/src/writer/format.rs
new file mode 100644
index 000000000000..ba2a0b8564b2
--- /dev/null
+++ b/arrow-avro/src/writer/format.rs
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Avro Writer Formats for Arrow.
+
+use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
+use crate::schema::{AvroSchema, AvroSchemaOptions, SCHEMA_METADATA_KEY};
+use crate::writer::encoder::write_long;
+use arrow_schema::{ArrowError, Schema};
+use rand::RngCore;
+use std::fmt::Debug;
+use std::io::Write;
+
+/// Format abstraction implemented by each container‐level writer.
+pub trait AvroFormat: Debug + Default {
+    /// If `true`, the writer for this format will query `single_object_prefix()`
+    /// and write the prefix before each record. If `false`, the writer can
+    /// skip this step. This is a performance hint for the writer.
+    const NEEDS_PREFIX: bool;
+
+    /// Write any bytes required at the very beginning of the output stream
+    /// (file header, etc.).
+    /// Implementations **must not** write any record data.
+    fn start_stream<W: Write>(
+        &mut self,
+        writer: &mut W,
+        schema: &Schema,
+        compression: Option<CompressionCodec>,
+    ) -> Result<(), ArrowError>;
+
+    /// Return the 16‑byte sync marker (OCF) or `None` (binary stream).
+    fn sync_marker(&self) -> Option<&[u8; 16]>;
+}
+
+/// Avro Object Container File (OCF) format writer.
+#[derive(Debug, Default)]
+pub struct AvroOcfFormat {
+    sync_marker: [u8; 16],
+}
+
+impl AvroFormat for AvroOcfFormat {
+    const NEEDS_PREFIX: bool = false;
+    fn start_stream<W: Write>(
+        &mut self,
+        writer: &mut W,
+        schema: &Schema,
+        compression: Option<CompressionCodec>,
+    ) -> Result<(), ArrowError> {
+        let mut rng = rand::rng();
+        rng.fill_bytes(&mut self.sync_marker);
+        // Choose the Avro schema JSON that the file will advertise.
+        // If `schema.metadata[SCHEMA_METADATA_KEY]` exists, AvroSchema::try_from
+        // uses it verbatim; otherwise it is generated from the Arrow schema.
+        let avro_schema = AvroSchema::from_arrow_with_options(
+            schema,
+            Some(AvroSchemaOptions {
+                null_order: None,
+                strip_metadata: true,
+            }),
+        )?;
+        // Magic
+        writer
+            .write_all(b"Obj\x01")
+            .map_err(|e| ArrowError::IoError(format!("write OCF magic: {e}"), e))?;
+        // File metadata map: { "avro.schema": <json>, "avro.codec": <codec> }
+        let codec_str = match compression {
+            Some(CompressionCodec::Deflate) => "deflate",
+            Some(CompressionCodec::Snappy) => "snappy",
+            Some(CompressionCodec::ZStandard) => "zstandard",
+            Some(CompressionCodec::Bzip2) => "bzip2",
+            Some(CompressionCodec::Xz) => "xz",
+            None => "null",
+        };
+        // Map block: count=2, then key/value pairs, then terminating count=0
+        write_long(writer, 2)?;
+        write_string(writer, SCHEMA_METADATA_KEY)?;
+        write_bytes(writer, avro_schema.json_string.as_bytes())?;
+        write_string(writer, CODEC_METADATA_KEY)?;
+        write_bytes(writer, codec_str.as_bytes())?;
+        write_long(writer, 0)?;
+        // Sync marker (16 bytes)
+        writer
+            .write_all(&self.sync_marker)
+            .map_err(|e| ArrowError::IoError(format!("write OCF sync marker: {e}"), e))?;
+        Ok(())
+    }
+
+    fn sync_marker(&self) -> Option<&[u8; 16]> {
+        Some(&self.sync_marker)
+    }
+}
+
+/// Raw Avro binary streaming format using **Single-Object Encoding** per record.
+///
+/// Each record written by the stream writer is framed with a prefix determined
+/// by the schema fingerprinting algorithm.
+///
+/// See: <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+/// See: <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+#[derive(Debug, Default)]
+pub struct AvroSoeFormat {}
+
+impl AvroFormat for AvroSoeFormat {
+    const NEEDS_PREFIX: bool = true;
+    fn start_stream<W: Write>(
+        &mut self,
+        _writer: &mut W,
+        _schema: &Schema,
+        compression: Option<CompressionCodec>,
+    ) -> Result<(), ArrowError> {
+        if compression.is_some() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Compression not supported for Avro SOE streaming".to_string(),
+            ));
+        }
+        Ok(())
+    }
+
+    fn sync_marker(&self) -> Option<&[u8; 16]> {
+        None
+    }
+}
+
+#[inline]
+fn write_string<W: Write>(writer: &mut W, s: &str) -> Result<(), ArrowError> {
+    write_bytes(writer, s.as_bytes())
+}
+
+#[inline]
+fn write_bytes<W: Write>(writer: &mut W, bytes: &[u8]) -> Result<(), ArrowError> {
+    write_long(writer, bytes.len() as i64)?;
+    writer
+        .write_all(bytes)
+        .map_err(|e| ArrowError::IoError(format!("write bytes: {e}"), e))
+}
diff --git a/arrow-avro/src/writer/mod.rs b/arrow-avro/src/writer/mod.rs
new file mode 100644
index 000000000000..f4a2e60ed57f
--- /dev/null
+++ b/arrow-avro/src/writer/mod.rs
@@ -0,0 +1,2413 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Avro writer implementation for the `arrow-avro` crate.
+//!
+//! # Overview
+//!
+//! Use this module to serialize Arrow `RecordBatch` values into Avro. Two output
+//! formats are supported:
+//!
+//! * **[`AvroWriter`](crate::writer::AvroWriter)** — writes an **Object Container File (OCF)**: a self‑describing
+//!   file with header (schema JSON + metadata), optional compression, data blocks, and
+//!   sync markers. See Avro 1.11.1 “Object Container Files.”
+//!   <https://avro.apache.org/docs/1.11.1/specification/#object-container-files>
+//! * **[`AvroStreamWriter`](crate::writer::AvroStreamWriter)** — writes a **Single Object Encoding (SOE) Stream** (“datum” bytes) without
+//!   any container framing. This is useful when the schema is known out‑of‑band (i.e.,
+//!   via a registry) and you want minimal overhead.
+//!
+//! ## Which format should you use?
+//!
+//! * Use **OCF** when you need a portable, self‑contained file. The schema travels with
+//!   the data, making it easy to read elsewhere.
+//! * Use the **SOE stream** when your surrounding protocol supplies schema information
+//!   (i.e., a schema registry). The writer automatically adds the per‑record prefix:
+//!   - **SOE**: Each record is prefixed with the 2-byte header (`0xC3 0x01`) followed by
+//!     an 8‑byte little‑endian CRC‑64‑AVRO fingerprint, then the Avro body.
+//!     See Avro 1.11.1 "Single object encoding".
+//!     <https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding>
+//!   - **Confluent wire format**: Each record is prefixed with magic byte `0x00` followed by
+//!     a **big‑endian** 4‑byte schema ID, then the Avro body. Use `FingerprintStrategy::Id(schema_id)`.
+//!     <https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format>
+//!   - **Apicurio wire format**: Each record is prefixed with magic byte `0x00` followed by
+//!     a **big‑endian** 8‑byte schema ID, then the Avro body. Use `FingerprintStrategy::Id64(schema_id)`.
+//!     <https://www.apicur.io/registry/docs/apicurio-registry/1.3.3.Final/getting-started/assembly-using-kafka-client-serdes.html#registry-serdes-types-avro-registry>
+//!
+//! ## Choosing the Avro schema
+//!
+//! By default, the writer converts your Arrow schema to Avro (including a top‑level record
+//! name). If you already have an Avro schema JSON you want to use verbatim, put it into the
+//! Arrow schema metadata under the `avro.schema` key before constructing the writer. The
+//! builder will use that schema instead of generating a new one (unless `strip_metadata` is
+//! set to true in the options).
+//!
+//! ## Compression
+//!
+//! For OCF, you may enable a compression codec via `WriterBuilder::with_compression`. The
+//! chosen codec is written into the file header and used for subsequent blocks. SOE stream
+//! writing doesn’t apply container‑level compression.
+//!
+//! ---
+use crate::codec::AvroFieldBuilder;
+use crate::compression::CompressionCodec;
+use crate::schema::{
+    AvroSchema, Fingerprint, FingerprintAlgorithm, FingerprintStrategy, SCHEMA_METADATA_KEY,
+};
+use crate::writer::encoder::{RecordEncoder, RecordEncoderBuilder, write_long};
+use crate::writer::format::{AvroFormat, AvroOcfFormat, AvroSoeFormat};
+use arrow_array::RecordBatch;
+use arrow_schema::{ArrowError, Schema};
+use std::io::Write;
+use std::sync::Arc;
+
+/// Encodes `RecordBatch` into the Avro binary format.
+mod encoder;
+/// Logic for different Avro container file formats.
+pub mod format;
+
+/// Builder to configure and create a `Writer`.
+#[derive(Debug, Clone)]
+pub struct WriterBuilder {
+    schema: Schema,
+    codec: Option<CompressionCodec>,
+    capacity: usize,
+    fingerprint_strategy: Option<FingerprintStrategy>,
+}
+
+impl WriterBuilder {
+    /// Create a new builder with default settings.
+    ///
+    /// The Avro schema used for writing is determined as follows:
+    /// 1) If the Arrow schema metadata contains `avro::schema` (see `SCHEMA_METADATA_KEY`),
+    ///    that JSON is used verbatim.
+    /// 2) Otherwise, the Arrow schema is converted to an Avro record schema.
+    pub fn new(schema: Schema) -> Self {
+        Self {
+            schema,
+            codec: None,
+            capacity: 1024,
+            fingerprint_strategy: None,
+        }
+    }
+
+    /// Set the fingerprinting strategy for the stream writer.
+    /// This determines the per-record prefix format.
+    pub fn with_fingerprint_strategy(mut self, strategy: FingerprintStrategy) -> Self {
+        self.fingerprint_strategy = Some(strategy);
+        self
+    }
+
+    /// Change the compression codec.
+    pub fn with_compression(mut self, codec: Option<CompressionCodec>) -> Self {
+        self.codec = codec;
+        self
+    }
+
+    /// Sets the capacity for the given object and returns the modified instance.
+    pub fn with_capacity(mut self, capacity: usize) -> Self {
+        self.capacity = capacity;
+        self
+    }
+
+    /// Create a new `Writer` with specified `AvroFormat` and builder options.
+    /// Performs one‑time startup (header/stream init, encoder plan).
+    pub fn build<W, F>(self, mut writer: W) -> Result<Writer<W, F>, ArrowError>
+    where
+        W: Write,
+        F: AvroFormat,
+    {
+        let mut format = F::default();
+        let avro_schema = match self.schema.metadata.get(SCHEMA_METADATA_KEY) {
+            Some(json) => AvroSchema::new(json.clone()),
+            None => AvroSchema::try_from(&self.schema)?,
+        };
+        let maybe_fingerprint = if F::NEEDS_PREFIX {
+            match self.fingerprint_strategy {
+                Some(FingerprintStrategy::Id(id)) => Some(Fingerprint::Id(id)),
+                Some(FingerprintStrategy::Id64(id)) => Some(Fingerprint::Id64(id)),
+                Some(strategy) => {
+                    Some(avro_schema.fingerprint(FingerprintAlgorithm::from(strategy))?)
+                }
+                None => Some(
+                    avro_schema
+                        .fingerprint(FingerprintAlgorithm::from(FingerprintStrategy::Rabin))?,
+                ),
+            }
+        } else {
+            None
+        };
+        let mut md = self.schema.metadata().clone();
+        md.insert(
+            SCHEMA_METADATA_KEY.to_string(),
+            avro_schema.clone().json_string,
+        );
+        let schema = Arc::new(Schema::new_with_metadata(self.schema.fields().clone(), md));
+        format.start_stream(&mut writer, &schema, self.codec)?;
+        let avro_root = AvroFieldBuilder::new(&avro_schema.schema()?).build()?;
+        let encoder = RecordEncoderBuilder::new(&avro_root, schema.as_ref())
+            .with_fingerprint(maybe_fingerprint)
+            .build()?;
+        Ok(Writer {
+            writer,
+            schema,
+            format,
+            compression: self.codec,
+            capacity: self.capacity,
+            encoder,
+        })
+    }
+}
+
+/// Generic Avro writer.
+///
+/// This type is generic over the output Write sink (`W`) and the Avro format (`F`).
+/// You’ll usually use the concrete aliases:
+///
+/// * **[`AvroWriter`]** for **OCF** (self‑describing container file)
+/// * **[`AvroStreamWriter`]** for **SOE** Avro streams
+#[derive(Debug)]
+pub struct Writer<W: Write, F: AvroFormat> {
+    writer: W,
+    schema: Arc<Schema>,
+    format: F,
+    compression: Option<CompressionCodec>,
+    capacity: usize,
+    encoder: RecordEncoder,
+}
+
+/// Alias for an Avro **Object Container File** writer.
+///
+/// ### Quickstart (runnable)
+///
+/// ```
+/// use std::io::Cursor;
+/// use std::sync::Arc;
+/// use arrow_array::{ArrayRef, Int64Array, StringArray, RecordBatch};
+/// use arrow_schema::{DataType, Field, Schema};
+/// use arrow_avro::writer::AvroWriter;
+/// use arrow_avro::reader::ReaderBuilder;
+///
+/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// // Writer schema: { id: long, name: string }
+/// let writer_schema = Schema::new(vec![
+///     Field::new("id", DataType::Int64, false),
+///     Field::new("name", DataType::Utf8, false),
+/// ]);
+///
+/// // Build a RecordBatch with two rows
+/// let batch = RecordBatch::try_new(
+///     Arc::new(writer_schema.clone()),
+///     vec![
+///         Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef,
+///         Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef,
+///     ],
+/// )?;
+///
+/// // Write an Avro **Object Container File** (OCF) to memory
+/// let mut w = AvroWriter::new(Vec::<u8>::new(), writer_schema.clone())?;
+/// w.write(&batch)?;
+/// w.finish()?;
+/// let bytes = w.into_inner();
+///
+/// // Build a Reader and decode the batch back
+/// let mut r = ReaderBuilder::new().build(Cursor::new(bytes))?;
+/// let out = r.next().unwrap()?;
+/// assert_eq!(out.num_rows(), 2);
+/// # Ok(()) }
+/// ```
+pub type AvroWriter<W> = Writer<W, AvroOcfFormat>;
+
+/// Alias for an Avro **Single Object Encoding** stream writer.
+///
+/// ### Example
+///
+/// This writer automatically adds the appropriate per-record prefix (based on the
+/// fingerprint strategy) before the Avro body of each record. The default is Single
+/// Object Encoding (SOE) with a Rabin fingerprint.
+///
+/// ```
+/// use std::sync::Arc;
+/// use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+/// use arrow_schema::{DataType, Field, Schema};
+/// use arrow_avro::writer::AvroStreamWriter;
+///
+/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// // One‑column Arrow batch
+/// let schema = Schema::new(vec![Field::new("x", DataType::Int64, false)]);
+/// let batch = RecordBatch::try_new(
+///     Arc::new(schema.clone()),
+///     vec![Arc::new(Int64Array::from(vec![10, 20])) as ArrayRef],
+/// )?;
+///
+/// // Write an Avro Single Object Encoding stream to a Vec<u8>
+/// let sink: Vec<u8> = Vec::new();
+/// let mut w = AvroStreamWriter::new(sink, schema)?;
+/// w.write(&batch)?;
+/// w.finish()?;
+/// let bytes = w.into_inner();
+/// assert!(!bytes.is_empty());
+/// # Ok(()) }
+/// ```
+pub type AvroStreamWriter<W> = Writer<W, AvroSoeFormat>;
+
+impl<W: Write> Writer<W, AvroOcfFormat> {
+    /// Convenience constructor – same as [`WriterBuilder::build`] with `AvroOcfFormat`.
+    ///
+    /// ### Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// use arrow_avro::writer::AvroWriter;
+    ///
+    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    /// let batch = RecordBatch::try_new(
+    ///     Arc::new(schema.clone()),
+    ///     vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+    /// )?;
+    ///
+    /// let buf: Vec<u8> = Vec::new();
+    /// let mut w = AvroWriter::new(buf, schema)?;
+    /// w.write(&batch)?;
+    /// w.finish()?;
+    /// let bytes = w.into_inner();
+    /// assert!(!bytes.is_empty());
+    /// # Ok(()) }
+    /// ```
+    pub fn new(writer: W, schema: Schema) -> Result<Self, ArrowError> {
+        WriterBuilder::new(schema).build::<W, AvroOcfFormat>(writer)
+    }
+
+    /// Return a reference to the 16‑byte sync marker generated for this file.
+    pub fn sync_marker(&self) -> Option<&[u8; 16]> {
+        self.format.sync_marker()
+    }
+}
+
+impl<W: Write> Writer<W, AvroSoeFormat> {
+    /// Convenience constructor to create a new [`AvroStreamWriter`].
+    ///
+    /// The resulting stream contains **Single Object Encodings** (no OCF header/sync).
+    ///
+    /// ### Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// use arrow_avro::writer::AvroStreamWriter;
+    ///
+    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// let schema = Schema::new(vec![Field::new("x", DataType::Int64, false)]);
+    /// let batch = RecordBatch::try_new(
+    ///     Arc::new(schema.clone()),
+    ///     vec![Arc::new(Int64Array::from(vec![10, 20])) as ArrayRef],
+    /// )?;
+    ///
+    /// let sink: Vec<u8> = Vec::new();
+    /// let mut w = AvroStreamWriter::new(sink, schema)?;
+    /// w.write(&batch)?;
+    /// w.finish()?;
+    /// let bytes = w.into_inner();
+    /// assert!(!bytes.is_empty());
+    /// # Ok(()) }
+    /// ```
+    pub fn new(writer: W, schema: Schema) -> Result<Self, ArrowError> {
+        WriterBuilder::new(schema).build::<W, AvroSoeFormat>(writer)
+    }
+}
+
+impl<W: Write, F: AvroFormat> Writer<W, F> {
+    /// Serialize one [`RecordBatch`] to the output.
+    pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
+        if batch.schema().fields() != self.schema.fields() {
+            return Err(ArrowError::SchemaError(
+                "Schema of RecordBatch differs from Writer schema".to_string(),
+            ));
+        }
+        match self.format.sync_marker() {
+            Some(&sync) => self.write_ocf_block(batch, &sync),
+            None => self.write_stream(batch),
+        }
+    }
+
+    /// A convenience method to write a slice of [`RecordBatch`].
+    ///
+    /// This is equivalent to calling `write` for each batch in the slice.
+    pub fn write_batches(&mut self, batches: &[&RecordBatch]) -> Result<(), ArrowError> {
+        for b in batches {
+            self.write(b)?;
+        }
+        Ok(())
+    }
+
+    /// Flush remaining buffered data and (for OCF) ensure the header is present.
+    pub fn finish(&mut self) -> Result<(), ArrowError> {
+        self.writer
+            .flush()
+            .map_err(|e| ArrowError::IoError(format!("Error flushing writer: {e}"), e))
+    }
+
+    /// Consume the writer, returning the underlying output object.
+    pub fn into_inner(self) -> W {
+        self.writer
+    }
+
+    fn write_ocf_block(&mut self, batch: &RecordBatch, sync: &[u8; 16]) -> Result<(), ArrowError> {
+        let mut buf = Vec::<u8>::with_capacity(self.capacity);
+        self.encoder.encode(&mut buf, batch)?;
+        let encoded = match self.compression {
+            Some(codec) => codec.compress(&buf)?,
+            None => buf,
+        };
+        write_long(&mut self.writer, batch.num_rows() as i64)?;
+        write_long(&mut self.writer, encoded.len() as i64)?;
+        self.writer
+            .write_all(&encoded)
+            .map_err(|e| ArrowError::IoError(format!("Error writing Avro block: {e}"), e))?;
+        self.writer
+            .write_all(sync)
+            .map_err(|e| ArrowError::IoError(format!("Error writing Avro sync: {e}"), e))?;
+        Ok(())
+    }
+
+    fn write_stream(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
+        self.encoder.encode(&mut self.writer, batch)?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::compression::CompressionCodec;
+    use crate::reader::ReaderBuilder;
+    use crate::schema::{AvroSchema, SchemaStore};
+    use crate::test_util::arrow_test_data;
+    use arrow::datatypes::TimeUnit;
+    #[cfg(feature = "avro_custom_types")]
+    use arrow_array::types::{Int16Type, Int32Type, Int64Type};
+    use arrow_array::types::{
+        Time32MillisecondType, Time64MicrosecondType, TimestampMicrosecondType,
+        TimestampMillisecondType, TimestampNanosecondType,
+    };
+    use arrow_array::{
+        Array, ArrayRef, BinaryArray, Date32Array, Int32Array, PrimitiveArray, RecordBatch,
+        StringArray, StructArray, UnionArray,
+    };
+    #[cfg(feature = "avro_custom_types")]
+    use arrow_array::{Int16Array, Int64Array, RunArray};
+    use arrow_schema::UnionMode;
+    #[cfg(not(feature = "avro_custom_types"))]
+    use arrow_schema::{DataType, Field, Schema};
+    #[cfg(feature = "avro_custom_types")]
+    use arrow_schema::{DataType, Field, Schema};
+    use std::collections::HashMap;
+    use std::collections::HashSet;
+    use std::fs::File;
+    use std::io::{BufReader, Cursor};
+    use std::path::PathBuf;
+    use std::sync::Arc;
+    use tempfile::NamedTempFile;
+
+    fn files() -> impl Iterator<Item = &'static str> {
+        [
+            // TODO: avoid requiring snappy for this file
+            #[cfg(feature = "snappy")]
+            "avro/alltypes_plain.avro",
+            #[cfg(feature = "snappy")]
+            "avro/alltypes_plain.snappy.avro",
+            #[cfg(feature = "zstd")]
+            "avro/alltypes_plain.zstandard.avro",
+            #[cfg(feature = "bzip2")]
+            "avro/alltypes_plain.bzip2.avro",
+            #[cfg(feature = "xz")]
+            "avro/alltypes_plain.xz.avro",
+        ]
+        .into_iter()
+    }
+
+    fn make_schema() -> Schema {
+        Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Binary, false),
+        ])
+    }
+
+    fn make_batch() -> RecordBatch {
+        let ids = Int32Array::from(vec![1, 2, 3]);
+        let names = BinaryArray::from_vec(vec![b"a".as_ref(), b"b".as_ref(), b"c".as_ref()]);
+        RecordBatch::try_new(
+            Arc::new(make_schema()),
+            vec![Arc::new(ids) as ArrayRef, Arc::new(names) as ArrayRef],
+        )
+        .expect("failed to build test RecordBatch")
+    }
+
+    #[test]
+    fn test_stream_writer_writes_prefix_per_row_rt() -> Result<(), ArrowError> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef],
+        )?;
+        let buf: Vec<u8> = Vec::new();
+        let mut writer = AvroStreamWriter::new(buf, schema.clone())?;
+        writer.write(&batch)?;
+        let encoded = writer.into_inner();
+        let mut store = SchemaStore::new(); // Rabin by default
+        let avro_schema = AvroSchema::try_from(&schema)?;
+        let _fp = store.register(avro_schema)?;
+        let mut decoder = ReaderBuilder::new()
+            .with_writer_schema_store(store)
+            .build_decoder()?;
+        let _consumed = decoder.decode(&encoded)?;
+        let decoded = decoder
+            .flush()?
+            .expect("expected at least one batch from decoder");
+        assert_eq!(decoded.num_columns(), 1);
+        assert_eq!(decoded.num_rows(), 2);
+        let col = decoded
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("int column");
+        assert_eq!(col, &Int32Array::from(vec![10, 20]));
+        Ok(())
+    }
+
+    #[test]
+    fn test_nullable_struct_with_nonnullable_field_sliced_encoding() {
+        use arrow_array::{ArrayRef, Int32Array, StringArray, StructArray};
+        use arrow_buffer::NullBuffer;
+        use arrow_schema::{DataType, Field, Fields, Schema};
+        use std::sync::Arc;
+        let inner_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, false), // non-nullable
+            Field::new("name", DataType::Utf8, true), // nullable
+        ]);
+        let inner_struct_type = DataType::Struct(inner_fields.clone());
+        let schema = Schema::new(vec![
+            Field::new("before", inner_struct_type.clone(), true), // nullable struct
+            Field::new("after", inner_struct_type.clone(), true),  // nullable struct
+            Field::new("op", DataType::Utf8, false),               // non-nullable
+        ]);
+        let before_ids = Int32Array::from(vec![None, None]);
+        let before_names = StringArray::from(vec![None::<&str>, None]);
+        let before_struct = StructArray::new(
+            inner_fields.clone(),
+            vec![
+                Arc::new(before_ids) as ArrayRef,
+                Arc::new(before_names) as ArrayRef,
+            ],
+            Some(NullBuffer::from(vec![false, false])),
+        );
+        let after_ids = Int32Array::from(vec![1, 2]); // non-nullable, no nulls
+        let after_names = StringArray::from(vec![Some("Alice"), Some("Bob")]);
+        let after_struct = StructArray::new(
+            inner_fields.clone(),
+            vec![
+                Arc::new(after_ids) as ArrayRef,
+                Arc::new(after_names) as ArrayRef,
+            ],
+            Some(NullBuffer::from(vec![true, true])),
+        );
+        let op_col = StringArray::from(vec!["r", "r"]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(before_struct) as ArrayRef,
+                Arc::new(after_struct) as ArrayRef,
+                Arc::new(op_col) as ArrayRef,
+            ],
+        )
+        .expect("failed to create test batch");
+        let mut sink = Vec::new();
+        let mut writer = WriterBuilder::new(schema)
+            .with_fingerprint_strategy(FingerprintStrategy::Id(1))
+            .build::<_, AvroSoeFormat>(&mut sink)
+            .expect("failed to create writer");
+        for row_idx in 0..batch.num_rows() {
+            let single_row = batch.slice(row_idx, 1);
+            let after_col = single_row.column(1);
+            assert_eq!(
+                after_col.null_count(),
+                0,
+                "after column should have no nulls in sliced row"
+            );
+            writer
+                .write(&single_row)
+                .unwrap_or_else(|e| panic!("Failed to encode row {row_idx}: {e}"));
+        }
+        writer.finish().expect("failed to finish writer");
+        assert!(!sink.is_empty(), "encoded output should not be empty");
+    }
+
+    #[test]
+    fn test_nullable_struct_with_decimal_and_timestamp_sliced() {
+        use arrow_array::{
+            ArrayRef, Decimal128Array, Int32Array, StringArray, StructArray,
+            TimestampMicrosecondArray,
+        };
+        use arrow_buffer::NullBuffer;
+        use arrow_schema::{DataType, Field, Fields, Schema};
+        use std::sync::Arc;
+        let row_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+            Field::new("category", DataType::Utf8, true),
+            Field::new("price", DataType::Decimal128(10, 2), true),
+            Field::new("stock_quantity", DataType::Int32, true),
+            Field::new(
+                "created_at",
+                DataType::Timestamp(TimeUnit::Microsecond, None),
+                true,
+            ),
+        ]);
+        let row_struct_type = DataType::Struct(row_fields.clone());
+        let schema = Schema::new(vec![
+            Field::new("before", row_struct_type.clone(), true),
+            Field::new("after", row_struct_type.clone(), true),
+            Field::new("op", DataType::Utf8, false),
+        ]);
+        let before_struct = StructArray::new_null(row_fields.clone(), 2);
+        let ids = Int32Array::from(vec![1, 2]);
+        let names = StringArray::from(vec![Some("Widget"), Some("Gadget")]);
+        let categories = StringArray::from(vec![Some("Electronics"), Some("Electronics")]);
+        let prices = Decimal128Array::from(vec![Some(1999), Some(2999)])
+            .with_precision_and_scale(10, 2)
+            .unwrap();
+        let quantities = Int32Array::from(vec![Some(100), Some(50)]);
+        let timestamps = TimestampMicrosecondArray::from(vec![
+            Some(1700000000000000i64),
+            Some(1700000001000000i64),
+        ]);
+        let after_struct = StructArray::new(
+            row_fields.clone(),
+            vec![
+                Arc::new(ids) as ArrayRef,
+                Arc::new(names) as ArrayRef,
+                Arc::new(categories) as ArrayRef,
+                Arc::new(prices) as ArrayRef,
+                Arc::new(quantities) as ArrayRef,
+                Arc::new(timestamps) as ArrayRef,
+            ],
+            Some(NullBuffer::from(vec![true, true])),
+        );
+        let op_col = StringArray::from(vec!["r", "r"]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(before_struct) as ArrayRef,
+                Arc::new(after_struct) as ArrayRef,
+                Arc::new(op_col) as ArrayRef,
+            ],
+        )
+        .expect("failed to create products batch");
+        let mut sink = Vec::new();
+        let mut writer = WriterBuilder::new(schema)
+            .with_fingerprint_strategy(FingerprintStrategy::Id(1))
+            .build::<_, AvroSoeFormat>(&mut sink)
+            .expect("failed to create writer");
+        // Encode row by row
+        for row_idx in 0..batch.num_rows() {
+            let single_row = batch.slice(row_idx, 1);
+            writer
+                .write(&single_row)
+                .unwrap_or_else(|e| panic!("Failed to encode product row {row_idx}: {e}"));
+        }
+        writer.finish().expect("failed to finish writer");
+        assert!(!sink.is_empty());
+    }
+
+    #[test]
+    fn non_nullable_child_in_nullable_struct_should_encode_per_row() {
+        use arrow_array::{
+            ArrayRef, Int32Array, Int64Array, RecordBatch, StringArray, StructArray,
+        };
+        use arrow_schema::{DataType, Field, Fields, Schema};
+        use std::sync::Arc;
+        let row_fields = Fields::from(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ]);
+        let row_struct_dt = DataType::Struct(row_fields.clone());
+        let before: ArrayRef = Arc::new(StructArray::new_null(row_fields.clone(), 1));
+        let id_col: ArrayRef = Arc::new(Int32Array::from(vec![1]));
+        let name_col: ArrayRef = Arc::new(StringArray::from(vec![None::<&str>]));
+        let after: ArrayRef = Arc::new(StructArray::new(
+            row_fields.clone(),
+            vec![id_col, name_col],
+            None,
+        ));
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("before", row_struct_dt.clone(), true),
+            Field::new("after", row_struct_dt, true),
+            Field::new("op", DataType::Utf8, false),
+            Field::new("ts_ms", DataType::Int64, false),
+        ]));
+        let op = Arc::new(StringArray::from(vec!["r"])) as ArrayRef;
+        let ts_ms = Arc::new(Int64Array::from(vec![1732900000000_i64])) as ArrayRef;
+        let batch = RecordBatch::try_new(schema.clone(), vec![before, after, op, ts_ms]).unwrap();
+        let mut buf = Vec::new();
+        let mut writer = WriterBuilder::new(schema.as_ref().clone())
+            .build::<_, AvroSoeFormat>(&mut buf)
+            .unwrap();
+        let single = batch.slice(0, 1);
+        let res = writer.write(&single);
+        assert!(
+            res.is_ok(),
+            "expected to encode successfully, got: {:?}",
+            res.err()
+        );
+    }
+
+    #[test]
+    fn test_union_nonzero_type_ids() -> Result<(), ArrowError> {
+        use arrow_array::UnionArray;
+        use arrow_buffer::Buffer;
+        use arrow_schema::UnionFields;
+        let union_fields = UnionFields::try_new(
+            vec![2, 5],
+            vec![
+                Field::new("v_str", DataType::Utf8, true),
+                Field::new("v_int", DataType::Int32, true),
+            ],
+        )
+        .unwrap();
+        let strings = StringArray::from(vec!["hello", "world"]);
+        let ints = Int32Array::from(vec![10, 20, 30]);
+        let type_ids = Buffer::from_slice_ref([2_i8, 5, 5, 2, 5]);
+        let offsets = Buffer::from_slice_ref([0_i32, 0, 1, 1, 2]);
+        let union_array = UnionArray::try_new(
+            union_fields.clone(),
+            type_ids.into(),
+            Some(offsets.into()),
+            vec![Arc::new(strings) as ArrayRef, Arc::new(ints) as ArrayRef],
+        )?;
+        let schema = Schema::new(vec![Field::new(
+            "union_col",
+            DataType::Union(union_fields, UnionMode::Dense),
+            false,
+        )]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(union_array) as ArrayRef],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        assert!(
+            writer.write(&batch).is_ok(),
+            "Expected no error from writing"
+        );
+        writer.finish()?;
+        assert!(
+            writer.finish().is_ok(),
+            "Expected no error from finishing writer"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_stream_writer_with_id_fingerprint_rt() -> Result<(), ArrowError> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+        )?;
+        let schema_id: u32 = 42;
+        let mut writer = WriterBuilder::new(schema.clone())
+            .with_fingerprint_strategy(FingerprintStrategy::Id(schema_id))
+            .build::<_, AvroSoeFormat>(Vec::new())?;
+        writer.write(&batch)?;
+        let encoded = writer.into_inner();
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id);
+        let avro_schema = AvroSchema::try_from(&schema)?;
+        let _ = store.set(Fingerprint::Id(schema_id), avro_schema)?;
+        let mut decoder = ReaderBuilder::new()
+            .with_writer_schema_store(store)
+            .build_decoder()?;
+        let _ = decoder.decode(&encoded)?;
+        let decoded = decoder
+            .flush()?
+            .expect("expected at least one batch from decoder");
+        assert_eq!(decoded.num_columns(), 1);
+        assert_eq!(decoded.num_rows(), 3);
+        let col = decoded
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("int column");
+        assert_eq!(col, &Int32Array::from(vec![1, 2, 3]));
+        Ok(())
+    }
+
+    #[test]
+    fn test_stream_writer_with_id64_fingerprint_rt() -> Result<(), ArrowError> {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+        )?;
+        let schema_id: u64 = 42;
+        let mut writer = WriterBuilder::new(schema.clone())
+            .with_fingerprint_strategy(FingerprintStrategy::Id64(schema_id))
+            .build::<_, AvroSoeFormat>(Vec::new())?;
+        writer.write(&batch)?;
+        let encoded = writer.into_inner();
+        let mut store = SchemaStore::new_with_type(FingerprintAlgorithm::Id64);
+        let avro_schema = AvroSchema::try_from(&schema)?;
+        let _ = store.set(Fingerprint::Id64(schema_id), avro_schema)?;
+        let mut decoder = ReaderBuilder::new()
+            .with_writer_schema_store(store)
+            .build_decoder()?;
+        let _ = decoder.decode(&encoded)?;
+        let decoded = decoder
+            .flush()?
+            .expect("expected at least one batch from decoder");
+        assert_eq!(decoded.num_columns(), 1);
+        assert_eq!(decoded.num_rows(), 3);
+        let col = decoded
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("int column");
+        assert_eq!(col, &Int32Array::from(vec![1, 2, 3]));
+        Ok(())
+    }
+
+    #[test]
+    fn test_ocf_writer_generates_header_and_sync() -> Result<(), ArrowError> {
+        let batch = make_batch();
+        let buffer: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(buffer, make_schema())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let out = writer.into_inner();
+        assert_eq!(&out[..4], b"Obj\x01", "OCF magic bytes missing/incorrect");
+        let trailer = &out[out.len() - 16..];
+        assert_eq!(trailer.len(), 16, "expected 16‑byte sync marker");
+        Ok(())
+    }
+
+    #[test]
+    fn test_schema_mismatch_yields_error() {
+        let batch = make_batch();
+        let alt_schema = Schema::new(vec![Field::new("x", DataType::Int32, false)]);
+        let buffer = Vec::<u8>::new();
+        let mut writer = AvroWriter::new(buffer, alt_schema).unwrap();
+        let err = writer.write(&batch).unwrap_err();
+        assert!(matches!(err, ArrowError::SchemaError(_)));
+    }
+
+    #[test]
+    fn test_write_batches_accumulates_multiple() -> Result<(), ArrowError> {
+        let batch1 = make_batch();
+        let batch2 = make_batch();
+        let buffer = Vec::<u8>::new();
+        let mut writer = AvroWriter::new(buffer, make_schema())?;
+        writer.write_batches(&[&batch1, &batch2])?;
+        writer.finish()?;
+        let out = writer.into_inner();
+        assert!(out.len() > 4, "combined batches produced tiny file");
+        Ok(())
+    }
+
+    #[test]
+    fn test_finish_without_write_adds_header() -> Result<(), ArrowError> {
+        let buffer = Vec::<u8>::new();
+        let mut writer = AvroWriter::new(buffer, make_schema())?;
+        writer.finish()?;
+        let out = writer.into_inner();
+        assert_eq!(&out[..4], b"Obj\x01", "finish() should emit OCF header");
+        Ok(())
+    }
+
+    #[test]
+    fn test_write_long_encodes_zigzag_varint() -> Result<(), ArrowError> {
+        let mut buf = Vec::new();
+        write_long(&mut buf, 0)?;
+        write_long(&mut buf, -1)?;
+        write_long(&mut buf, 1)?;
+        write_long(&mut buf, -2)?;
+        write_long(&mut buf, 2147483647)?;
+        assert!(
+            buf.starts_with(&[0x00, 0x01, 0x02, 0x03]),
+            "zig‑zag varint encodings incorrect: {buf:?}"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_roundtrip_alltypes_roundtrip_writer() -> Result<(), ArrowError> {
+        for rel in files() {
+            let path = arrow_test_data(rel);
+            let rdr_file = File::open(&path).expect("open input avro");
+            let reader = ReaderBuilder::new()
+                .build(BufReader::new(rdr_file))
+                .expect("build reader");
+            let schema = reader.schema();
+            let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+            let original =
+                arrow::compute::concat_batches(&schema, &input_batches).expect("concat input");
+            let tmp = NamedTempFile::new().expect("create temp file");
+            let out_path = tmp.into_temp_path();
+            let out_file = File::create(&out_path).expect("create temp avro");
+            let codec = if rel.contains(".snappy.") {
+                Some(CompressionCodec::Snappy)
+            } else if rel.contains(".zstandard.") {
+                Some(CompressionCodec::ZStandard)
+            } else if rel.contains(".bzip2.") {
+                Some(CompressionCodec::Bzip2)
+            } else if rel.contains(".xz.") {
+                Some(CompressionCodec::Xz)
+            } else {
+                None
+            };
+            let mut writer = WriterBuilder::new(original.schema().as_ref().clone())
+                .with_compression(codec)
+                .build::<_, AvroOcfFormat>(out_file)?;
+            writer.write(&original)?;
+            writer.finish()?;
+            drop(writer);
+            let rt_file = File::open(&out_path).expect("open roundtrip avro");
+            let rt_reader = ReaderBuilder::new()
+                .build(BufReader::new(rt_file))
+                .expect("build roundtrip reader");
+            let rt_schema = rt_reader.schema();
+            let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+            let roundtrip =
+                arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+            assert_eq!(
+                roundtrip, original,
+                "Round-trip batch mismatch for file: {}",
+                rel
+            );
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn test_roundtrip_nested_records_writer() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/nested_records.avro");
+        let rdr_file = File::open(&path).expect("open nested_records.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for nested_records.avro");
+        let schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original = arrow::compute::concat_batches(&schema, &batches).expect("concat original");
+        let tmp = NamedTempFile::new().expect("create temp file");
+        let out_path = tmp.into_temp_path();
+        {
+            let out_file = File::create(&out_path).expect("create output avro");
+            let mut writer = AvroWriter::new(out_file, original.schema().as_ref().clone())?;
+            writer.write(&original)?;
+            writer.finish()?;
+        }
+        let rt_file = File::open(&out_path).expect("open round_trip avro");
+        let rt_reader = ReaderBuilder::new()
+            .build(BufReader::new(rt_file))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, original,
+            "Round-trip batch mismatch for nested_records.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_roundtrip_nested_lists_writer() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/nested_lists.snappy.avro");
+        let rdr_file = File::open(&path).expect("open nested_lists.snappy.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for nested_lists.snappy.avro");
+        let schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original = arrow::compute::concat_batches(&schema, &batches).expect("concat original");
+        let tmp = NamedTempFile::new().expect("create temp file");
+        let out_path = tmp.into_temp_path();
+        {
+            let out_file = File::create(&out_path).expect("create output avro");
+            let mut writer = WriterBuilder::new(original.schema().as_ref().clone())
+                .with_compression(Some(CompressionCodec::Snappy))
+                .build::<_, AvroOcfFormat>(out_file)?;
+            writer.write(&original)?;
+            writer.finish()?;
+        }
+        let rt_file = File::open(&out_path).expect("open round_trip avro");
+        let rt_reader = ReaderBuilder::new()
+            .build(BufReader::new(rt_file))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, original,
+            "Round-trip batch mismatch for nested_lists.snappy.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_round_trip_simple_fixed_ocf() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/simple_fixed.avro");
+        let rdr_file = File::open(&path).expect("open avro/simple_fixed.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build avro reader");
+        let schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&schema, &input_batches).expect("concat input");
+        let tmp = NamedTempFile::new().expect("create temp file");
+        let out_file = File::create(tmp.path()).expect("create temp avro");
+        let mut writer = AvroWriter::new(out_file, original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        drop(writer);
+        let rt_file = File::open(tmp.path()).expect("open round_trip avro");
+        let rt_reader = ReaderBuilder::new()
+            .build(BufReader::new(rt_file))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(round_trip, original);
+        Ok(())
+    }
+
+    // Strict equality (schema + values) only when canonical extension types are enabled
+    #[test]
+    #[cfg(feature = "canonical_extension_types")]
+    fn test_round_trip_duration_and_uuid_ocf() -> Result<(), ArrowError> {
+        use arrow_schema::{DataType, IntervalUnit};
+        let in_file =
+            File::open("test/data/duration_uuid.avro").expect("open test/data/duration_uuid.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for duration_uuid.avro");
+        let in_schema = reader.schema();
+        let has_mdn = in_schema.fields().iter().any(|f| {
+            matches!(
+                f.data_type(),
+                DataType::Interval(IntervalUnit::MonthDayNano)
+            )
+        });
+        assert!(
+            has_mdn,
+            "expected at least one Interval(MonthDayNano) field in duration_uuid.avro"
+        );
+        let has_uuid_fixed = in_schema
+            .fields()
+            .iter()
+            .any(|f| matches!(f.data_type(), DataType::FixedSizeBinary(16)));
+        assert!(
+            has_uuid_fixed,
+            "expected at least one FixedSizeBinary(16) (uuid) field in duration_uuid.avro"
+        );
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let input =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        // Write to an in‑memory OCF and read back
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), in_schema.as_ref().clone())?;
+        writer.write(&input)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(round_trip, input);
+        Ok(())
+    }
+
+    // Feature OFF: only values are asserted equal; schema may legitimately differ (uuid as fixed(16))
+    #[test]
+    #[cfg(not(feature = "canonical_extension_types"))]
+    fn test_duration_and_uuid_ocf_without_extensions_round_trips_values() -> Result<(), ArrowError>
+    {
+        use arrow::datatypes::{DataType, IntervalUnit};
+        use std::io::BufReader;
+
+        // Read input Avro (duration + uuid)
+        let in_file =
+            File::open("test/data/duration_uuid.avro").expect("open test/data/duration_uuid.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for duration_uuid.avro");
+        let in_schema = reader.schema();
+
+        // Sanity checks: has MonthDayNano and a FixedSizeBinary(16)
+        assert!(
+            in_schema.fields().iter().any(|f| {
+                matches!(
+                    f.data_type(),
+                    DataType::Interval(IntervalUnit::MonthDayNano)
+                )
+            }),
+            "expected at least one Interval(MonthDayNano) field"
+        );
+        assert!(
+            in_schema
+                .fields()
+                .iter()
+                .any(|f| matches!(f.data_type(), DataType::FixedSizeBinary(16))),
+            "expected a FixedSizeBinary(16) field (uuid)"
+        );
+
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let input =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+
+        // Write to a temp OCF and read back
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), in_schema.as_ref().clone())?;
+        writer.write(&input)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+
+        // 1) Values must round-trip for both columns
+        assert_eq!(
+            round_trip.column(0),
+            input.column(0),
+            "duration column values differ"
+        );
+        assert_eq!(round_trip.column(1), input.column(1), "uuid bytes differ");
+
+        // 2) Schema expectation without extensions:
+        //    uuid is written as named fixed(16), so reader attaches avro.name
+        let uuid_rt = rt_schema.field_with_name("uuid_field")?;
+        assert_eq!(uuid_rt.data_type(), &DataType::FixedSizeBinary(16));
+        assert_eq!(
+            uuid_rt.metadata().get("logicalType").map(|s| s.as_str()),
+            Some("uuid"),
+            "expected `logicalType = \"uuid\"` on round-tripped field metadata"
+        );
+
+        // 3) Duration remains Interval(MonthDayNano)
+        let dur_rt = rt_schema.field_with_name("duration_field")?;
+        assert!(matches!(
+            dur_rt.data_type(),
+            DataType::Interval(IntervalUnit::MonthDayNano)
+        ));
+
+        Ok(())
+    }
+
+    // This test reads the same 'nonnullable.impala.avro' used by the reader tests,
+    // writes it back out with the writer (hitting Map encoding paths), then reads it
+    // again and asserts exact Arrow equivalence.
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_nonnullable_impala_roundtrip_writer() -> Result<(), ArrowError> {
+        // Load source Avro with Map fields
+        let path = arrow_test_data("avro/nonnullable.impala.avro");
+        let rdr_file = File::open(&path).expect("open avro/nonnullable.impala.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for nonnullable.impala.avro");
+        // Collect all input batches and concatenate to a single RecordBatch
+        let in_schema = reader.schema();
+        // Sanity: ensure the file actually contains at least one Map field
+        let has_map = in_schema
+            .fields()
+            .iter()
+            .any(|f| matches!(f.data_type(), DataType::Map(_, _)));
+        assert!(
+            has_map,
+            "expected at least one Map field in avro/nonnullable.impala.avro"
+        );
+
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        // Write out using the OCF writer into an in-memory Vec<u8>
+        let buffer = Vec::<u8>::new();
+        let mut writer = AvroWriter::new(buffer, in_schema.as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let out_bytes = writer.into_inner();
+        // Read the produced bytes back with the Reader
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(out_bytes))
+            .expect("build reader for round-tripped in-memory OCF");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        // Exact value fidelity (schema + data)
+        assert_eq!(
+            roundtrip, original,
+            "Round-trip Avro map data mismatch for nonnullable.impala.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for these files
+    #[cfg(feature = "snappy")]
+    fn test_roundtrip_decimals_via_writer() -> Result<(), ArrowError> {
+        // (file, resolve via ARROW_TEST_DATA?)
+        let files: [(&str, bool); 8] = [
+            ("avro/fixed_length_decimal.avro", true), // fixed-backed -> Decimal128(25,2)
+            ("avro/fixed_length_decimal_legacy.avro", true), // legacy fixed[8] -> Decimal64(13,2)
+            ("avro/int32_decimal.avro", true),        // bytes-backed -> Decimal32(4,2)
+            ("avro/int64_decimal.avro", true),        // bytes-backed -> Decimal64(10,2)
+            ("test/data/int256_decimal.avro", false), // bytes-backed -> Decimal256(76,2)
+            ("test/data/fixed256_decimal.avro", false), // fixed[32]-backed -> Decimal256(76,10)
+            ("test/data/fixed_length_decimal_legacy_32.avro", false), // legacy fixed[4] -> Decimal32(9,2)
+            ("test/data/int128_decimal.avro", false), // bytes-backed -> Decimal128(38,2)
+        ];
+        for (rel, in_test_data_dir) in files {
+            // Resolve path the same way as reader::test_decimal
+            let path: String = if in_test_data_dir {
+                arrow_test_data(rel)
+            } else {
+                PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+                    .join(rel)
+                    .to_string_lossy()
+                    .into_owned()
+            };
+            // Read original file into a single RecordBatch for comparison
+            let f_in = File::open(&path).expect("open input avro");
+            let rdr = ReaderBuilder::new().build(BufReader::new(f_in))?;
+            let in_schema = rdr.schema();
+            let in_batches = rdr.collect::<Result<Vec<_>, _>>()?;
+            let original =
+                arrow::compute::concat_batches(&in_schema, &in_batches).expect("concat input");
+            // Write it out with the OCF writer (no special compression)
+            let tmp = NamedTempFile::new().expect("create temp file");
+            let out_path = tmp.into_temp_path();
+            let out_file = File::create(&out_path).expect("create temp avro");
+            let mut writer = AvroWriter::new(out_file, original.schema().as_ref().clone())?;
+            writer.write(&original)?;
+            writer.finish()?;
+            // Read back the file we just wrote and compare equality (schema + data)
+            let f_rt = File::open(&out_path).expect("open roundtrip avro");
+            let rt_rdr = ReaderBuilder::new().build(BufReader::new(f_rt))?;
+            let rt_schema = rt_rdr.schema();
+            let rt_batches = rt_rdr.collect::<Result<Vec<_>, _>>()?;
+            let roundtrip =
+                arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat rt");
+            assert_eq!(roundtrip, original, "decimal round-trip mismatch for {rel}");
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn test_named_types_complex_roundtrip() -> Result<(), ArrowError> {
+        // 1. Read the new, more complex named references file.
+        let path =
+            PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/data/named_types_complex.avro");
+        let rdr_file = File::open(&path).expect("open avro/named_types_complex.avro");
+
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for named_types_complex.avro");
+
+        // 2. Concatenate all batches to one RecordBatch.
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+
+        // 3. Sanity Checks: Validate that all named types were reused correctly.
+        {
+            let arrow_schema = original.schema();
+
+            // --- A. Validate 'User' record reuse ---
+            let author_field = arrow_schema.field_with_name("author")?;
+            let author_type = author_field.data_type();
+            let editors_field = arrow_schema.field_with_name("editors")?;
+            let editors_item_type = match editors_field.data_type() {
+                DataType::List(item_field) => item_field.data_type(),
+                other => panic!("Editors field should be a List, but was {:?}", other),
+            };
+            assert_eq!(
+                author_type, editors_item_type,
+                "The DataType for the 'author' struct and the 'editors' list items must be identical"
+            );
+
+            // --- B. Validate 'PostStatus' enum reuse ---
+            let status_field = arrow_schema.field_with_name("status")?;
+            let status_type = status_field.data_type();
+            assert!(
+                matches!(status_type, DataType::Dictionary(_, _)),
+                "Status field should be a Dictionary (Enum)"
+            );
+
+            let prev_status_field = arrow_schema.field_with_name("previous_status")?;
+            let prev_status_type = prev_status_field.data_type();
+            assert_eq!(
+                status_type, prev_status_type,
+                "The DataType for 'status' and 'previous_status' enums must be identical"
+            );
+
+            // --- C. Validate 'MD5' fixed reuse ---
+            let content_hash_field = arrow_schema.field_with_name("content_hash")?;
+            let content_hash_type = content_hash_field.data_type();
+            assert!(
+                matches!(content_hash_type, DataType::FixedSizeBinary(16)),
+                "Content hash should be FixedSizeBinary(16)"
+            );
+
+            let thumb_hash_field = arrow_schema.field_with_name("thumbnail_hash")?;
+            let thumb_hash_type = thumb_hash_field.data_type();
+            assert_eq!(
+                content_hash_type, thumb_hash_type,
+                "The DataType for 'content_hash' and 'thumbnail_hash' fixed types must be identical"
+            );
+        }
+
+        // 4. Write the data to an in-memory buffer.
+        let buffer: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(buffer, original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+
+        // 5. Read the data back and compare for exact equality.
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build reader for round-trip");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+
+        assert_eq!(
+            roundtrip, original,
+            "Avro complex named types round-trip mismatch"
+        );
+
+        Ok(())
+    }
+
+    // Union Roundtrip Test Helpers
+
+    // Asserts that the `actual` schema is a semantically equivalent superset of the `expected` one.
+    // This allows the `actual` schema to contain additional metadata keys
+    // (`arrowUnionMode`, `arrowUnionTypeIds`, `avro.name`) that are added during an Arrow-to-Avro-to-Arrow
+    // roundtrip, while ensuring no other information was lost or changed.
+    fn assert_schema_is_semantically_equivalent(expected: &Schema, actual: &Schema) {
+        // Compare top-level schema metadata using the same superset logic.
+        assert_metadata_is_superset(expected.metadata(), actual.metadata(), "Schema");
+
+        // Compare fields.
+        assert_eq!(
+            expected.fields().len(),
+            actual.fields().len(),
+            "Schema must have the same number of fields"
+        );
+
+        for (expected_field, actual_field) in expected.fields().iter().zip(actual.fields().iter()) {
+            assert_field_is_semantically_equivalent(expected_field, actual_field);
+        }
+    }
+
+    fn assert_field_is_semantically_equivalent(expected: &Field, actual: &Field) {
+        let context = format!("Field '{}'", expected.name());
+
+        assert_eq!(
+            expected.name(),
+            actual.name(),
+            "{context}: names must match"
+        );
+        assert_eq!(
+            expected.is_nullable(),
+            actual.is_nullable(),
+            "{context}: nullability must match"
+        );
+
+        // Recursively check the data types.
+        assert_datatype_is_semantically_equivalent(
+            expected.data_type(),
+            actual.data_type(),
+            &context,
+        );
+
+        // Check that metadata is a valid superset.
+        assert_metadata_is_superset(expected.metadata(), actual.metadata(), &context);
+    }
+
+    fn assert_datatype_is_semantically_equivalent(
+        expected: &DataType,
+        actual: &DataType,
+        context: &str,
+    ) {
+        match (expected, actual) {
+            (DataType::List(expected_field), DataType::List(actual_field))
+            | (DataType::LargeList(expected_field), DataType::LargeList(actual_field))
+            | (DataType::Map(expected_field, _), DataType::Map(actual_field, _)) => {
+                assert_field_is_semantically_equivalent(expected_field, actual_field);
+            }
+            (DataType::Struct(expected_fields), DataType::Struct(actual_fields)) => {
+                assert_eq!(
+                    expected_fields.len(),
+                    actual_fields.len(),
+                    "{context}: struct must have same number of fields"
+                );
+                for (ef, af) in expected_fields.iter().zip(actual_fields.iter()) {
+                    assert_field_is_semantically_equivalent(ef, af);
+                }
+            }
+            (
+                DataType::Union(expected_fields, expected_mode),
+                DataType::Union(actual_fields, actual_mode),
+            ) => {
+                assert_eq!(
+                    expected_mode, actual_mode,
+                    "{context}: union mode must match"
+                );
+                assert_eq!(
+                    expected_fields.len(),
+                    actual_fields.len(),
+                    "{context}: union must have same number of variants"
+                );
+                for ((exp_id, exp_field), (act_id, act_field)) in
+                    expected_fields.iter().zip(actual_fields.iter())
+                {
+                    assert_eq!(exp_id, act_id, "{context}: union type ids must match");
+                    assert_field_is_semantically_equivalent(exp_field, act_field);
+                }
+            }
+            _ => {
+                assert_eq!(expected, actual, "{context}: data types must be identical");
+            }
+        }
+    }
+
+    fn assert_batch_data_is_identical(expected: &RecordBatch, actual: &RecordBatch) {
+        assert_eq!(
+            expected.num_columns(),
+            actual.num_columns(),
+            "RecordBatches must have the same number of columns"
+        );
+        assert_eq!(
+            expected.num_rows(),
+            actual.num_rows(),
+            "RecordBatches must have the same number of rows"
+        );
+
+        for i in 0..expected.num_columns() {
+            let context = format!("Column {i}");
+            let expected_col = expected.column(i);
+            let actual_col = actual.column(i);
+            assert_array_data_is_identical(expected_col, actual_col, &context);
+        }
+    }
+
+    /// Recursively asserts that the data content of two Arrays is identical.
+    fn assert_array_data_is_identical(expected: &dyn Array, actual: &dyn Array, context: &str) {
+        assert_eq!(
+            expected.nulls(),
+            actual.nulls(),
+            "{context}: null buffers must match"
+        );
+        assert_eq!(
+            expected.len(),
+            actual.len(),
+            "{context}: array lengths must match"
+        );
+
+        match (expected.data_type(), actual.data_type()) {
+            (DataType::Union(expected_fields, _), DataType::Union(..)) => {
+                let expected_union = expected.as_any().downcast_ref::<UnionArray>().unwrap();
+                let actual_union = actual.as_any().downcast_ref::<UnionArray>().unwrap();
+
+                // Compare the type_ids buffer (always the first buffer).
+                assert_eq!(
+                    &expected.to_data().buffers()[0],
+                    &actual.to_data().buffers()[0],
+                    "{context}: union type_ids buffer mismatch"
+                );
+
+                // For dense unions, compare the value_offsets buffer (the second buffer).
+                if expected.to_data().buffers().len() > 1 {
+                    assert_eq!(
+                        &expected.to_data().buffers()[1],
+                        &actual.to_data().buffers()[1],
+                        "{context}: union value_offsets buffer mismatch"
+                    );
+                }
+
+                // Recursively compare children based on the fields in the DataType.
+                for (type_id, _) in expected_fields.iter() {
+                    let child_context = format!("{context} -> child variant {type_id}");
+                    assert_array_data_is_identical(
+                        expected_union.child(type_id),
+                        actual_union.child(type_id),
+                        &child_context,
+                    );
+                }
+            }
+            (DataType::Struct(_), DataType::Struct(_)) => {
+                let expected_struct = expected.as_any().downcast_ref::<StructArray>().unwrap();
+                let actual_struct = actual.as_any().downcast_ref::<StructArray>().unwrap();
+                for i in 0..expected_struct.num_columns() {
+                    let child_context = format!("{context} -> struct child {i}");
+                    assert_array_data_is_identical(
+                        expected_struct.column(i),
+                        actual_struct.column(i),
+                        &child_context,
+                    );
+                }
+            }
+            // Fallback for primitive types and other types where buffer comparison is sufficient.
+            _ => {
+                assert_eq!(
+                    expected.to_data().buffers(),
+                    actual.to_data().buffers(),
+                    "{context}: data buffers must match"
+                );
+            }
+        }
+    }
+
+    /// Checks that `actual_meta` contains all of `expected_meta`, and any additional
+    /// keys in `actual_meta` are from a permitted set.
+    fn assert_metadata_is_superset(
+        expected_meta: &HashMap<String, String>,
+        actual_meta: &HashMap<String, String>,
+        context: &str,
+    ) {
+        let allowed_additions: HashSet<&str> =
+            vec!["arrowUnionMode", "arrowUnionTypeIds", "avro.name"]
+                .into_iter()
+                .collect();
+        for (key, expected_value) in expected_meta {
+            match actual_meta.get(key) {
+                Some(actual_value) => assert_eq!(
+                    expected_value, actual_value,
+                    "{context}: preserved metadata for key '{key}' must have the same value"
+                ),
+                None => panic!("{context}: metadata key '{key}' was lost during roundtrip"),
+            }
+        }
+        for key in actual_meta.keys() {
+            if !expected_meta.contains_key(key) && !allowed_additions.contains(key.as_str()) {
+                panic!("{context}: unexpected metadata key '{key}' was added during roundtrip");
+            }
+        }
+    }
+
+    #[test]
+    fn test_union_roundtrip() -> Result<(), ArrowError> {
+        let file_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test/data/union_fields.avro")
+            .to_string_lossy()
+            .into_owned();
+        let rdr_file = File::open(&file_path).expect("open avro/union_fields.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for union_fields.avro");
+        let schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&schema, &input_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+
+        // The nature of the crate is such that metadata gets appended during the roundtrip,
+        // so we can't compare the schemas directly. Instead, we semantically compare the schemas and data.
+        assert_schema_is_semantically_equivalent(&original.schema(), &round_trip.schema());
+
+        assert_batch_data_is_identical(&original, &round_trip);
+        Ok(())
+    }
+
+    #[test]
+    fn test_enum_roundtrip_uses_reader_fixture() -> Result<(), ArrowError> {
+        // Read the known-good enum file (same as reader::test_simple)
+        let path = arrow_test_data("avro/simple_enum.avro");
+        let rdr_file = File::open(&path).expect("open avro/simple_enum.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for simple_enum.avro");
+        // Concatenate all batches to one RecordBatch for a clean equality check
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        // Sanity: expect at least one Dictionary(Int32, Utf8) column (enum)
+        let has_enum_dict = in_schema.fields().iter().any(|f| {
+            matches!(
+                f.data_type(),
+                DataType::Dictionary(k, v) if **k == DataType::Int32 && **v == DataType::Utf8
+            )
+        });
+        assert!(
+            has_enum_dict,
+            "Expected at least one enum-mapped Dictionary<Int32, Utf8> field"
+        );
+        // Write with OCF writer into memory using the reader-provided Arrow schema.
+        // The writer will embed the Avro JSON from `avro.schema` metadata if present.
+        let buffer: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(buffer, in_schema.as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        // Read back and compare for exact equality (schema + data)
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("reader for round-trip");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        assert_eq!(roundtrip, original, "Avro enum round-trip mismatch");
+        Ok(())
+    }
+
+    #[test]
+    fn test_builder_propagates_capacity_to_writer() -> Result<(), ArrowError> {
+        let cap = 64 * 1024;
+        let buffer = Vec::<u8>::new();
+        let mut writer = WriterBuilder::new(make_schema())
+            .with_capacity(cap)
+            .build::<_, AvroOcfFormat>(buffer)?;
+        assert_eq!(writer.capacity, cap, "builder capacity not propagated");
+        let batch = make_batch();
+        writer.write(&batch)?;
+        writer.finish()?;
+        let out = writer.into_inner();
+        assert_eq!(&out[..4], b"Obj\x01", "OCF magic missing/incorrect");
+        Ok(())
+    }
+
+    #[test]
+    fn test_stream_writer_stores_capacity_direct_writes() -> Result<(), ArrowError> {
+        use arrow_array::{ArrayRef, Int32Array};
+        use arrow_schema::{DataType, Field, Schema};
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef],
+        )?;
+        let cap = 8192;
+        let mut writer = WriterBuilder::new(schema)
+            .with_capacity(cap)
+            .build::<_, AvroSoeFormat>(Vec::new())?;
+        assert_eq!(writer.capacity, cap);
+        writer.write(&batch)?;
+        let _bytes = writer.into_inner();
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_roundtrip_duration_logical_types_ocf() -> Result<(), ArrowError> {
+        let file_path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test/data/duration_logical_types.avro")
+            .to_string_lossy()
+            .into_owned();
+
+        let in_file = File::open(&file_path)
+            .unwrap_or_else(|_| panic!("Failed to open test file: {}", file_path));
+
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for duration_logical_types.avro");
+        let in_schema = reader.schema();
+
+        let expected_units: HashSet<TimeUnit> = [
+            TimeUnit::Nanosecond,
+            TimeUnit::Microsecond,
+            TimeUnit::Millisecond,
+            TimeUnit::Second,
+        ]
+        .into_iter()
+        .collect();
+
+        let found_units: HashSet<TimeUnit> = in_schema
+            .fields()
+            .iter()
+            .filter_map(|f| match f.data_type() {
+                DataType::Duration(unit) => Some(*unit),
+                _ => None,
+            })
+            .collect();
+
+        assert_eq!(
+            found_units, expected_units,
+            "Expected to find all four Duration TimeUnits in the schema from the initial read"
+        );
+
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let input =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+
+        let tmp = NamedTempFile::new().expect("create temp file");
+        {
+            let out_file = File::create(tmp.path()).expect("create temp avro");
+            let mut writer = AvroWriter::new(out_file, in_schema.as_ref().clone())?;
+            writer.write(&input)?;
+            writer.finish()?;
+        }
+
+        let rt_file = File::open(tmp.path()).expect("open round_trip avro");
+        let rt_reader = ReaderBuilder::new()
+            .build(BufReader::new(rt_file))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+
+        assert_eq!(round_trip, input);
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_roundtrip_writer() -> Result<(), ArrowError> {
+        let run_ends = Int32Array::from(vec![3, 5, 7, 8]);
+        let run_values = Int32Array::from(vec![Some(1), Some(2), None, Some(3)]);
+        let ree = RunArray::<Int32Type>::try_new(&run_ends, &run_values)?;
+        let field = Field::new("x", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(ree.clone()) as ArrayRef],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 8);
+        match out.schema().field(0).data_type() {
+            DataType::RunEndEncoded(run_ends_field, values_field) => {
+                assert_eq!(run_ends_field.name(), "run_ends");
+                assert_eq!(run_ends_field.data_type(), &DataType::Int32);
+                assert_eq!(values_field.name(), "values");
+                assert_eq!(values_field.data_type(), &DataType::Int32);
+                assert!(values_field.is_nullable());
+                let got_ree = out
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<RunArray<Int32Type>>()
+                    .expect("RunArray<Int32Type>");
+                assert_eq!(got_ree, &ree);
+            }
+            other => panic!(
+                "Unexpected DataType for round-tripped RunEndEncoded column: {:?}",
+                other
+            ),
+        }
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_string_values_int16_run_ends_roundtrip_writer() -> Result<(), ArrowError>
+    {
+        let run_ends = Int16Array::from(vec![2, 5, 7]); // end indices
+        let run_values = StringArray::from(vec![Some("a"), None, Some("c")]);
+        let ree = RunArray::<Int16Type>::try_new(&run_ends, &run_values)?;
+        let field = Field::new("s", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(ree.clone()) as ArrayRef],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 7);
+        match out.schema().field(0).data_type() {
+            DataType::RunEndEncoded(run_ends_field, values_field) => {
+                assert_eq!(run_ends_field.data_type(), &DataType::Int16);
+                assert_eq!(values_field.data_type(), &DataType::Utf8);
+                assert!(
+                    values_field.is_nullable(),
+                    "REE 'values' child should be nullable"
+                );
+                let got = out
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<RunArray<Int16Type>>()
+                    .expect("RunArray<Int16Type>");
+                assert_eq!(got, &ree);
+            }
+            other => panic!("Unexpected DataType: {:?}", other),
+        }
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_int64_run_ends_numeric_values_roundtrip_writer()
+    -> Result<(), ArrowError> {
+        let run_ends = Int64Array::from(vec![4_i64, 8_i64]);
+        let run_values = Int32Array::from(vec![Some(999), Some(-5)]);
+        let ree = RunArray::<Int64Type>::try_new(&run_ends, &run_values)?;
+        let field = Field::new("y", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(ree.clone()) as ArrayRef],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 8);
+        match out.schema().field(0).data_type() {
+            DataType::RunEndEncoded(run_ends_field, values_field) => {
+                assert_eq!(run_ends_field.data_type(), &DataType::Int64);
+                assert_eq!(values_field.data_type(), &DataType::Int32);
+                assert!(values_field.is_nullable());
+                let got = out
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<RunArray<Int64Type>>()
+                    .expect("RunArray<Int64Type>");
+                assert_eq!(got, &ree);
+            }
+            other => panic!("Unexpected DataType for REE column: {:?}", other),
+        }
+        Ok(())
+    }
+
+    #[cfg(feature = "avro_custom_types")]
+    #[test]
+    fn test_run_end_encoded_sliced_roundtrip_writer() -> Result<(), ArrowError> {
+        let run_ends = Int32Array::from(vec![3, 5, 7, 8]);
+        let run_values = Int32Array::from(vec![Some(1), Some(2), None, Some(3)]);
+        let base = RunArray::<Int32Type>::try_new(&run_ends, &run_values)?;
+        let offset = 1usize;
+        let length = 6usize;
+        let base_values = base
+            .values()
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("REE values as Int32Array");
+        let mut logical_window: Vec<Option<i32>> = Vec::with_capacity(length);
+        for i in offset..offset + length {
+            let phys = base.get_physical_index(i);
+            let v = if base_values.is_null(phys) {
+                None
+            } else {
+                Some(base_values.value(phys))
+            };
+            logical_window.push(v);
+        }
+
+        fn compress_run_ends_i32(vals: &[Option<i32>]) -> (Int32Array, Int32Array) {
+            if vals.is_empty() {
+                return (Int32Array::new_null(0), Int32Array::new_null(0));
+            }
+            let mut run_ends_out: Vec<i32> = Vec::new();
+            let mut run_vals_out: Vec<Option<i32>> = Vec::new();
+            let mut cur = vals[0];
+            let mut len = 1i32;
+            for v in &vals[1..] {
+                if *v == cur {
+                    len += 1;
+                } else {
+                    let last_end = run_ends_out.last().copied().unwrap_or(0);
+                    run_ends_out.push(last_end + len);
+                    run_vals_out.push(cur);
+                    cur = *v;
+                    len = 1;
+                }
+            }
+            let last_end = run_ends_out.last().copied().unwrap_or(0);
+            run_ends_out.push(last_end + len);
+            run_vals_out.push(cur);
+            (
+                Int32Array::from(run_ends_out),
+                Int32Array::from(run_vals_out),
+            )
+        }
+        let (owned_run_ends, owned_run_values) = compress_run_ends_i32(&logical_window);
+        let owned_slice = RunArray::<Int32Type>::try_new(&owned_run_ends, &owned_run_values)?;
+        let field = Field::new("x", owned_slice.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(owned_slice.clone()) as ArrayRef],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), length);
+        match out.schema().field(0).data_type() {
+            DataType::RunEndEncoded(run_ends_field, values_field) => {
+                assert_eq!(run_ends_field.data_type(), &DataType::Int32);
+                assert_eq!(values_field.data_type(), &DataType::Int32);
+                assert!(values_field.is_nullable());
+                let got = out
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<RunArray<Int32Type>>()
+                    .expect("RunArray<Int32Type>");
+                fn expand_ree_to_int32(a: &RunArray<Int32Type>) -> Int32Array {
+                    let vals = a
+                        .values()
+                        .as_any()
+                        .downcast_ref::<Int32Array>()
+                        .expect("REE values as Int32Array");
+                    let mut out: Vec<Option<i32>> = Vec::with_capacity(a.len());
+                    for i in 0..a.len() {
+                        let phys = a.get_physical_index(i);
+                        out.push(if vals.is_null(phys) {
+                            None
+                        } else {
+                            Some(vals.value(phys))
+                        });
+                    }
+                    Int32Array::from(out)
+                }
+                let got_logical = expand_ree_to_int32(got);
+                let expected_logical = Int32Array::from(logical_window);
+                assert_eq!(
+                    got_logical, expected_logical,
+                    "Logical values differ after REE slice round-trip"
+                );
+            }
+            other => panic!("Unexpected DataType for REE column: {:?}", other),
+        }
+        Ok(())
+    }
+
+    #[cfg(not(feature = "avro_custom_types"))]
+    #[test]
+    fn test_run_end_encoded_roundtrip_writer_feature_off() -> Result<(), ArrowError> {
+        use arrow_schema::{DataType, Field, Schema};
+        let run_ends = arrow_array::Int32Array::from(vec![3, 5, 7, 8]);
+        let run_values = arrow_array::Int32Array::from(vec![Some(1), Some(2), None, Some(3)]);
+        let ree = arrow_array::RunArray::<arrow_array::types::Int32Type>::try_new(
+            &run_ends,
+            &run_values,
+        )?;
+        let field = Field::new("x", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(ree) as ArrayRef])?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 8);
+        assert_eq!(out.schema().field(0).data_type(), &DataType::Int32);
+        let got = out
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("Int32Array");
+        let expected = Int32Array::from(vec![
+            Some(1),
+            Some(1),
+            Some(1),
+            Some(2),
+            Some(2),
+            None,
+            None,
+            Some(3),
+        ]);
+        assert_eq!(got, &expected);
+        Ok(())
+    }
+
+    #[cfg(not(feature = "avro_custom_types"))]
+    #[test]
+    fn test_run_end_encoded_string_values_int16_run_ends_roundtrip_writer_feature_off()
+    -> Result<(), ArrowError> {
+        use arrow_schema::{DataType, Field, Schema};
+        let run_ends = arrow_array::Int16Array::from(vec![2, 5, 7]);
+        let run_values = arrow_array::StringArray::from(vec![Some("a"), None, Some("c")]);
+        let ree = arrow_array::RunArray::<arrow_array::types::Int16Type>::try_new(
+            &run_ends,
+            &run_values,
+        )?;
+        let field = Field::new("s", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(ree) as ArrayRef])?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 7);
+        assert_eq!(out.schema().field(0).data_type(), &DataType::Utf8);
+        let got = out
+            .column(0)
+            .as_any()
+            .downcast_ref::<arrow_array::StringArray>()
+            .expect("StringArray");
+        let expected = arrow_array::StringArray::from(vec![
+            Some("a"),
+            Some("a"),
+            None,
+            None,
+            None,
+            Some("c"),
+            Some("c"),
+        ]);
+        assert_eq!(got, &expected);
+        Ok(())
+    }
+
+    #[cfg(not(feature = "avro_custom_types"))]
+    #[test]
+    fn test_run_end_encoded_int64_run_ends_numeric_values_roundtrip_writer_feature_off()
+    -> Result<(), ArrowError> {
+        use arrow_schema::{DataType, Field, Schema};
+        let run_ends = arrow_array::Int64Array::from(vec![4_i64, 8_i64]);
+        let run_values = Int32Array::from(vec![Some(999), Some(-5)]);
+        let ree = arrow_array::RunArray::<arrow_array::types::Int64Type>::try_new(
+            &run_ends,
+            &run_values,
+        )?;
+        let field = Field::new("y", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(ree) as ArrayRef])?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 8);
+        assert_eq!(out.schema().field(0).data_type(), &DataType::Int32);
+        let got = out
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("Int32Array");
+        let expected = Int32Array::from(vec![
+            Some(999),
+            Some(999),
+            Some(999),
+            Some(999),
+            Some(-5),
+            Some(-5),
+            Some(-5),
+            Some(-5),
+        ]);
+        assert_eq!(got, &expected);
+        Ok(())
+    }
+
+    #[cfg(not(feature = "avro_custom_types"))]
+    #[test]
+    fn test_run_end_encoded_sliced_roundtrip_writer_feature_off() -> Result<(), ArrowError> {
+        use arrow_schema::{DataType, Field, Schema};
+        let run_ends = Int32Array::from(vec![2, 4, 6]);
+        let run_values = Int32Array::from(vec![Some(1), Some(2), None]);
+        let ree = arrow_array::RunArray::<arrow_array::types::Int32Type>::try_new(
+            &run_ends,
+            &run_values,
+        )?;
+        let field = Field::new("x", ree.data_type().clone(), true);
+        let schema = Schema::new(vec![field]);
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(ree) as ArrayRef])?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let reader = ReaderBuilder::new().build(Cursor::new(bytes))?;
+        let out_schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let out = arrow::compute::concat_batches(&out_schema, &batches).expect("concat output");
+        assert_eq!(out.num_columns(), 1);
+        assert_eq!(out.num_rows(), 6);
+        assert_eq!(out.schema().field(0).data_type(), &DataType::Int32);
+        let got = out
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .expect("Int32Array");
+        let expected = Int32Array::from(vec![Some(1), Some(1), Some(2), Some(2), None, None]);
+        assert_eq!(got, &expected);
+        Ok(())
+    }
+
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_nullable_impala_roundtrip() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/nullable.impala.avro");
+        let rdr_file = File::open(&path).expect("open avro/nullable.impala.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for nullable.impala.avro");
+        let in_schema = reader.schema();
+        assert!(
+            in_schema.fields().iter().any(|f| f.is_nullable()),
+            "expected at least one nullable field in avro/nullable.impala.avro"
+        );
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        let buffer: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(buffer, in_schema.as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let out_bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(out_bytes))
+            .expect("build reader for round-tripped in-memory OCF");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        assert_eq!(
+            roundtrip, original,
+            "Round-trip Avro data mismatch for nullable.impala.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_datapage_v2_roundtrip() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/datapage_v2.snappy.avro");
+        let rdr_file = File::open(&path).expect("open avro/datapage_v2.snappy.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for datapage_v2.snappy.avro");
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), in_schema.as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round-trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, original,
+            "Round-trip batch mismatch for datapage_v2.snappy.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_single_nan_roundtrip() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/single_nan.avro");
+        let in_file = File::open(&path).expect("open avro/single_nan.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for single_nan.avro");
+        let in_schema = reader.schema();
+        let in_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &in_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, original,
+            "Round-trip batch mismatch for avro/single_nan.avro"
+        );
+        Ok(())
+    }
+    #[test]
+    // TODO: avoid requiring snappy for this file
+    #[cfg(feature = "snappy")]
+    fn test_dict_pages_offset_zero_roundtrip() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/dict-page-offset-zero.avro");
+        let rdr_file = File::open(&path).expect("open avro/dict-page-offset-zero.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for dict-page-offset-zero.avro");
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        let buffer: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(buffer, original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build reader for round-trip");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        assert_eq!(
+            roundtrip, original,
+            "Round-trip batch mismatch for avro/dict-page-offset-zero.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    #[cfg(feature = "snappy")]
+    fn test_repeated_no_annotation_roundtrip() -> Result<(), ArrowError> {
+        let path = arrow_test_data("avro/repeated_no_annotation.avro");
+        let in_file = File::open(&path).expect("open avro/repeated_no_annotation.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for repeated_no_annotation.avro");
+        let in_schema = reader.schema();
+        let in_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &in_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build reader for round-trip buffer");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round-trip");
+        assert_eq!(
+            round_trip, original,
+            "Round-trip batch mismatch for avro/repeated_no_annotation.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_nested_record_type_reuse_roundtrip() -> Result<(), ArrowError> {
+        let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test/data/nested_record_reuse.avro")
+            .to_string_lossy()
+            .into_owned();
+        let in_file = File::open(&path).expect("open avro/nested_record_reuse.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(in_file))
+            .expect("build reader for nested_record_reuse.avro");
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let input =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), in_schema.as_ref().clone())?;
+        writer.write(&input)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, input,
+            "Round-trip batch mismatch for nested_record_reuse.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_enum_type_reuse_roundtrip() -> Result<(), ArrowError> {
+        let path =
+            std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/data/enum_reuse.avro");
+        let rdr_file = std::fs::File::open(&path).expect("open test/data/enum_reuse.avro");
+        let reader = ReaderBuilder::new()
+            .build(std::io::BufReader::new(rdr_file))
+            .expect("build reader for enum_reuse.avro");
+        let in_schema = reader.schema();
+        let input_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &input_batches).expect("concat input");
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(std::io::Cursor::new(bytes))
+            .expect("build round_trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let round_trip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat round_trip");
+        assert_eq!(
+            round_trip, original,
+            "Avro enum type reuse round-trip mismatch"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn comprehensive_e2e_test_roundtrip() -> Result<(), ArrowError> {
+        let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test/data/comprehensive_e2e.avro");
+        let rdr_file = File::open(&path).expect("open test/data/comprehensive_e2e.avro");
+        let reader = ReaderBuilder::new()
+            .build(BufReader::new(rdr_file))
+            .expect("build reader for comprehensive_e2e.avro");
+        let in_schema = reader.schema();
+        let in_batches = reader.collect::<Result<Vec<_>, _>>()?;
+        let original =
+            arrow::compute::concat_batches(&in_schema, &in_batches).expect("concat input");
+        let sink: Vec<u8> = Vec::new();
+        let mut writer = AvroWriter::new(sink, original.schema().as_ref().clone())?;
+        writer.write(&original)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(Cursor::new(bytes))
+            .expect("build round-trip reader");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        assert_eq!(
+            roundtrip, original,
+            "Round-trip batch mismatch for comprehensive_e2e.avro"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_roundtrip_new_time_encoders_writer() -> Result<(), ArrowError> {
+        let schema = Schema::new(vec![
+            Field::new("d32", DataType::Date32, false),
+            Field::new("t32_ms", DataType::Time32(TimeUnit::Millisecond), false),
+            Field::new("t64_us", DataType::Time64(TimeUnit::Microsecond), false),
+            Field::new(
+                "ts_ms",
+                DataType::Timestamp(TimeUnit::Millisecond, None),
+                false,
+            ),
+            Field::new(
+                "ts_us",
+                DataType::Timestamp(TimeUnit::Microsecond, None),
+                false,
+            ),
+            Field::new(
+                "ts_ns",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                false,
+            ),
+        ]);
+        let d32 = Date32Array::from(vec![0, 1, -1]);
+        let t32_ms: PrimitiveArray<Time32MillisecondType> =
+            vec![0_i32, 12_345_i32, 86_399_999_i32].into();
+        let t64_us: PrimitiveArray<Time64MicrosecondType> =
+            vec![0_i64, 1_234_567_i64, 86_399_999_999_i64].into();
+        let ts_ms: PrimitiveArray<TimestampMillisecondType> =
+            vec![0_i64, -1_i64, 1_700_000_000_000_i64].into();
+        let ts_us: PrimitiveArray<TimestampMicrosecondType> = vec![0_i64, 1_i64, -1_i64].into();
+        let ts_ns: PrimitiveArray<TimestampNanosecondType> = vec![0_i64, 1_i64, -1_i64].into();
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![
+                Arc::new(d32) as ArrayRef,
+                Arc::new(t32_ms) as ArrayRef,
+                Arc::new(t64_us) as ArrayRef,
+                Arc::new(ts_ms) as ArrayRef,
+                Arc::new(ts_us) as ArrayRef,
+                Arc::new(ts_ns) as ArrayRef,
+            ],
+        )?;
+        let mut writer = AvroWriter::new(Vec::<u8>::new(), schema.clone())?;
+        writer.write(&batch)?;
+        writer.finish()?;
+        let bytes = writer.into_inner();
+        let rt_reader = ReaderBuilder::new()
+            .build(std::io::Cursor::new(bytes))
+            .expect("build reader for round-trip of new time encoders");
+        let rt_schema = rt_reader.schema();
+        let rt_batches = rt_reader.collect::<Result<Vec<_>, _>>()?;
+        let roundtrip =
+            arrow::compute::concat_batches(&rt_schema, &rt_batches).expect("concat roundtrip");
+        assert_eq!(roundtrip, batch);
+        Ok(())
+    }
+}
diff --git a/arrow-avro/test/data/README.md b/arrow-avro/test/data/README.md
new file mode 100644
index 000000000000..226e0700fb94
--- /dev/null
+++ b/arrow-avro/test/data/README.md
@@ -0,0 +1,359 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Avro test files for `arrow-avro`
+
+This directory contains small Avro Object Container Files (OCF) used by
+`arrow-avro` tests to validate the `Reader` implementation. These files are generated from 
+a set of python scripts and will gradually be removed as they are merged into `arrow-testing`.
+
+## Decimal Files
+
+This directory contains OCF files used to exercise decoding of Avro’s `decimal` logical type 
+across both `bytes` and `fixed` encodings, and to cover Arrow decimal widths ranging
+from `Decimal32` up through `Decimal256`. The files were generated from a
+script (see **How these files were created** below).
+
+> **Avro decimal recap.** Avro’s `decimal` logical type annotates either a
+> `bytes` or `fixed` primitive and stores the **two’s‑complement big‑endian
+> representation of the unscaled integer** (value × 10^scale). Implementations
+> should reject invalid combinations such as `scale > precision`.
+
+> **Arrow decimal recap.** Arrow defines `Decimal32`, `Decimal64`, `Decimal128`,
+> and `Decimal256` types with maximum precisions of 9, 18, 38, and 76 digits,
+> respectively. Tests here validate that the Avro reader selects compatible
+> Arrow decimal widths given the Avro decimal’s precision and storage.
+
+---
+
+All files are one‑column Avro OCFs with a field named `value`. Each contains 24
+rows with the sequence `1 … 24` rendered at the file’s declared `scale`
+(i.e., at scale 10: `1.0000000000`, `2.0000000000`).
+
+| File | Avro storage | Decimal (precision, scale) | Intended Arrow width |
+|---|---|---|---|
+| `int256_decimal.avro` | `bytes` + `logicalType: decimal` | (76, 10) | `Decimal256` |
+| `fixed256_decimal.avro` | `fixed[32]` + `logicalType: decimal` | (76, 10) | `Decimal256` |
+| `fixed_length_decimal_legacy_32.avro` | `fixed[4]` + `logicalType: decimal` | (9, 2) | `Decimal32` (legacy fixed‑width path) |
+| `int128_decimal.avro` | `bytes` + `logicalType: decimal` | (38, 2) | `Decimal128` |
+
+### Schemas (for reference)
+
+#### int256_decimal.avro
+
+```json
+{
+  "type": "record",
+  "name": "OneColDecimal256Bytes",
+  "fields": [{
+    "name": "value",
+    "type": { "type": "bytes", "logicalType": "decimal", "precision": 76, "scale": 10 }
+  }]
+}
+```
+
+#### fixed256_decimal.avro
+
+```json
+{
+  "type": "record",
+  "name": "OneColDecimal256Fixed",
+  "fields": [{
+    "name": "value",
+    "type": {
+      "type": "fixed", "name": "Decimal256Fixed", "size": 32,
+      "logicalType": "decimal", "precision": 76, "scale": 10
+    }
+  }]
+}
+```
+
+#### fixed_length_decimal_legacy_32.avro
+
+```json
+{
+  "type": "record",
+  "name": "OneColDecimal32FixedLegacy",
+  "fields": [{
+    "name": "value",
+    "type": {
+      "type": "fixed", "name": "Decimal32FixedLegacy", "size": 4,
+      "logicalType": "decimal", "precision": 9, "scale": 2
+    }
+  }]
+}
+```
+
+#### int128_decimal.avro
+
+```json
+{
+  "type": "record",
+  "name": "OneColDecimal128Bytes",
+  "fields": [{
+    "name": "value",
+    "type": { "type": "bytes", "logicalType": "decimal", "precision": 38, "scale": 2 }
+  }]
+}
+```
+
+### How these files were created
+
+All four files were generated by the Python script
+`create_avro_decimal_files.py` authored for this purpose. The script uses
+`fastavro` to write OCFs and encodes decimal values as required by the Avro
+spec (two’s‑complement big‑endian of the unscaled integer).
+
+#### Re‑generation
+
+From the repository root (defaults write into arrow-avro/test/data):
+
+```bash
+# 1) Ensure Python 3 is available, then install fastavro
+python -m pip install --upgrade fastavro
+
+# 2) Fetch the script
+curl -L -o create_avro_decimal_files.py \
+https://gist.githubusercontent.com/jecsand838/3890349bdb33082a3e8fdcae3257eef7/raw/create_avro_decimal_files.py
+
+# 3) Generate the files (prints a verification dump by default)
+python create_avro_decimal_files.py -o arrow-avro/test/data
+```
+
+Options:
+* --num-rows (default 24) — number of rows to emit per file
+* --scale (default 10) — the decimal scale used for the 256 files
+* --no-verify — skip reading the files back for printed verification
+
+## Duration Logical Types File
+
+This directory contains an OCF file used to test the decoding of Avro long types annotated with custom logicalType values. This is used to map directly to Arrow Duration types with different time units.
+
+#### duration_logical_types.avro
+
+```json
+{
+   "type": "record",
+   "name": "DurationLogicalTypes",
+   "fields": [
+      {
+         "name": "duration_time_nanos",
+         "type": {
+            "type": "long",
+            "logicalType": "arrow.duration-nanos"
+         }
+      },
+      {
+         "name": "duration_time_micros",
+         "type": {
+            "type": "long",
+            "logicalType": "arrow.duration-micros"
+         }
+      },
+      {
+         "name": "duration_time_millis",
+         "type": {
+            "type": "long",
+            "logicalType": "arrow.duration-millis"
+         }
+      },
+      {
+         "name": "duration_time_seconds",
+         "type": {
+            "type": "long",
+            "logicalType": "arrow.duration-seconds"
+         }
+      }
+   ]
+}
+```
+
+This file contains 24 rows of random long values across four fields, each annotated with a different custom logical type corresponding to an Arrow Duration unit.
+
+
+#### How this file was created
+
+The file was generated by the Python script generate_duration_avro.py. The script uses fastavro to write an OCF with the schema and random data described above.
+
+#### Re‑generation
+From the repository root (defaults write into arrow-avro/test/data):
+
+```Bash
+
+# 1) Ensure Python 3 is available, then install fastavro
+python3 -m pip install --upgrade fastavro
+
+# 2) Fetch the script
+curl -L -o generate_duration_avro.py \
+https://gist.githubusercontent.com/nathaniel-d-ef/c253cb180b041023e3ccfe9df20ccef7/raw/06c8ca1321efcd8e1c8746fd65aa013e1a566944/generate_duration_avro.py
+
+# 3) Run the generation script
+python3 generate_duration_avro.py -o arrow-avro/test/data
+```
+
+Options:
+
+* --num-rows (default 24) — number of rows to emit
+
+* --no-verify — skip reading the file back for printed verification
+
+## Union File
+
+**Purpose:** Exercise a wide variety of Avro **union** shapes (including nullable unions, unions of ambiguous scalar types, unions of named types, and unions inside arrays, maps, and nested records) to validate `arrow-avro` union decoding and schema‑resolution paths.
+
+**Format:** Avro Object Container File (OCF) written by `fastavro.writer` with embedded writer schema.
+
+**Record count:** four rows. Each row selects different branches across the unions to ensure coverage (i.e., toggling between bytes vs. string, fixed vs. duration vs. decimal, enum vs. record alternatives, etc.).
+
+**How this file was created:**
+
+1. Script: [`create_avro_union_file.py`](https://gist.github.com/jecsand838/f4bf85ad597ab34575219df515156444)  
+   Runs with Python 3 and uses **fastavro** to emit `union_fields.avro` in the working directory.
+2. Quick reproduce:
+   ```bash
+   pip install fastavro
+   python3 create_avro_union_file.py
+   # Outputs: ./union_fields.avro
+   ```
+
+> Note: Avro OCF files include a *sync marker*; `fastavro.writer` generates a random one if not provided, so byte‑for‑byte output may vary between runs even with the same data. This does not affect the embedded schema or logical content.
+
+**Writer schema (overview):** The record is named `UnionTypesRecord` and defines the following fields:
+
+| Field                             | Union branches / details                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nullable_int_nullfirst`          | `["null","int"]` (tests null‑first ordering)                                                                                                                                                                                                                                                                                                                                                                                                       |
+| `nullable_string_nullsecond`      | `["string","null"]` (tests null‑second ordering; in Avro, a union field’s default must match the *first* branch)                                                                                                                                                                                                                                                                                                                                   |
+| `union_prim`                      | `["boolean","int","long","float","double","bytes","string"]`                                                                                                                                                                                                                                                                                                                                                                                       |
+| `union_bytes_vs_string`           | `["bytes","string"]` (ambiguous scalar union; script uses fastavro’s tuple notation to disambiguate)                                                                                                                                                                                                                                                                                                                                               |
+| `union_fixed_dur_decfix`          | `["Fx8","Dur12","DecFix16"]` where:<br>• `Fx8` = `fixed`(size=8)<br>• `Dur12` = `fixed`(size=12, `logicalType`=`duration`)<br>• `DecFix16` = `fixed`(size=16, `logicalType`=`decimal`, precision=10, scale=2)<br>**Notes:** Avro `duration` is a `fixed[12]` storing **months, days, millis** as three **little‑endian** 32‑bit integers; Avro `decimal` on `bytes`/`fixed` uses **two’s‑complement big‑endian** encoding of the unscaled integer. |
+| `union_enum_records_array_map`    | `[ColorU, RecA, RecB, array<long>, map<string>]` where:<br>• `ColorU` = `enum` {`RED`,`GREEN`,`BLUE`}<br>• `RecA` = `record` {`a:int`, `b:string`}<br>• `RecB` = `record` {`x:long`, `y:bytes`}                                                                                                                                                                                                                                                    |
+| `union_date_or_fixed4`            | `[int (logicalType=`date`), Fx4]` where `Fx4` = `fixed`(size=4)                                                                                                                                                                                                                                                                                                                                                                                    |
+| `union_time_millis_or_enum`       | `[int (logicalType=`time-millis`), OnOff]` where `OnOff` = `enum` {`ON`,`OFF`}                                                                                                                                                                                                                                                                                                                                                                     |
+| `union_time_micros_or_string`     | `[long (logicalType=`time-micros`), string]`                                                                                                                                                                                                                                                                                                                                                                                                       |
+| `union_ts_millis_utc_or_array`    | `[long (logicalType=`timestamp-millis`), array<int>]`                                                                                                                                                                                                                                                                                                                                                                                              |
+| `union_ts_micros_local_or_bytes`  | `[long (logicalType=`local-timestamp-micros`), bytes]`                                                                                                                                                                                                                                                                                                                                                                                             |
+| `union_uuid_or_fixed10`           | `[string (logicalType=`uuid`), Fx10]` where `Fx10` = `fixed`(size=10)                                                                                                                                                                                                                                                                                                                                                                              |
+| `union_dec_bytes_or_dec_fixed`    | `[bytes (decimal p=10 s=2), DecFix20]` where `DecFix20` = `fixed`(size=20, decimal p=20 s=4) — decimal encoding is big‑endian two’s‑complement.                                                                                                                                                                                                                                                                                                    |
+| `union_null_bytes_string`         | `["null","bytes","string"]`                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| `array_of_union`                  | `array<["long","string"]>`                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| `map_of_union`                    | `map<["null","double"]>`                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| `record_with_union_field`         | `HasUnion` = `record` {`id:int`, `u:["int","string"]`}                                                                                                                                                                                                                                                                                                                                                                                             |
+| `union_ts_micros_utc_or_map`      | `[long (logicalType=`timestamp-micros`), map<long>]`                                                                                                                                                                                                                                                                                                                                                                                               |
+| `union_ts_millis_local_or_string` | `[long (logicalType=`local-timestamp-millis`), string]`                                                                                                                                                                                                                                                                                                                                                                                            |
+| `union_bool_or_string`            | `["boolean","string"]`                                                                                                                                                                                                                                                                                                                                                                                                                             |
+
+**Implementation notes (generation):**
+
+* The script uses **fastavro’s tuple notation** `(branch_name, value)` to select branches in ambiguous unions (e.g., bytes vs. string, multiple named records). See *“Using the tuple notation to specify which branch of a union to take”* in the fastavro docs.
+* Decimal values are pre‑encoded to the required **big‑endian two’s‑complement** byte sequence before writing (for both `bytes` and `fixed` decimal logical types).
+* The `duration` logical type payloads are 12‑byte triples: **months / days / milliseconds**, little‑endian each.
+
+**Source / Repro script:**
+`create_avro_union_file.py` (Gist): contains the full writer schema, record builders covering four rows, and the `fastavro.writer` call which emits `union_fields.avro`.
+
+## Comprehensive E2E Coverage File
+
+**Purpose:** A single OCF that exercises **all decoder paths** used by `arrow-avro` with both **nested and non‑nested** shapes, including **dense unions** (null‑first, null‑second, multi‑branch), **aliases** (type and field), **default values**, **docs** and **namespaces**, and combinations thereof. It’s intended to validate the final `Reader` implementation and to stress schema‑resolution behavior in the tests under `arrow-avro/src/reader/mod.rs`.
+
+**File:** `comprehensive_e2e.avro`
+**Top‑level record (writer schema):** `org.apache.arrow.avrotests.v1.E2eComprehensive`
+**Record count:** four rows (each row selects different union branches and nested shapes)
+
+**Coverage summary (by Arrow / Avro mapping):**
+
+* Primitives: **boolean, int, long, float, double**
+* Binary / Text: **bytes**, **string (UTF‑8)**
+* Logical types: **date**, **time‑millis**, **time‑micros**, **timestamp‑millis (UTC)**, **timestamp‑micros (UTC)**, **local‑timestamp‑millis**, **local‑timestamp‑micros**, **uuid (string)**, **decimal** on **bytes** and **fixed**, **duration** on **fixed(12)**
+* Named types: **fixed**, **enum**, **record**
+* Collections: **array**, **map**
+* Unions: **nullable unions**, **ambiguous scalar unions**, **unions of named types**, and **unions nested inside arrays/maps/records**
+* Schema‑evolution hooks: **type aliases**, **field aliases**, **defaults** (including union defaults on the first branch), **docs**, and **namespaces**
+
+**Writer schema (overview of fields):**
+
+| Field                         | Type / details                                                                                          |
+|-------------------------------|---------------------------------------------------------------------------------------------------------|
+| `id`                          | `long`                                                                                                  |
+| `flag`                        | `boolean` (default `true`)                                                                              |
+| `ratio_f32`                   | `float` (default `0.0`)                                                                                 |
+| `ratio_f64`                   | `double` (default `0.0`)                                                                                |
+| `count_i32`                   | `int` (default `0`)                                                                                     |
+| `count_i64`                   | `long` (default `0`)                                                                                    |
+| `opt_i32_nullfirst`           | `["null","int"]` (default `null`)                                                                       |
+| `opt_str_nullsecond`          | `["string","null"]` (default `""`, alias: `old_opt_str`)                                                |
+| `tri_union_prim`              | `["int","string","boolean"]` (default `0`)                                                              |
+| `str_utf8`                    | `string` (default `"default"`)                                                                          |
+| `raw_bytes`                   | `bytes` (default `""`)                                                                                  |
+| `fx16_plain`                  | `fixed` `types.Fx16` (size 16, alias `Fixed16Old`)                                                      |
+| `dec_bytes_s10_2`             | `bytes` + `logicalType: decimal` (precision 10, scale 2)                                                |
+| `dec_fix_s20_4`               | `fixed` `types.DecFix20` (size 20) + `logicalType: decimal` (precision 20, scale 4)                     |
+| `uuid_str`                    | `string` + `logicalType: uuid`                                                                          |
+| `d_date`                      | `int` + `logicalType: date`                                                                             |
+| `t_millis`                    | `int` + `logicalType: time-millis`                                                                      |
+| `t_micros`                    | `long` + `logicalType: time-micros`                                                                     |
+| `ts_millis_utc`               | `long` + `logicalType: timestamp-millis`                                                                |
+| `ts_micros_utc`               | `long` + `logicalType: timestamp-micros`                                                                |
+| `ts_millis_local`             | `long` + `logicalType: local-timestamp-millis`                                                          |
+| `ts_micros_local`             | `long` + `logicalType: local-timestamp-micros`                                                          |
+| `interval_mdn`                | `fixed` `types.Dur12` (size 12) + `logicalType: duration`                                               |
+| `status`                      | `enum` `types.Status` = {`UNKNOWN`,`NEW`,`PROCESSING`,`DONE`} (alias: `State`)                          |
+| `arr_union`                   | `array<["long","string","null"]>`                                                                       |
+| `map_union`                   | `map<["null","double","string"]>`                                                                       |
+| `address`                     | `record` `types.Address` {`street` (alias: `street_name`), `zip:int`, `country:string`}                 |
+| `maybe_auth`                  | `record` `types.MaybeAuth` {`user:string`, `token:["null","bytes"]` (default `null`)}                   |
+| `union_enum_record_array_map` | `[types.Color enum, types.RecA record, types.RecB record, array<long>, map<string>]`                    |
+| `union_date_or_fixed4`        | `[int (logicalType=date), fixed Fx4 size 4]`                                                            |
+| `union_interval_or_string`    | `[fixed Dur12U size 12 (logicalType=duration), string]`                                                 |
+| `union_uuid_or_fixed10`       | `[string (logicalType=uuid), fixed Fx10 size 10]`                                                       |
+| `array_records_with_union`    | `array<record types.KV { key:string, val:["null","int","long"] }>`                                      |
+| `union_map_or_array_int`      | `[map<int>, array<int>]`                                                                                |
+| `renamed_with_default`        | `int` (default `42`, alias: `old_count`)                                                                |
+| `person`                      | `record` `com.example.v2.PersonV2` (alias: `com.example.Person`) `{ name:string, age:int (default 0) }` |
+
+**How this file was created**
+
+* Script: [`create_comprehensive_avro_file.py`](https://gist.github.com/jecsand838/26f9666da8de22651027d485bd83f4a3)
+  Uses **fastavro** to write `comprehensive_e2e.avro` with the schema above and four records that intentionally vary union branches and nested shapes.
+
+**Re‑generation**
+
+From the repository root:
+
+```bash
+# 1) Ensure Python 3 is available, then install fastavro
+python -m pip install --upgrade fastavro
+
+# 2) Run the generator (writes ./comprehensive_e2e.avro by default)
+python create_comprehensive_avro_file.py
+
+# 3) Move or copy the file into this directory if needed
+mv comprehensive_e2e.avro arrow-avro/test/data/
+```
+
+**Notes / tips for tests**
+
+* For **unions of named types** (record/enum/fixed), the generator uses fastavro’s **tuple notation** to select the union branch and, where needed, supplies the **fully‑qualified name (FQN)** to avoid ambiguity when namespaces apply.
+* The file contains many **defaults** and **aliases** (type and field) to exercise **schema resolution** code paths.
+* As with all OCFs, a random **sync marker** is embedded in the file header; byte‑for‑byte output may vary across runs without affecting the schema or logical content.
+
+## Other Files
+
+This directory contains other small OCF files used by `arrow-avro` tests. Details on these will be added in
+follow-up PRs.
\ No newline at end of file
diff --git a/arrow-avro/test/data/comprehensive_e2e.avro b/arrow-avro/test/data/comprehensive_e2e.avro
new file mode 100644
index 000000000000..a3e55716c325
Binary files /dev/null and b/arrow-avro/test/data/comprehensive_e2e.avro differ
diff --git a/arrow-avro/test/data/duration_logical_types.avro b/arrow-avro/test/data/duration_logical_types.avro
new file mode 100644
index 000000000000..4d514fa9ba59
Binary files /dev/null and b/arrow-avro/test/data/duration_logical_types.avro differ
diff --git a/arrow-avro/test/data/duration_uuid.avro b/arrow-avro/test/data/duration_uuid.avro
new file mode 100644
index 000000000000..09dd67b7807a
Binary files /dev/null and b/arrow-avro/test/data/duration_uuid.avro differ
diff --git a/arrow-avro/test/data/enum_reuse.avro b/arrow-avro/test/data/enum_reuse.avro
new file mode 100644
index 000000000000..7891870df3c9
Binary files /dev/null and b/arrow-avro/test/data/enum_reuse.avro differ
diff --git a/arrow-avro/test/data/fixed256_decimal.avro b/arrow-avro/test/data/fixed256_decimal.avro
new file mode 100644
index 000000000000..d1fc97dd8c83
Binary files /dev/null and b/arrow-avro/test/data/fixed256_decimal.avro differ
diff --git a/arrow-avro/test/data/fixed_length_decimal_legacy_32.avro b/arrow-avro/test/data/fixed_length_decimal_legacy_32.avro
new file mode 100644
index 000000000000..b746df9619b5
Binary files /dev/null and b/arrow-avro/test/data/fixed_length_decimal_legacy_32.avro differ
diff --git a/arrow-avro/test/data/int128_decimal.avro b/arrow-avro/test/data/int128_decimal.avro
new file mode 100644
index 000000000000..bd54d20ba487
Binary files /dev/null and b/arrow-avro/test/data/int128_decimal.avro differ
diff --git a/arrow-avro/test/data/int256_decimal.avro b/arrow-avro/test/data/int256_decimal.avro
new file mode 100644
index 000000000000..62ad7ea4df08
Binary files /dev/null and b/arrow-avro/test/data/int256_decimal.avro differ
diff --git a/arrow-avro/test/data/named_types_complex.avro b/arrow-avro/test/data/named_types_complex.avro
new file mode 100644
index 000000000000..eae439317e5b
Binary files /dev/null and b/arrow-avro/test/data/named_types_complex.avro differ
diff --git a/arrow-avro/test/data/nested_record_reuse.avro b/arrow-avro/test/data/nested_record_reuse.avro
new file mode 100644
index 000000000000..5e2a9e0328bc
Binary files /dev/null and b/arrow-avro/test/data/nested_record_reuse.avro differ
diff --git a/arrow-avro/test/data/skippable_types.avro b/arrow-avro/test/data/skippable_types.avro
new file mode 100644
index 000000000000..b0518e0056b5
Binary files /dev/null and b/arrow-avro/test/data/skippable_types.avro differ
diff --git a/arrow-avro/test/data/union_fields.avro b/arrow-avro/test/data/union_fields.avro
new file mode 100644
index 000000000000..e0ffb82bd412
Binary files /dev/null and b/arrow-avro/test/data/union_fields.avro differ
diff --git a/arrow-avro/test/data/zero_byte.avro b/arrow-avro/test/data/zero_byte.avro
new file mode 100644
index 000000000000..f7ffd29b6890
Binary files /dev/null and b/arrow-avro/test/data/zero_byte.avro differ
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index d4fa0614e01a..02ea49c37c46 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -35,13 +35,17 @@ bench = false
 [package.metadata.docs.rs]
 all-features = true
 
+[features]
+pool = []
+
 [dependencies]
 bytes = { version = "1.4" }
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-bigint = { version = "0.4.6", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 half = { version = "2.1", default-features = false }
 
 [dev-dependencies]
-criterion = { version = "0.5", default-features = false }
+criterion = { workspace = true, default-features = false }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
 
 [[bench]]
@@ -55,3 +59,8 @@ harness = false
 [[bench]]
 name = "offset"
 harness = false
+
+[[bench]]
+name = "mutable_buffer_repeat_slice"
+harness = false
+
diff --git a/arrow-buffer/benches/bit_mask.rs b/arrow-buffer/benches/bit_mask.rs
index 545528724e5d..0384089e32c5 100644
--- a/arrow-buffer/benches/bit_mask.rs
+++ b/arrow-buffer/benches/bit_mask.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow_buffer::bit_mask::set_bits;
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use std::hint;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/arrow-buffer/benches/i256.rs b/arrow-buffer/benches/i256.rs
index 7dec226bbc08..2bbb5c0284c2 100644
--- a/arrow-buffer/benches/i256.rs
+++ b/arrow-buffer/benches/i256.rs
@@ -17,6 +17,7 @@
 
 use arrow_buffer::i256;
 use criterion::*;
+use num_traits::cast::ToPrimitive;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use std::{hint, str::FromStr};
@@ -36,13 +37,19 @@ fn criterion_benchmark(c: &mut Criterion) {
         i256::MAX,
     ];
 
-    for number in numbers {
+    for number in numbers.iter() {
         let t = hint::black_box(number.to_string());
         c.bench_function(&format!("i256_parse({t})"), |b| {
             b.iter(|| i256::from_str(&t).unwrap());
         });
     }
 
+    for number in numbers.iter() {
+        c.bench_function(&format!("i256_to_f64({number})"), |b| {
+            b.iter(|| (*number).to_f64().unwrap())
+        });
+    }
+
     let mut rng = StdRng::seed_from_u64(42);
 
     let numerators: Vec<_> = (0..SIZE)
diff --git a/arrow-buffer/benches/mutable_buffer_repeat_slice.rs b/arrow-buffer/benches/mutable_buffer_repeat_slice.rs
new file mode 100644
index 000000000000..a59c24baef56
--- /dev/null
+++ b/arrow-buffer/benches/mutable_buffer_repeat_slice.rs
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_buffer::Buffer;
+use criterion::*;
+use rand::distr::Alphanumeric;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use std::hint;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MutableBuffer repeat slice");
+    let mut rng = StdRng::seed_from_u64(42);
+
+    for slice_length in [3, 20, 100] {
+        let slice_to_repeat: Vec<u8> = hint::black_box(
+            (&mut rng)
+                .sample_iter(&Alphanumeric)
+                .take(slice_length)
+                .collect(),
+        );
+        let slice_to_repeat: &[u8] = slice_to_repeat.as_ref();
+
+        for repeat_count in [3, 64, 1024, 8192] {
+            let parameter_string = format!("slice_len={slice_length} n={repeat_count}");
+
+            group.bench_with_input(
+                BenchmarkId::new("repeat_slice_n_times", &parameter_string),
+                &(repeat_count),
+                |b, &repeat_count| {
+                    b.iter(|| {
+                        let mut mutable_buffer = arrow_buffer::MutableBuffer::with_capacity(0);
+
+                        mutable_buffer.repeat_slice_n_times(slice_to_repeat, repeat_count);
+
+                        Buffer::from(mutable_buffer)
+                    })
+                },
+            );
+            group.bench_with_input(
+                BenchmarkId::new("extend_from_slice loop", &parameter_string),
+                &(repeat_count),
+                |b, &repeat_count| {
+                    b.iter(|| {
+                        let mut mutable_buffer = arrow_buffer::MutableBuffer::with_capacity(
+                            size_of_val(slice_to_repeat) * repeat_count,
+                        );
+
+                        for _ in 0..repeat_count {
+                            mutable_buffer.extend_from_slice(slice_to_repeat);
+                        }
+
+                        Buffer::from(mutable_buffer)
+                    })
+                },
+            );
+        }
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs
index 9868ab55cc11..15faed43a130 100644
--- a/arrow-buffer/src/bigint/mod.rs
+++ b/arrow-buffer/src/bigint/mod.rs
@@ -17,8 +17,12 @@
 
 use crate::arith::derive_arith;
 use crate::bigint::div::div_rem;
-use num::cast::AsPrimitive;
-use num::{BigInt, FromPrimitive, ToPrimitive};
+use num_bigint::BigInt;
+use num_traits::{
+    Bounded, CheckedAdd, CheckedDiv, CheckedMul, CheckedNeg, CheckedRem, CheckedSub, FromPrimitive,
+    Num, One, Signed, ToPrimitive, WrappingAdd, WrappingMul, WrappingNeg, WrappingSub, Zero,
+    cast::AsPrimitive,
+};
 use std::cmp::Ordering;
 use std::num::ParseIntError;
 use std::ops::{BitAnd, BitOr, BitXor, Neg, Shl, Shr};
@@ -232,11 +236,7 @@ impl i256 {
     pub fn from_f64(v: f64) -> Option<Self> {
         BigInt::from_f64(v).and_then(|i| {
             let (integer, overflow) = i256::from_bigint_with_overflow(i);
-            if overflow {
-                None
-            } else {
-                Some(integer)
-            }
+            if overflow { None } else { Some(integer) }
         })
     }
 
@@ -304,7 +304,7 @@ impl i256 {
         let v_bytes = v.to_signed_bytes_le();
         match v_bytes.len().cmp(&32) {
             Ordering::Less => {
-                let mut bytes = if num::Signed::is_negative(&v) {
+                let mut bytes = if num_traits::Signed::is_negative(&v) {
                     [255_u8; 32]
                 } else {
                     [0; 32]
@@ -586,6 +586,34 @@ impl i256 {
     pub const fn is_positive(self) -> bool {
         self.high.is_positive() || self.high == 0 && self.low != 0
     }
+
+    /// Returns the number of leading zeros in the binary representation of this [`i256`].
+    pub const fn leading_zeros(&self) -> u32 {
+        match self.high {
+            0 => u128::BITS + self.low.leading_zeros(),
+            _ => self.high.leading_zeros(),
+        }
+    }
+
+    /// Returns the number of trailing zeros in the binary representation of this [`i256`].
+    pub const fn trailing_zeros(&self) -> u32 {
+        match self.low {
+            0 => u128::BITS + self.high.trailing_zeros(),
+            _ => self.low.trailing_zeros(),
+        }
+    }
+
+    fn redundant_leading_sign_bits_i256(n: i256) -> u8 {
+        let mask = n >> 255; // all ones or all zeros
+        ((n ^ mask).leading_zeros() - 1) as u8 // we only need one sign bit
+    }
+
+    fn i256_to_f64(input: i256) -> f64 {
+        let k = i256::redundant_leading_sign_bits_i256(input);
+        let n = input << k; // left-justify (no redundant sign bits)
+        let n = (n.high >> 64) as i64; // throw away the lower 192 bits
+        (n as f64) * f64::powi(2.0, 192 - (k as i32)) // convert to f64 and scale it, as we left-shift k bit previous, so we need to scale it by 2^(192-k)
+    }
 }
 
 /// Temporary workaround due to lack of stable const array slicing
@@ -821,6 +849,15 @@ impl ToPrimitive for i256 {
         }
     }
 
+    fn to_f64(&self) -> Option<f64> {
+        match *self {
+            Self::MIN => Some(-2_f64.powi(255)),
+            Self::ZERO => Some(0f64),
+            Self::ONE => Some(1f64),
+            n => Some(Self::i256_to_f64(n)),
+        }
+    }
+
     fn to_u64(&self) -> Option<u64> {
         let as_i128 = self.low as i128;
 
@@ -836,11 +873,142 @@ impl ToPrimitive for i256 {
     }
 }
 
+// num_traits checked implementations
+
+impl CheckedNeg for i256 {
+    fn checked_neg(&self) -> Option<Self> {
+        (*self).checked_neg()
+    }
+}
+
+impl CheckedAdd for i256 {
+    fn checked_add(&self, v: &i256) -> Option<Self> {
+        (*self).checked_add(*v)
+    }
+}
+
+impl CheckedSub for i256 {
+    fn checked_sub(&self, v: &i256) -> Option<Self> {
+        (*self).checked_sub(*v)
+    }
+}
+
+impl CheckedDiv for i256 {
+    fn checked_div(&self, v: &i256) -> Option<Self> {
+        (*self).checked_div(*v)
+    }
+}
+
+impl CheckedMul for i256 {
+    fn checked_mul(&self, v: &i256) -> Option<Self> {
+        (*self).checked_mul(*v)
+    }
+}
+
+impl CheckedRem for i256 {
+    fn checked_rem(&self, v: &i256) -> Option<Self> {
+        (*self).checked_rem(*v)
+    }
+}
+
+impl WrappingAdd for i256 {
+    fn wrapping_add(&self, v: &Self) -> Self {
+        (*self).wrapping_add(*v)
+    }
+}
+
+impl WrappingSub for i256 {
+    fn wrapping_sub(&self, v: &Self) -> Self {
+        (*self).wrapping_sub(*v)
+    }
+}
+
+impl WrappingMul for i256 {
+    fn wrapping_mul(&self, v: &Self) -> Self {
+        (*self).wrapping_mul(*v)
+    }
+}
+
+impl WrappingNeg for i256 {
+    fn wrapping_neg(&self) -> Self {
+        (*self).wrapping_neg()
+    }
+}
+
+impl Zero for i256 {
+    fn zero() -> Self {
+        i256::ZERO
+    }
+
+    fn is_zero(&self) -> bool {
+        *self == i256::ZERO
+    }
+}
+
+impl One for i256 {
+    fn one() -> Self {
+        i256::ONE
+    }
+
+    fn is_one(&self) -> bool {
+        *self == i256::ONE
+    }
+}
+
+impl Num for i256 {
+    type FromStrRadixErr = ParseI256Error;
+
+    fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
+        if radix == 10 {
+            str.parse()
+        } else {
+            // Parsing from non-10 baseseeÎ is not supported
+            Err(ParseI256Error {})
+        }
+    }
+}
+
+impl Signed for i256 {
+    fn abs(&self) -> Self {
+        self.wrapping_abs()
+    }
+
+    fn abs_sub(&self, other: &Self) -> Self {
+        if self > other {
+            self.wrapping_sub(other)
+        } else {
+            i256::ZERO
+        }
+    }
+
+    fn signum(&self) -> Self {
+        (*self).signum()
+    }
+
+    fn is_positive(&self) -> bool {
+        (*self).is_positive()
+    }
+
+    fn is_negative(&self) -> bool {
+        (*self).is_negative()
+    }
+}
+
+impl Bounded for i256 {
+    fn min_value() -> Self {
+        i256::MIN
+    }
+
+    fn max_value() -> Self {
+        i256::MAX
+    }
+}
+
 #[cfg(all(test, not(miri)))] // llvm.x86.subborrow.64 not supported by MIRI
 mod tests {
     use super::*;
-    use num::Signed;
-    use rand::{rng, Rng};
+    use num_traits::Signed;
+    use rand::{Rng, rng};
 
     #[test]
     fn test_signed_cmp() {
@@ -1264,4 +1432,152 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_decimal256_to_f64_typical_values() {
+        let v = i256::from_i128(42_i128);
+        assert_eq!(v.to_f64().unwrap(), 42.0);
+
+        let v = i256::from_i128(-123456789012345678i128);
+        assert_eq!(v.to_f64().unwrap(), -123456789012345678.0);
+
+        let v = i256::from_string("0").unwrap();
+        assert_eq!(v.to_f64().unwrap(), 0.0);
+
+        let v = i256::from_string("1").unwrap();
+        assert_eq!(v.to_f64().unwrap(), 1.0);
+
+        let mut rng = rng();
+        for _ in 0..10 {
+            let f64_value =
+                (rng.random_range(i128::MIN..i128::MAX) as f64) * rng.random_range(0.0..1.0);
+            let big = i256::from_f64(f64_value).unwrap();
+            assert_eq!(big.to_f64().unwrap(), f64_value);
+        }
+    }
+
+    #[test]
+    fn test_decimal256_to_f64_large_positive_value() {
+        let max_f = f64::MAX;
+        let big = i256::from_f64(max_f * 2.0).unwrap_or(i256::MAX);
+        let out = big.to_f64().unwrap();
+        assert!(out.is_finite() && out.is_sign_positive());
+    }
+
+    #[test]
+    fn test_decimal256_to_f64_large_negative_value() {
+        let max_f = f64::MAX;
+        let big_neg = i256::from_f64(-(max_f * 2.0)).unwrap_or(i256::MIN);
+        let out = big_neg.to_f64().unwrap();
+        assert!(out.is_finite() && out.is_sign_negative());
+    }
+
+    #[test]
+    fn test_num_traits() {
+        let value = i256::from_i128(-5);
+        assert_eq!(
+            <i256 as CheckedNeg>::checked_neg(&value),
+            Some(i256::from(5))
+        );
+
+        assert_eq!(
+            <i256 as CheckedAdd>::checked_add(&value, &value),
+            Some(i256::from(-10))
+        );
+
+        assert_eq!(
+            <i256 as CheckedSub>::checked_sub(&value, &value),
+            Some(i256::from(0))
+        );
+
+        assert_eq!(
+            <i256 as CheckedMul>::checked_mul(&value, &value),
+            Some(i256::from(25))
+        );
+
+        assert_eq!(
+            <i256 as CheckedDiv>::checked_div(&value, &value),
+            Some(i256::from(1))
+        );
+
+        assert_eq!(
+            <i256 as CheckedRem>::checked_rem(&value, &value),
+            Some(i256::from(0))
+        );
+
+        assert_eq!(
+            <i256 as WrappingAdd>::wrapping_add(&value, &value),
+            i256::from(-10)
+        );
+
+        assert_eq!(
+            <i256 as WrappingSub>::wrapping_sub(&value, &value),
+            i256::from(0)
+        );
+
+        assert_eq!(
+            <i256 as WrappingMul>::wrapping_mul(&value, &value),
+            i256::from(25)
+        );
+
+        assert_eq!(<i256 as WrappingNeg>::wrapping_neg(&value), i256::from(5));
+
+        // A single check for wrapping behavior, rely on trait implementation for others
+        let result = <i256 as WrappingAdd>::wrapping_add(&i256::MAX, &i256::ONE);
+        assert_eq!(result, i256::MIN);
+
+        assert_eq!(<i256 as Signed>::abs(&value), i256::from(5));
+
+        assert_eq!(<i256 as One>::one(), i256::from(1));
+        assert_eq!(<i256 as Zero>::zero(), i256::from(0));
+
+        assert_eq!(<i256 as Bounded>::min_value(), i256::MIN);
+        assert_eq!(<i256 as Bounded>::max_value(), i256::MAX);
+    }
+
+    #[test]
+    fn test_numtraits_from_str_radix() {
+        assert_eq!(
+            i256::from_str_radix("123456789", 10).expect("parsed"),
+            i256::from(123456789)
+        );
+        assert_eq!(
+            i256::from_str_radix("0", 10).expect("parsed"),
+            i256::from(0)
+        );
+        assert!(i256::from_str_radix("abc", 10).is_err());
+        assert!(i256::from_str_radix("0", 16).is_err());
+    }
+
+    #[test]
+    fn test_leading_zeros() {
+        // Without high part
+        assert_eq!(i256::from(0).leading_zeros(), 256);
+        assert_eq!(i256::from(1).leading_zeros(), 256 - 1);
+        assert_eq!(i256::from(16).leading_zeros(), 256 - 5);
+        assert_eq!(i256::from(17).leading_zeros(), 256 - 5);
+
+        // With high part
+        assert_eq!(i256::from_parts(2, 16).leading_zeros(), 128 - 5);
+        assert_eq!(i256::from_parts(2, i128::MAX).leading_zeros(), 1);
+
+        assert_eq!(i256::MAX.leading_zeros(), 1);
+        assert_eq!(i256::from(-1).leading_zeros(), 0);
+    }
+
+    #[test]
+    fn test_trailing_zeros() {
+        // Without high part
+        assert_eq!(i256::from(0).trailing_zeros(), 256);
+        assert_eq!(i256::from(2).trailing_zeros(), 1);
+        assert_eq!(i256::from(16).trailing_zeros(), 4);
+        assert_eq!(i256::from(17).trailing_zeros(), 0);
+        // With high part
+        assert_eq!(i256::from_parts(0, i128::MAX).trailing_zeros(), 128);
+        assert_eq!(i256::from_parts(0, 16).trailing_zeros(), 128 + 4);
+        assert_eq!(i256::from_parts(2, i128::MAX).trailing_zeros(), 1);
+
+        assert_eq!(i256::MAX.trailing_zeros(), 0);
+        assert_eq!(i256::from(-1).trailing_zeros(), 0);
+    }
 }
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index c8e5144c14cb..ff836bf28729 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -16,33 +16,74 @@
 // under the License.
 
 use crate::bit_chunk_iterator::BitChunks;
-use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
+use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator};
 use crate::{
-    bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not,
-    BooleanBufferBuilder, Buffer, MutableBuffer,
+    BooleanBufferBuilder, Buffer, MutableBuffer, bit_util, buffer_bin_and, buffer_bin_or,
+    buffer_bin_xor, buffer_unary_not,
 };
 
 use std::ops::{BitAnd, BitOr, BitXor, Not};
 
 /// A slice-able [`Buffer`] containing bit-packed booleans
 ///
-/// `BooleanBuffer`s can be creating using [`BooleanBufferBuilder`]
+/// This structure represents a sequence of boolean values packed into a
+/// byte-aligned [`Buffer`]. Both the offset and length are represented in bits.
 ///
-/// # See Also
+/// # Layout
+///
+/// The values are represented as little endian bit-packed values, where the
+/// least significant bit of each byte represents the first boolean value and
+/// then proceeding to the most significant bit.
+///
+/// For example, the 10 bit bitmask `0b0111001101` has length 10, and is
+/// represented using 2 bytes with offset 0 like this:
+///
+/// ```text
+///        ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
+///        │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
+///        ││ 1 │ 0 │ 1 │ 1 │ 0 │ 0 │ 1 │ 1 ││    ││ 1 │ 0 │ ? │ ? │ ? │ ? │ ? │ ? ││
+///        │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
+/// bit    └─────────────────────────────────┘    └─────────────────────────────────┘
+/// offset  0             Byte 0             7    0              Byte 1            7
+///
+///         length = 10 bits, offset = 0
+/// ```
+///
+/// The same bitmask with length 10 and offset 3 would be represented using 2
+/// bytes like this:
+///
+/// ```text
+///       ┌─────────────────────────────────┐    ┌─────────────────────────────────┐
+///       │┌───┬───┬───┬───┬───┬───┬───┬───┐│    │┌───┬───┬───┬───┬───┬───┬───┬───┐│
+///       ││ ? │ ? │ ? │ 1 │ 0 │ 1 │ 1 │ 0 ││    ││ 0 │ 1 │ 1 │ 1 │ 0 │ ? │ ? │ ? ││
+///       │└───┴───┴───┴───┴───┴───┴───┴───┘│    │└───┴───┴───┴───┴───┴───┴───┴───┘│
+/// bit   └─────────────────────────────────┘    └─────────────────────────────────┘
+/// offset 0             Byte 0             7    0              Byte 1            7
+///
+///        length = 10 bits, offset = 3
+/// ```
 ///
+/// Note that the bits marked `?` are not logically part of the mask and may
+/// contain either `0` or `1`
+///
+/// # See Also
+/// * [`BooleanBufferBuilder`] for building [`BooleanBuffer`] instances
 /// * [`NullBuffer`] for representing null values in Arrow arrays
 ///
 /// [`NullBuffer`]: crate::NullBuffer
 #[derive(Debug, Clone, Eq)]
 pub struct BooleanBuffer {
+    /// Underlying buffer (byte aligned)
     buffer: Buffer,
-    offset: usize,
-    len: usize,
+    /// Offset in bits (not bytes)
+    bit_offset: usize,
+    /// Length in bits (not bytes)
+    bit_len: usize,
 }
 
 impl PartialEq for BooleanBuffer {
     fn eq(&self, other: &Self) -> bool {
-        if self.len != other.len {
+        if self.bit_len != other.bit_len {
             return false;
         }
 
@@ -53,40 +94,40 @@ impl PartialEq for BooleanBuffer {
 }
 
 impl BooleanBuffer {
-    /// Create a new [`BooleanBuffer`] from a [`Buffer`], an `offset` and `length` in bits
+    /// Create a new [`BooleanBuffer`] from a [`Buffer`], `bit_offset` offset and `bit_len` length
     ///
     /// # Panics
     ///
     /// This method will panic if `buffer` is not large enough
-    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
-        let total_len = offset.saturating_add(len);
+    pub fn new(buffer: Buffer, bit_offset: usize, bit_len: usize) -> Self {
+        let total_len = bit_offset.saturating_add(bit_len);
         let buffer_len = buffer.len();
-        let bit_len = buffer_len.saturating_mul(8);
+        let buffer_bit_len = buffer_len.saturating_mul(8);
         assert!(
-            total_len <= bit_len,
-            "buffer not large enough (offset: {offset}, len: {len}, buffer_len: {buffer_len})"
+            total_len <= buffer_bit_len,
+            "buffer not large enough (bit_offset: {bit_offset}, bit_len: {bit_len}, buffer_len: {buffer_len})"
         );
         Self {
             buffer,
-            offset,
-            len,
+            bit_offset,
+            bit_len,
         }
     }
 
-    /// Create a new [`BooleanBuffer`] of `length` where all values are `true`
+    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `true`
     pub fn new_set(length: usize) -> Self {
         let mut builder = BooleanBufferBuilder::new(length);
         builder.append_n(length, true);
         builder.finish()
     }
 
-    /// Create a new [`BooleanBuffer`] of `length` where all values are `false`
+    /// Create a new [`BooleanBuffer`] of `length` bits (not bytes) where all values are `false`
     pub fn new_unset(length: usize) -> Self {
         let buffer = MutableBuffer::new_null(length).into_buffer();
         Self {
             buffer,
-            offset: 0,
-            len: length,
+            bit_offset: 0,
+            bit_len: length,
         }
     }
 
@@ -96,34 +137,258 @@ impl BooleanBuffer {
         Self::new(buffer.into(), 0, len)
     }
 
+    /// Create a new [`BooleanBuffer`] by copying the relevant bits from an
+    /// input buffer.
+    ///
+    /// # Notes:
+    /// * The new `BooleanBuffer` has zero offset, even if `offset_in_bits` is non-zero
+    ///
+    /// # Example: Create a new [`BooleanBuffer`] copying a bit slice from in input slice
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8];
+    /// // // Copy bits 4..16 from input
+    /// let result = BooleanBuffer::from_bits(&input, 4, 12);
+    /// assert_eq!(result.values(), &[0b10101100u8, 0b00001011u8]);
+    pub fn from_bits(src: impl AsRef<[u8]>, offset_in_bits: usize, len_in_bits: usize) -> Self {
+        Self::from_bitwise_unary_op(src, offset_in_bits, len_in_bits, |a| a)
+    }
+
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation to `op`
+    /// to an input buffer.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// * `op` takes a single `u64` inputs and produces one `u64` output.
+    /// * `op` must only apply bitwise operations
+    ///   on the relevant bits; the input `u64` may contain irrelevant bits
+    ///   and may be processed differently on different endian architectures.
+    /// * `op` may be called with input bits outside the requested range
+    /// * The output always has zero offset
+    ///
+    /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer from a binary operation
+    /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for in-place unary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte slice
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
+    /// // NOT of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_unary_op(
+    ///  &input, 0, 12, |a| !a
+    /// );
+    /// assert_eq!(result.values(), &[0b00110011u8, 0b11110101u8]);
+    /// ```
+    pub fn from_bitwise_unary_op<F>(
+        src: impl AsRef<[u8]>,
+        offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64) -> u64,
+    {
+        // try fast path for aligned input
+        if offset_in_bits & 0x7 == 0 {
+            // align to byte boundary
+            let aligned = &src.as_ref()[offset_in_bits / 8..];
+            if let Some(result) =
+                Self::try_from_aligned_bitwise_unary_op(aligned, len_in_bits, &mut op)
+            {
+                return result;
+            }
+        }
+
+        let chunks = BitChunks::new(src.as_ref(), offset_in_bits, len_in_bits);
+        let mut result = MutableBuffer::with_capacity(chunks.num_u64s() * 8);
+        for chunk in chunks.iter() {
+            // SAFETY: reserved enough capacity above, (exactly num_u64s()
+            // items) and we assume `BitChunks` correctly reports upper bound
+            unsafe {
+                result.push_unchecked(op(chunk));
+            }
+        }
+        if chunks.remainder_len() > 0 {
+            debug_assert!(result.capacity() >= result.len() + 8); // should not reallocate
+            // SAFETY: reserved enough capacity above, (exactly num_u64s()
+            // items) and we assume `BitChunks` correctly reports upper bound
+            unsafe {
+                result.push_unchecked(op(chunks.remainder_bits()));
+            }
+            // Just pushed one u64, which may have trailing zeros
+            result.truncate(chunks.num_bytes());
+        }
+
+        BooleanBuffer {
+            buffer: Buffer::from(result),
+            bit_offset: 0,
+            bit_len: len_in_bits,
+        }
+    }
+
+    /// Fast path for [`Self::from_bitwise_unary_op`] when input is aligned to
+    /// 8-byte (64-bit) boundaries
+    ///
+    /// Returns None if the fast path cannot be taken
+    fn try_from_aligned_bitwise_unary_op<F>(
+        src: &[u8],
+        len_in_bits: usize,
+        op: &mut F,
+    ) -> Option<Self>
+    where
+        F: FnMut(u64) -> u64,
+    {
+        // Safety: all valid bytes are valid u64s
+        let (prefix, aligned_u6us, suffix) = unsafe { src.align_to::<u64>() };
+        if !(prefix.is_empty() && suffix.is_empty()) {
+            // Couldn't make this case any faster than the default path, see
+            // https://github.com/apache/arrow-rs/pull/8996/changes#r2620022082
+            return None;
+        }
+        // the buffer is word (64 bit) aligned, so use optimized Vec code.
+        let result_u64s: Vec<u64> = aligned_u6us.iter().map(|l| op(*l)).collect();
+        let buffer = Buffer::from(result_u64s);
+        Some(BooleanBuffer::new(buffer, 0, len_in_bits))
+    }
+
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
+    /// the relevant bits from two input buffers.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// See notes on [Self::from_bitwise_unary_op]
+    ///
+    /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
+    /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
+    /// ```
+    /// # use arrow_buffer::{Buffer, BooleanBuffer};
+    /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
+    /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
+    /// // AND of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///   &left, 0, &right, 0, 12, |a, b| a & b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+    /// ```
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let left = [0b11001100u8, 0b10111010u8];
+    /// let right = [0b10101010u8, 0b11011100u8];
+    /// // OR of bits 4..16 from left and bits 0..12 from right
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///  &left, 4, &right, 0, 12, |a, b| a | b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+    /// ```
+    pub fn from_bitwise_binary_op<F>(
+        left: impl AsRef<[u8]>,
+        left_offset_in_bits: usize,
+        right: impl AsRef<[u8]>,
+        right_offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64, u64) -> u64,
+    {
+        let left = left.as_ref();
+        let right = right.as_ref();
+        // try fast path for aligned input
+        // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
+        // to improve performance.
+        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
+            // align to byte boundary
+            let left = &left[left_offset_in_bits / 8..];
+            let right = &right[right_offset_in_bits / 8..];
+
+            unsafe {
+                let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>();
+                let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>();
+                // if there is no prefix or suffix, both buffers are aligned and
+                // we can do the operation directly on u64s.
+                // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88.
+                // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
+                if left_prefix.is_empty()
+                    && right_prefix.is_empty()
+                    && left_suffix.is_empty()
+                    && right_suffix.is_empty()
+                {
+                    let result_u64s = left_u64s
+                        .iter()
+                        .zip(right_u64s.iter())
+                        .map(|(l, r)| op(*l, *r))
+                        .collect::<Vec<u64>>();
+                    return BooleanBuffer {
+                        buffer: Buffer::from(result_u64s),
+                        bit_offset: 0,
+                        bit_len: len_in_bits,
+                    };
+                }
+            }
+        }
+        let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
+        let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);
+
+        let chunks = left_chunks
+            .iter()
+            .zip(right_chunks.iter())
+            .map(|(left, right)| op(left, right));
+        // Soundness: `BitChunks` is a `BitChunks` trusted length iterator which
+        // correctly reports its upper bound
+        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
+
+        let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
+        let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
+        // we are counting its starting from the least significant bit, to to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+        buffer.extend_from_slice(rem);
+
+        BooleanBuffer {
+            buffer: Buffer::from(buffer),
+            bit_offset: 0,
+            bit_len: len_in_bits,
+        }
+    }
+
     /// Returns the number of set bits in this buffer
     pub fn count_set_bits(&self) -> usize {
-        self.buffer.count_set_bits_offset(self.offset, self.len)
+        self.buffer
+            .count_set_bits_offset(self.bit_offset, self.bit_len)
     }
 
-    /// Returns a `BitChunks` instance which can be used to iterate over
+    /// Returns a [`BitChunks`] instance which can be used to iterate over
     /// this buffer's bits in `u64` chunks
     #[inline]
-    pub fn bit_chunks(&self) -> BitChunks {
-        BitChunks::new(self.values(), self.offset, self.len)
+    pub fn bit_chunks(&self) -> BitChunks<'_> {
+        BitChunks::new(self.values(), self.bit_offset, self.bit_len)
     }
 
-    /// Returns the offset of this [`BooleanBuffer`] in bits
+    /// Returns the offset of this [`BooleanBuffer`] in bits (not bytes)
     #[inline]
     pub fn offset(&self) -> usize {
-        self.offset
+        self.bit_offset
     }
 
-    /// Returns the length of this [`BooleanBuffer`] in bits
+    /// Returns the length of this [`BooleanBuffer`] in bits (not bytes)
     #[inline]
     pub fn len(&self) -> usize {
-        self.len
+        self.bit_len
     }
 
     /// Returns true if this [`BooleanBuffer`] is empty
     #[inline]
     pub fn is_empty(&self) -> bool {
-        self.len == 0
+        self.bit_len == 0
     }
 
     /// Free up unused memory.
@@ -139,7 +404,7 @@ impl BooleanBuffer {
     /// Panics if `i >= self.len()`
     #[inline]
     pub fn value(&self, idx: usize) -> bool {
-        assert!(idx < self.len);
+        assert!(idx < self.bit_len);
         unsafe { self.value_unchecked(idx) }
     }
 
@@ -149,7 +414,7 @@ impl BooleanBuffer {
     /// This doesn't check bounds, the caller must ensure that index < self.len()
     #[inline]
     pub unsafe fn value_unchecked(&self, i: usize) -> bool {
-        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.offset) }
+        unsafe { bit_util::get_bit_raw(self.buffer.as_ptr(), i + self.bit_offset) }
     }
 
     /// Returns the packed values of this [`BooleanBuffer`] not including any offset
@@ -161,13 +426,13 @@ impl BooleanBuffer {
     /// Slices this [`BooleanBuffer`] by the provided `offset` and `length`
     pub fn slice(&self, offset: usize, len: usize) -> Self {
         assert!(
-            offset.saturating_add(len) <= self.len,
+            offset.saturating_add(len) <= self.bit_len,
             "the length + offset of the sliced BooleanBuffer cannot exceed the existing length"
         );
         Self {
             buffer: self.buffer.clone(),
-            offset: self.offset + offset,
-            len,
+            bit_offset: self.bit_offset + offset,
+            bit_len: len,
         }
     }
 
@@ -175,7 +440,7 @@ impl BooleanBuffer {
     ///
     /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
     pub fn sliced(&self) -> Buffer {
-        self.buffer.bit_slice(self.offset, self.len)
+        self.buffer.bit_slice(self.bit_offset, self.bit_len)
     }
 
     /// Returns true if this [`BooleanBuffer`] is equal to `other`, using pointer comparisons
@@ -183,17 +448,21 @@ impl BooleanBuffer {
     /// return false when the arrays are logically equal
     pub fn ptr_eq(&self, other: &Self) -> bool {
         self.buffer.as_ptr() == other.buffer.as_ptr()
-            && self.offset == other.offset
-            && self.len == other.len
+            && self.bit_offset == other.bit_offset
+            && self.bit_len == other.bit_len
     }
 
     /// Returns the inner [`Buffer`]
+    ///
+    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
     #[inline]
     pub fn inner(&self) -> &Buffer {
         &self.buffer
     }
 
     /// Returns the inner [`Buffer`], consuming self
+    ///
+    /// Note: this does not account for offset and length of this [`BooleanBuffer`]
     pub fn into_inner(self) -> Buffer {
         self.buffer
     }
@@ -205,12 +474,17 @@ impl BooleanBuffer {
 
     /// Returns an iterator over the set bit positions in this [`BooleanBuffer`]
     pub fn set_indices(&self) -> BitIndexIterator<'_> {
-        BitIndexIterator::new(self.values(), self.offset, self.len)
+        BitIndexIterator::new(self.values(), self.bit_offset, self.bit_len)
+    }
+
+    /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion
+    pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> {
+        BitIndexU32Iterator::new(self.values(), self.bit_offset, self.bit_len)
     }
 
     /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
     pub fn set_slices(&self) -> BitSliceIterator<'_> {
-        BitSliceIterator::new(self.values(), self.offset, self.len)
+        BitSliceIterator::new(self.values(), self.bit_offset, self.bit_len)
     }
 }
 
@@ -219,9 +493,9 @@ impl Not for &BooleanBuffer {
 
     fn not(self) -> Self::Output {
         BooleanBuffer {
-            buffer: buffer_unary_not(&self.buffer, self.offset, self.len),
-            offset: 0,
-            len: self.len,
+            buffer: buffer_unary_not(&self.buffer, self.bit_offset, self.bit_len),
+            bit_offset: 0,
+            bit_len: self.bit_len,
         }
     }
 }
@@ -230,11 +504,17 @@ impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
     type Output = BooleanBuffer;
 
     fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
-        assert_eq!(self.len, rhs.len);
+        assert_eq!(self.bit_len, rhs.bit_len);
         BooleanBuffer {
-            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
-            offset: 0,
-            len: self.len,
+            buffer: buffer_bin_and(
+                &self.buffer,
+                self.bit_offset,
+                &rhs.buffer,
+                rhs.bit_offset,
+                self.bit_len,
+            ),
+            bit_offset: 0,
+            bit_len: self.bit_len,
         }
     }
 }
@@ -243,11 +523,17 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
     type Output = BooleanBuffer;
 
     fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
-        assert_eq!(self.len, rhs.len);
+        assert_eq!(self.bit_len, rhs.bit_len);
         BooleanBuffer {
-            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
-            offset: 0,
-            len: self.len,
+            buffer: buffer_bin_or(
+                &self.buffer,
+                self.bit_offset,
+                &rhs.buffer,
+                rhs.bit_offset,
+                self.bit_len,
+            ),
+            bit_offset: 0,
+            bit_len: self.bit_len,
         }
     }
 }
@@ -256,11 +542,17 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
     type Output = BooleanBuffer;
 
     fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
-        assert_eq!(self.len, rhs.len);
+        assert_eq!(self.bit_len, rhs.bit_len);
         BooleanBuffer {
-            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
-            offset: 0,
-            len: self.len,
+            buffer: buffer_bin_xor(
+                &self.buffer,
+                self.bit_offset,
+                &rhs.buffer,
+                rhs.bit_offset,
+                self.bit_len,
+            ),
+            bit_offset: 0,
+            bit_len: self.bit_len,
         }
     }
 }
@@ -270,7 +562,7 @@ impl<'a> IntoIterator for &'a BooleanBuffer {
     type IntoIter = BitIterator<'a>;
 
     fn into_iter(self) -> Self::IntoIter {
-        BitIterator::new(self.values(), self.offset, self.len)
+        BitIterator::new(self.values(), self.bit_offset, self.bit_len)
     }
 }
 
@@ -358,12 +650,12 @@ mod tests {
         assert_eq!(boolean_slice1.values(), boolean_slice2.values());
 
         assert_eq!(bytes, boolean_slice1.values());
-        assert_eq!(16, boolean_slice1.offset);
-        assert_eq!(16, boolean_slice1.len);
+        assert_eq!(16, boolean_slice1.bit_offset);
+        assert_eq!(16, boolean_slice1.bit_len);
 
         assert_eq!(bytes, boolean_slice2.values());
-        assert_eq!(0, boolean_slice2.offset);
-        assert_eq!(16, boolean_slice2.len);
+        assert_eq!(0, boolean_slice2.bit_offset);
+        assert_eq!(16, boolean_slice2.bit_len);
     }
 
     #[test]
@@ -432,4 +724,103 @@ mod tests {
         assert_eq!(buf.values().len(), 1);
         assert!(buf.value(0));
     }
+
+    #[test]
+    fn test_from_bitwise_unary_op() {
+        // Use 1024 boolean values so that at least some of the tests cover multiple u64 chunks and
+        // perfect alignment
+        let input_bools = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_buffer = BooleanBuffer::from(&input_bools[..]);
+
+        // Note ensure we test offsets over 100 to cover multiple u64 chunks
+        for offset in 0..1024 {
+            let result = BooleanBuffer::from_bitwise_unary_op(
+                input_buffer.values(),
+                offset,
+                input_buffer.len() - offset,
+                |a| !a,
+            );
+            let expected = input_bools[offset..]
+                .iter()
+                .map(|b| !*b)
+                .collect::<BooleanBuffer>();
+            assert_eq!(result, expected);
+        }
+
+        // Also test when the input doesn't cover the entire buffer
+        for offset in 0..512 {
+            let len = 512 - offset; // fixed length less than total
+            let result =
+                BooleanBuffer::from_bitwise_unary_op(input_buffer.values(), offset, len, |a| !a);
+            let expected = input_bools[offset..]
+                .iter()
+                .take(len)
+                .map(|b| !*b)
+                .collect::<BooleanBuffer>();
+            assert_eq!(result, expected);
+        }
+    }
+
+    #[test]
+    fn test_from_bitwise_binary_op() {
+        // pick random boolean inputs
+        let input_bools_left = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_bools_right = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
+        let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
+
+        for left_offset in 0..200 {
+            for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
+                for len_offset in [0, 1, 44, 100, 256, 300, 512] {
+                    let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
+                    // compute with AND
+                    let result = BooleanBuffer::from_bitwise_binary_op(
+                        input_buffer_left.values(),
+                        left_offset,
+                        input_buffer_right.values(),
+                        right_offset,
+                        len,
+                        |a, b| a & b,
+                    );
+                    // compute directly from bools
+                    let expected = input_bools_left[left_offset..]
+                        .iter()
+                        .zip(&input_bools_right[right_offset..])
+                        .take(len)
+                        .map(|(a, b)| *a & *b)
+                        .collect::<BooleanBuffer>();
+                    assert_eq!(result, expected);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_extend_trusted_len_sets_byte_len() {
+        // Ensures extend_trusted_len keeps the underlying byte length in sync with bit length.
+        let mut builder = BooleanBufferBuilder::new(0);
+        let bools: Vec<_> = (0..10).map(|i| i % 2 == 0).collect();
+        unsafe { builder.extend_trusted_len(bools.into_iter()) };
+        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
+    }
+
+    #[test]
+    fn test_extend_trusted_len_then_append() {
+        // Exercises append after extend_trusted_len to validate byte length and values.
+        let mut builder = BooleanBufferBuilder::new(0);
+        let bools: Vec<_> = (0..9).map(|i| i % 3 == 0).collect();
+        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+        builder.append(true);
+        assert_eq!(builder.as_slice().len(), bit_util::ceil(builder.len(), 8));
+        let finished = builder.finish();
+        for (i, v) in bools.into_iter().chain(std::iter::once(true)).enumerate() {
+            assert_eq!(finished.value(i), v, "at index {}", i);
+        }
+    }
 }
diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index 946299d0061b..7bf67503562d 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -22,10 +22,12 @@ use std::sync::Arc;
 
 use crate::alloc::{Allocation, Deallocation};
 use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
-use crate::BufferBuilder;
+use crate::{BooleanBuffer, BufferBuilder};
 use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 
-use super::ops::bitwise_unary_op_helper;
+#[cfg(feature = "pool")]
+use crate::pool::MemoryPool;
+
 use super::{MutableBuffer, ScalarBuffer};
 
 /// A contiguous memory region that can be shared with other buffers and across
@@ -169,7 +171,7 @@ impl Buffer {
         len: usize,
         owner: Arc<dyn Allocation>,
     ) -> Self {
-        Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
+        unsafe { Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len)) }
     }
 
     /// Auxiliary method to create a new Buffer
@@ -178,7 +180,7 @@ impl Buffer {
         len: usize,
         deallocation: Deallocation,
     ) -> Self {
-        let bytes = Bytes::new(ptr, len, deallocation);
+        let bytes = unsafe { Bytes::new(ptr, len, deallocation) };
         let ptr = bytes.as_ptr();
         Buffer {
             ptr,
@@ -341,13 +343,13 @@ impl Buffer {
             return self.slice_with_length(offset / 8, bit_util::ceil(len, 8));
         }
 
-        bitwise_unary_op_helper(self, offset, len, |a| a)
+        BooleanBuffer::from_bits(self.as_slice(), offset, len).into_inner()
     }
 
     /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits
     /// in larger chunks and starting at arbitrary bit offsets.
     /// Note that both `offset` and `length` are measured in bits.
-    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
+    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks<'_> {
         BitChunks::new(self.as_slice(), offset, len)
     }
 
@@ -361,6 +363,23 @@ impl Buffer {
     /// Returns `Err` if this is shared or its allocation is from an external source or
     /// it is not allocated with alignment [`ALIGNMENT`]
     ///
+    /// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
+    /// ```
+    /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
+    /// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
+    /// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
+    /// // (i.e., there are no other references to it).
+    /// let mut mutable_buffer = match buffer.into_mutable() {
+    ///    Ok(mutable) => mutable,
+    ///    Err(orig_buffer) => {
+    ///      panic!("buffer was not uniquely owned");
+    ///    }
+    /// };
+    /// mutable_buffer.push(5u8);
+    /// let buffer = Buffer::from(mutable_buffer);
+    /// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
+    /// ```
+    ///
     /// [`ALIGNMENT`]: crate::alloc::ALIGNMENT
     pub fn into_mutable(self) -> Result<MutableBuffer, Self> {
         let ptr = self.ptr;
@@ -385,8 +404,8 @@ impl Buffer {
     /// # Errors
     ///
     /// Returns `Err(self)` if
-    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
-    /// 2. contains a non-zero offset
+    /// 1. The buffer does not have the same [`Layout`] as the destination Vec
+    /// 2. The buffer contains a non-zero offset
     /// 3. The buffer is shared
     pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
         let layout = match self.data.deallocation() {
@@ -430,6 +449,17 @@ impl Buffer {
     pub fn ptr_eq(&self, other: &Self) -> bool {
         self.ptr == other.ptr && self.length == other.length
     }
+
+    /// Register this [`Buffer`] with the provided [`MemoryPool`]
+    ///
+    /// This claims the memory used by this buffer in the pool, allowing for
+    /// accurate accounting of memory usage. Any prior reservation will be
+    /// released so this works well when the buffer is being shared among
+    /// multiple arrays.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn MemoryPool) {
+        self.data.claim(pool)
+    }
 }
 
 /// Note that here we deliberately do not implement
@@ -510,6 +540,12 @@ impl std::ops::Deref for Buffer {
     }
 }
 
+impl AsRef<[u8]> for &Buffer {
+    fn as_ref(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
 impl From<MutableBuffer> for Buffer {
     #[inline]
     fn from(buffer: MutableBuffer) -> Self {
@@ -547,7 +583,7 @@ impl Buffer {
     pub unsafe fn from_trusted_len_iter<T: ArrowNativeType, I: Iterator<Item = T>>(
         iterator: I,
     ) -> Self {
-        MutableBuffer::from_trusted_len_iter(iterator).into()
+        unsafe { MutableBuffer::from_trusted_len_iter(iterator).into() }
     }
 
     /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length or errors
@@ -564,7 +600,7 @@ impl Buffer {
     >(
         iterator: I,
     ) -> Result<Self, E> {
-        Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into())
+        unsafe { Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) }
     }
 }
 
@@ -983,13 +1019,13 @@ mod tests {
     #[should_panic(expected = "capacity overflow")]
     fn test_from_iter_overflow() {
         let iter_len = usize::MAX / std::mem::size_of::<u64>() + 1;
-        let _ = Buffer::from_iter(std::iter::repeat(0_u64).take(iter_len));
+        let _ = Buffer::from_iter(std::iter::repeat_n(0_u64, iter_len));
     }
 
     #[test]
     fn bit_slice_length_preserved() {
         // Create a boring buffer
-        let buf = Buffer::from_iter(std::iter::repeat(true).take(64));
+        let buf = Buffer::from_iter(std::iter::repeat_n(true, 64));
 
         let assert_preserved = |offset: usize, len: usize| {
             let new_buf = buf.bit_slice(offset, len);
@@ -1021,7 +1057,7 @@ mod tests {
 
     #[test]
     fn test_strong_count() {
-        let buffer = Buffer::from_iter(std::iter::repeat(0_u8).take(100));
+        let buffer = Buffer::from_iter(std::iter::repeat_n(0_u8, 100));
         assert_eq!(buffer.strong_count(), 1);
 
         let buffer2 = buffer.clone();
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index 19ca0fef1519..9fc860506194 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -15,41 +15,86 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::alloc::{handle_alloc_error, Layout};
+use std::alloc::{Layout, handle_alloc_error};
 use std::mem;
 use std::ptr::NonNull;
 
-use crate::alloc::{Deallocation, ALIGNMENT};
+use crate::alloc::{ALIGNMENT, Deallocation};
 use crate::{
     bytes::Bytes,
     native::{ArrowNativeType, ToByteSlice},
     util::bit_util,
 };
 
+#[cfg(feature = "pool")]
+use crate::pool::{MemoryPool, MemoryReservation};
+#[cfg(feature = "pool")]
+use std::sync::Mutex;
+
 use super::Buffer;
 
-/// A [`MutableBuffer`] is Arrow's interface to build a [`Buffer`] out of items or slices of items.
+/// A [`MutableBuffer`] is a wrapper over memory regions, used to build
+/// [`Buffer`]s out of items or slices of items.
 ///
-/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to have its pointer aligned
-/// along cache lines and in multiple of 64 bytes.
+/// [`Buffer`]s created from [`MutableBuffer`] (via `into`) are guaranteed to be
+/// aligned along cache lines and in multiples of 64 bytes.
 ///
 /// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
-/// to insert many items, and `into` to convert it to [`Buffer`].
-///
-/// For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
+/// to insert many items, and `into` to convert it to [`Buffer`]. For typed data,
+/// it is often more efficient to use [`Vec`] and convert it to [`Buffer`] rather
+/// than using [`MutableBuffer`] (see examples below).
 ///
-/// Note: this may be deprecated in a future release ([#1176](https://github.com/apache/arrow-rs/issues/1176))
+/// # See Also
+/// * For a safe, strongly typed API consider using [`Vec`] and [`ScalarBuffer`](crate::ScalarBuffer)
+/// * To apply bitwise operations, see [`apply_bitwise_binary_op`] and [`apply_bitwise_unary_op`]
 ///
-/// # Example
+/// [`apply_bitwise_binary_op`]: crate::bit_util::apply_bitwise_binary_op
+/// [`apply_bitwise_unary_op`]: crate::bit_util::apply_bitwise_unary_op
 ///
+/// # Example: Creating a [`Buffer`] from a [`MutableBuffer`]
 /// ```
 /// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
 /// let mut buffer = MutableBuffer::new(0);
 /// buffer.push(256u32);
 /// buffer.extend_from_slice(&[1u32]);
-/// let buffer: Buffer = buffer.into();
+/// let buffer = Buffer::from(buffer);
 /// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0])
 /// ```
+///
+/// The same can be achieved more efficiently by using a `Vec<u32>`
+/// ```
+/// # use arrow_buffer::buffer::Buffer;
+/// let mut vec = Vec::new();
+/// vec.push(256u32);
+/// vec.extend_from_slice(&[1u32]);
+/// let buffer = Buffer::from(vec);
+/// assert_eq!(buffer.as_slice(), &[0u8, 1, 0, 0, 1, 0, 0, 0]);
+/// ```
+///
+/// # Example: Creating a [`MutableBuffer`] from a `Vec<T>`
+/// ```
+/// # use arrow_buffer::buffer::MutableBuffer;
+/// let vec = vec![1u32, 2, 3];
+/// let mutable_buffer = MutableBuffer::from(vec); // reuses the allocation from vec
+/// assert_eq!(mutable_buffer.len(), 12); // 3 * 4 bytes
+/// ```
+///
+/// # Example: Creating a [`MutableBuffer`] from a [`Buffer`]
+/// ```
+/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
+/// let buffer: Buffer = Buffer::from(&[1u8, 2, 3, 4][..]);
+/// // Only possible to convert a Buffer into a MutableBuffer if uniquely owned
+/// // (i.e., there are no other references to it).
+/// let mut mutable_buffer = match buffer.into_mutable() {
+///    Ok(mutable) => mutable,
+///    Err(orig_buffer) => {
+///      panic!("buffer was not uniquely owned");
+///    }
+/// };
+/// mutable_buffer.push(5u8);
+/// let buffer = Buffer::from(mutable_buffer);
+/// assert_eq!(buffer.as_slice(), &[1u8, 2, 3, 4, 5])
+/// ```
 #[derive(Debug)]
 pub struct MutableBuffer {
     // dangling iff capacity = 0
@@ -57,6 +102,10 @@ pub struct MutableBuffer {
     // invariant: len <= capacity
     len: usize,
     layout: Layout,
+
+    /// Memory reservation for tracking memory usage
+    #[cfg(feature = "pool")]
+    reservation: Mutex<Option<Box<dyn MemoryReservation>>>,
 }
 
 impl MutableBuffer {
@@ -91,6 +140,8 @@ impl MutableBuffer {
             data,
             len: 0,
             layout,
+            #[cfg(feature = "pool")]
+            reservation: std::sync::Mutex::new(None),
         }
     }
 
@@ -115,7 +166,13 @@ impl MutableBuffer {
                 NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
             }
         };
-        Self { data, len, layout }
+        Self {
+            data,
+            len,
+            layout,
+            #[cfg(feature = "pool")]
+            reservation: std::sync::Mutex::new(None),
+        }
     }
 
     /// Allocates a new [MutableBuffer] from given `Bytes`.
@@ -127,9 +184,17 @@ impl MutableBuffer {
 
         let len = bytes.len();
         let data = bytes.ptr();
+        #[cfg(feature = "pool")]
+        let reservation = bytes.reservation.lock().unwrap().take();
         mem::forget(bytes);
 
-        Ok(Self { data, len, layout })
+        Ok(Self {
+            data,
+            len,
+            layout,
+            #[cfg(feature = "pool")]
+            reservation: Mutex::new(reservation),
+        })
     }
 
     /// creates a new [MutableBuffer] with capacity and length capable of holding `len` bits.
@@ -197,6 +262,75 @@ impl MutableBuffer {
         }
     }
 
+    /// Adding to this mutable buffer `slice_to_repeat` repeated `repeat_count` times.
+    ///
+    /// # Example
+    ///
+    /// ## Repeat the same string bytes multiple times
+    /// ```
+    /// # use arrow_buffer::buffer::MutableBuffer;
+    /// let mut buffer = MutableBuffer::new(0);
+    /// let bytes_to_repeat = b"ab";
+    /// buffer.repeat_slice_n_times(bytes_to_repeat, 3);
+    /// assert_eq!(buffer.as_slice(), b"ababab");
+    /// ```
+    pub fn repeat_slice_n_times<T: ArrowNativeType>(
+        &mut self,
+        slice_to_repeat: &[T],
+        repeat_count: usize,
+    ) {
+        if repeat_count == 0 || slice_to_repeat.is_empty() {
+            return;
+        }
+
+        let bytes_to_repeat = size_of_val(slice_to_repeat);
+
+        // Ensure capacity
+        self.reserve(repeat_count * bytes_to_repeat);
+
+        // Save the length before we do all the copies to know where to start from
+        let length_before = self.len;
+
+        // Copy the initial slice once so we can use doubling strategy on it
+        self.extend_from_slice(slice_to_repeat);
+
+        // This tracks how much bytes we have added by repeating so far
+        let added_repeats_length = bytes_to_repeat;
+        assert_eq!(
+            self.len - length_before,
+            added_repeats_length,
+            "should copy exactly the same number of bytes"
+        );
+
+        // Number of times the slice was repeated
+        let mut already_repeated_times = 1;
+
+        // We will use doubling strategy to fill the buffer in log(repeat_count) steps
+        while already_repeated_times < repeat_count {
+            // How many slices can we copy in this iteration
+            // (either double what we have, or just the remaining ones)
+            let number_of_slices_to_copy =
+                already_repeated_times.min(repeat_count - already_repeated_times);
+            let number_of_bytes_to_copy = number_of_slices_to_copy * bytes_to_repeat;
+
+            unsafe {
+                // Get to the start of the data before we started copying anything
+                let src = self.data.as_ptr().add(length_before) as *const u8;
+
+                // Go to the current location to copy to (end of current data)
+                let dst = self.data.as_ptr().add(self.len);
+
+                // SAFETY: the pointers are not overlapping as there is `number_of_bytes_to_copy` or less between them
+                std::ptr::copy_nonoverlapping(src, dst, number_of_bytes_to_copy)
+            }
+
+            // Advance the length by the amount of data we just copied (doubled)
+            self.len += number_of_bytes_to_copy;
+
+            already_repeated_times += number_of_slices_to_copy;
+        }
+    }
+
     #[cold]
     fn reallocate(&mut self, capacity: usize) {
         let new_layout = Layout::from_size_align(capacity, self.layout.align()).unwrap();
@@ -217,6 +351,12 @@ impl MutableBuffer {
         };
         self.data = NonNull::new(data).unwrap_or_else(|| handle_alloc_error(new_layout));
         self.layout = new_layout;
+        #[cfg(feature = "pool")]
+        {
+            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
+                reservation.resize(self.layout.size());
+            }
+        }
     }
 
     /// Truncates this buffer to `len` bytes
@@ -228,6 +368,12 @@ impl MutableBuffer {
             return;
         }
         self.len = len;
+        #[cfg(feature = "pool")]
+        {
+            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
+                reservation.resize(self.len);
+            }
+        }
     }
 
     /// Resizes the buffer, either truncating its contents (with no change in capacity), or
@@ -251,6 +397,12 @@ impl MutableBuffer {
         }
         // this truncates the buffer when new_len < self.len
         self.len = new_len;
+        #[cfg(feature = "pool")]
+        {
+            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
+                reservation.resize(self.len);
+            }
+        }
     }
 
     /// Shrinks the capacity of the buffer as much as possible.
@@ -328,6 +480,11 @@ impl MutableBuffer {
     #[inline]
     pub(super) fn into_buffer(self) -> Buffer {
         let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
+        #[cfg(feature = "pool")]
+        {
+            let reservation = self.reservation.lock().unwrap().take();
+            *bytes.reservation.lock().unwrap() = reservation;
+        }
         std::mem::forget(self);
         Buffer::from(bytes)
     }
@@ -412,8 +569,8 @@ impl MutableBuffer {
     pub unsafe fn push_unchecked<T: ToByteSlice>(&mut self, item: T) {
         let additional = std::mem::size_of::<T>();
         let src = item.to_byte_slice().as_ptr();
-        let dst = self.data.as_ptr().add(self.len);
-        std::ptr::copy_nonoverlapping(src, dst, additional);
+        let dst = unsafe { self.data.as_ptr().add(self.len) };
+        unsafe { std::ptr::copy_nonoverlapping(src, dst, additional) };
         self.len += additional;
     }
 
@@ -437,20 +594,19 @@ impl MutableBuffer {
     /// as it eliminates the conditional `Iterator::next`
     #[inline]
     pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
-        let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
+        let mut buffer: Vec<u64> = Vec::with_capacity(bit_util::ceil(len, 64));
 
         let chunks = len / 64;
         let remainder = len % 64;
-        for chunk in 0..chunks {
+        buffer.extend((0..chunks).map(|chunk| {
             let mut packed = 0;
             for bit_idx in 0..64 {
                 let i = bit_idx + chunk * 64;
                 packed |= (f(i) as u64) << bit_idx;
             }
 
-            // SAFETY: Already allocated sufficient capacity
-            unsafe { buffer.push_unchecked(packed) }
-        }
+            packed
+        }));
 
         if remainder != 0 {
             let mut packed = 0;
@@ -459,13 +615,152 @@ impl MutableBuffer {
                 packed |= (f(i) as u64) << bit_idx;
             }
 
-            // SAFETY: Already allocated sufficient capacity
-            unsafe { buffer.push_unchecked(packed) }
+            buffer.push(packed)
         }
 
+        let mut buffer: MutableBuffer = buffer.into();
         buffer.truncate(bit_util::ceil(len, 8));
         buffer
     }
+
+    /// Extends this buffer with boolean values.
+    ///
+    /// This requires `iter` to report an exact size via `size_hint`.
+    /// `offset` indicates the starting offset in bits in this buffer to begin writing to
+    /// and must be less than or equal to the current length of this buffer.
+    /// All bits not written to (but readable due to byte alignment) will be zeroed out.
+    /// # Safety
+    /// Callers must ensure that `iter` reports an exact size via `size_hint`.
+    #[inline]
+    pub unsafe fn extend_bool_trusted_len<I: Iterator<Item = bool>>(
+        &mut self,
+        mut iter: I,
+        offset: usize,
+    ) {
+        let (lower, upper) = iter.size_hint();
+        let len = upper.expect("Iterator must have exact size_hint");
+        assert_eq!(lower, len, "Iterator must have exact size_hint");
+        debug_assert!(
+            offset <= self.len * 8,
+            "offset must be <= buffer length in bits"
+        );
+
+        if len == 0 {
+            return;
+        }
+
+        let start_len = offset;
+        let end_bit = start_len + len;
+
+        // SAFETY: we will initialize all newly exposed bytes before they are read
+        let new_len_bytes = bit_util::ceil(end_bit, 8);
+        if new_len_bytes > self.len {
+            self.reserve(new_len_bytes - self.len);
+            // SAFETY: caller will initialize all newly exposed bytes before they are read
+            unsafe { self.set_len(new_len_bytes) };
+        }
+
+        let slice = self.as_slice_mut();
+
+        let mut bit_idx = start_len;
+
+        // ---- Unaligned prefix: advance to the next 64-bit boundary ----
+        let misalignment = bit_idx & 63;
+        let prefix_bits = if misalignment == 0 {
+            0
+        } else {
+            (64 - misalignment).min(end_bit - bit_idx)
+        };
+
+        if prefix_bits != 0 {
+            let byte_start = bit_idx / 8;
+            let byte_end = bit_util::ceil(bit_idx + prefix_bits, 8);
+            let bit_offset = bit_idx % 8;
+
+            // Clear any newly-visible bits in the existing partial byte
+            if bit_offset != 0 {
+                let keep_mask = (1u8 << bit_offset).wrapping_sub(1);
+                slice[byte_start] &= keep_mask;
+            }
+
+            // Zero any new bytes we will partially fill in this prefix
+            let zero_from = if bit_offset == 0 {
+                byte_start
+            } else {
+                byte_start + 1
+            };
+            if byte_end > zero_from {
+                slice[zero_from..byte_end].fill(0);
+            }
+
+            for _ in 0..prefix_bits {
+                let v = iter.next().unwrap();
+                if v {
+                    let byte_idx = bit_idx / 8;
+                    let bit = bit_idx % 8;
+                    slice[byte_idx] |= 1 << bit;
+                }
+                bit_idx += 1;
+            }
+        }
+
+        if bit_idx < end_bit {
+            // ---- Aligned middle: write u64 chunks ----
+            debug_assert_eq!(bit_idx & 63, 0);
+            let remaining_bits = end_bit - bit_idx;
+            let chunks = remaining_bits / 64;
+
+            let words_start = bit_idx / 8;
+            let words_end = words_start + chunks * 8;
+            for dst in slice[words_start..words_end].chunks_exact_mut(8) {
+                let mut packed: u64 = 0;
+                for i in 0..64 {
+                    packed |= (iter.next().unwrap() as u64) << i;
+                }
+                dst.copy_from_slice(&packed.to_le_bytes());
+                bit_idx += 64;
+            }
+
+            // ---- Unaligned suffix: remaining < 64 bits ----
+            let suffix_bits = end_bit - bit_idx;
+            if suffix_bits != 0 {
+                debug_assert_eq!(bit_idx % 8, 0);
+                let byte_start = bit_idx / 8;
+                let byte_end = bit_util::ceil(end_bit, 8);
+                slice[byte_start..byte_end].fill(0);
+
+                for _ in 0..suffix_bits {
+                    let v = iter.next().unwrap();
+                    if v {
+                        let byte_idx = bit_idx / 8;
+                        let bit = bit_idx % 8;
+                        slice[byte_idx] |= 1 << bit;
+                    }
+                    bit_idx += 1;
+                }
+            }
+        }
+
+        // Clear any unused bits in the last byte
+        let remainder = end_bit % 8;
+        if remainder != 0 {
+            let mask = (1u8 << remainder).wrapping_sub(1);
+            slice[bit_util::ceil(end_bit, 8) - 1] &= mask;
+        }
+
+        debug_assert_eq!(bit_idx, end_bit);
+    }
+
+    /// Register this [`MutableBuffer`] with the provided [`MemoryPool`]
+    ///
+    /// This claims the memory used by this buffer in the pool, allowing for
+    /// accurate accounting of memory usage. Any prior reservation will be
+    /// released so this works well when the buffer is being shared among
+    /// multiple arrays.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn MemoryPool) {
+        *self.reservation.lock().unwrap() = Some(pool.reserve(self.capacity()));
+    }
 }
 
 /// Creates a non-null pointer with alignment of [`ALIGNMENT`]
@@ -506,7 +801,13 @@ impl<T: ArrowNativeType> From<Vec<T>> for MutableBuffer {
         // This is based on `RawVec::current_memory`
         let layout = unsafe { Layout::array::<T>(value.capacity()).unwrap_unchecked() };
         mem::forget(value);
-        Self { data, len, layout }
+        Self {
+            data,
+            len,
+            layout,
+            #[cfg(feature = "pool")]
+            reservation: std::sync::Mutex::new(None),
+        }
     }
 }
 
@@ -575,11 +876,11 @@ impl MutableBuffer {
         for item in iterator {
             // note how there is no reserve here (compared with `extend_from_iter`)
             let src = item.to_byte_slice().as_ptr();
-            std::ptr::copy_nonoverlapping(src, dst, item_size);
-            dst = dst.add(item_size);
+            unsafe { std::ptr::copy_nonoverlapping(src, dst, item_size) };
+            dst = unsafe { dst.add(item_size) };
         }
         assert_eq!(
-            dst.offset_from(buffer.data.as_ptr()) as usize,
+            unsafe { dst.offset_from(buffer.data.as_ptr()) } as usize,
             len,
             "Trusted iterator length was not accurately reported"
         );
@@ -638,20 +939,22 @@ impl MutableBuffer {
             let item = item?;
             // note how there is no reserve here (compared with `extend_from_iter`)
             let src = item.to_byte_slice().as_ptr();
-            std::ptr::copy_nonoverlapping(src, dst, item_size);
-            dst = dst.add(item_size);
+            unsafe { std::ptr::copy_nonoverlapping(src, dst, item_size) };
+            dst = unsafe { dst.add(item_size) };
         }
         // try_from_trusted_len_iter is instantiated a lot, so we extract part of it into a less
         // generic method to reduce compile time
         unsafe fn finalize_buffer(dst: *mut u8, buffer: &mut MutableBuffer, len: usize) {
-            assert_eq!(
-                dst.offset_from(buffer.data.as_ptr()) as usize,
-                len,
-                "Trusted iterator length was not accurately reported"
-            );
-            buffer.len = len;
-        }
-        finalize_buffer(dst, &mut buffer, len);
+            unsafe {
+                assert_eq!(
+                    dst.offset_from(buffer.data.as_ptr()) as usize,
+                    len,
+                    "Trusted iterator length was not accurately reported"
+                );
+                buffer.len = len;
+            }
+        }
+        unsafe { finalize_buffer(dst, &mut buffer, len) };
         Ok(buffer)
     }
 }
@@ -676,6 +979,12 @@ impl std::ops::DerefMut for MutableBuffer {
     }
 }
 
+impl AsRef<[u8]> for &MutableBuffer {
+    fn as_ref(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
 impl Drop for MutableBuffer {
     fn drop(&mut self) {
         if self.layout.size() != 0 {
@@ -1013,4 +1322,229 @@ mod tests {
         let max_capacity = isize::MAX as usize - (isize::MAX as usize % ALIGNMENT);
         let _ = MutableBuffer::with_capacity(max_capacity + 1);
     }
+
+    #[cfg(feature = "pool")]
+    mod pool_tests {
+        use super::*;
+        use crate::pool::{MemoryPool, TrackingMemoryPool};
+
+        #[test]
+        fn test_reallocate_with_pool() {
+            let pool = TrackingMemoryPool::default();
+            let mut buffer = MutableBuffer::with_capacity(100);
+            buffer.claim(&pool);
+
+            // Initial capacity should be 128 (multiple of 64)
+            assert_eq!(buffer.capacity(), 128);
+            assert_eq!(pool.used(), 128);
+
+            // Reallocate to a larger size
+            buffer.reallocate(200);
+
+            // The capacity is exactly the requested size, not rounded up
+            assert_eq!(buffer.capacity(), 200);
+            assert_eq!(pool.used(), 200);
+
+            // Reallocate to a smaller size
+            buffer.reallocate(50);
+
+            // The capacity is exactly the requested size, not rounded up
+            assert_eq!(buffer.capacity(), 50);
+            assert_eq!(pool.used(), 50);
+        }
+
+        #[test]
+        fn test_truncate_with_pool() {
+            let pool = TrackingMemoryPool::default();
+            let mut buffer = MutableBuffer::with_capacity(100);
+
+            // Fill buffer with some data
+            buffer.resize(80, 1);
+            assert_eq!(buffer.len(), 80);
+
+            buffer.claim(&pool);
+            assert_eq!(pool.used(), 128);
+
+            // Truncate buffer
+            buffer.truncate(40);
+            assert_eq!(buffer.len(), 40);
+            assert_eq!(pool.used(), 40);
+
+            // Truncate to zero
+            buffer.truncate(0);
+            assert_eq!(buffer.len(), 0);
+            assert_eq!(pool.used(), 0);
+        }
+
+        #[test]
+        fn test_resize_with_pool() {
+            let pool = TrackingMemoryPool::default();
+            let mut buffer = MutableBuffer::with_capacity(100);
+            buffer.claim(&pool);
+
+            // Initial state
+            assert_eq!(buffer.len(), 0);
+            assert_eq!(pool.used(), 128);
+
+            // Resize to increase length
+            buffer.resize(50, 1);
+            assert_eq!(buffer.len(), 50);
+            assert_eq!(pool.used(), 50);
+
+            // Resize to increase length beyond capacity
+            buffer.resize(150, 1);
+            assert_eq!(buffer.len(), 150);
+            assert_eq!(buffer.capacity(), 256);
+            assert_eq!(pool.used(), 150);
+
+            // Resize to decrease length
+            buffer.resize(30, 1);
+            assert_eq!(buffer.len(), 30);
+            assert_eq!(pool.used(), 30);
+        }
+
+        #[test]
+        fn test_buffer_lifecycle_with_pool() {
+            let pool = TrackingMemoryPool::default();
+
+            // Create a buffer with memory reservation
+            let mut mutable = MutableBuffer::with_capacity(100);
+            mutable.resize(80, 1);
+            mutable.claim(&pool);
+
+            // Memory reservation is based on capacity when using claim()
+            assert_eq!(pool.used(), 128);
+
+            // Convert to immutable Buffer
+            let buffer = mutable.into_buffer();
+
+            // Memory reservation should be preserved
+            assert_eq!(pool.used(), 128);
+
+            // Drop the buffer and the reservation should be released
+            drop(buffer);
+            assert_eq!(pool.used(), 0);
+        }
+    }
+
+    fn create_expected_repeated_slice<T: ArrowNativeType>(
+        slice_to_repeat: &[T],
+        repeat_count: usize,
+    ) -> Buffer {
+        let mut expected = MutableBuffer::new(size_of_val(slice_to_repeat) * repeat_count);
+        for _ in 0..repeat_count {
+            // Not using push_slice_repeated as this is the function under test
+            expected.extend_from_slice(slice_to_repeat);
+        }
+        expected.into()
+    }
+
+    // Helper to test a specific repeat count with various slice sizes
+    fn test_repeat_count<T: ArrowNativeType + PartialEq + std::fmt::Debug>(
+        repeat_count: usize,
+        test_data: &[T],
+    ) {
+        let mut buffer = MutableBuffer::new(0);
+        buffer.repeat_slice_n_times(test_data, repeat_count);
+
+        let expected = create_expected_repeated_slice(test_data, repeat_count);
+        let result: Buffer = buffer.into();
+
+        assert_eq!(
+            result,
+            expected,
+            "Failed for repeat_count={}, slice_len={}",
+            repeat_count,
+            test_data.len()
+        );
+    }
+
+    #[test]
+    fn test_repeat_slice_count_edge_cases() {
+        // Empty slice
+        test_repeat_count(100, &[] as &[i32]);
+
+        // Zero repeats
+        test_repeat_count(0, &[1i32, 2, 3]);
+    }
+
+    #[test]
+    fn test_small_repeats_counts() {
+        // test any special implementation for small repeat counts
+        let data = &[1u8, 2, 3, 4, 5];
+
+        for _ in 1..=10 {
+            test_repeat_count(2, data);
+        }
+    }
+
+    #[test]
+    fn test_different_size_of_i32_repeat_slice() {
+        let data: &[i32] = &[1, 2, 3];
+        let data_with_single_item: &[i32] = &[42];
+
+        for data in &[data, data_with_single_item] {
+            for item in 1..=9 {
+                let base_repeat_count = 2_usize.pow(item);
+                test_repeat_count(base_repeat_count - 1, data);
+                test_repeat_count(base_repeat_count, data);
+                test_repeat_count(base_repeat_count + 1, data);
+            }
+        }
+    }
+
+    #[test]
+    fn test_different_size_of_u8_repeat_slice() {
+        let data: &[u8] = &[1, 2, 3];
+        let data_with_single_item: &[u8] = &[10];
+
+        for data in &[data, data_with_single_item] {
+            for item in 1..=9 {
+                let base_repeat_count = 2_usize.pow(item);
+                test_repeat_count(base_repeat_count - 1, data);
+                test_repeat_count(base_repeat_count, data);
+                test_repeat_count(base_repeat_count + 1, data);
+            }
+        }
+    }
+
+    #[test]
+    fn test_different_size_of_u16_repeat_slice() {
+        let data: &[u16] = &[1, 2, 3];
+        let data_with_single_item: &[u16] = &[10];
+
+        for data in &[data, data_with_single_item] {
+            for item in 1..=9 {
+                let base_repeat_count = 2_usize.pow(item);
+                test_repeat_count(base_repeat_count - 1, data);
+                test_repeat_count(base_repeat_count, data);
+                test_repeat_count(base_repeat_count + 1, data);
+            }
+        }
+    }
+
+    #[test]
+    fn test_various_slice_lengths() {
+        // Test different slice lengths with same repeat pattern
+        let repeat_count = 37; // Arbitrary non-power-of-2
+
+        // Single element
+        test_repeat_count(repeat_count, &[42i32]);
+
+        // Small slices
+        test_repeat_count(repeat_count, &[1i32, 2]);
+        test_repeat_count(repeat_count, &[1i32, 2, 3]);
+        test_repeat_count(repeat_count, &[1i32, 2, 3, 4]);
+        test_repeat_count(repeat_count, &[1i32, 2, 3, 4, 5]);
+
+        // Larger slices
+        let data_10: Vec<i32> = (0..10).collect();
+        test_repeat_count(repeat_count, &data_10);
+
+        let data_100: Vec<i32> = (0..100).collect();
+        test_repeat_count(repeat_count, &data_100);
+
+        let data_1000: Vec<i32> = (0..1000).collect();
+        test_repeat_count(repeat_count, &data_1000);
+    }
 }
diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs
index fe3a57a38248..66fa7dd22ec5 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -112,6 +112,9 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
     /// assert_eq!(offsets.as_ref(), &[0, 1, 4, 9]);
     /// ```
     ///
+    /// If you want to create an [`OffsetBuffer`] where all lengths are the same,
+    /// consider using the faster [`OffsetBuffer::from_repeated_length`] instead.
+    ///
     /// # Panics
     ///
     /// Panics on overflow
@@ -133,6 +136,43 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
         Self(out.into())
     }
 
+    /// Create a new [`OffsetBuffer`] where each slice has the same length
+    /// `length`, repeated `n` times.
+    ///
+    ///
+    /// Example
+    /// ```
+    /// # use arrow_buffer::OffsetBuffer;
+    /// let offsets = OffsetBuffer::<i32>::from_repeated_length(4, 3);
+    /// assert_eq!(offsets.as_ref(), &[0, 4, 8, 12]);
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics on overflow
+    pub fn from_repeated_length(length: usize, n: usize) -> Self {
+        if n == 0 {
+            return Self::new_empty();
+        }
+
+        if length == 0 {
+            return Self::new_zeroed(n);
+        }
+
+        // Check for overflow
+        // Making sure we don't overflow usize or O when calculating the total length
+        length.checked_mul(n).expect("usize overflow");
+
+        // Check for overflow
+        O::from_usize(length * n).expect("offset overflow");
+
+        let offsets = (0..=n)
+            .map(|index| O::usize_as(index * length))
+            .collect::<Vec<O>>();
+
+        Self(ScalarBuffer::from(offsets))
+    }
+
     /// Get an Iterator over the lengths of this [`OffsetBuffer`]
     ///
     /// ```
@@ -283,6 +323,36 @@ mod tests {
         OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
     }
 
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn from_repeated_lengths_offset_length_overflow() {
+        OffsetBuffer::<i32>::from_repeated_length(i32::MAX as usize / 4, 5);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn from_repeated_lengths_offset_repeat_overflow() {
+        OffsetBuffer::<i32>::from_repeated_length(1, i32::MAX as usize + 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn from_repeated_lengths_usize_length_overflow() {
+        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "usize overflow")]
+    fn from_repeated_lengths_usize_length_usize_overflow() {
+        OffsetBuffer::<i32>::from_repeated_length(usize::MAX, 2);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset overflow")]
+    fn from_repeated_lengths_usize_repeat_overflow() {
+        OffsetBuffer::<i32>::from_repeated_length(1, usize::MAX);
+    }
+
     #[test]
     fn get_lengths() {
         let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
@@ -323,4 +393,76 @@ mod tests {
         let default = OffsetBuffer::<i32>::default();
         assert_eq!(default.as_ref(), &[0]);
     }
+
+    #[test]
+    fn from_repeated_length_basic() {
+        // Basic case with length 4, repeated 3 times
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(4, 3);
+        assert_eq!(buffer.as_ref(), &[0, 4, 8, 12]);
+
+        // Verify the lengths are correct
+        let lengths: Vec<usize> = buffer.lengths().collect();
+        assert_eq!(lengths, vec![4, 4, 4]);
+    }
+
+    #[test]
+    fn from_repeated_length_single_repeat() {
+        // Length 5, repeated once
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(5, 1);
+        assert_eq!(buffer.as_ref(), &[0, 5]);
+
+        let lengths: Vec<usize> = buffer.lengths().collect();
+        assert_eq!(lengths, vec![5]);
+    }
+
+    #[test]
+    fn from_repeated_length_zero_repeats() {
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(10, 0);
+        assert_eq!(buffer, OffsetBuffer::<i32>::new_empty());
+    }
+
+    #[test]
+    fn from_repeated_length_zero_length() {
+        // Zero length, repeated 5 times (all zeros)
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(0, 5);
+        assert_eq!(buffer.as_ref(), &[0, 0, 0, 0, 0, 0]);
+
+        // All lengths should be 0
+        let lengths: Vec<usize> = buffer.lengths().collect();
+        assert_eq!(lengths, vec![0, 0, 0, 0, 0]);
+    }
+
+    #[test]
+    fn from_repeated_length_large_values() {
+        // Test with larger values that don't overflow
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(1000, 100);
+        assert_eq!(buffer[0], 0);
+
+        // Verify all lengths are 1000
+        let lengths: Vec<usize> = buffer.lengths().collect();
+        assert_eq!(lengths.len(), 100);
+        assert!(lengths.iter().all(|&len| len == 1000));
+    }
+
+    #[test]
+    fn from_repeated_length_unit_length() {
+        // Length 1, repeated multiple times
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(1, 10);
+        assert_eq!(buffer.as_ref(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+
+        let lengths: Vec<usize> = buffer.lengths().collect();
+        assert_eq!(lengths, vec![1; 10]);
+    }
+
+    #[test]
+    fn from_repeated_length_max_safe_values() {
+        // Test with maximum safe values for i32
+        // i32::MAX / 3 ensures we don't overflow when repeated twice
+        let third_max = (i32::MAX / 3) as usize;
+        let buffer = OffsetBuffer::<i32>::from_repeated_length(third_max, 2);
+        assert_eq!(
+            buffer.as_ref(),
+            &[0, third_max as i32, (third_max * 2) as i32]
+        );
+    }
 }
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index c69e5c6deb10..36efe876432d 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -16,10 +16,16 @@
 // under the License.
 
 use super::{Buffer, MutableBuffer};
+use crate::BooleanBuffer;
 use crate::util::bit_util::ceil;
 
 /// Apply a bitwise operation `op` to four inputs and return the result as a Buffer.
-/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// The inputs are treated as bitmaps, meaning that offsets and length are
+/// specified in number of bits.
+///
+/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
+/// outside the offsets and len are set to zero out before calling `op`.
 pub fn bitwise_quaternary_op_helper<F>(
     buffers: [&Buffer; 4],
     offsets: [usize; 4],
@@ -59,7 +65,12 @@ where
 }
 
 /// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
-/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// The inputs are treated as bitmaps, meaning that offsets and length are
+/// specified in number of bits.
+///
+/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
+/// outside the offsets and len are set to zero out before calling `op`.
 pub fn bitwise_bin_op_helper<F>(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -92,7 +103,12 @@ where
 }
 
 /// Apply a bitwise operation `op` to one input and return the result as a Buffer.
-/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
+///
+/// The input is treated as a bitmap, meaning that offset and length are
+/// specified in number of bits.
+///
+/// NOTE: The operation `op` is applied to chunks of 64 bits (u64) and any bits
+/// outside the offsets and len are set to zero out before calling `op`.
 pub fn bitwise_unary_op_helper<F>(
     left: &Buffer,
     offset_in_bits: usize,
@@ -134,7 +150,7 @@ pub fn buffer_bin_and(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -142,6 +158,7 @@ pub fn buffer_bin_and(
         len_in_bits,
         |a, b| a & b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise or to two inputs and return the result as a Buffer.
@@ -153,7 +170,7 @@ pub fn buffer_bin_or(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -161,6 +178,7 @@ pub fn buffer_bin_or(
         len_in_bits,
         |a, b| a | b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise xor to two inputs and return the result as a Buffer.
@@ -172,7 +190,7 @@ pub fn buffer_bin_xor(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -180,6 +198,7 @@ pub fn buffer_bin_xor(
         len_in_bits,
         |a, b| a ^ b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise and_not to two inputs and return the result as a Buffer.
@@ -191,7 +210,7 @@ pub fn buffer_bin_and_not(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -199,10 +218,11 @@ pub fn buffer_bin_and_not(
         len_in_bits,
         |a, b| a & !b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
 pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
-    bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
+    BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a).into_inner()
 }
diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs
index cc6d19044feb..6603dec1bac1 100644
--- a/arrow-buffer/src/buffer/run.rs
+++ b/arrow-buffer/src/buffer/run.rs
@@ -15,78 +15,111 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::buffer::ScalarBuffer;
 use crate::ArrowNativeType;
+use crate::buffer::ScalarBuffer;
 
-/// A slice-able buffer of monotonically increasing, positive integers used to store run-ends
-///
-/// # Logical vs Physical
+/// A buffer of monotonically increasing, positive integers used to store run-ends.
 ///
-/// A [`RunEndBuffer`] is used to encode runs of the same value, the index of each run is
-/// called the physical index. The logical index is then the corresponding index in the logical
-/// run-encoded array, i.e. a single run of length `3`, would have the logical indices `0..3`.
+/// Used to compactly represent runs of the same value. Values being represented
+/// are stored in a separate buffer from this struct. See [`RunArray`] for an example
+/// of how this is used with a companion array to represent the values.
 ///
-/// Each value in [`RunEndBuffer::values`] is the cumulative length of all runs in the
-/// logical array, up to that physical index.
+/// # Logical vs Physical
 ///
-/// Consider a [`RunEndBuffer`] containing `[3, 4, 6]`. The maximum physical index is `2`,
-/// as there are `3` values, and the maximum logical index is `5`, as the maximum run end
-/// is `6`. The physical indices are therefore `[0, 0, 0, 1, 2, 2]`
+/// Physically, each value in the `run_ends` buffer is the cumulative length of
+/// all runs in the logical representation, up to that physical index. Consider
+/// the following example:
 ///
 /// ```text
-///     ┌─────────┐        ┌─────────┐           ┌─────────┐
-///     │    3    │        │    0    │ ─┬──────▶ │    0    │
-///     ├─────────┤        ├─────────┤  │        ├─────────┤
-///     │    4    │        │    1    │ ─┤ ┌────▶ │    1    │
-///     ├─────────┤        ├─────────┤  │ │      ├─────────┤
-///     │    6    │        │    2    │ ─┘ │ ┌──▶ │    2    │
-///     └─────────┘        ├─────────┤    │ │    └─────────┘
-///      run ends          │    3    │ ───┘ │  physical indices
-///                        ├─────────┤      │
-///                        │    4    │ ─────┤
-///                        ├─────────┤      │
-///                        │    5    │ ─────┘
-///                        └─────────┘
-///                      logical indices
+///           physical                        logical
+///     ┌─────────┬─────────┐           ┌─────────┬─────────┐
+///     │    3    │    0    │ ◄──────┬─ │    A    │    0    │
+///     ├─────────┼─────────┤        │  ├─────────┼─────────┤
+///     │    4    │    1    │ ◄────┐ ├─ │    A    │    1    │
+///     ├─────────┼─────────┤      │ │  ├─────────┼─────────┤
+///     │    6    │    2    │ ◄──┐ │ └─ │    A    │    2    │
+///     └─────────┴─────────┘    │ │    ├─────────┼─────────┤
+///      run-ends    index       │ └─── │    B    │    3    │
+///                              │      ├─────────┼─────────┤
+///      logical_offset = 0      ├───── │    C    │    4    │
+///      logical_length = 6      │      ├─────────┼─────────┤
+///                              └───── │    C    │    5    │
+///                                     └─────────┴─────────┘
+///                                       values     index
 /// ```
 ///
+/// A [`RunEndBuffer`] is physically the buffer and offset with length on the left.
+/// In this case, the offset and length represent the whole buffer, so it is essentially
+/// unsliced. See the section below on slicing for more details on how this buffer
+/// handles slicing.
+///
+/// This means that multiple logical values are represented in the same physical index,
+/// and multiple logical indices map to the same physical index. The [`RunEndBuffer`]
+/// containing `[3, 4, 6]` is essentially the physical indices `[0, 0, 0, 1, 2, 2]`,
+/// and having a separately stored buffer of values such as `[A, B, C]` can turn
+/// this into a representation of `[A, A, A, B, C, C]`.
+///
 /// # Slicing
 ///
-/// In order to provide zero-copy slicing, this container stores a separate offset and length
+/// In order to provide zero-copy slicing, this struct stores a separate **logical**
+/// offset and length. Consider the following example:
 ///
-/// For example, a [`RunEndBuffer`] containing values `[3, 6, 8]` with offset and length `4` would
-/// describe the physical indices `1, 1, 2, 2`
+/// ```text
+///           physical                        logical
+///     ┌─────────┬─────────┐           ┌ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ┐
+///     │    3    │    0    │ ◄──────┐       A         0
+///     ├─────────┼─────────┤        │  ├── ─ ─ ─ ┼ ─ ─ ─ ─ ┤
+///     │    4    │    1    │ ◄────┐ │       A         1
+///     ├─────────┼─────────┤      │ │  ├─────────┼─────────┤
+///     │    6    │    2    │ ◄──┐ │ └─ │    A    │    2    │◄─── logical_offset
+///     └─────────┴─────────┘    │ │    ├─────────┼─────────┤
+///      run-ends    index       │ └─── │    B    │    3    │
+///                              │      ├─────────┼─────────┤
+///      logical_offset = 2      └───── │    C    │    4    │
+///      logical_length = 3             ├─────────┼─────────┤
+///                                          C         5     ◄─── logical_offset + logical_length
+///                                     └ ─ ─ ─ ─ ┴ ─ ─ ─ ─ ┘
+///                                       values     index
+/// ```
+///
+/// The physical `run_ends` [`ScalarBuffer`] remains unchanged, in order to facilitate
+/// zero-copy. However, we now offset into the **logical** representation with an
+/// accompanying length. This allows us to represent values `[A, B, C]` using physical
+/// indices `0, 1, 2` with the same underlying physical buffer, at the cost of two
+/// extra `usize`s to represent the logical slice that was taken.
 ///
-/// For example, a [`RunEndBuffer`] containing values `[6, 8, 9]` with offset `2` and length `5`
-/// would describe the physical indices `0, 0, 0, 0, 1`
+/// (A [`RunEndBuffer`] is considered unsliced when `logical_offset` is `0` and
+/// `logical_length` is equal to the last value in `run_ends`)
 ///
+/// [`RunArray`]: https://docs.rs/arrow/latest/arrow/array/struct.RunArray.html
 /// [Run-End encoded layout]: https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout
 #[derive(Debug, Clone)]
 pub struct RunEndBuffer<E: ArrowNativeType> {
     run_ends: ScalarBuffer<E>,
-    len: usize,
-    offset: usize,
+    logical_length: usize,
+    logical_offset: usize,
 }
 
 impl<E> RunEndBuffer<E>
 where
     E: ArrowNativeType,
 {
-    /// Create a new [`RunEndBuffer`] from a [`ScalarBuffer`], an `offset` and `len`
+    /// Create a new [`RunEndBuffer`] from a [`ScalarBuffer`], `logical_offset`
+    /// and `logical_length`.
     ///
     /// # Panics
     ///
-    /// - `buffer` does not contain strictly increasing values greater than zero
-    /// - the last value of `buffer` is less than `offset + len`
-    pub fn new(run_ends: ScalarBuffer<E>, offset: usize, len: usize) -> Self {
+    /// - `run_ends` does not contain strictly increasing values greater than zero
+    /// - The last value of `run_ends` is less than `logical_offset + logical_length`
+    pub fn new(run_ends: ScalarBuffer<E>, logical_offset: usize, logical_length: usize) -> Self {
         assert!(
             run_ends.windows(2).all(|w| w[0] < w[1]),
             "run-ends not strictly increasing"
         );
 
-        if len != 0 {
+        if logical_length != 0 {
             assert!(!run_ends.is_empty(), "non-empty slice but empty run-ends");
-            let end = E::from_usize(offset.saturating_add(len)).unwrap();
+            let end = E::from_usize(logical_offset.saturating_add(logical_length)).unwrap();
             assert!(
                 *run_ends.first().unwrap() > E::usize_as(0),
                 "run-ends not greater than 0"
@@ -99,41 +132,46 @@ where
 
         Self {
             run_ends,
-            offset,
-            len,
+            logical_offset,
+            logical_length,
         }
     }
 
-    /// Create a new [`RunEndBuffer`] from an [`ScalarBuffer`], an `offset` and `len`
+    /// Create a new [`RunEndBuffer`] from a [`ScalarBuffer`], `logical_offset`
+    /// and `logical_length`.
     ///
     /// # Safety
     ///
-    /// - `buffer` must contain strictly increasing values greater than zero
-    /// - The last value of `buffer` must be greater than or equal to `offset + len`
-    pub unsafe fn new_unchecked(run_ends: ScalarBuffer<E>, offset: usize, len: usize) -> Self {
+    /// - `run_ends` must contain strictly increasing values greater than zero
+    /// - The last value of `run_ends` must be greater than or equal to `logical_offset + logical_len`
+    pub unsafe fn new_unchecked(
+        run_ends: ScalarBuffer<E>,
+        logical_offset: usize,
+        logical_length: usize,
+    ) -> Self {
         Self {
             run_ends,
-            offset,
-            len,
+            logical_offset,
+            logical_length,
         }
     }
 
-    /// Returns the logical offset into the run-ends stored by this buffer
+    /// Returns the logical offset into the run-ends stored by this buffer.
     #[inline]
     pub fn offset(&self) -> usize {
-        self.offset
+        self.logical_offset
     }
 
-    /// Returns the logical length of the run-ends stored by this buffer
+    /// Returns the logical length of the run-ends stored by this buffer.
     #[inline]
     pub fn len(&self) -> usize {
-        self.len
+        self.logical_length
     }
 
-    /// Returns true if this buffer is empty
+    /// Returns true if this buffer is logically empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
-        self.len == 0
+        self.logical_length == 0
     }
 
     /// Free up unused memory.
@@ -142,23 +180,50 @@ where
         self.run_ends.shrink_to_fit();
     }
 
-    /// Returns the values of this [`RunEndBuffer`] not including any offset
+    /// Returns the physical (**unsliced**) run ends of this buffer.
+    ///
+    /// Take care when operating on these values as it doesn't take into account
+    /// any logical slicing that may have occurred.
     #[inline]
     pub fn values(&self) -> &[E] {
         &self.run_ends
     }
 
-    /// Returns the maximum run-end encoded in the underlying buffer
+    /// Returns an iterator yielding run ends adjusted for the logical slice.
+    ///
+    /// Each yielded value is subtracted by the [`logical_offset`] and capped
+    /// at the [`logical_length`].
+    ///
+    /// [`logical_offset`]: Self::offset
+    /// [`logical_length`]: Self::len
+    pub fn sliced_values(&self) -> impl Iterator<Item = E> + '_ {
+        let offset = self.logical_offset;
+        let len = self.logical_length;
+        let start = self.get_start_physical_index();
+        let end = self.get_end_physical_index();
+        self.run_ends[start..=end].iter().map(move |&val| {
+            let val = val.as_usize().saturating_sub(offset).min(len);
+            E::from_usize(val).unwrap()
+        })
+    }
+
+    /// Returns the maximum run-end encoded in the underlying buffer; that is, the
+    /// last physical run of the buffer. This does not take into account any logical
+    /// slicing that may have occurred.
     #[inline]
     pub fn max_value(&self) -> usize {
         self.values().last().copied().unwrap_or_default().as_usize()
     }
 
-    /// Performs a binary search to find the physical index for the given logical index
+    /// Performs a binary search to find the physical index for the given logical
+    /// index.
+    ///
+    /// Useful for extracting the corresponding physical `run_ends` when this buffer
+    /// is logically sliced.
     ///
-    /// The result is arbitrary if `logical_index >= self.len()`
+    /// The result is arbitrary if `logical_index >= self.len()`.
     pub fn get_physical_index(&self, logical_index: usize) -> usize {
-        let logical_index = E::usize_as(self.offset + logical_index);
+        let logical_index = E::usize_as(self.logical_offset + logical_index);
         let cmp = |p: &E| p.partial_cmp(&logical_index).unwrap();
 
         match self.run_ends.binary_search_by(cmp) {
@@ -167,49 +232,137 @@ where
         }
     }
 
-    /// Returns the physical index at which the logical array starts
+    /// Returns the physical index at which the logical array starts.
+    ///
+    /// The same as calling `get_physical_index(0)` but with a fast path if the
+    /// buffer is not logically sliced, in which case it always returns `0`.
     pub fn get_start_physical_index(&self) -> usize {
-        if self.offset == 0 || self.len == 0 {
+        if self.logical_offset == 0 || self.logical_length == 0 {
             return 0;
         }
         // Fallback to binary search
         self.get_physical_index(0)
     }
 
-    /// Returns the physical index at which the logical array ends
+    /// Returns the physical index at which the logical array ends.
+    ///
+    /// The same as calling `get_physical_index(length - 1)` but with a fast path
+    /// if the buffer is not logically sliced, in which case it returns `length - 1`.
     pub fn get_end_physical_index(&self) -> usize {
-        if self.len == 0 {
+        if self.logical_length == 0 {
             return 0;
         }
-        if self.max_value() == self.offset + self.len {
+        if self.max_value() == self.logical_offset + self.logical_length {
             return self.values().len() - 1;
         }
         // Fallback to binary search
-        self.get_physical_index(self.len - 1)
+        self.get_physical_index(self.logical_length - 1)
     }
 
-    /// Slices this [`RunEndBuffer`] by the provided `offset` and `length`
-    pub fn slice(&self, offset: usize, len: usize) -> Self {
+    /// Slices this [`RunEndBuffer`] by the provided `logical_offset` and `logical_length`.
+    ///
+    /// # Panics
+    ///
+    /// - Specified slice (`logical_offset` + `logical_length`) exceeds existing
+    ///   logical length
+    pub fn slice(&self, logical_offset: usize, logical_length: usize) -> Self {
         assert!(
-            offset.saturating_add(len) <= self.len,
+            logical_offset.saturating_add(logical_length) <= self.logical_length,
             "the length + offset of the sliced RunEndBuffer cannot exceed the existing length"
         );
         Self {
             run_ends: self.run_ends.clone(),
-            offset: self.offset + offset,
-            len,
+            logical_offset: self.logical_offset + logical_offset,
+            logical_length,
         }
     }
 
-    /// Returns the inner [`ScalarBuffer`]
+    /// Returns the inner [`ScalarBuffer`].
     pub fn inner(&self) -> &ScalarBuffer<E> {
         &self.run_ends
     }
 
-    /// Returns the inner [`ScalarBuffer`], consuming self
+    /// Returns the inner [`ScalarBuffer`], consuming self.
     pub fn into_inner(self) -> ScalarBuffer<E> {
         self.run_ends
     }
+
+    /// Returns the physical indices corresponding to the provided logical indices.
+    ///
+    /// Given a slice of logical indices, this method returns a `Vec` containing the
+    /// corresponding physical indices into the run-ends buffer.
+    ///
+    /// This method operates by iterating the logical indices in sorted order, instead of
+    /// finding the physical index for each logical index using binary search via
+    /// the function [`RunEndBuffer::get_physical_index`].
+    ///
+    /// Running benchmarks on both approaches showed that the approach used here
+    /// scaled well for larger inputs.
+    ///
+    /// See <https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727> for more details.
+    ///
+    /// # Errors
+    ///
+    /// If any logical index is out of bounds (>= self.len()), returns an error containing the invalid index.
+    #[inline]
+    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, I>
+    where
+        I: ArrowNativeType,
+    {
+        let len = self.len();
+        let offset = self.offset();
+
+        let indices_len = logical_indices.len();
+
+        if indices_len == 0 {
+            return Ok(vec![]);
+        }
+
+        // `ordered_indices` store index into `logical_indices` and can be used
+        // to iterate `logical_indices` in sorted order.
+        let mut ordered_indices: Vec<usize> = (0..indices_len).collect();
+
+        // Instead of sorting `logical_indices` directly, sort the `ordered_indices`
+        // whose values are index of `logical_indices`
+        ordered_indices.sort_unstable_by(|lhs, rhs| {
+            logical_indices[*lhs]
+                .partial_cmp(&logical_indices[*rhs])
+                .unwrap()
+        });
+
+        // Return early if all the logical indices cannot be converted to physical indices.
+        let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
+        if largest_logical_index >= len {
+            return Err(logical_indices[*ordered_indices.last().unwrap()]);
+        }
+
+        // Skip some physical indices based on offset.
+        let skip_value = self.get_start_physical_index();
+
+        let mut physical_indices = vec![0; indices_len];
+
+        let mut ordered_index = 0_usize;
+        for (physical_index, run_end) in self.values().iter().enumerate().skip(skip_value) {
+            // Get the run end index (relative to offset) of current physical index
+            let run_end_value = run_end.as_usize() - offset;
+
+            // All the `logical_indices` that are less than current run end index
+            // belongs to current physical index.
+            while ordered_index < indices_len
+                && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
+            {
+                physical_indices[ordered_indices[ordered_index]] = physical_index;
+                ordered_index += 1;
+            }
+        }
+
+        // If there are input values >= run_ends.last_value then we'll not be able to convert
+        // all logical indices to physical indices.
+        if ordered_index < logical_indices.len() {
+            return Err(logical_indices[ordered_indices[ordered_index]]);
+        }
+        Ok(physical_indices)
+    }
 }
 
 #[cfg(test)]
@@ -233,4 +386,26 @@ mod tests {
         assert_eq!(buffer.get_start_physical_index(), 0);
         assert_eq!(buffer.get_end_physical_index(), 0);
     }
+
+    #[test]
+    fn test_sliced_values() {
+        // [0, 0, 1, 2, 2, 2]
+        let buffer = RunEndBuffer::new(vec![2i32, 3, 6].into(), 0, 6);
+
+        // Slice: [0, 1, 2, 2] start: 1, len: 4
+        // Logical indices: 1, 2, 3, 4
+        // Original run ends: [2, 3, 6]
+        // Adjusted: [2-1, 3-1, 6-1] capped at 4 -> [1, 2, 4]
+        let sliced = buffer.slice(1, 4);
+        let sliced_values: Vec<i32> = sliced.sliced_values().collect();
+        assert_eq!(sliced_values, &[1, 2, 4]);
+
+        // Slice: [2, 2] start: 4, len: 2
+        // Original run ends: [2, 3, 6]
+        // Slicing at 4 means we only have the last run (physical index 2, which ends at 6)
+        // Adjusted: [6-4] capped at 2 -> [2]
+        let sliced = buffer.slice(4, 2);
+        let sliced_values: Vec<i32> = sliced.sliced_values().collect();
+        assert_eq!(sliced_values, &[2]);
+    }
 }
diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
index 6c66060fb95f..3c5334ca5118 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -29,17 +29,38 @@ use std::ops::Deref;
 /// with the following differences:
 ///
 /// - slicing and cloning is O(1).
-/// - it supports external allocated memory
+/// - support for external allocated memory (e.g. via FFI).
 ///
+/// See [`Buffer`] for more low-level memory management details.
+///
+/// # Example: Convert to/from Vec (without copies)
+///
+/// (See [`Buffer::from_vec`] and [`Buffer::into_vec`] for a lower level API)
 /// ```
 /// # use arrow_buffer::ScalarBuffer;
 /// // Zero-copy conversion from Vec
 /// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
 /// assert_eq!(&buffer, &[1, 2, 3]);
+/// // convert the buffer back to Vec without copy assuming:
+/// // 1. the inner buffer is not sliced
+/// // 2. the inner buffer uses standard allocation
+/// // 3. there are no other references to the inner buffer
+/// let vec: Vec<i32> = buffer.into();
+/// assert_eq!(&vec, &[1, 2, 3]);
+/// ```
 ///
+/// # Example: Zero copy slicing
+/// ```
+/// # use arrow_buffer::ScalarBuffer;
+/// let buffer = ScalarBuffer::from(vec![1, 2, 3]);
+/// assert_eq!(&buffer, &[1, 2, 3]);
 /// // Zero-copy slicing
 /// let sliced = buffer.slice(1, 2);
 /// assert_eq!(&sliced, &[2, 3]);
+/// // Original buffer is unchanged
+/// assert_eq!(&buffer, &[1, 2, 3]);
+/// // converting the sliced buffer back to Vec incurs a copy
+/// let vec: Vec<i32> = sliced.into();
 /// ```
 #[derive(Clone, Default)]
 pub struct ScalarBuffer<T: ArrowNativeType> {
@@ -72,6 +93,19 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
         buffer.slice_with_length(byte_offset, byte_len).into()
     }
 
+    /// Unsafe function to create a new [`ScalarBuffer`] from a [`Buffer`].
+    /// Only use for testing purpose.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe because it does not check if the `buffer` is aligned
+    pub unsafe fn new_unchecked(buffer: Buffer) -> Self {
+        Self {
+            buffer,
+            phantom: Default::default(),
+        }
+    }
+
     /// Free up unused memory.
     pub fn shrink_to_fit(&mut self) {
         self.buffer.shrink_to_fit();
@@ -99,6 +133,16 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
     pub fn ptr_eq(&self, other: &Self) -> bool {
         self.buffer.ptr_eq(&other.buffer)
     }
+
+    /// Returns the number of elements in the buffer
+    pub fn len(&self) -> usize {
+        self.buffer.len() / std::mem::size_of::<T>()
+    }
+
+    /// Returns if the buffer is empty
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
 }
 
 impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
@@ -139,8 +183,10 @@ impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
                 is_aligned,
                 "Memory pointer is not aligned with the specified scalar type"
             ),
-            Deallocation::Custom(_, _) =>
-                assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
+            Deallocation::Custom(_, _) => assert!(
+                is_aligned,
+                "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."
+            ),
         }
 
         Self {
diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs
index bdcc3a55dbf2..7990be1e7cc9 100644
--- a/arrow-buffer/src/builder/boolean.rs
+++ b/arrow-buffer/src/builder/boolean.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{bit_mask, bit_util, BooleanBuffer, Buffer, MutableBuffer};
+use crate::bit_util::apply_bitwise_binary_op;
+use crate::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
 use std::ops::Range;
 
 /// Builder for [`BooleanBuffer`]
@@ -139,7 +140,6 @@ impl BooleanBufferBuilder {
 
     /// Reserve space to at least `additional` new bits.
     /// Capacity will be `>= self.len() + additional`.
-    /// New bytes are uninitialized and reading them is undefined behavior.
     #[inline]
     pub fn reserve(&mut self, additional: usize) {
         let capacity = self.len + additional;
@@ -218,13 +218,16 @@ impl BooleanBufferBuilder {
     pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
         let offset_write = self.len;
         let len = range.end - range.start;
+        // allocate new bits as 0
         self.advance(len);
-        bit_mask::set_bits(
+        // copy bits from to_set into self.buffer a word at a time
+        apply_bitwise_binary_op(
             self.buffer.as_slice_mut(),
-            to_set,
             offset_write,
+            to_set,
             range.start,
             len,
+            |_a, b| b, // copy bits from to_set
         );
     }
 
@@ -256,6 +259,20 @@ impl BooleanBufferBuilder {
     pub fn finish_cloned(&self) -> BooleanBuffer {
         BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
     }
+
+    /// Extends the builder from a trusted length iterator of booleans.
+    /// # Safety
+    /// Callers must ensure that `iter` reports an exact size via `size_hint`.
+    ///
+    #[inline]
+    pub unsafe fn extend_trusted_len<I>(&mut self, iterator: I)
+    where
+        I: Iterator<Item = bool>,
+    {
+        let len = iterator.size_hint().0;
+        unsafe { self.buffer.extend_bool_trusted_len(iterator, self.len) };
+        self.len += len;
+    }
 }
 
 impl From<BooleanBufferBuilder> for Buffer {
@@ -272,6 +289,14 @@ impl From<BooleanBufferBuilder> for BooleanBuffer {
     }
 }
 
+impl From<BooleanBufferBuilder> for NullBuffer {
+    #[inline]
+    fn from(builder: BooleanBufferBuilder) -> Self {
+        let boolean_buffer = BooleanBuffer::from(builder);
+        NullBuffer::new(boolean_buffer)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -523,4 +548,65 @@ mod tests {
         assert_eq!(buf.len(), buf2.inner().len());
         assert_eq!(buf.as_slice(), buf2.values());
     }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = BooleanBufferBuilder::new(0);
+        let bools = vec![true, false, true, true, false, true, true, true, false];
+        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+        assert_eq!(builder.len(), 9);
+        let finished = builder.finish();
+        for (i, v) in bools.into_iter().enumerate() {
+            assert_eq!(finished.value(i), v);
+        }
+
+        // Test > 64 bits
+        let mut builder = BooleanBufferBuilder::new(0);
+        let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
+        unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+        assert_eq!(builder.len(), 100);
+        let finished = builder.finish();
+        for (i, v) in bools.into_iter().enumerate() {
+            assert_eq!(finished.value(i), v, "at index {}", i);
+        }
+    }
+
+    #[test]
+    fn test_extend_misaligned() {
+        // Test misaligned start
+        for offset in 1..65 {
+            let mut builder = BooleanBufferBuilder::new(0);
+            builder.append_n(offset, false);
+
+            let bools: Vec<_> = (0..100).map(|i| i % 3 == 0 || i % 7 == 0).collect();
+            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+            assert_eq!(builder.len(), offset + 100);
+
+            let finished = builder.finish();
+            for i in 0..offset {
+                assert!(!finished.value(i));
+            }
+            for (i, v) in bools.into_iter().enumerate() {
+                assert_eq!(finished.value(offset + i), v, "at index {}", offset + i);
+            }
+        }
+    }
+
+    #[test]
+    fn test_extend_misaligned_end() {
+        for len in 1..130 {
+            let mut builder = BooleanBufferBuilder::new(0);
+            let mut bools: Vec<_> = (0..len).map(|i| i % 2 == 0).collect();
+            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+            unsafe { builder.extend_trusted_len(bools.clone().into_iter()) };
+            let copy = bools.clone();
+            bools.extend(copy);
+            assert_eq!(builder.len(), 2 * len);
+
+            let finished = builder.finish();
+            for (i, &v) in bools.iter().enumerate() {
+                assert_eq!(finished.value(i), v, "at index {} for len {}", i, len);
+            }
+        }
+    }
 }
diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs
index f7e0e29dace4..abe510bdabc6 100644
--- a/arrow-buffer/src/builder/mod.rs
+++ b/arrow-buffer/src/builder/mod.rs
@@ -26,7 +26,7 @@ pub use null::*;
 pub use offset::*;
 
 use crate::{ArrowNativeType, Buffer, MutableBuffer};
-use std::{iter, marker::PhantomData};
+use std::marker::PhantomData;
 
 /// Builder for creating a [Buffer] object.
 ///
@@ -214,7 +214,7 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
     #[inline]
     pub fn append_n(&mut self, n: usize, v: T) {
         self.reserve(n);
-        self.extend(iter::repeat(v).take(n))
+        self.extend(std::iter::repeat_n(v, n))
     }
 
     /// Appends `n`, zero-initialized values
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index b811bd2c6b40..8f912b807da5 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -26,6 +26,11 @@ use std::{fmt::Debug, fmt::Formatter};
 use crate::alloc::Deallocation;
 use crate::buffer::dangling_ptr;
 
+#[cfg(feature = "pool")]
+use crate::pool::{MemoryPool, MemoryReservation};
+#[cfg(feature = "pool")]
+use std::sync::Mutex;
+
 /// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
 ///
 /// Note that this structure is an internal implementation detail of the
@@ -49,6 +54,10 @@ pub struct Bytes {
 
     /// how to deallocate this region
     deallocation: Deallocation,
+
+    /// Memory reservation for tracking memory usage
+    #[cfg(feature = "pool")]
+    pub(super) reservation: Mutex<Option<Box<dyn MemoryReservation>>>,
 }
 
 impl Bytes {
@@ -70,6 +79,8 @@ impl Bytes {
             ptr,
             len,
             deallocation,
+            #[cfg(feature = "pool")]
+            reservation: Mutex::new(None),
         }
     }
 
@@ -101,6 +112,27 @@ impl Bytes {
         }
     }
 
+    /// Register this [`Bytes`] with the provided [`MemoryPool`], replacing any prior reservation.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn MemoryPool) {
+        *self.reservation.lock().unwrap() = Some(pool.reserve(self.capacity()));
+    }
+
+    /// Resize the memory reservation of this buffer
+    ///
+    /// This is a no-op if this buffer doesn't have a reservation.
+    #[cfg(feature = "pool")]
+    fn resize_reservation(&self, new_size: usize) {
+        let mut guard = self.reservation.lock().unwrap();
+        if let Some(mut reservation) = guard.take() {
+            // Resize the reservation
+            reservation.resize(new_size);
+
+            // Put it back
+            *guard = Some(reservation);
+        }
+    }
+
     /// Try to reallocate the underlying memory region to a new size (smaller or larger).
     ///
     /// Only works for bytes allocated with the standard allocator.
@@ -135,6 +167,13 @@ impl Bytes {
                     self.ptr = ptr;
                     self.len = new_len;
                     self.deallocation = Deallocation::Standard(new_layout);
+
+                    #[cfg(feature = "pool")]
+                    {
+                        // Resize reservation
+                        self.resize_reservation(new_len);
+                    }
+
                     return Ok(());
                 }
             }
@@ -199,6 +238,8 @@ impl From<bytes::Bytes> for Bytes {
             len,
             ptr: NonNull::new(value.as_ptr() as _).unwrap(),
             deallocation: Deallocation::Custom(std::sync::Arc::new(value), len),
+            #[cfg(feature = "pool")]
+            reservation: Mutex::new(None),
         }
     }
 }
@@ -209,14 +250,83 @@ mod tests {
 
     #[test]
     fn test_from_bytes() {
-        let bytes = bytes::Bytes::from(vec![1, 2, 3, 4]);
-        let arrow_bytes: Bytes = bytes.clone().into();
+        let message = b"hello arrow";
 
-        assert_eq!(bytes.as_ptr(), arrow_bytes.as_ptr());
+        // we can create a Bytes from bytes::Bytes (created from slices)
+        let c_bytes: bytes::Bytes = message.as_ref().into();
+        let a_bytes: Bytes = c_bytes.into();
+        assert_eq!(a_bytes.as_slice(), message);
 
-        drop(bytes);
-        drop(arrow_bytes);
+        // we can create a Bytes from bytes::Bytes (created from Vec)
+        let c_bytes: bytes::Bytes = bytes::Bytes::from(message.to_vec());
+        let a_bytes: Bytes = c_bytes.into();
+        assert_eq!(a_bytes.as_slice(), message);
+    }
+
+    #[cfg(feature = "pool")]
+    mod pool_tests {
+        use super::*;
+
+        use crate::pool::TrackingMemoryPool;
+
+        #[test]
+        fn test_bytes_with_pool() {
+            // Create a standard allocation
+            let buffer = unsafe {
+                let layout =
+                    std::alloc::Layout::from_size_align(1024, crate::alloc::ALIGNMENT).unwrap();
+                let ptr = std::alloc::alloc(layout);
+                assert!(!ptr.is_null());
+
+                Bytes::new(
+                    NonNull::new(ptr).unwrap(),
+                    1024,
+                    Deallocation::Standard(layout),
+                )
+            };
+
+            // Create a memory pool
+            let pool = TrackingMemoryPool::default();
+            assert_eq!(pool.used(), 0);
+
+            // Reserve memory and assign to buffer. Claim twice.
+            buffer.claim(&pool);
+            assert_eq!(pool.used(), 1024);
+            buffer.claim(&pool);
+            assert_eq!(pool.used(), 1024);
+
+            // Memory should be released when buffer is dropped
+            drop(buffer);
+            assert_eq!(pool.used(), 0);
+        }
+
+        #[test]
+        fn test_bytes_drop_releases_pool() {
+            let pool = TrackingMemoryPool::default();
+
+            {
+                // Create a buffer with pool
+                let _buffer = unsafe {
+                    let layout =
+                        std::alloc::Layout::from_size_align(1024, crate::alloc::ALIGNMENT).unwrap();
+                    let ptr = std::alloc::alloc(layout);
+                    assert!(!ptr.is_null());
+
+                    let bytes = Bytes::new(
+                        NonNull::new(ptr).unwrap(),
+                        1024,
+                        Deallocation::Standard(layout),
+                    );
+
+                    bytes.claim(&pool);
+                    bytes
+                };
 
-        let _ = Bytes::from(bytes::Bytes::new());
+                assert_eq!(pool.used(), 1024);
+            }
+
+            // Buffer has been dropped, memory should be released
+            assert_eq!(pool.used(), 0);
+        }
     }
 }
diff --git a/arrow-buffer/src/lib.rs b/arrow-buffer/src/lib.rs
index 174cdc4d9c18..230747b8b84a 100644
--- a/arrow-buffer/src/lib.rs
+++ b/arrow-buffer/src/lib.rs
@@ -16,14 +16,27 @@
 // under the License.
 
 //! Low-level buffer abstractions for [Apache Arrow Rust](https://docs.rs/arrow)
+//!
+//! # Byte Storage abstractions
+//! - [`MutableBuffer`]: Raw memory buffer that can be mutated and grown
+//! - [`Buffer`]: Immutable buffer that is shared across threads
+//!
+//! # Typed Abstractions
+//!
+//! There are also several wrappers over [`Buffer`] with methods for
+//! easier manipulation:
+//!
+//! - [`BooleanBuffer`][]: Bitmasks (buffer of packed bits)
+//! - [`NullBuffer`][]: Arrow null (validity) bitmaps ([`BooleanBuffer`] with extra utilities)
+//! - [`ScalarBuffer<T>`][]: Typed buffer for primitive types (e.g., `i32`, `f64`)
+//! - [`OffsetBuffer<O>`][]: Offsets used in variable-length types (e.g., strings, lists)
+//! - [`RunEndBuffer<E>`][]: Run-ends used in run-encoded encoded data
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
-// used by [`buffer::mutable::dangling_ptr`]
-#![cfg_attr(miri, feature(strict_provenance))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 
 pub mod alloc;
@@ -48,3 +61,8 @@ mod interval;
 pub use interval::*;
 
 mod arith;
+
+#[cfg(feature = "pool")]
+mod pool;
+#[cfg(feature = "pool")]
+pub use pool::*;
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index eb8e067db0be..68058a4eeccd 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{i256, IntervalDayTime, IntervalMonthDayNano};
+use crate::{IntervalDayTime, IntervalMonthDayNano, i256};
 use half::f16;
 
 mod private {
diff --git a/arrow-buffer/src/pool.rs b/arrow-buffer/src/pool.rs
new file mode 100644
index 000000000000..95bd308a35be
--- /dev/null
+++ b/arrow-buffer/src/pool.rs
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module contains traits for memory pool traits and an implementation
+//! for tracking memory usage.
+//!
+//! The basic traits are [`MemoryPool`] and [`MemoryReservation`]. And default
+//! implementation of [`MemoryPool`] is [`TrackingMemoryPool`]. Their relationship
+//! is as follows:
+//!
+//! ```text
+//!     (pool tracker)                        (resizable)           
+//!  ┌──────────────────┐ fn reserve() ┌─────────────────────────┐
+//!  │ trait MemoryPool │─────────────►│ trait MemoryReservation │
+//!  └──────────────────┘              └─────────────────────────┘
+//! ```
+
+use std::fmt::Debug;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// A memory reservation within a [`MemoryPool`] that is freed on drop
+pub trait MemoryReservation: Debug + Send + Sync {
+    /// Returns the size of this reservation in bytes.
+    fn size(&self) -> usize;
+
+    /// Resize this reservation to a new size in bytes.
+    fn resize(&mut self, new_size: usize);
+}
+
+/// A pool of memory that can be reserved and released.
+///
+/// This is used to accurately track memory usage when buffers are shared
+/// between multiple arrays or other data structures.
+///
+/// For example, assume we have two arrays that share underlying buffer.
+/// It's hard to tell how much memory is used by them because we can't
+/// tell if the buffer is shared or not.
+///
+/// ```text
+///       Array A           Array B    
+///    ┌────────────┐    ┌────────────┐
+///    │ slices...  │    │ slices...  │
+///    │────────────│    │────────────│
+///    │ Arc<Bytes> │    │ Arc<Bytes> │ (shared buffer)
+///    └─────▲──────┘    └───────▲────┘
+///          │                   │     
+///          │       Bytes       │     
+///          │  ┌─────────────┐  │     
+///          │  │   data...   │  │     
+///          │  │─────────────│  │     
+///          └──│   Memory    │──┘   (tracked with a memory pool)  
+///             │ Reservation │        
+///             └─────────────┘        
+/// ```
+///
+/// With a memory pool, we can count the memory usage by the shared buffer
+/// directly.
+pub trait MemoryPool: Debug + Send + Sync {
+    /// Reserves memory from the pool. Infallible.
+    ///
+    /// Returns a reservation of the requested size.
+    fn reserve(&self, size: usize) -> Box<dyn MemoryReservation>;
+
+    /// Returns the current available memory in the pool.
+    ///
+    /// The pool may be overfilled, so this method might return a negative value.
+    fn available(&self) -> isize;
+
+    /// Returns the current used memory from the pool.
+    fn used(&self) -> usize;
+
+    /// Returns the maximum memory that can be reserved from the pool.
+    fn capacity(&self) -> usize;
+}
+
+/// A simple [`MemoryPool`] that reports the total memory usage
+#[derive(Debug, Default)]
+pub struct TrackingMemoryPool(Arc<AtomicUsize>);
+
+impl TrackingMemoryPool {
+    /// Returns the total allocated size
+    pub fn allocated(&self) -> usize {
+        self.0.load(Ordering::Relaxed)
+    }
+}
+
+impl MemoryPool for TrackingMemoryPool {
+    fn reserve(&self, size: usize) -> Box<dyn MemoryReservation> {
+        self.0.fetch_add(size, Ordering::Relaxed);
+        Box::new(Tracker {
+            size,
+            shared: Arc::clone(&self.0),
+        })
+    }
+
+    fn available(&self) -> isize {
+        isize::MAX - self.used() as isize
+    }
+
+    fn used(&self) -> usize {
+        self.0.load(Ordering::Relaxed)
+    }
+
+    fn capacity(&self) -> usize {
+        usize::MAX
+    }
+}
+
+#[derive(Debug)]
+struct Tracker {
+    size: usize,
+    shared: Arc<AtomicUsize>,
+}
+
+impl Drop for Tracker {
+    fn drop(&mut self) {
+        self.shared.fetch_sub(self.size, Ordering::Relaxed);
+    }
+}
+
+impl MemoryReservation for Tracker {
+    fn size(&self) -> usize {
+        self.size
+    }
+
+    fn resize(&mut self, new: usize) {
+        match self.size < new {
+            true => self.shared.fetch_add(new - self.size, Ordering::Relaxed),
+            false => self.shared.fetch_sub(self.size - new, Ordering::Relaxed),
+        };
+        self.size = new;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tracking_memory_pool() {
+        let pool = TrackingMemoryPool::default();
+
+        // Reserve 512 bytes
+        let reservation = pool.reserve(512);
+        assert_eq!(reservation.size(), 512);
+        assert_eq!(pool.used(), 512);
+        assert_eq!(pool.available(), isize::MAX - 512);
+
+        // Reserve another 256 bytes
+        let reservation2 = pool.reserve(256);
+        assert_eq!(reservation2.size(), 256);
+        assert_eq!(pool.used(), 768);
+        assert_eq!(pool.available(), isize::MAX - 768);
+
+        // Test resize to increase
+        let mut reservation_mut = reservation;
+        reservation_mut.resize(600);
+        assert_eq!(reservation_mut.size(), 600);
+        assert_eq!(pool.used(), 856); // 600 + 256
+
+        // Test resize to decrease
+        reservation_mut.resize(400);
+        assert_eq!(reservation_mut.size(), 400);
+        assert_eq!(pool.used(), 656); // 400 + 256
+
+        // Drop the first reservation
+        drop(reservation_mut);
+        assert_eq!(pool.used(), 256);
+
+        // Drop the second reservation
+        drop(reservation2);
+        assert_eq!(pool.used(), 0);
+    }
+}
diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs
index ea8e8f472ace..8c7ec5e9a8f6 100644
--- a/arrow-buffer/src/util/bit_chunk_iterator.rs
+++ b/arrow-buffer/src/util/bit_chunk_iterator.rs
@@ -202,11 +202,10 @@ fn compute_suffix_mask(len: usize, lead_padding: usize) -> (u64, usize) {
     (suffix_mask, trailing_padding)
 }
 
-/// Iterates over an arbitrarily aligned byte buffer
+/// Iterates over an arbitrarily aligned byte buffer 64 bits at a time
 ///
-/// Yields an iterator of u64, and a remainder. The first byte in the buffer
+/// [`Self::iter`] yields iterator of `u64`, and a remainder. The first byte in the buffer
 /// will be the least significant byte in output u64
-///
 #[derive(Debug)]
 pub struct BitChunks<'a> {
     buffer: &'a [u8],
@@ -221,7 +220,10 @@ pub struct BitChunks<'a> {
 impl<'a> BitChunks<'a> {
     /// Create a new [`BitChunks`] from a byte array, and an offset and length in bits
     pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
-        assert!(ceil(offset + len, 8) <= buffer.len() * 8);
+        assert!(
+            ceil(offset + len, 8) <= buffer.len(),
+            "offset + len out of bounds"
+        );
 
         let byte_offset = offset / 8;
         let bit_offset = offset % 8;
@@ -256,7 +258,7 @@ impl<'a> BitChunks<'a> {
         self.remainder_len
     }
 
-    /// Returns the number of chunks
+    /// Returns the number of `u64` chunks
     #[inline]
     pub const fn chunk_len(&self) -> usize {
         self.chunk_len
@@ -290,7 +292,28 @@ impl<'a> BitChunks<'a> {
         }
     }
 
-    /// Returns an iterator over chunks of 64 bits represented as an u64
+    /// Return the number of `u64` that are needed to represent all bits
+    /// (including remainder).
+    ///
+    /// This is equal to `chunk_len + 1` if there is a remainder,
+    /// otherwise it is equal to `chunk_len`.
+    #[inline]
+    pub fn num_u64s(&self) -> usize {
+        if self.remainder_len == 0 {
+            self.chunk_len
+        } else {
+            self.chunk_len + 1
+        }
+    }
+
+    /// Return the number of *bytes* that are needed to represent all bits
+    /// (including remainder).
+    #[inline]
+    pub fn num_bytes(&self) -> usize {
+        ceil(self.chunk_len * 64 + self.remainder_len, 8)
+    }
+
+    /// Returns an iterator over chunks of 64 bits represented as an `u64`
     #[inline]
     pub const fn iter(&self) -> BitChunkIterator<'a> {
         BitChunkIterator::<'a> {
@@ -476,6 +499,57 @@ mod tests {
         assert_eq!(0x7F, bitchunks.remainder_bits());
     }
 
+    #[test]
+    #[should_panic(expected = "offset + len out of bounds")]
+    fn test_out_of_bound_should_panic_length_is_more_than_buffer_length() {
+        const ALLOC_SIZE: usize = 4 * 1024;
+        let input = vec![0xFF_u8; ALLOC_SIZE];
+
+        let buffer: Buffer = Buffer::from_vec(input);
+
+        // We are reading more than exists in the buffer
+        buffer.bit_chunks(0, (ALLOC_SIZE + 1) * 8);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset + len out of bounds")]
+    fn test_out_of_bound_should_panic_length_is_more_than_buffer_length_but_not_when_not_using_ceil()
+     {
+        const ALLOC_SIZE: usize = 4 * 1024;
+        let input = vec![0xFF_u8; ALLOC_SIZE];
+
+        let buffer: Buffer = Buffer::from_vec(input);
+
+        // We are reading more than exists in the buffer
+        buffer.bit_chunks(0, (ALLOC_SIZE * 8) + 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset + len out of bounds")]
+    fn test_out_of_bound_should_panic_when_offset_is_not_zero_and_length_is_the_entire_buffer_length()
+     {
+        const ALLOC_SIZE: usize = 4 * 1024;
+        let input = vec![0xFF_u8; ALLOC_SIZE];
+
+        let buffer: Buffer = Buffer::from_vec(input);
+
+        // We are reading more than exists in the buffer
+        buffer.bit_chunks(8, ALLOC_SIZE * 8);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset + len out of bounds")]
+    fn test_out_of_bound_should_panic_when_offset_is_not_zero_and_length_is_the_entire_buffer_length_with_ceil()
+     {
+        const ALLOC_SIZE: usize = 4 * 1024;
+        let input = vec![0xFF_u8; ALLOC_SIZE];
+
+        let buffer: Buffer = Buffer::from_vec(input);
+
+        // We are reading more than exists in the buffer
+        buffer.bit_chunks(1, ALLOC_SIZE * 8);
+    }
+
     #[test]
     #[allow(clippy::assertions_on_constants)]
     fn test_unaligned_bit_chunk_iterator() {
diff --git a/arrow-buffer/src/util/bit_iterator.rs b/arrow-buffer/src/util/bit_iterator.rs
index c3e72044bf87..0aa94a5d4dc1 100644
--- a/arrow-buffer/src/util/bit_iterator.rs
+++ b/arrow-buffer/src/util/bit_iterator.rs
@@ -23,6 +23,7 @@ use crate::bit_util::{ceil, get_bit_raw};
 /// Iterator over the bits within a packed bitmask
 ///
 /// To efficiently iterate over just the set bits see [`BitIndexIterator`] and [`BitSliceIterator`]
+#[derive(Clone)]
 pub struct BitIterator<'a> {
     buffer: &'a [u8],
     current_offset: usize,
@@ -71,6 +72,71 @@ impl Iterator for BitIterator<'_> {
         let remaining_bits = self.end_offset - self.current_offset;
         (remaining_bits, Some(remaining_bits))
     }
+
+    fn count(self) -> usize
+    where
+        Self: Sized,
+    {
+        self.len()
+    }
+
+    fn nth(&mut self, n: usize) -> Option<Self::Item> {
+        // Check if we can advance to the desired offset.
+        // When n is 0 it means we want the next() value
+        // and when n is 1 we want the next().next() value
+        // so adding n to the current offset and not n - 1
+        match self.current_offset.checked_add(n) {
+            // Yes, and still within bounds
+            Some(new_offset) if new_offset < self.end_offset => {
+                self.current_offset = new_offset;
+            }
+
+            // Either overflow or would exceed end_offset
+            _ => {
+                self.current_offset = self.end_offset;
+                return None;
+            }
+        }
+
+        self.next()
+    }
+
+    fn last(mut self) -> Option<Self::Item> {
+        // If already at the end, return None
+        if self.current_offset == self.end_offset {
+            return None;
+        }
+
+        // Go to the one before the last bit
+        self.current_offset = self.end_offset - 1;
+
+        // Return the last bit
+        self.next()
+    }
+
+    fn max(self) -> Option<Self::Item>
+    where
+        Self: Sized,
+        Self::Item: Ord,
+    {
+        if self.current_offset == self.end_offset {
+            return None;
+        }
+
+        // true is greater than false so we only need to check if there's any true bit
+        let mut bit_index_iter = BitIndexIterator::new(
+            self.buffer,
+            self.current_offset,
+            self.end_offset - self.current_offset,
+        );
+
+        if bit_index_iter.next().is_some() {
+            return Some(true);
+        }
+
+        // We know the iterator is not empty and there are no set bits so false is the max
+        Some(false)
+    }
 }
 
 impl ExactSizeIterator for BitIterator<'_> {}
@@ -86,6 +152,27 @@ impl DoubleEndedIterator for BitIterator<'_> {
         let v = unsafe { get_bit_raw(self.buffer.as_ptr(), self.end_offset) };
         Some(v)
     }
+
+    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
+        // Check if we can advance to the desired offset.
+        // When n is 0 it means we want the next_back() value
+        // and when n is 1 we want the next_back().next_back() value
+        // so subtracting n to the current offset and not n - 1
+        match self.end_offset.checked_sub(n) {
+            // Yes, and still within bounds
+            Some(new_offset) if self.current_offset < new_offset => {
+                self.end_offset = new_offset;
+            }
+
+            // Either underflow or would exceed current_offset
+            _ => {
+                self.current_offset = self.end_offset;
+                return None;
+            }
+        }
+
+        self.next_back()
+    }
 }
 
 /// Iterator of contiguous ranges of set bits within a provided packed bitmask
@@ -216,6 +303,7 @@ impl<'a> BitIndexIterator<'a> {
 impl Iterator for BitIndexIterator<'_> {
     type Item = usize;
 
+    #[inline]
     fn next(&mut self) -> Option<Self::Item> {
         loop {
             if self.current_chunk != 0 {
@@ -230,6 +318,63 @@ impl Iterator for BitIndexIterator<'_> {
     }
 }
 
+/// An iterator of u32 whose index in a provided bitmask is true
+/// Respects arbitrary offsets and slice lead/trail padding exactly like BitIndexIterator
+#[derive(Debug)]
+pub struct BitIndexU32Iterator<'a> {
+    curr: u64,
+    chunk_offset: i64,
+    iter: UnalignedBitChunkIterator<'a>,
+}
+
+impl<'a> BitIndexU32Iterator<'a> {
+    /// Create a new [BitIndexU32Iterator] from the provided buffer,
+    /// offset and len in bits.
+    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
+        // Build the aligned chunks (including prefix/suffix masked)
+        let chunks = UnalignedBitChunk::new(buffer, offset, len);
+        let mut iter = chunks.iter();
+
+        // First 64-bit word (masked for lead padding), or 0 if empty
+        let curr = iter.next().unwrap_or(0);
+        // Negative lead padding ensures the first bit in curr maps to index 0
+        let chunk_offset = -(chunks.lead_padding() as i64);
+
+        Self {
+            curr,
+            chunk_offset,
+            iter,
+        }
+    }
+}
+
+impl<'a> Iterator for BitIndexU32Iterator<'a> {
+    type Item = u32;
+
+    #[inline(always)]
+    fn next(&mut self) -> Option<u32> {
+        loop {
+            if self.curr != 0 {
+                // Position of least-significant set bit
+                let tz = self.curr.trailing_zeros();
+                // Clear that bit
+                self.curr &= self.curr - 1;
+                // Return global index = chunk_offset + tz
+                return Some((self.chunk_offset + tz as i64) as u32);
+            }
+            // Advance to next 64-bit chunk
+            match self.iter.next() {
+                Some(next_chunk) => {
+                    // Move offset forward by 64 bits
+                    self.chunk_offset += 64;
+                    self.curr = next_chunk;
+                }
+                None => return None,
+            }
+        }
+    }
+}
+
 /// Calls the provided closure for each index in the provided null mask that is set,
 /// using an adaptive strategy based on the null count
 ///
@@ -269,6 +414,12 @@ pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::BooleanBuffer;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use std::fmt::Debug;
+    use std::iter::Copied;
+    use std::slice::Iter;
 
     #[test]
     fn test_bit_iterator_size_hint() {
@@ -322,4 +473,533 @@ mod tests {
         let mask = &[223, 23];
         BitIterator::new(mask, 17, 0);
     }
+
+    #[test]
+    fn test_bit_index_u32_iterator_basic() {
+        let mask = &[0b00010010, 0b00100011];
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 4, 8).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 4, 8)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 10, 4).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 10, 4)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 0).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 0)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_iterator_all_set() {
+        let mask = &[0xFF, 0xFF];
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_iterator_none_set() {
+        let mask = &[0x00, 0x00];
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, 0, 16).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, 0, 16)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_cross_chunk() {
+        let mut buf = vec![0u8; 16];
+        for bit in 60..68 {
+            let byte = (bit / 8) as usize;
+            let bit_in_byte = bit % 8;
+            buf[byte] |= 1 << bit_in_byte;
+        }
+        let offset = 58;
+        let len = 10;
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(&buf, offset, len).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(&buf, offset, len)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_unaligned_offset() {
+        let mask = &[0b0110_1100, 0b1010_0000];
+        let offset = 2;
+        let len = 12;
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(mask, offset, len).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(mask, offset, len)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_long_all_set() {
+        let len = 200;
+        let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 };
+        let bytes = vec![0xFFu8; num_bytes];
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(&bytes, 0, len).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(&bytes, 0, len)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_bit_index_u32_none_set() {
+        let len = 50;
+        let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 };
+        let bytes = vec![0u8; num_bytes];
+
+        let result: Vec<u32> = BitIndexU32Iterator::new(&bytes, 0, len).collect();
+        let expected: Vec<u32> = BitIndexIterator::new(&bytes, 0, len)
+            .map(|i| i as u32)
+            .collect();
+        assert_eq!(result, expected);
+    }
+
+    trait SharedBetweenBitIteratorAndSliceIter:
+        ExactSizeIterator<Item = bool> + DoubleEndedIterator<Item = bool>
+    {
+    }
+    impl<T: ?Sized + ExactSizeIterator<Item = bool> + DoubleEndedIterator<Item = bool>>
+        SharedBetweenBitIteratorAndSliceIter for T
+    {
+    }
+
+    fn get_bit_iterator_cases() -> impl Iterator<Item = (BooleanBuffer, Vec<bool>)> {
+        let mut rng = StdRng::seed_from_u64(42);
+
+        [0, 1, 6, 8, 100, 164]
+            .map(|len| {
+                let source = (0..len).map(|_| rng.random_bool(0.5)).collect::<Vec<_>>();
+
+                (BooleanBuffer::from(source.as_slice()), source)
+            })
+            .into_iter()
+    }
+
+    fn setup_and_assert(
+        setup_iters: impl Fn(&mut dyn SharedBetweenBitIteratorAndSliceIter),
+        assert_fn: impl Fn(BitIterator, Copied<Iter<bool>>),
+    ) {
+        for (boolean_buffer, source) in get_bit_iterator_cases() {
+            // Not using `boolean_buffer.iter()` in case the implementation change to not call BitIterator internally
+            // in which case the test would not test what it intends to test
+            let mut actual = BitIterator::new(boolean_buffer.values(), 0, boolean_buffer.len());
+            let mut expected = source.iter().copied();
+
+            setup_iters(&mut actual);
+            setup_iters(&mut expected);
+
+            assert_fn(actual, expected);
+        }
+    }
+
+    /// Trait representing an operation on a BitIterator
+    /// that can be compared against a slice iterator
+    trait BitIteratorOp {
+        /// What the operation returns (e.g. Option<bool> for last/max, usize for count, etc)
+        type Output: PartialEq + Debug;
+
+        /// The name of the operation, used for error messages
+        const NAME: &'static str;
+
+        /// Get the value of the operation for the provided iterator
+        /// This will be either a BitIterator or a slice iterator to make sure they produce the same result
+        fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(iter: T) -> Self::Output;
+    }
+
+    /// Helper function that will assert that the provided operation
+    /// produces the same result for both BitIterator and slice iterator
+    /// under various consumption patterns (e.g. some calls to next/next_back/consume_all/etc)
+    fn assert_bit_iterator_cases<O: BitIteratorOp>() {
+        setup_and_assert(
+            |_iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {},
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                iter.next();
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming 1 element from the start (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                iter.next_back();
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming 1 element from the end (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                iter.next();
+                iter.next_back();
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming 1 element from start and end (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                while iter.len() > 1 {
+                    iter.next();
+                }
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming all from the start but 1 (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                while iter.len() > 1 {
+                    iter.next_back();
+                }
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming all from the end but 1 (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                while iter.next().is_some() {}
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming all from the start (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+
+        setup_and_assert(
+            |iter: &mut dyn SharedBetweenBitIteratorAndSliceIter| {
+                while iter.next_back().is_some() {}
+            },
+            |actual, expected| {
+                let current_iterator_values: Vec<bool> = expected.clone().collect();
+
+                assert_eq!(
+                    O::get_value(actual),
+                    O::get_value(expected),
+                    "Failed on op {} for new iter after consuming all from the end (left actual, right expected) ({current_iterator_values:?})",
+                    O::NAME
+                );
+            },
+        );
+    }
+
+    #[test]
+    fn assert_bit_iterator_count() {
+        struct CountOp;
+
+        impl BitIteratorOp for CountOp {
+            type Output = usize;
+            const NAME: &'static str = "count";
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(iter: T) -> Self::Output {
+                iter.count()
+            }
+        }
+
+        assert_bit_iterator_cases::<CountOp>()
+    }
+
+    #[test]
+    fn assert_bit_iterator_last() {
+        struct LastOp;
+
+        impl BitIteratorOp for LastOp {
+            type Output = Option<bool>;
+            const NAME: &'static str = "last";
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(iter: T) -> Self::Output {
+                iter.last()
+            }
+        }
+
+        assert_bit_iterator_cases::<LastOp>()
+    }
+
+    #[test]
+    fn assert_bit_iterator_max() {
+        struct MaxOp;
+
+        impl BitIteratorOp for MaxOp {
+            type Output = Option<bool>;
+            const NAME: &'static str = "max";
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(iter: T) -> Self::Output {
+                iter.max()
+            }
+        }
+
+        assert_bit_iterator_cases::<MaxOp>()
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_0() {
+        struct NthOp<const BACK: bool>;
+
+        impl<const BACK: bool> BitIteratorOp for NthOp<BACK> {
+            type Output = Option<bool>;
+            const NAME: &'static str = if BACK { "nth_back(0)" } else { "nth(0)" };
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(mut iter: T) -> Self::Output {
+                if BACK { iter.nth_back(0) } else { iter.nth(0) }
+            }
+        }
+
+        assert_bit_iterator_cases::<NthOp<false>>();
+        assert_bit_iterator_cases::<NthOp<true>>();
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_1() {
+        struct NthOp<const BACK: bool>;
+
+        impl<const BACK: bool> BitIteratorOp for NthOp<BACK> {
+            type Output = Option<bool>;
+            const NAME: &'static str = if BACK { "nth_back(1)" } else { "nth(1)" };
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(mut iter: T) -> Self::Output {
+                if BACK { iter.nth_back(1) } else { iter.nth(1) }
+            }
+        }
+
+        assert_bit_iterator_cases::<NthOp<false>>();
+        assert_bit_iterator_cases::<NthOp<true>>();
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_after_end() {
+        struct NthOp<const BACK: bool>;
+
+        impl<const BACK: bool> BitIteratorOp for NthOp<BACK> {
+            type Output = Option<bool>;
+            const NAME: &'static str = if BACK {
+                "nth_back(iter.len() + 1)"
+            } else {
+                "nth(iter.len() + 1)"
+            };
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(mut iter: T) -> Self::Output {
+                if BACK {
+                    iter.nth_back(iter.len() + 1)
+                } else {
+                    iter.nth(iter.len() + 1)
+                }
+            }
+        }
+
+        assert_bit_iterator_cases::<NthOp<false>>();
+        assert_bit_iterator_cases::<NthOp<true>>();
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_len() {
+        struct NthOp<const BACK: bool>;
+
+        impl<const BACK: bool> BitIteratorOp for NthOp<BACK> {
+            type Output = Option<bool>;
+            const NAME: &'static str = if BACK {
+                "nth_back(iter.len())"
+            } else {
+                "nth(iter.len())"
+            };
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(mut iter: T) -> Self::Output {
+                if BACK {
+                    iter.nth_back(iter.len())
+                } else {
+                    iter.nth(iter.len())
+                }
+            }
+        }
+
+        assert_bit_iterator_cases::<NthOp<false>>();
+        assert_bit_iterator_cases::<NthOp<true>>();
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_last() {
+        struct NthOp<const BACK: bool>;
+
+        impl<const BACK: bool> BitIteratorOp for NthOp<BACK> {
+            type Output = Option<bool>;
+            const NAME: &'static str = if BACK {
+                "nth_back(iter.len().saturating_sub(1))"
+            } else {
+                "nth(iter.len().saturating_sub(1))"
+            };
+
+            fn get_value<T: SharedBetweenBitIteratorAndSliceIter>(mut iter: T) -> Self::Output {
+                if BACK {
+                    iter.nth_back(iter.len().saturating_sub(1))
+                } else {
+                    iter.nth(iter.len().saturating_sub(1))
+                }
+            }
+        }
+
+        assert_bit_iterator_cases::<NthOp<false>>();
+        assert_bit_iterator_cases::<NthOp<true>>();
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_and_reuse() {
+        setup_and_assert(
+            |_| {},
+            |actual, expected| {
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        #[allow(clippy::iter_nth_zero)]
+                        let actual_val = actual.nth(0);
+                        #[allow(clippy::iter_nth_zero)]
+                        let expected_val = expected.nth(0);
+                        assert_eq!(actual_val, expected_val, "Failed on nth(0)");
+                    }
+                }
+
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        let actual_val = actual.nth(1);
+                        let expected_val = expected.nth(1);
+                        assert_eq!(actual_val, expected_val, "Failed on nth(1)");
+                    }
+                }
+
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        let actual_val = actual.nth(2);
+                        let expected_val = expected.nth(2);
+                        assert_eq!(actual_val, expected_val, "Failed on nth(2)");
+                    }
+                }
+            },
+        );
+    }
+
+    #[test]
+    fn assert_bit_iterator_nth_back_and_reuse() {
+        setup_and_assert(
+            |_| {},
+            |actual, expected| {
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        #[allow(clippy::iter_nth_zero)]
+                        let actual_val = actual.nth_back(0);
+                        let expected_val = expected.nth_back(0);
+                        assert_eq!(actual_val, expected_val, "Failed on nth_back(0)");
+                    }
+                }
+
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        let actual_val = actual.nth_back(1);
+                        let expected_val = expected.nth_back(1);
+                        assert_eq!(actual_val, expected_val, "Failed on nth_back(1)");
+                    }
+                }
+
+                {
+                    let mut actual = actual.clone();
+                    let mut expected = expected.clone();
+                    for _ in 0..expected.len() {
+                        let actual_val = actual.nth_back(2);
+                        let expected_val = expected.nth_back(2);
+                        assert_eq!(actual_val, expected_val, "Failed on nth_back(2)");
+                    }
+                }
+            },
+        );
+    }
 }
diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs
index 0d694d13ec75..a8ae1a765414 100644
--- a/arrow-buffer/src/util/bit_mask.rs
+++ b/arrow-buffer/src/util/bit_mask.rs
@@ -132,10 +132,8 @@ unsafe fn set_upto_64bits(
 unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 {
     debug_assert!(count <= 8);
     let mut tmp: u64 = 0;
-    let src = data.as_ptr().add(offset);
-    unsafe {
-        std::ptr::copy_nonoverlapping(src, &mut tmp as *mut _ as *mut u8, count);
-    }
+    let src = unsafe { data.as_ptr().add(offset) };
+    unsafe { std::ptr::copy_nonoverlapping(src, &mut tmp as *mut _ as *mut u8, count) };
     tmp
 }
 
@@ -143,8 +141,8 @@ unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 {
 /// The caller must ensure `data` has `offset..(offset + 8)` range
 #[inline]
 unsafe fn write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) {
-    let ptr = data.as_mut_ptr().add(offset) as *mut u64;
-    ptr.write_unaligned(chunk);
+    let ptr = unsafe { data.as_mut_ptr().add(offset) } as *mut u64;
+    unsafe { ptr.write_unaligned(chunk) };
 }
 
 /// Similar to `write_u64_bytes`, but this method ORs the offset addressed `data` and `chunk`
@@ -154,9 +152,9 @@ unsafe fn write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) {
 /// The caller must ensure `data` has `offset..(offset + 8)` range
 #[inline]
 unsafe fn or_write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) {
-    let ptr = data.as_mut_ptr().add(offset);
-    let chunk = chunk | (*ptr) as u64;
-    (ptr as *mut u64).write_unaligned(chunk);
+    let ptr = unsafe { data.as_mut_ptr().add(offset) };
+    let chunk = chunk | (unsafe { *ptr }) as u64;
+    unsafe { (ptr as *mut u64).write_unaligned(chunk) };
 }
 
 #[cfg(test)]
@@ -278,7 +276,7 @@ mod tests {
     impl Display for BinaryFormatter<'_> {
         fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
             for byte in self.0 {
-                write!(f, "{:08b} ", byte)?;
+                write!(f, "{byte:08b} ")?;
             }
             write!(f, " ")?;
             Ok(())
@@ -389,8 +387,8 @@ mod tests {
                 self.len,
             );
 
-            assert_eq!(actual, self.expected_data, "self: {}", self);
-            assert_eq!(null_count, self.expected_null_count, "self: {}", self);
+            assert_eq!(actual, self.expected_data, "self: {self}");
+            assert_eq!(null_count, self.expected_null_count, "self: {self}");
         }
     }
 
diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs
index c297321bdcf9..67c72fc08906 100644
--- a/arrow-buffer/src/util/bit_util.rs
+++ b/arrow-buffer/src/util/bit_util.rs
@@ -17,6 +17,8 @@
 
 //! Utils for working with bits
 
+use crate::bit_chunk_iterator::BitChunks;
+
 /// Returns the nearest number that is `>=` than `num` and is a multiple of 64
 #[inline]
 pub fn round_upto_multiple_of_64(num: usize) -> usize {
@@ -47,7 +49,7 @@ pub fn get_bit(data: &[u8], i: usize) -> bool {
 /// responsible to guarantee that `i` is within bounds.
 #[inline]
 pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool {
-    (*data.add(i / 8) & (1 << (i % 8))) != 0
+    unsafe { (*data.add(i / 8) & (1 << (i % 8))) != 0 }
 }
 
 /// Sets bit at position `i` for `data` to 1
@@ -64,7 +66,9 @@ pub fn set_bit(data: &mut [u8], i: usize) {
 /// responsible to guarantee that `i` is within bounds.
 #[inline]
 pub unsafe fn set_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i / 8) |= 1 << (i % 8);
+    unsafe {
+        *data.add(i / 8) |= 1 << (i % 8);
+    }
 }
 
 /// Sets bit at position `i` for `data` to 0
@@ -81,7 +85,9 @@ pub fn unset_bit(data: &mut [u8], i: usize) {
 /// responsible to guarantee that `i` is within bounds.
 #[inline]
 pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) {
-    *data.add(i / 8) &= !(1 << (i % 8));
+    unsafe {
+        *data.add(i / 8) &= !(1 << (i % 8));
+    }
 }
 
 /// Returns the ceil of `value`/`divisor`
@@ -90,11 +96,726 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
     value.div_ceil(divisor)
 }
 
+/// Read up to 8 bits from a byte slice starting at a given bit offset.
+///
+/// # Arguments
+///
+/// * `slice` - The byte slice to read from
+/// * `number_of_bits_to_read` - Number of bits to read (must be < 8)
+/// * `bit_offset` - Starting bit offset within the first byte (must be < 8)
+///
+/// # Returns
+///
+/// A `u8` containing the requested bits in the least significant positions
+///
+/// # Panics
+/// - Panics if `number_of_bits_to_read` is 0 or >= 8
+/// - Panics if `bit_offset` is >= 8
+/// - Panics if `slice` is empty or too small to read the requested bits
+///
+#[inline]
+pub(crate) fn read_up_to_byte_from_offset(
+    slice: &[u8],
+    number_of_bits_to_read: usize,
+    bit_offset: usize,
+) -> u8 {
+    assert!(number_of_bits_to_read < 8, "can read up to 8 bits only");
+    assert!(bit_offset < 8, "bit offset must be less than 8");
+    assert_ne!(
+        number_of_bits_to_read, 0,
+        "number of bits to read must be greater than 0"
+    );
+    assert_ne!(slice.len(), 0, "slice must not be empty");
+
+    let number_of_bytes_to_read = ceil(number_of_bits_to_read + bit_offset, 8);
+
+    // number of bytes to read
+    assert!(slice.len() >= number_of_bytes_to_read, "slice is too small");
+
+    let mut bits = slice[0] >> bit_offset;
+    for (i, &byte) in slice
+        .iter()
+        .take(number_of_bytes_to_read)
+        .enumerate()
+        .skip(1)
+    {
+        bits |= byte << (i * 8 - bit_offset);
+    }
+
+    bits & ((1 << number_of_bits_to_read) - 1)
+}
+
+/// Applies a bitwise operation relative to another bit-packed byte slice
+/// (right) in place
+///
+/// Note: applies the operation 64-bits (u64) at a time.
+///
+/// # Arguments
+///
+/// * `left` - The mutable buffer to be modified in-place
+/// * `offset_in_bits` - Starting bit offset in Self buffer
+/// * `right` - slice of bit-packed bytes in LSB order
+/// * `right_offset_in_bits` - Starting bit offset in the right buffer
+/// * `len_in_bits` - Number of bits to process
+/// * `op` - Binary operation to apply (e.g., `|a, b| a & b`). Applied a word at a time
+///
+/// # Example: Modify entire buffer
+/// ```
+/// # use arrow_buffer::MutableBuffer;
+/// # use arrow_buffer::bit_util::apply_bitwise_binary_op;
+/// let mut left = MutableBuffer::new(2);
+/// left.extend_from_slice(&[0b11110000u8, 0b00110011u8]);
+/// let right = &[0b10101010u8, 0b10101010u8];
+/// // apply bitwise AND between left and right buffers, updating left in place
+/// apply_bitwise_binary_op(left.as_slice_mut(), 0, right, 0, 16, |a, b| a & b);
+/// assert_eq!(left.as_slice(), &[0b10100000u8, 0b00100010u8]);
+/// ```
+///
+/// # Example: Modify buffer with offsets
+/// ```
+/// # use arrow_buffer::MutableBuffer;
+/// # use arrow_buffer::bit_util::apply_bitwise_binary_op;
+/// let mut left = MutableBuffer::new(2);
+/// left.extend_from_slice(&[0b00000000u8, 0b00000000u8]);
+/// let right = &[0b10110011u8, 0b11111110u8];
+/// // apply bitwise OR between left and right buffers,
+/// // Apply only 8 bits starting from bit offset 3 in left and bit offset 2 in right
+/// apply_bitwise_binary_op(left.as_slice_mut(), 3, right, 2, 8, |a, b| a | b);
+/// assert_eq!(left.as_slice(), &[0b01100000, 0b00000101u8]);
+/// ```
+///
+/// # Panics
+///
+/// If the offset or lengths exceed the buffer or slice size.
+pub fn apply_bitwise_binary_op<F>(
+    left: &mut [u8],
+    left_offset_in_bits: usize,
+    right: impl AsRef<[u8]>,
+    right_offset_in_bits: usize,
+    len_in_bits: usize,
+    mut op: F,
+) where
+    F: FnMut(u64, u64) -> u64,
+{
+    if len_in_bits == 0 {
+        return;
+    }
+
+    // offset inside a byte
+    let bit_offset = left_offset_in_bits % 8;
+
+    let is_mutable_buffer_byte_aligned = bit_offset == 0;
+
+    if is_mutable_buffer_byte_aligned {
+        byte_aligned_bitwise_bin_op_helper(
+            left,
+            left_offset_in_bits,
+            right,
+            right_offset_in_bits,
+            len_in_bits,
+            op,
+        );
+    } else {
+        // If we are not byte aligned, run `op` on the first few bits to reach byte alignment
+        let bits_to_next_byte = (8 - bit_offset)
+            // Minimum with the amount of bits we need to process
+            // to avoid reading out of bounds
+            .min(len_in_bits);
+
+        {
+            let right_byte_offset = right_offset_in_bits / 8;
+
+            // Read the same amount of bits from the right buffer
+            let right_first_byte: u8 = crate::util::bit_util::read_up_to_byte_from_offset(
+                &right.as_ref()[right_byte_offset..],
+                bits_to_next_byte,
+                // Right bit offset
+                right_offset_in_bits % 8,
+            );
+
+            align_to_byte(
+                left,
+                // Hope it gets inlined
+                &mut |left| op(left, right_first_byte as u64),
+                left_offset_in_bits,
+            );
+        }
+
+        let offset_in_bits = left_offset_in_bits + bits_to_next_byte;
+        let right_offset_in_bits = right_offset_in_bits + bits_to_next_byte;
+        let len_in_bits = len_in_bits.saturating_sub(bits_to_next_byte);
+
+        if len_in_bits == 0 {
+            return;
+        }
+
+        // We are now byte aligned
+        byte_aligned_bitwise_bin_op_helper(
+            left,
+            offset_in_bits,
+            right,
+            right_offset_in_bits,
+            len_in_bits,
+            op,
+        );
+    }
+}
+
+/// Apply a bitwise operation to a mutable buffer, updating it in place.
+///
+/// Note: applies the operation 64-bits (u64) at a time.
+///
+/// # Arguments
+///
+/// * `offset_in_bits` - Starting bit offset for the current buffer
+/// * `len_in_bits` - Number of bits to process
+/// * `op` - Unary operation to apply (e.g., `|a| !a`). Applied a word at a time
+///
+/// # Example: Modify entire buffer
+/// ```
+/// # use arrow_buffer::MutableBuffer;
+/// # use arrow_buffer::bit_util::apply_bitwise_unary_op;
+/// let mut buffer = MutableBuffer::new(2);
+/// buffer.extend_from_slice(&[0b11110000u8, 0b00110011u8]);
+/// // apply bitwise NOT to the buffer in place
+/// apply_bitwise_unary_op(buffer.as_slice_mut(), 0, 16, |a| !a);
+/// assert_eq!(buffer.as_slice(), &[0b00001111u8, 0b11001100u8]);
+/// ```
+///
+/// # Example: Modify buffer with offsets
+/// ```
+/// # use arrow_buffer::MutableBuffer;
+/// # use arrow_buffer::bit_util::apply_bitwise_unary_op;
+/// let mut buffer = MutableBuffer::new(2);
+/// buffer.extend_from_slice(&[0b00000000u8, 0b00000000u8]);
+/// // apply bitwise NOT to 8 bits starting from bit offset 3
+/// apply_bitwise_unary_op(buffer.as_slice_mut(), 3, 8, |a| !a);
+/// assert_eq!(buffer.as_slice(), &[0b11111000u8, 0b00000111u8]);
+/// ```
+///
+/// # Panics
+///
+/// If the offset and length exceed the buffer size.
+pub fn apply_bitwise_unary_op<F>(
+    buffer: &mut [u8],
+    offset_in_bits: usize,
+    len_in_bits: usize,
+    mut op: F,
+) where
+    F: FnMut(u64) -> u64,
+{
+    if len_in_bits == 0 {
+        return;
+    }
+
+    // offset inside a byte
+    let left_bit_offset = offset_in_bits % 8;
+
+    let is_mutable_buffer_byte_aligned = left_bit_offset == 0;
+
+    if is_mutable_buffer_byte_aligned {
+        byte_aligned_bitwise_unary_op_helper(buffer, offset_in_bits, len_in_bits, op);
+    } else {
+        align_to_byte(buffer, &mut op, offset_in_bits);
+
+        // If we are not byte aligned we will read the first few bits
+        let bits_to_next_byte = 8 - left_bit_offset;
+
+        let offset_in_bits = offset_in_bits + bits_to_next_byte;
+        let len_in_bits = len_in_bits.saturating_sub(bits_to_next_byte);
+
+        if len_in_bits == 0 {
+            return;
+        }
+
+        // We are now byte aligned
+        byte_aligned_bitwise_unary_op_helper(buffer, offset_in_bits, len_in_bits, op);
+    }
+}
+
+/// Perform bitwise binary operation on byte-aligned buffers (i.e. not offsetting into a middle of a byte).
+///
+/// This is the optimized path for byte-aligned operations. It processes data in
+/// u64 chunks for maximum efficiency, then handles any remainder bits.
+///
+/// # Arguments
+///
+/// * `left` - The left mutable buffer (must be byte-aligned)
+/// * `left_offset_in_bits` - Starting bit offset in the left buffer (must be multiple of 8)
+/// * `right` - The right buffer as byte slice
+/// * `right_offset_in_bits` - Starting bit offset in the right buffer
+/// * `len_in_bits` - Number of bits to process
+/// * `op` - Binary operation to apply
+#[inline]
+fn byte_aligned_bitwise_bin_op_helper<F>(
+    left: &mut [u8],
+    left_offset_in_bits: usize,
+    right: impl AsRef<[u8]>,
+    right_offset_in_bits: usize,
+    len_in_bits: usize,
+    mut op: F,
+) where
+    F: FnMut(u64, u64) -> u64,
+{
+    // Must not reach here if we not byte aligned
+    assert_eq!(
+        left_offset_in_bits % 8,
+        0,
+        "offset_in_bits must be byte aligned"
+    );
+
+    // 1. Prepare the buffers
+    let (complete_u64_chunks, remainder_bytes) =
+        U64UnalignedSlice::split(left, left_offset_in_bits, len_in_bits);
+
+    let right_chunks = BitChunks::new(right.as_ref(), right_offset_in_bits, len_in_bits);
+    assert_eq!(
+        self::ceil(right_chunks.remainder_len(), 8),
+        remainder_bytes.len()
+    );
+
+    let right_chunks_iter = right_chunks.iter();
+    assert_eq!(right_chunks_iter.len(), complete_u64_chunks.len());
+
+    // 2. Process complete u64 chunks
+    complete_u64_chunks.zip_modify(right_chunks_iter, &mut op);
+
+    // Handle remainder bits if any
+    if right_chunks.remainder_len() > 0 {
+        handle_mutable_buffer_remainder(
+            &mut op,
+            remainder_bytes,
+            right_chunks.remainder_bits(),
+            right_chunks.remainder_len(),
+        )
+    }
+}
+
+/// Perform bitwise unary operation on byte-aligned buffer.
+///
+/// This is the optimized path for byte-aligned unary operations. It processes data in
+/// u64 chunks for maximum efficiency, then handles any remainder bits.
+///
+/// # Arguments
+///
+/// * `buffer` - The mutable buffer (must be byte-aligned)
+/// * `offset_in_bits` - Starting bit offset (must be multiple of 8)
+/// * `len_in_bits` - Number of bits to process
+/// * `op` - Unary operation to apply (e.g., `|a| !a`)
+#[inline]
+fn byte_aligned_bitwise_unary_op_helper<F>(
+    buffer: &mut [u8],
+    offset_in_bits: usize,
+    len_in_bits: usize,
+    mut op: F,
+) where
+    F: FnMut(u64) -> u64,
+{
+    // Must not reach here if we not byte aligned
+    assert_eq!(offset_in_bits % 8, 0, "offset_in_bits must be byte aligned");
+
+    let remainder_len = len_in_bits % 64;
+
+    let (complete_u64_chunks, remainder_bytes) =
+        U64UnalignedSlice::split(buffer, offset_in_bits, len_in_bits);
+
+    assert_eq!(self::ceil(remainder_len, 8), remainder_bytes.len());
+
+    // 2. Process complete u64 chunks
+    complete_u64_chunks.apply_unary_op(&mut op);
+
+    // Handle remainder bits if any
+    if remainder_len > 0 {
+        handle_mutable_buffer_remainder_unary(&mut op, remainder_bytes, remainder_len)
+    }
+}
+
+/// Align to byte boundary by applying operation to bits before the next byte boundary.
+///
+/// This function handles non-byte-aligned operations by processing bits from the current
+/// position up to the next byte boundary, while preserving all other bits in the byte.
+///
+/// # Arguments
+///
+/// * `op` - Unary operation to apply
+/// * `buffer` - The mutable buffer to modify
+/// * `offset_in_bits` - Starting bit offset (not byte-aligned)
+fn align_to_byte<F>(buffer: &mut [u8], op: &mut F, offset_in_bits: usize)
+where
+    F: FnMut(u64) -> u64,
+{
+    let byte_offset = offset_in_bits / 8;
+    let bit_offset = offset_in_bits % 8;
+
+    // 1. read the first byte from the buffer
+    let first_byte: u8 = buffer[byte_offset];
+
+    // 2. Shift byte by the bit offset, keeping only the relevant bits
+    let relevant_first_byte = first_byte >> bit_offset;
+
+    // 3. run the op on the first byte only
+    let result_first_byte = op(relevant_first_byte as u64) as u8;
+
+    // 4. Shift back the result to the original position
+    let result_first_byte = result_first_byte << bit_offset;
+
+    // 5. Mask the bits that are outside the relevant bits in the byte
+    //    so the bits until bit_offset are 1 and the rest are 0
+    let mask_for_first_bit_offset = (1 << bit_offset) - 1;
+
+    let result_first_byte =
+        (first_byte & mask_for_first_bit_offset) | (result_first_byte & !mask_for_first_bit_offset);
+
+    // 6. write back the result to the buffer
+    buffer[byte_offset] = result_first_byte;
+}
+
+/// Centralized structure to handle a mutable u8 slice as a mutable u64 pointer.
+///
+/// Handle the following:
+/// 1. the lifetime is correct
+/// 2. we read/write within the bounds
+/// 3. We read and write using unaligned
+///
+/// This does not deallocate the underlying pointer when dropped
+///
+/// This is the only place that uses unsafe code to read and write unaligned
+///
+struct U64UnalignedSlice<'a> {
+    /// Pointer to the start of the u64 data
+    ///
+    /// We are using raw pointer as the data came from a u8 slice so we need to read and write unaligned
+    ptr: *mut u64,
+
+    /// Number of u64 elements
+    len: usize,
+
+    /// Marker to tie the lifetime of the pointer to the lifetime of the u8 slice
+    _marker: std::marker::PhantomData<&'a u8>,
+}
+
+impl<'a> U64UnalignedSlice<'a> {
+    /// Create a new [`U64UnalignedSlice`] from a `&mut [u8]` buffer
+    ///
+    /// return the [`U64UnalignedSlice`] and slice of bytes that are not part of the u64 chunks (guaranteed to be less than 8 bytes)
+    ///
+    fn split(
+        buffer: &'a mut [u8],
+        offset_in_bits: usize,
+        len_in_bits: usize,
+    ) -> (Self, &'a mut [u8]) {
+        // 1. Prepare the buffers
+        let left_buffer_mut: &mut [u8] = {
+            let last_offset = self::ceil(offset_in_bits + len_in_bits, 8);
+            assert!(last_offset <= buffer.len());
+
+            let byte_offset = offset_in_bits / 8;
+
+            &mut buffer[byte_offset..last_offset]
+        };
+
+        let number_of_u64_we_can_fit = len_in_bits / (u64::BITS as usize);
+
+        // 2. Split
+        let u64_len_in_bytes = number_of_u64_we_can_fit * size_of::<u64>();
+
+        assert!(u64_len_in_bytes <= left_buffer_mut.len());
+        let (bytes_for_u64, remainder) = left_buffer_mut.split_at_mut(u64_len_in_bytes);
+
+        let ptr = bytes_for_u64.as_mut_ptr() as *mut u64;
+
+        let this = Self {
+            ptr,
+            len: number_of_u64_we_can_fit,
+            _marker: std::marker::PhantomData,
+        };
+
+        (this, remainder)
+    }
+
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Modify the underlying u64 data in place using a binary operation
+    /// with another iterator.
+    fn zip_modify(
+        mut self,
+        mut zip_iter: impl ExactSizeIterator<Item = u64>,
+        mut map: impl FnMut(u64, u64) -> u64,
+    ) {
+        assert_eq!(self.len, zip_iter.len());
+
+        // In order to avoid advancing the pointer at the end of the loop which will
+        // make the last pointer invalid, we handle the first element outside the loop
+        // and then advance the pointer at the start of the loop
+        // making sure that the iterator is not empty
+        if let Some(right) = zip_iter.next() {
+            // SAFETY: We asserted that the iterator length and the current length are the same
+            // and the iterator is not empty, so the pointer is valid
+            unsafe {
+                self.apply_bin_op(right, &mut map);
+            }
+
+            // Because this consumes self we don't update the length
+        }
+
+        for right in zip_iter {
+            // Advance the pointer
+            //
+            // SAFETY: We asserted that the iterator length and the current length are the same
+            self.ptr = unsafe { self.ptr.add(1) };
+
+            // SAFETY: the pointer is valid as we are within the length
+            unsafe {
+                self.apply_bin_op(right, &mut map);
+            }
+
+            // Because this consumes self we don't update the length
+        }
+    }
+
+    /// Centralized function to correctly read the current u64 value and write back the result
+    ///
+    /// # SAFETY
+    /// the caller must ensure that the pointer is valid for reads and writes
+    ///
+    #[inline]
+    unsafe fn apply_bin_op(&mut self, right: u64, mut map: impl FnMut(u64, u64) -> u64) {
+        // SAFETY: The constructor ensures the pointer is valid,
+        // and as to all modifications in U64UnalignedSlice
+        let current_input = unsafe {
+            self.ptr
+                // Reading unaligned as we came from u8 slice
+                .read_unaligned()
+                // bit-packed buffers are stored starting with the least-significant byte first
+                // so when reading as u64 on a big-endian machine, the bytes need to be swapped
+                .to_le()
+        };
+
+        let combined = map(current_input, right);
+
+        // Write the result back
+        //
+        // The pointer came from mutable u8 slice so the pointer is valid for writes,
+        // and we need to write unaligned
+        unsafe { self.ptr.write_unaligned(combined) }
+    }
+
+    /// Modify the underlying u64 data in place using a unary operation.
+    fn apply_unary_op(mut self, mut map: impl FnMut(u64) -> u64) {
+        if self.len == 0 {
+            return;
+        }
+
+        // In order to avoid advancing the pointer at the end of the loop which will
+        // make the last pointer invalid, we handle the first element outside the loop
+        // and then advance the pointer at the start of the loop
+        // making sure that the iterator is not empty
+        unsafe {
+            // I hope the function get inlined and the compiler remove the dead right parameter
+            self.apply_bin_op(0, &mut |left, _| map(left));
+
+            // Because this consumes self we don't update the length
+        }
+
+        for _ in 1..self.len {
+            // Advance the pointer
+            //
+            // SAFETY: we only advance the pointer within the length and not beyond
+            self.ptr = unsafe { self.ptr.add(1) };
+
+            // SAFETY: the pointer is valid as we are within the length
+            unsafe {
+                // I hope the function get inlined and the compiler remove the dead right parameter
+                self.apply_bin_op(0, &mut |left, _| map(left));
+            }
+
+            // Because this consumes self we don't update the length
+        }
+    }
+}
+
+/// Handle remainder bits (< 64 bits) for binary operations.
+///
+/// This function processes the bits that don't form a complete u64 chunk,
+/// ensuring that bits outside the operation range are preserved.
+///
+/// # Arguments
+///
+/// * `op` - Binary operation to apply
+/// * `start_remainder_mut_slice` - slice to the start of remainder bytes
+///   the length must be equal to `ceil(remainder_len, 8)`
+/// * `right_remainder_bits` - Right operand bits
+/// * `remainder_len` - Number of remainder bits
+#[inline]
+fn handle_mutable_buffer_remainder<F>(
+    op: &mut F,
+    start_remainder_mut_slice: &mut [u8],
+    right_remainder_bits: u64,
+    remainder_len: usize,
+) where
+    F: FnMut(u64, u64) -> u64,
+{
+    // Only read from slice the number of remainder bits
+    let left_remainder_bits = get_remainder_bits(start_remainder_mut_slice, remainder_len);
+
+    // Apply the operation
+    let rem = op(left_remainder_bits, right_remainder_bits);
+
+    // Write only the relevant bits back the result to the mutable slice
+    set_remainder_bits(start_remainder_mut_slice, rem, remainder_len);
+}
+
+/// Write remainder bits back to buffer while preserving bits outside the range.
+///
+/// This function carefully updates only the specified bits, leaving all other
+/// bits in the affected bytes unchanged.
+///
+/// # Arguments
+///
+/// * `start_remainder_mut_slice` - the slice of bytes to write the remainder bits to,
+///   the length must be equal to `ceil(remainder_len, 8)`
+/// * `rem` - The result bits to write
+/// * `remainder_len` - Number of bits to write
+#[inline]
+fn set_remainder_bits(start_remainder_mut_slice: &mut [u8], rem: u64, remainder_len: usize) {
+    assert_ne!(
+        start_remainder_mut_slice.len(),
+        0,
+        "start_remainder_mut_slice must not be empty"
+    );
+    assert!(remainder_len < 64, "remainder_len must be less than 64");
+
+    // This assertion is to make sure that the last byte in the slice is the boundary byte
+    // (i.e., the byte that contains both remainder bits and bits outside the remainder)
+    assert_eq!(
+        start_remainder_mut_slice.len(),
+        self::ceil(remainder_len, 8),
+        "start_remainder_mut_slice length must be equal to ceil(remainder_len, 8)"
+    );
+
+    // Need to update the remainder bytes in the mutable buffer
+    // but not override the bits outside the remainder
+
+    // Update `rem` end with the current bytes in the mutable buffer
+    // to preserve the bits outside the remainder
+    let rem = {
+        // 1. Read the byte that we will override
+        //    we only read the last byte as we verified that start_remainder_mut_slice length is
+        //    equal to ceil(remainder_len, 8), which means the last byte is the boundary byte
+        //    containing both remainder bits and bits outside the remainder
+        let current = start_remainder_mut_slice
+            .last()
+            // Unwrap as we already validated the slice is not empty
+            .unwrap();
+
+        let current = *current as u64;
+
+        // Mask where the bits that are inside the remainder are 1
+        // and the bits outside the remainder are 0
+        let inside_remainder_mask = (1 << remainder_len) - 1;
+        // Mask where the bits that are outside the remainder are 1
+        // and the bits inside the remainder are 0
+        let outside_remainder_mask = !inside_remainder_mask;
+
+        // 2. Only keep the bits that are outside the remainder for the value from the mutable buffer
+        let current = current & outside_remainder_mask;
+
+        // 3. Only keep the bits that are inside the remainder for the value from the operation
+        let rem = rem & inside_remainder_mask;
+
+        // 4. Combine the two values
+        current | rem
+    };
+
+    // Write back the result to the mutable slice
+    {
+        let remainder_bytes = self::ceil(remainder_len, 8);
+
+        // we are counting starting from the least significant bit, so to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+
+        // this assumes that `[ToByteSlice]` can be copied directly
+        // without calling `to_byte_slice` for each element,
+        // which is correct for all ArrowNativeType implementations including u64.
+        let src = rem.as_ptr();
+        unsafe {
+            std::ptr::copy_nonoverlapping(
+                src,
+                start_remainder_mut_slice.as_mut_ptr(),
+                remainder_bytes,
+            )
+        };
+    }
+}
+
+/// Read remainder bits from a slice.
+///
+/// Reads the specified number of bits from slice and returns them as a u64.
+///
+/// # Arguments
+///
+/// * `remainder` - slice to the start of the bits
+/// * `remainder_len` - Number of bits to read (must be < 64)
+///
+/// # Returns
+///
+/// A u64 containing the bits in the least significant positions
+#[inline]
+fn get_remainder_bits(remainder: &[u8], remainder_len: usize) -> u64 {
+    assert!(remainder.len() < 64, "remainder_len must be less than 64");
+    assert_eq!(
+        remainder.len(),
+        self::ceil(remainder_len, 8),
+        "remainder and remainder len ceil must be the same"
+    );
+
+    let bits = remainder
+        .iter()
+        .enumerate()
+        .fold(0_u64, |acc, (index, &byte)| {
+            acc | (byte as u64) << (index * 8)
+        });
+
+    bits & ((1 << remainder_len) - 1)
+}
+
+/// Handle remainder bits (< 64 bits) for unary operations.
+///
+/// This function processes the bits that don't form a complete u64 chunk,
+/// ensuring that bits outside the operation range are preserved.
+///
+/// # Arguments
+///
+/// * `op` - Unary operation to apply
+/// * `start_remainder_mut` - Slice of bytes to write the remainder bits to
+/// * `remainder_len` - Number of remainder bits
+#[inline]
+fn handle_mutable_buffer_remainder_unary<F>(
+    op: &mut F,
+    start_remainder_mut: &mut [u8],
+    remainder_len: usize,
+) where
+    F: FnMut(u64) -> u64,
+{
+    // Only read from the slice the number of remainder bits
+    let left_remainder_bits = get_remainder_bits(start_remainder_mut, remainder_len);
+
+    // Apply the operation
+    let rem = op(left_remainder_bits);
+
+    // Write only the relevant bits back the result to the slice
+    set_remainder_bits(start_remainder_mut, rem, remainder_len);
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashSet;
 
     use super::*;
+    use crate::bit_iterator::BitIterator;
+    use crate::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
 
@@ -275,4 +996,500 @@ mod tests {
         assert_eq!(ceil(10, 10000000000), 1);
         assert_eq!(ceil(10000000000, 1000000000), 10);
     }
+
+    #[test]
+    fn test_read_up_to() {
+        let all_ones = &[0b10111001, 0b10001100];
+
+        for (bit_offset, expected) in [
+            (0, 0b00000001),
+            (1, 0b00000000),
+            (2, 0b00000000),
+            (3, 0b00000001),
+            (4, 0b00000001),
+            (5, 0b00000001),
+            (6, 0b00000000),
+            (7, 0b00000001),
+        ] {
+            let result = read_up_to_byte_from_offset(all_ones, 1, bit_offset);
+            assert_eq!(
+                result, expected,
+                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
+            );
+        }
+
+        for (bit_offset, expected) in [
+            (0, 0b00000001),
+            (1, 0b00000000),
+            (2, 0b00000010),
+            (3, 0b00000011),
+            (4, 0b00000011),
+            (5, 0b00000001),
+            (6, 0b00000010),
+            (7, 0b00000001),
+        ] {
+            let result = read_up_to_byte_from_offset(all_ones, 2, bit_offset);
+            assert_eq!(
+                result, expected,
+                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
+            );
+        }
+
+        for (bit_offset, expected) in [
+            (0, 0b00111001),
+            (1, 0b00011100),
+            (2, 0b00101110),
+            (3, 0b00010111),
+            (4, 0b00001011),
+            (5, 0b00100101),
+            (6, 0b00110010),
+            (7, 0b00011001),
+        ] {
+            let result = read_up_to_byte_from_offset(all_ones, 6, bit_offset);
+            assert_eq!(
+                result, expected,
+                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
+            );
+        }
+
+        for (bit_offset, expected) in [
+            (0, 0b00111001),
+            (1, 0b01011100),
+            (2, 0b00101110),
+            (3, 0b00010111),
+            (4, 0b01001011),
+            (5, 0b01100101),
+            (6, 0b00110010),
+            (7, 0b00011001),
+        ] {
+            let result = read_up_to_byte_from_offset(all_ones, 7, bit_offset);
+            assert_eq!(
+                result, expected,
+                "failed at bit_offset {bit_offset}. result, expected:\n{result:08b}\n{expected:08b}"
+            );
+        }
+    }
+
+    /// Verifies that a unary operation applied to a buffer using u64 chunks
+    /// is the same as applying the operation bit by bit.
+    fn test_mutable_buffer_bin_op_helper<F, G>(
+        left_data: &[bool],
+        right_data: &[bool],
+        left_offset_in_bits: usize,
+        right_offset_in_bits: usize,
+        len_in_bits: usize,
+        op: F,
+        mut expected_op: G,
+    ) where
+        F: FnMut(u64, u64) -> u64,
+        G: FnMut(bool, bool) -> bool,
+    {
+        let mut left_buffer = BooleanBufferBuilder::new(len_in_bits);
+        left_buffer.append_slice(left_data);
+        let right_buffer = BooleanBuffer::from(right_data);
+
+        let expected: Vec<bool> = left_data
+            .iter()
+            .skip(left_offset_in_bits)
+            .zip(right_data.iter().skip(right_offset_in_bits))
+            .take(len_in_bits)
+            .map(|(l, r)| expected_op(*l, *r))
+            .collect();
+
+        apply_bitwise_binary_op(
+            left_buffer.as_slice_mut(),
+            left_offset_in_bits,
+            right_buffer.inner(),
+            right_offset_in_bits,
+            len_in_bits,
+            op,
+        );
+
+        let result: Vec<bool> =
+            BitIterator::new(left_buffer.as_slice(), left_offset_in_bits, len_in_bits).collect();
+
+        assert_eq!(
+            result, expected,
+            "Failed with left_offset={}, right_offset={}, len={}",
+            left_offset_in_bits, right_offset_in_bits, len_in_bits
+        );
+    }
+
+    /// Verifies that a unary operation applied to a buffer using u64 chunks
+    /// is the same as applying the operation bit by bit.
+    fn test_mutable_buffer_unary_op_helper<F, G>(
+        data: &[bool],
+        offset_in_bits: usize,
+        len_in_bits: usize,
+        op: F,
+        mut expected_op: G,
+    ) where
+        F: FnMut(u64) -> u64,
+        G: FnMut(bool) -> bool,
+    {
+        let mut buffer = BooleanBufferBuilder::new(len_in_bits);
+        buffer.append_slice(data);
+
+        let expected: Vec<bool> = data
+            .iter()
+            .skip(offset_in_bits)
+            .take(len_in_bits)
+            .map(|b| expected_op(*b))
+            .collect();
+
+        apply_bitwise_unary_op(buffer.as_slice_mut(), offset_in_bits, len_in_bits, op);
+
+        let result: Vec<bool> =
+            BitIterator::new(buffer.as_slice(), offset_in_bits, len_in_bits).collect();
+
+        assert_eq!(
+            result, expected,
+            "Failed with offset={}, len={}",
+            offset_in_bits, len_in_bits
+        );
+    }
+
+    // Helper to create test data of specific length
+    fn create_test_data(len: usize) -> (Vec<bool>, Vec<bool>) {
+        let mut rng = rand::rng();
+        let left: Vec<bool> = (0..len).map(|_| rng.random_bool(0.5)).collect();
+        let right: Vec<bool> = (0..len).map(|_| rng.random_bool(0.5)).collect();
+        (left, right)
+    }
+
+    /// Test all binary operations (AND, OR, XOR) with the given parameters
+    fn test_all_binary_ops(
+        left_data: &[bool],
+        right_data: &[bool],
+        left_offset_in_bits: usize,
+        right_offset_in_bits: usize,
+        len_in_bits: usize,
+    ) {
+        // Test AND
+        test_mutable_buffer_bin_op_helper(
+            left_data,
+            right_data,
+            left_offset_in_bits,
+            right_offset_in_bits,
+            len_in_bits,
+            |a, b| a & b,
+            |a, b| a & b,
+        );
+
+        // Test OR
+        test_mutable_buffer_bin_op_helper(
+            left_data,
+            right_data,
+            left_offset_in_bits,
+            right_offset_in_bits,
+            len_in_bits,
+            |a, b| a | b,
+            |a, b| a | b,
+        );
+
+        // Test XOR
+        test_mutable_buffer_bin_op_helper(
+            left_data,
+            right_data,
+            left_offset_in_bits,
+            right_offset_in_bits,
+            len_in_bits,
+            |a, b| a ^ b,
+            |a, b| a ^ b,
+        );
+    }
+
+    // ===== Combined Binary Operation Tests =====
+
+    #[test]
+    fn test_binary_ops_less_than_byte() {
+        let (left, right) = create_test_data(4);
+        test_all_binary_ops(&left, &right, 0, 0, 4);
+    }
+
+    #[test]
+    fn test_binary_ops_less_than_byte_across_boundary() {
+        let (left, right) = create_test_data(16);
+        test_all_binary_ops(&left, &right, 6, 6, 4);
+    }
+
+    #[test]
+    fn test_binary_ops_exactly_byte() {
+        let (left, right) = create_test_data(16);
+        test_all_binary_ops(&left, &right, 0, 0, 8);
+    }
+
+    #[test]
+    fn test_binary_ops_more_than_byte_less_than_u64() {
+        let (left, right) = create_test_data(64);
+        test_all_binary_ops(&left, &right, 0, 0, 32);
+    }
+
+    #[test]
+    fn test_binary_ops_exactly_u64() {
+        let (left, right) = create_test_data(180);
+        test_all_binary_ops(&left, &right, 0, 0, 64);
+        test_all_binary_ops(&left, &right, 64, 9, 64);
+        test_all_binary_ops(&left, &right, 8, 100, 64);
+        test_all_binary_ops(&left, &right, 1, 15, 64);
+        test_all_binary_ops(&left, &right, 12, 10, 64);
+        test_all_binary_ops(&left, &right, 180 - 64, 2, 64);
+    }
+
+    #[test]
+    fn test_binary_ops_more_than_u64_not_multiple() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 0, 0, 100);
+    }
+
+    #[test]
+    fn test_binary_ops_exactly_multiple_u64() {
+        let (left, right) = create_test_data(256);
+        test_all_binary_ops(&left, &right, 0, 0, 128);
+    }
+
+    #[test]
+    fn test_binary_ops_more_than_multiple_u64() {
+        let (left, right) = create_test_data(300);
+        test_all_binary_ops(&left, &right, 0, 0, 200);
+    }
+
+    #[test]
+    fn test_binary_ops_byte_aligned_no_remainder() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 0, 0, 128);
+    }
+
+    #[test]
+    fn test_binary_ops_byte_aligned_with_remainder() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 0, 0, 100);
+    }
+
+    #[test]
+    fn test_binary_ops_not_byte_aligned_no_remainder() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 3, 3, 128);
+    }
+
+    #[test]
+    fn test_binary_ops_not_byte_aligned_with_remainder() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 5, 5, 100);
+    }
+
+    #[test]
+    fn test_binary_ops_different_offsets() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 3, 7, 50);
+    }
+
+    #[test]
+    fn test_binary_ops_offsets_greater_than_8_less_than_64() {
+        let (left, right) = create_test_data(200);
+        test_all_binary_ops(&left, &right, 13, 27, 100);
+    }
+
+    // ===== NOT (Unary) Operation Tests =====
+
+    #[test]
+    fn test_not_less_than_byte() {
+        let data = vec![true, false, true, false];
+        test_mutable_buffer_unary_op_helper(&data, 0, 4, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_less_than_byte_across_boundary() {
+        let data: Vec<bool> = (0..16).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 6, 4, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_exactly_byte() {
+        let data: Vec<bool> = (0..16).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 8, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_more_than_byte_less_than_u64() {
+        let data: Vec<bool> = (0..64).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 32, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_exactly_u64() {
+        let data: Vec<bool> = (0..128).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 64, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_more_than_u64_not_multiple() {
+        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 100, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_exactly_multiple_u64() {
+        let data: Vec<bool> = (0..256).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 128, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_more_than_multiple_u64() {
+        let data: Vec<bool> = (0..300).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 200, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_byte_aligned_no_remainder() {
+        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 128, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_byte_aligned_with_remainder() {
+        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 0, 100, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_not_byte_aligned_no_remainder() {
+        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 3, 128, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_not_byte_aligned_with_remainder() {
+        let data: Vec<bool> = (0..200).map(|i| i % 2 == 0).collect();
+        test_mutable_buffer_unary_op_helper(&data, 5, 100, |a| !a, |a| !a);
+    }
+
+    // ===== Edge Cases =====
+
+    #[test]
+    fn test_empty_length() {
+        let (left, right) = create_test_data(16);
+        test_all_binary_ops(&left, &right, 0, 0, 0);
+    }
+
+    #[test]
+    fn test_single_bit() {
+        let (left, right) = create_test_data(16);
+        test_all_binary_ops(&left, &right, 0, 0, 1);
+    }
+
+    #[test]
+    fn test_single_bit_at_offset() {
+        let (left, right) = create_test_data(16);
+        test_all_binary_ops(&left, &right, 7, 7, 1);
+    }
+
+    #[test]
+    fn test_not_single_bit() {
+        let data = vec![true, false, true, false];
+        test_mutable_buffer_unary_op_helper(&data, 0, 1, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_not_empty_length() {
+        let data = vec![true, false, true, false];
+        test_mutable_buffer_unary_op_helper(&data, 0, 0, |a| !a, |a| !a);
+    }
+
+    #[test]
+    fn test_less_than_byte_unaligned_and_not_enough_bits() {
+        let left_offset_in_bits = 2;
+        let right_offset_in_bits = 4;
+        let len_in_bits = 1;
+
+        // Single byte
+        let right = (0..8).map(|i| (i / 2) % 2 == 0).collect::<Vec<_>>();
+        // less than a byte
+        let left = (0..3).map(|i| i % 2 == 0).collect::<Vec<_>>();
+        test_all_binary_ops(
+            &left,
+            &right,
+            left_offset_in_bits,
+            right_offset_in_bits,
+            len_in_bits,
+        );
+    }
+
+    #[test]
+    fn test_bitwise_binary_op_offset_out_of_bounds() {
+        let input = vec![0b10101010u8, 0b01010101u8];
+        let mut buffer = MutableBuffer::new(2); // space for 16 bits
+        buffer.extend_from_slice(&input); // only 2 bytes
+        apply_bitwise_binary_op(
+            buffer.as_slice_mut(),
+            100, // exceeds buffer length, becomes a noop
+            [0b11110000u8, 0b00001111u8],
+            0,
+            0,
+            |a, b| a & b,
+        );
+        assert_eq!(buffer.as_slice(), &input);
+    }
+
+    #[test]
+    #[should_panic(expected = "assertion failed: last_offset <= buffer.len()")]
+    fn test_bitwise_binary_op_length_out_of_bounds() {
+        let mut buffer = MutableBuffer::new(2); // space for 16 bits
+        buffer.extend_from_slice(&[0b10101010u8, 0b01010101u8]); // only 2 bytes
+        apply_bitwise_binary_op(
+            buffer.as_slice_mut(),
+            0, // exceeds buffer length
+            [0b11110000u8, 0b00001111u8],
+            0,
+            100,
+            |a, b| a & b,
+        );
+        assert_eq!(buffer.as_slice(), &[0b10101010u8, 0b01010101u8]);
+    }
+
+    #[test]
+    #[should_panic(expected = "offset + len out of bounds")]
+    fn test_bitwise_binary_op_right_len_out_of_bounds() {
+        let mut buffer = MutableBuffer::new(2); // space for 16 bits
+        buffer.extend_from_slice(&[0b10101010u8, 0b01010101u8]); // only 2 bytes
+        apply_bitwise_binary_op(
+            buffer.as_slice_mut(),
+            0, // exceeds buffer length
+            [0b11110000u8, 0b00001111u8],
+            1000,
+            16,
+            |a, b| a & b,
+        );
+        assert_eq!(buffer.as_slice(), &[0b10101010u8, 0b01010101u8]);
+    }
+
+    #[test]
+    #[should_panic(expected = "the len is 2 but the index is 12")]
+    fn test_bitwise_unary_op_offset_out_of_bounds() {
+        let input = vec![0b10101010u8, 0b01010101u8];
+        let mut buffer = MutableBuffer::new(2); // space for 16 bits
+        buffer.extend_from_slice(&input); // only 2 bytes
+        apply_bitwise_unary_op(
+            buffer.as_slice_mut(),
+            100, // exceeds buffer length, becomes a noop
+            8,
+            |a| !a,
+        );
+        assert_eq!(buffer.as_slice(), &input);
+    }
+
+    #[test]
+    #[should_panic(expected = "assertion failed: last_offset <= buffer.len()")]
+    fn test_bitwise_unary_op_length_out_of_bounds2() {
+        let input = vec![0b10101010u8, 0b01010101u8];
+        let mut buffer = MutableBuffer::new(2); // space for 16 bits
+        buffer.extend_from_slice(&input); // only 2 bytes
+        apply_bitwise_unary_op(
+            buffer.as_slice_mut(),
+            3,   // start at bit 3, to exercise different path
+            100, // exceeds buffer length
+            |a| !a,
+        );
+        assert_eq!(buffer.as_slice(), &input);
+    }
 }
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index 49145cf987f9..536bc101a816 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -43,19 +43,20 @@ force_validate = []
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
+arrow-ord = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
 chrono = { workspace = true }
 half = { version = "2.1", default-features = false }
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 lexical-core = { version = "1.0", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
 atoi = "2.0.0"
-comfy-table = { version = "7.0", optional = true, default-features = false }
+comfy-table = { version = "7", optional = true, default-features = false }
 base64 = "0.22"
 ryu = "1.0.16"
 
 [dev-dependencies]
-criterion = { version = "0.5", default-features = false }
+criterion = { workspace = true, default-features = false }
 half = { version = "2.1", default-features = false }
 rand = "0.9"
 
@@ -74,3 +75,4 @@ harness = false
 [[bench]]
 name = "parse_decimal"
 harness = false
+
diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs
index e7bb84ebe24c..5637bdc689d9 100644
--- a/arrow-cast/src/base64.rs
+++ b/arrow-cast/src/base64.rs
@@ -79,18 +79,14 @@ pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(
     // Safety: offsets monotonically increasing by construction
     let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
 
-    Ok(GenericBinaryArray::new(
-        offsets,
-        Buffer::from_vec(buffer),
-        array.nulls().cloned(),
-    ))
+    GenericBinaryArray::try_new(offsets, Buffer::from_vec(buffer), array.nulls().cloned())
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
     use arrow_array::BinaryArray;
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
 
     fn test_engine<E: Engine>(e: &E, a: &BinaryArray) {
         let encoded = b64_encode(e, a);
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index b86d93bc81a7..71338a6921e9 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -19,17 +19,89 @@ use crate::cast::*;
 
 /// A utility trait that provides checked conversions between
 /// decimal types inspired by [`NumCast`]
-pub(crate) trait DecimalCast: Sized {
+pub trait DecimalCast: Sized {
+    /// Convert the decimal to an i32
+    fn to_i32(self) -> Option<i32>;
+
+    /// Convert the decimal to an i64
+    fn to_i64(self) -> Option<i64>;
+
+    /// Convert the decimal to an i128
     fn to_i128(self) -> Option<i128>;
 
+    /// Convert the decimal to an i256
     fn to_i256(self) -> Option<i256>;
 
+    /// Convert a decimal from a decimal
     fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
 
+    /// Convert a decimal from a f64
     fn from_f64(n: f64) -> Option<Self>;
 }
 
+impl DecimalCast for i32 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self as i64)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i32()
+    }
+
+    fn from_f64(n: f64) -> Option<Self> {
+        n.to_i32()
+    }
+}
+
+impl DecimalCast for i64 {
+    fn to_i32(self) -> Option<i32> {
+        i32::try_from(self).ok()
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i64()
+    }
+
+    fn from_f64(n: f64) -> Option<Self> {
+        // Call implementation explicitly otherwise this resolves to `to_i64`
+        // in arrow-buffer that behaves differently.
+        num_traits::ToPrimitive::to_i64(&n)
+    }
+}
+
 impl DecimalCast for i128 {
+    fn to_i32(self) -> Option<i32> {
+        i32::try_from(self).ok()
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        i64::try_from(self).ok()
+    }
+
     fn to_i128(self) -> Option<i128> {
         Some(self)
     }
@@ -48,6 +120,14 @@ impl DecimalCast for i128 {
 }
 
 impl DecimalCast for i256 {
+    fn to_i32(self) -> Option<i32> {
+        self.to_i128().map(|x| i32::try_from(x).ok())?
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        self.to_i128().map(|x| i64::try_from(x).ok())?
+    }
+
     fn to_i128(self) -> Option<i128> {
         self.to_i128()
     }
@@ -65,63 +145,96 @@ impl DecimalCast for i256 {
     }
 }
 
-pub(crate) fn cast_decimal_to_decimal_error<I, O>(
+/// Construct closures to upscale decimals from `(input_precision, input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `(f_fallible, f_infallible)` where:
+/// * `f_fallible` yields `None` when the requested cast would overflow
+/// * `f_infallible` is present only when every input is guaranteed to succeed; otherwise it is `None`
+///   and callers must fall back to `f_fallible`
+///
+/// Returns `None` if the required scale increase `delta_scale = output_scale - input_scale`
+/// exceeds the supported precomputed precision table `O::MAX_FOR_EACH_PRECISION`.
+/// In that case, the caller should treat this as an overflow for the output scale
+/// and handle it accordingly (e.g., return a cast error).
+#[allow(clippy::type_complexity)]
+fn make_upscaler<I: DecimalType, O: DecimalType>(
+    input_precision: u8,
+    input_scale: i8,
     output_precision: u8,
     output_scale: i8,
-) -> impl Fn(<I as ArrowPrimitiveType>::Native) -> ArrowError
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
 where
-    I: DecimalType,
-    O: DecimalType,
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    move |x: I::Native| {
-        ArrowError::CastError(format!(
-            "Cannot cast to {}({}, {}). Overflowing on {:?}",
-            O::PREFIX,
-            output_precision,
-            output_scale,
-            x
-        ))
-    }
+    let delta_scale = output_scale - input_scale;
+
+    // O::MAX_FOR_EACH_PRECISION[k] stores 10^k - 1 (e.g., 9, 99, 999, ...).
+    // Adding 1 yields exactly 10^k without computing a power at runtime.
+    // Using the precomputed table avoids pow(10, k) and its checked/overflow
+    // handling, which is faster and simpler for scaling by 10^delta_scale.
+    let max = O::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
+    let mul = max.add_wrapping(O::Native::ONE);
+    let f_fallible = move |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok());
+
+    // if the gain in precision (digits) is greater than the multiplication due to scaling
+    // every number will fit into the output type
+    // Example: If we are starting with any number of precision 5 [xxxxx],
+    // then an increase of scale by 3 will have the following effect on the representation:
+    // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
+    // needs to provide at least 8 digits precision
+    let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8);
+    let f_infallible = is_infallible_cast
+        .then_some(move |x| O::Native::from_decimal(x).unwrap().mul_wrapping(mul));
+    Some((f_fallible, f_infallible))
 }
 
-pub(crate) fn convert_to_smaller_scale_decimal<I, O>(
-    array: &PrimitiveArray<I>,
+/// Construct closures to downscale decimals from `(input_precision, input_scale)` to
+/// `(output_precision, output_scale)`.
+///
+/// Returns `(f_fallible, f_infallible)` where:
+/// * `f_fallible` yields `None` when the requested cast would overflow
+/// * `f_infallible` is present only when every input is guaranteed to succeed; otherwise it is `None`
+///   and callers must fall back to `f_fallible`
+///
+/// Returns `None` if the required scale reduction `delta_scale = input_scale - output_scale`
+/// exceeds the supported precomputed precision table `I::MAX_FOR_EACH_PRECISION`.
+/// In this scenario, any value would round to zero (e.g., dividing by 10^k where k exceeds the
+/// available precision). Callers should therefore produce zero values (preserving nulls) rather
+/// than returning an error.
+#[allow(clippy::type_complexity)]
+fn make_downscaler<I: DecimalType, O: DecimalType>(
     input_precision: u8,
     input_scale: i8,
     output_precision: u8,
     output_scale: i8,
-    cast_options: &CastOptions,
-) -> Result<PrimitiveArray<O>, ArrowError>
+) -> Option<(
+    impl Fn(I::Native) -> Option<O::Native>,
+    Option<impl Fn(I::Native) -> O::Native>,
+)>
 where
-    I: DecimalType,
-    O: DecimalType,
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let error = cast_decimal_to_decimal_error::<I, O>(output_precision, output_scale);
     let delta_scale = input_scale - output_scale;
-    // if the reduction of the input number through scaling (dividing) is greater
-    // than a possible precision loss (plus potential increase via rounding)
-    // every input number will fit into the output type
-    // Example: If we are starting with any number of precision 5 [xxxxx],
-    // then and decrease the scale by 3 will have the following effect on the representation:
-    // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
-    // The rounding may add an additional digit, so the cast to be infallible,
-    // the output type needs to have at least 3 digits of precision.
-    // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
-    // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
-    let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8);
 
-    let div = I::Native::from_decimal(10_i128)
-        .unwrap()
-        .pow_checked(delta_scale as u32)?;
+    // delta_scale is guaranteed to be > 0, but may also be larger than I::MAX_PRECISION. If so, the
+    // scale change divides out more digits than the input has precision and the result of the cast
+    // is always zero. For example, if we try to apply delta_scale=10 a decimal32 value, the largest
+    // possible result is 999999999/10000000000 = 0.0999999999, which rounds to zero. Smaller values
+    // (e.g. 1/10000000000) or larger delta_scale (e.g. 999999999/10000000000000) produce even
+    // smaller results, which also round to zero. In that case, just return an array of zeros.
+    let max = I::MAX_FOR_EACH_PRECISION.get(delta_scale as usize)?;
 
-    let half = div.div_wrapping(I::Native::from_usize(2).unwrap());
+    let div = max.add_wrapping(I::Native::ONE);
+    let half = div.div_wrapping(I::Native::ONE.add_wrapping(I::Native::ONE));
     let half_neg = half.neg_wrapping();
 
-    let f = |x: I::Native| {
+    let f_fallible = move |x: I::Native| {
         // div is >= 10 and so this cannot overflow
         let d = x.div_wrapping(div);
         let r = x.mod_wrapping(div);
@@ -135,23 +248,136 @@ where
         O::Native::from_decimal(adjusted)
     };
 
-    Ok(if is_infallible_cast {
-        // make sure we don't perform calculations that don't make sense w/o validation
-        validate_decimal_precision_and_scale::<O>(output_precision, output_scale)?;
-        let g = |x: I::Native| f(x).unwrap(); // unwrapping is safe since the result is guaranteed
-                                              // to fit into the target type
-        array.unary(g)
+    // if the reduction of the input number through scaling (dividing) is greater
+    // than a possible precision loss (plus potential increase via rounding)
+    // every input number will fit into the output type
+    // Example: If we are starting with any number of precision 5 [xxxxx],
+    // then and decrease the scale by 3 will have the following effect on the representation:
+    // [xxxxx] -> [xx] (+ 1 possibly, due to rounding).
+    // The rounding may add a digit, so the cast to be infallible,
+    // the output type needs to have at least 3 digits of precision.
+    // e.g. Decimal(5, 3) 99.999 to Decimal(3, 0) will result in 100:
+    // [99999] -> [99] + 1 = [100], a cast to Decimal(2, 0) would not be possible
+    let is_infallible_cast = (input_precision as i8) - delta_scale < (output_precision as i8);
+    let f_infallible = is_infallible_cast.then_some(move |x| f_fallible(x).unwrap());
+    Some((f_fallible, f_infallible))
+}
+
+/// Apply the rescaler function to the value.
+/// If the rescaler is infallible, use the infallible function.
+/// Otherwise, use the fallible function and validate the precision.
+fn apply_rescaler<I: DecimalType, O: DecimalType>(
+    value: I::Native,
+    output_precision: u8,
+    f: impl Fn(I::Native) -> Option<O::Native>,
+    f_infallible: Option<impl Fn(I::Native) -> O::Native>,
+) -> Option<O::Native>
+where
+    I::Native: DecimalCast,
+    O::Native: DecimalCast,
+{
+    if let Some(f_infallible) = f_infallible {
+        Some(f_infallible(value))
+    } else {
+        f(value).filter(|v| O::is_valid_decimal_precision(*v, output_precision))
+    }
+}
+
+/// Rescales a decimal value from `(input_precision, input_scale)` to
+/// `(output_precision, output_scale)` and returns the converted number when it fits
+/// within the output precision.
+///
+/// The function first validates that the requested precision and scale are supported for
+/// both the source and destination decimal types. It then either upscales (multiplying
+/// by an appropriate power of ten) or downscales (dividing with rounding) the input value.
+/// When the scaling factor exceeds the precision table of the destination type, the value
+/// is treated as an overflow for upscaling, or rounded to zero for downscaling (as any
+/// possible result would be zero at the requested scale).
+///
+/// This mirrors the column-oriented helpers of decimal casting but operates on a single value
+/// (row-level) instead of an entire array.
+///
+/// Returns `None` if the value cannot be represented with the requested precision.
+pub fn rescale_decimal<I: DecimalType, O: DecimalType>(
+    value: I::Native,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+) -> Option<O::Native>
+where
+    I::Native: DecimalCast + ArrowNativeTypeOp,
+    O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    validate_decimal_precision_and_scale::<I>(input_precision, input_scale).ok()?;
+    validate_decimal_precision_and_scale::<O>(output_precision, output_scale).ok()?;
+
+    if input_scale <= output_scale {
+        let (f, f_infallible) =
+            make_upscaler::<I, O>(input_precision, input_scale, output_precision, output_scale)?;
+        apply_rescaler::<I, O>(value, output_precision, f, f_infallible)
+    } else {
+        let Some((f, f_infallible)) =
+            make_downscaler::<I, O>(input_precision, input_scale, output_precision, output_scale)
+        else {
+            // Scale reduction exceeds supported precision; result mathematically rounds to zero
+            return Some(O::Native::ZERO);
+        };
+        apply_rescaler::<I, O>(value, output_precision, f, f_infallible)
+    }
+}
+
+fn cast_decimal_to_decimal_error<I, O>(
+    output_precision: u8,
+    output_scale: i8,
+) -> impl Fn(<I as ArrowPrimitiveType>::Native) -> ArrowError
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast + ArrowNativeTypeOp,
+    O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    move |x: I::Native| {
+        ArrowError::CastError(format!(
+            "Cannot cast to {}({}, {}). Overflowing on {:?}",
+            O::PREFIX,
+            output_precision,
+            output_scale,
+            x
+        ))
+    }
+}
+
+fn apply_decimal_cast<I: DecimalType, O: DecimalType>(
+    array: &PrimitiveArray<I>,
+    output_precision: u8,
+    output_scale: i8,
+    f_fallible: impl Fn(I::Native) -> Option<O::Native>,
+    f_infallible: Option<impl Fn(I::Native) -> O::Native>,
+    cast_options: &CastOptions,
+) -> Result<PrimitiveArray<O>, ArrowError>
+where
+    I::Native: DecimalCast + ArrowNativeTypeOp,
+    O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    let array = if let Some(f_infallible) = f_infallible {
+        array.unary(f_infallible)
     } else if cast_options.safe {
-        array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)))
+        array.unary_opt(|x| {
+            f_fallible(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))
+        })
     } else {
+        let error = cast_decimal_to_decimal_error::<I, O>(output_precision, output_scale);
         array.try_unary(|x| {
-            f(x).ok_or_else(|| error(x))
-                .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v))
+            f_fallible(x).ok_or_else(|| error(x)).and_then(|v| {
+                O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v)
+            })
         })?
-    })
+    };
+    Ok(array)
 }
 
-pub(crate) fn convert_to_bigger_or_equal_scale_decimal<I, O>(
+fn convert_to_smaller_scale_decimal<I, O>(
     array: &PrimitiveArray<I>,
     input_precision: u8,
     input_scale: i8,
@@ -165,35 +391,58 @@ where
     I::Native: DecimalCast + ArrowNativeTypeOp,
     O::Native: DecimalCast + ArrowNativeTypeOp,
 {
-    let error = cast_decimal_to_decimal_error::<I, O>(output_precision, output_scale);
-    let delta_scale = output_scale - input_scale;
-    let mul = O::Native::from_decimal(10_i128)
-        .unwrap()
-        .pow_checked(delta_scale as u32)?;
+    if let Some((f_fallible, f_infallible)) =
+        make_downscaler::<I, O>(input_precision, input_scale, output_precision, output_scale)
+    {
+        apply_decimal_cast(
+            array,
+            output_precision,
+            output_scale,
+            f_fallible,
+            f_infallible,
+            cast_options,
+        )
+    } else {
+        // Scale reduction exceeds supported precision; result mathematically rounds to zero
+        let zeros = vec![O::Native::ZERO; array.len()];
+        Ok(PrimitiveArray::new(zeros.into(), array.nulls().cloned()))
+    }
+}
 
-    // if the gain in precision (digits) is greater than the multiplication due to scaling
-    // every number will fit into the output type
-    // Example: If we are starting with any number of precision 5 [xxxxx],
-    // then an increase of scale by 3 will have the following effect on the representation:
-    // [xxxxx] -> [xxxxx000], so for the cast to be infallible, the output type
-    // needs to provide at least 8 digits precision
-    let is_infallible_cast = (input_precision as i8) + delta_scale <= (output_precision as i8);
-    let f = |x| O::Native::from_decimal(x).and_then(|x| x.mul_checked(mul).ok());
-
-    Ok(if is_infallible_cast {
-        // make sure we don't perform calculations that don't make sense w/o validation
-        validate_decimal_precision_and_scale::<O>(output_precision, output_scale)?;
-        // unwrapping is safe since the result is guaranteed to fit into the target type
-        let f = |x| O::Native::from_decimal(x).unwrap().mul_wrapping(mul);
-        array.unary(f)
-    } else if cast_options.safe {
-        array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision)))
+fn convert_to_bigger_or_equal_scale_decimal<I, O>(
+    array: &PrimitiveArray<I>,
+    input_precision: u8,
+    input_scale: i8,
+    output_precision: u8,
+    output_scale: i8,
+    cast_options: &CastOptions,
+) -> Result<PrimitiveArray<O>, ArrowError>
+where
+    I: DecimalType,
+    O: DecimalType,
+    I::Native: DecimalCast + ArrowNativeTypeOp,
+    O::Native: DecimalCast + ArrowNativeTypeOp,
+{
+    if let Some((f, f_infallible)) =
+        make_upscaler::<I, O>(input_precision, input_scale, output_precision, output_scale)
+    {
+        apply_decimal_cast(
+            array,
+            output_precision,
+            output_scale,
+            f,
+            f_infallible,
+            cast_options,
+        )
     } else {
-        array.try_unary(|x| {
-            f(x).ok_or_else(|| error(x))
-                .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v))
-        })?
-    })
+        // Scale increase exceeds supported precision; return overflow error
+        Err(ArrowError::CastError(format!(
+            "Cannot cast to {}({}, {}). Value overflows for output scale",
+            O::PREFIX,
+            output_precision,
+            output_scale
+        )))
+    }
 }
 
 // Only support one type of decimal cast operations
@@ -412,12 +661,11 @@ where
                     parse_string_to_decimal_native::<T>(v, scale as usize)
                         .map_err(|_| {
                             ArrowError::CastError(format!(
-                                "Cannot cast string '{}' to value of {:?} type",
-                                v,
+                                "Cannot cast string '{v}' to value of {} type",
                                 T::DATA_TYPE,
                             ))
                         })
-                        .and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v))
+                        .and_then(|v| T::validate_decimal_precision(v, precision, scale).map(|_| v))
                 })
                 .transpose()
             })
@@ -505,9 +753,8 @@ where
         )?,
         other => {
             return Err(ArrowError::ComputeError(format!(
-                "Cannot cast {:?} to decimal",
-                other
-            )))
+                "Cannot cast {other:?} to decimal",
+            )));
         }
     };
 
@@ -548,7 +795,7 @@ where
                             v
                         ))
                     })
-                    .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v))
+                    .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v))
             })?
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
@@ -615,7 +862,11 @@ where
     Ok(Arc::new(value_builder.finish()))
 }
 
-// Cast the decimal array to floating-point array
+/// Cast a decimal array to a floating point array.
+///
+/// Conversion is lossy and follows standard floating point semantics. Values
+/// that exceed the representable range become `INFINITY` or `-INFINITY` without
+/// returning an error.
 pub(crate) fn cast_decimal_to_float<D: DecimalType, T: ArrowPrimitiveType, F>(
     array: &dyn Array,
     op: F,
@@ -671,4 +922,58 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_rescale_decimal_upscale_within_precision() {
+        let result = rescale_decimal::<Decimal128Type, Decimal128Type>(
+            12_345_i128, // 123.45 with scale 2
+            5,
+            2,
+            8,
+            5,
+        );
+        assert_eq!(result, Some(12_345_000_i128));
+    }
+
+    #[test]
+    fn test_rescale_decimal_downscale_rounds_half_away_from_zero() {
+        let positive = rescale_decimal::<Decimal128Type, Decimal128Type>(
+            1_050_i128, // 1.050 with scale 3
+            5, 3, 5, 1,
+        );
+        assert_eq!(positive, Some(11_i128)); // 1.1 with scale 1
+
+        let negative = rescale_decimal::<Decimal128Type, Decimal128Type>(
+            -1_050_i128, // -1.050 with scale 3
+            5,
+            3,
+            5,
+            1,
+        );
+        assert_eq!(negative, Some(-11_i128)); // -1.1 with scale 1
+    }
+
+    #[test]
+    fn test_rescale_decimal_downscale_large_delta_returns_zero() {
+        let result = rescale_decimal::<Decimal32Type, Decimal32Type>(12_345_i32, 9, 9, 9, 4);
+        assert_eq!(result, Some(0_i32));
+    }
+
+    #[test]
+    fn test_rescale_decimal_upscale_overflow_returns_none() {
+        let result = rescale_decimal::<Decimal32Type, Decimal32Type>(9_999_i32, 4, 0, 5, 2);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_rescale_decimal_invalid_input_precision_scale_returns_none() {
+        let result = rescale_decimal::<Decimal128Type, Decimal128Type>(123_i128, 39, 39, 38, 38);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_rescale_decimal_invalid_output_precision_scale_returns_none() {
+        let result = rescale_decimal::<Decimal128Type, Decimal128Type>(123_i128, 38, 38, 39, 39);
+        assert_eq!(result, None);
+    }
 }
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index eae2f2167b39..64d77236ccd5 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -28,111 +28,92 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
 ) -> Result<ArrayRef, ArrowError> {
     use DataType::*;
 
-    match to_type {
-        Dictionary(to_index_type, to_value_type) => {
-            let dict_array = array
-                .as_any()
-                .downcast_ref::<DictionaryArray<K>>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
-                    )
-                })?;
-
-            let keys_array: ArrayRef =
-                Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
-            let values_array = dict_array.values();
-            let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
-            let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
+    let array = array.as_dictionary::<K>();
+    let from_child_type = array.values().data_type();
+    match (from_child_type, to_type) {
+        (_, Dictionary(to_index_type, to_value_type)) => {
+            dictionary_to_dictionary_cast(array, to_index_type, to_value_type, cast_options)
+        }
+        // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data
+        // copy of the value buffer. Fast path which avoids copying underlying values buffer.
+        // TODO: handle LargeUtf8/LargeBinary -> View (need to check offsets can fit)
+        // TODO: handle cross types (String -> BinaryView, Binary -> StringView)
+        //       (need to validate utf8?)
+        (Utf8, Utf8View) => view_from_dict_values::<K, Utf8Type, StringViewType>(
+            array.keys(),
+            array.values().as_string::<i32>(),
+        ),
+        (Binary, BinaryView) => view_from_dict_values::<K, BinaryType, BinaryViewType>(
+            array.keys(),
+            array.values().as_binary::<i32>(),
+        ),
+        _ => unpack_dictionary(array, to_type, cast_options),
+    }
+}
 
-            // Failure to cast keys (because they don't fit in the
-            // target type) results in NULL values;
-            if cast_keys.null_count() > keys_array.null_count() {
-                return Err(ArrowError::ComputeError(format!(
-                    "Could not convert {} dictionary indexes from {:?} to {:?}",
-                    cast_keys.null_count() - keys_array.null_count(),
-                    keys_array.data_type(),
-                    to_index_type
-                )));
-            }
+fn dictionary_to_dictionary_cast<K: ArrowDictionaryKeyType>(
+    array: &DictionaryArray<K>,
+    to_index_type: &DataType,
+    to_value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    use DataType::*;
 
-            let data = cast_keys.into_data();
-            let builder = data
-                .into_builder()
-                .data_type(to_type.clone())
-                .child_data(vec![cast_values.into_data()]);
+    let keys_array: ArrayRef = Arc::new(PrimitiveArray::<K>::from(array.keys().to_data()));
+    let values_array = array.values();
+    let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
+    let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
 
-            // Safety
-            // Cast keys are still valid
-            let data = unsafe { builder.build_unchecked() };
+    // Failure to cast keys (because they don't fit in the
+    // target type) results in NULL values;
+    if cast_keys.null_count() > keys_array.null_count() {
+        return Err(ArrowError::ComputeError(format!(
+            "Could not convert {} dictionary indexes from {:?} to {:?}",
+            cast_keys.null_count() - keys_array.null_count(),
+            keys_array.data_type(),
+            to_index_type
+        )));
+    }
 
-            // create the appropriate array type
-            let new_array: ArrayRef = match **to_index_type {
-                Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
-                Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
-                Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
-                Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
-                UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
-                UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
-                UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
-                UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
-                _ => {
-                    return Err(ArrowError::CastError(format!(
-                        "Unsupported type {to_index_type:?} for dictionary index"
-                    )));
-                }
-            };
+    let data = cast_keys.into_data();
+    let builder = data
+        .into_builder()
+        .data_type(Dictionary(
+            Box::new(to_index_type.clone()),
+            Box::new(to_value_type.clone()),
+        ))
+        .child_data(vec![cast_values.into_data()]);
 
-            Ok(new_array)
-        }
-        Utf8View => {
-            // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data copy of the value buffer.
-            // we handle it here to avoid the copy.
-            let dict_array = array
-                .as_dictionary::<K>()
-                .downcast_dict::<StringArray>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast Utf8View to StringArray of expected type"
-                            .to_string(),
-                    )
-                })?;
+    // Safety
+    // Cast keys are still valid
+    let data = unsafe { builder.build_unchecked() };
 
-            let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
-                dict_array.values(),
-                dict_array.keys(),
-            )?;
-            Ok(Arc::new(string_view))
+    // create the appropriate array type
+    let new_array: ArrayRef = match to_index_type {
+        Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
+        Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
+        Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
+        Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
+        UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
+        UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
+        UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
+        UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
+        _ => {
+            return Err(ArrowError::CastError(format!(
+                "Unsupported type {to_index_type} for dictionary index"
+            )));
         }
-        BinaryView => {
-            // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data copy of the value buffer.
-            // we handle it here to avoid the copy.
-            let dict_array = array
-                .as_dictionary::<K>()
-                .downcast_dict::<BinaryArray>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
-                            .to_string(),
-                    )
-                })?;
+    };
 
-            let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
-                dict_array.values(),
-                dict_array.keys(),
-            )?;
-            Ok(Arc::new(binary_view))
-        }
-        _ => unpack_dictionary::<K>(array, to_type, cast_options),
-    }
+    Ok(new_array)
 }
 
-fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
-    array: &GenericByteArray<V>,
+fn view_from_dict_values<K: ArrowDictionaryKeyType, V: ByteArrayType, T: ByteViewType>(
     keys: &PrimitiveArray<K>,
-) -> Result<GenericByteViewArray<T>, ArrowError> {
-    let value_buffer = array.values();
-    let value_offsets = array.value_offsets();
+    values: &GenericByteArray<V>,
+) -> Result<ArrayRef, ArrowError> {
+    let value_buffer = values.values();
+    let value_offsets = values.value_offsets();
     let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
     builder.append_block(value_buffer.clone());
     for i in keys.iter() {
@@ -157,21 +138,17 @@ fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArra
             }
         }
     }
-    Ok(builder.finish())
+    Ok(Arc::new(builder.finish()))
 }
 
-// Unpack a dictionary where the keys are of type <K> into a flattened array of type to_type
-pub(crate) fn unpack_dictionary<K>(
-    array: &dyn Array,
+// Unpack a dictionary into a flattened array of type to_type
+pub(crate) fn unpack_dictionary<K: ArrowDictionaryKeyType>(
+    array: &DictionaryArray<K>,
     to_type: &DataType,
     cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    K: ArrowDictionaryKeyType,
-{
-    let dict_array = array.as_dictionary::<K>();
-    let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
-    take(cast_dict_values.as_ref(), dict_array.keys(), None)
+) -> Result<ArrayRef, ArrowError> {
+    let cast_dict_values = cast_with_options(array.values(), to_type, cast_options)?;
+    take(cast_dict_values.as_ref(), array.keys(), None)
 }
 
 /// Pack a data type into a dictionary array passing the values through a primitive array
@@ -214,6 +191,20 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
         UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
         UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
+        Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
+        Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
         Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
             array,
             dict_value_type,
@@ -299,7 +290,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
             pack_byte_to_fixed_size_dictionary::<K>(array, cast_options, byte_size)
         }
         _ => Err(ArrowError::CastError(format!(
-            "Unsupported output type for dictionary packing: {dict_value_type:?}"
+            "Unsupported output type for dictionary packing: {dict_value_type}"
         ))),
     }
 }
diff --git a/arrow-cast/src/cast/list.rs b/arrow-cast/src/cast/list.rs
index ddcbca361bf0..f6c8d2465c86 100644
--- a/arrow-cast/src/cast/list.rs
+++ b/arrow-cast/src/cast/list.rs
@@ -24,8 +24,8 @@ pub(crate) fn cast_values_to_list<O: OffsetSizeTrait>(
     cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError> {
     let values = cast_with_options(array, to.data_type(), cast_options)?;
-    let offsets = OffsetBuffer::from_lengths(std::iter::repeat(1).take(values.len()));
-    let list = GenericListArray::<O>::new(to.clone(), offsets, values, None);
+    let offsets = OffsetBuffer::from_repeated_length(1, values.len());
+    let list = GenericListArray::<O>::try_new(to.clone(), offsets, values, None)?;
     Ok(Arc::new(list))
 }
 
@@ -37,7 +37,7 @@ pub(crate) fn cast_values_to_fixed_size_list(
     cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError> {
     let values = cast_with_options(array, to.data_type(), cast_options)?;
-    let list = FixedSizeListArray::new(to.clone(), size, values, None);
+    let list = FixedSizeListArray::try_new(to.clone(), size, values, None)?;
     Ok(Arc::new(list))
 }
 
@@ -140,7 +140,7 @@ where
 
     // Construct the FixedSizeListArray
     let nulls = nulls.map(|mut x| x.finish().into());
-    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
+    let array = FixedSizeListArray::try_new(field.clone(), size, values, nulls)?;
     Ok(Arc::new(array))
 }
 
@@ -152,12 +152,12 @@ pub(crate) fn cast_list_values<O: OffsetSizeTrait>(
 ) -> Result<ArrayRef, ArrowError> {
     let list = array.as_list::<O>();
     let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
-    Ok(Arc::new(GenericListArray::<O>::new(
+    Ok(Arc::new(GenericListArray::<O>::try_new(
         to.clone(),
         list.offsets().clone(),
         values,
         list.nulls().cloned(),
-    )))
+    )?))
 }
 
 /// Cast the container type of List/Largelist array along with the inner datatype
@@ -184,10 +184,10 @@ pub(crate) fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
     // Safety: valid offsets and checked for overflow
     let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
 
-    Ok(Arc::new(GenericListArray::<O>::new(
+    Ok(Arc::new(GenericListArray::<O>::try_new(
         field.clone(),
         offsets,
         values,
         nulls,
-    )))
+    )?))
 }
diff --git a/arrow-cast/src/cast/list_view.rs b/arrow-cast/src/cast/list_view.rs
new file mode 100644
index 000000000000..0fdab8c6247d
--- /dev/null
+++ b/arrow-cast/src/cast/list_view.rs
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::cast::*;
+
+/// Helper function to cast a list view to a list
+pub(crate) fn cast_list_view_to_list<O: OffsetSizeTrait>(
+    array: &dyn Array,
+    to: &FieldRef,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let list_view = array.as_list_view::<O>();
+    let list_view_offsets = list_view.offsets();
+    let sizes = list_view.sizes();
+    let source_values = list_view.values();
+
+    // Construct the indices and offsets for the new list array by iterating over the list view subarrays
+    let mut indices = Vec::with_capacity(list_view.values().len());
+    let mut offsets = Vec::with_capacity(list_view.len() + 1);
+    // Add the offset for the first subarray
+    offsets.push(O::usize_as(0));
+    for i in 0..list_view.len() {
+        // For each subarray, add the indices of the values to take
+        let offset = list_view_offsets[i].as_usize();
+        let size = sizes[i].as_usize();
+        let end = offset + size;
+        for j in offset..end {
+            indices.push(j as i32);
+        }
+        // Add the offset for the next subarray
+        offsets.push(O::usize_as(indices.len()));
+    }
+
+    // Take the values from the source values using the indices, creating a new array
+    let values = arrow_select::take::take(source_values, &Int32Array::from(indices), None)?;
+
+    // Cast the values to the target data type
+    let values = cast_with_options(&values, to.data_type(), cast_options)?;
+
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        to.clone(),
+        OffsetBuffer::new(offsets.into()),
+        values,
+        list_view.nulls().cloned(),
+    )?))
+}
+
+pub(crate) fn cast_list_view<I: OffsetSizeTrait, O: OffsetSizeTrait>(
+    array: &dyn Array,
+    to_field: &FieldRef,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let list_view = array.as_list_view::<I>();
+    let (_field, offsets, sizes, values, nulls) = list_view.clone().into_parts();
+
+    // Recursively cast values
+    let values = cast_with_options(&values, to_field.data_type(), cast_options)?;
+
+    let new_offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
+    let new_sizes: Vec<_> = sizes.iter().map(|x| O::usize_as(x.as_usize())).collect();
+    Ok(Arc::new(GenericListViewArray::<O>::try_new(
+        to_field.clone(),
+        new_offsets.into(),
+        new_sizes.into(),
+        values,
+        nulls,
+    )?))
+}
+
+pub(crate) fn cast_list_to_list_view<OffsetSize>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
+where
+    OffsetSize: OffsetSizeTrait,
+{
+    let list = array.as_list::<OffsetSize>();
+    let list_view: GenericListViewArray<OffsetSize> = list.clone().into();
+    Ok(Arc::new(list_view))
+}
diff --git a/arrow-cast/src/cast/map.rs b/arrow-cast/src/cast/map.rs
index d62a9519b7b3..e7a9b7495edb 100644
--- a/arrow-cast/src/cast/map.rs
+++ b/arrow-cast/src/cast/map.rs
@@ -42,17 +42,17 @@ pub(crate) fn cast_map_values(
     let key_array = cast_with_options(from.keys(), key_field.data_type(), cast_options)?;
     let value_array = cast_with_options(from.values(), value_field.data_type(), cast_options)?;
 
-    Ok(Arc::new(MapArray::new(
+    Ok(Arc::new(MapArray::try_new(
         entries_field.clone(),
         from.offsets().clone(),
-        StructArray::new(
+        StructArray::try_new(
             Fields::from(vec![key_field, value_field]),
             vec![key_array, value_array],
             from.entries().nulls().cloned(),
-        ),
+        )?,
         from.nulls().cloned(),
         to_ordered,
-    )))
+    )?))
 }
 
 /// Gets the key field from the entries of a map.  For all other types returns None.
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index b317dabd5dda..fb77993a3028 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -40,12 +40,16 @@
 mod decimal;
 mod dictionary;
 mod list;
+mod list_view;
 mod map;
+mod run_array;
 mod string;
+
 use crate::cast::decimal::*;
 use crate::cast::dictionary::*;
 use crate::cast::list::*;
 use crate::cast::map::*;
+use crate::cast::run_array::*;
 use crate::cast::string::*;
 
 use arrow_buffer::IntervalMonthDayNano;
@@ -56,17 +60,19 @@ use std::sync::Arc;
 
 use crate::display::{ArrayFormatter, FormatOptions};
 use crate::parse::{
-    parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
-    string_to_datetime, Parser,
+    Parser, parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
+    string_to_datetime,
 };
 use arrow_array::{builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *};
-use arrow_buffer::{i256, ArrowNativeType, OffsetBuffer};
-use arrow_data::transform::MutableArrayData;
+use arrow_buffer::{ArrowNativeType, OffsetBuffer, i256};
 use arrow_data::ArrayData;
+use arrow_data::transform::MutableArrayData;
 use arrow_schema::*;
 use arrow_select::take::take;
-use num::cast::AsPrimitive;
-use num::{NumCast, ToPrimitive};
+use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};
+
+use crate::cast::list_view::{cast_list_to_list_view, cast_list_view, cast_list_view_to_list};
+pub use decimal::{DecimalCast, rescale_decimal};
 
 /// CastOptions provides a way to override the default cast behaviors
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -98,45 +104,14 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
     }
 
     match (from_type, to_type) {
-        (
-            Null,
-            Boolean
-            | Int8
-            | UInt8
-            | Int16
-            | UInt16
-            | Int32
-            | UInt32
-            | Float32
-            | Date32
-            | Time32(_)
-            | Int64
-            | UInt64
-            | Float64
-            | Date64
-            | Timestamp(_, _)
-            | Time64(_)
-            | Duration(_)
-            | Interval(_)
-            | FixedSizeBinary(_)
-            | Binary
-            | Utf8
-            | LargeBinary
-            | LargeUtf8
-            | BinaryView
-            | Utf8View
-            | List(_)
-            | LargeList(_)
-            | FixedSizeList(_, _)
-            | Struct(_)
-            | Map(_, _)
-            | Dictionary(_, _),
-        ) => true,
+        (Null, _) => true,
         // Dictionary/List conditions should be put in front of others
         (Dictionary(_, from_value_type), Dictionary(_, to_value_type)) => {
             can_cast_types(from_value_type, to_value_type)
         }
         (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
+        (RunEndEncoded(_, value_type), _) => can_cast_types(value_type.data_type(), to_type),
+        (_, RunEndEncoded(_, value_type)) => can_cast_types(from_type, value_type.data_type()),
         (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
         (List(list_from) | LargeList(list_from), List(list_to) | LargeList(list_to)) => {
             can_cast_types(list_from.data_type(), list_to.data_type())
@@ -147,9 +122,21 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (List(list_from) | LargeList(list_from), FixedSizeList(list_to, _)) => {
             can_cast_types(list_from.data_type(), list_to.data_type())
         }
+        (List(list_from) | LargeList(list_from), ListView(list_to) | LargeListView(list_to)) => {
+            can_cast_types(list_from.data_type(), list_to.data_type())
+        }
         (List(_), _) => false,
-        (FixedSizeList(list_from,_), List(list_to)) |
-        (FixedSizeList(list_from,_), LargeList(list_to)) => {
+        (ListView(list_from) | LargeListView(list_from), List(list_to) | LargeList(list_to)) => {
+            can_cast_types(list_from.data_type(), list_to.data_type())
+        }
+        (ListView(list_from), LargeListView(list_to)) => {
+            can_cast_types(list_from.data_type(), list_to.data_type())
+        }
+        (LargeListView(list_from), ListView(list_to)) => {
+            can_cast_types(list_from.data_type(), list_to.data_type())
+        }
+        (FixedSizeList(list_from, _), List(list_to))
+        | (FixedSizeList(list_from, _), LargeList(list_to)) => {
             can_cast_types(list_from.data_type(), list_to.data_type())
         }
         (FixedSizeList(inner, size), FixedSizeList(inner_to, size_to)) if size == size_to => {
@@ -157,42 +144,100 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         }
         (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
         (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (_, FixedSizeList(list_to,size)) if *size == 1 => {
-            can_cast_types(from_type, list_to.data_type())},
-        (FixedSizeList(list_from,size), _) if *size == 1 => {
-            can_cast_types(list_from.data_type(), to_type)},
-        (Map(from_entries,ordered_from), Map(to_entries, ordered_to)) if ordered_from == ordered_to =>
-            match (key_field(from_entries), key_field(to_entries), value_field(from_entries), value_field(to_entries)) {
-                (Some(from_key), Some(to_key), Some(from_value), Some(to_value)) =>
-                    can_cast_types(from_key.data_type(), to_key.data_type()) && can_cast_types(from_value.data_type(), to_value.data_type()),
-                _ => false
-            },
+        (_, FixedSizeList(list_to, size)) if *size == 1 => {
+            can_cast_types(from_type, list_to.data_type())
+        }
+        (FixedSizeList(list_from, size), _) if *size == 1 => {
+            can_cast_types(list_from.data_type(), to_type)
+        }
+        (Map(from_entries, ordered_from), Map(to_entries, ordered_to))
+            if ordered_from == ordered_to =>
+        {
+            match (
+                key_field(from_entries),
+                key_field(to_entries),
+                value_field(from_entries),
+                value_field(to_entries),
+            ) {
+                (Some(from_key), Some(to_key), Some(from_value), Some(to_value)) => {
+                    can_cast_types(from_key.data_type(), to_key.data_type())
+                        && can_cast_types(from_value.data_type(), to_value.data_type())
+                }
+                _ => false,
+            }
+        }
         // cast one decimal type to another decimal type
-        (Decimal128(_, _), Decimal128(_, _)) => true,
-        (Decimal256(_, _), Decimal256(_, _)) => true,
-        (Decimal128(_, _), Decimal256(_, _)) => true,
-        (Decimal256(_, _), Decimal128(_, _)) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // unsigned integer to decimal
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal128(_, _)) |
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal256(_, _)) |
+        (
+            UInt8 | UInt16 | UInt32 | UInt64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // signed numeric to decimal
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal256(_, _)) |
+        (
+            Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // decimal to unsigned numeric
-        (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 | UInt64) |
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            UInt8 | UInt16 | UInt32 | UInt64,
+        ) => true,
         // decimal to signed numeric
-        (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+        ) => true,
         // decimal to string
-        (Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Utf8View | Utf8 | LargeUtf8,
+        ) => true,
         // string to decimal
-        (Utf8View | Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
+        (
+            Utf8View | Utf8 | LargeUtf8,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         (Struct(from_fields), Struct(to_fields)) => {
-            from_fields.len() == to_fields.len() &&
-                from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
+            if from_fields.len() != to_fields.len() {
+                return false;
+            }
+
+            // fast path, all field names are in the same order and same number of fields
+            if from_fields
+                .iter()
+                .zip(to_fields.iter())
+                .all(|(f1, f2)| f1.name() == f2.name())
+            {
+                return from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
                     // Assume that nullability between two structs are compatible, if not,
                     // cast kernel will return error.
                     can_cast_types(f1.data_type(), f2.data_type())
-                })
+                });
+            }
+
+            // slow path, we match the fields by name
+            if to_fields.iter().all(|to_field| {
+                from_fields
+                    .iter()
+                    .find(|from_field| from_field.name() == to_field.name())
+                    .is_some_and(|from_field| {
+                        // Assume that nullability between two structs are compatible, if not,
+                        // cast kernel will return error.
+                        can_cast_types(from_field.data_type(), to_field.data_type())
+                    })
+            }) {
+                return true;
+            }
+
+            // if we couldn't match by name, we try to see if they can be matched by position
+            from_fields
+                .iter()
+                .zip(to_fields.iter())
+                .all(|(f1, f2)| can_cast_types(f1.data_type(), f2.data_type()))
         }
         (Struct(_), _) => false,
         (_, Struct(_)) => false,
@@ -211,8 +256,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
                 || to_type == &LargeUtf8
         }
 
-        (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true,
-        (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true,
+        (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View) => {
+            true
+        }
+        (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View) => {
+            true
+        }
         (FixedSizeBinary(_), Binary | LargeBinary | BinaryView) => true,
         (
             Utf8 | LargeUtf8 | Utf8View,
@@ -236,22 +285,23 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Utf8 | LargeUtf8, Utf8View) => true,
         (BinaryView, Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View) => true,
         (Utf8View | Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
-        (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
-        (_, Utf8View) => from_type.is_numeric(),
+        (_, Utf8 | Utf8View | LargeUtf8) => from_type.is_primitive(),
 
         (_, Binary | LargeBinary) => from_type.is_integer(),
 
         // start numeric casts
         (
-            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64,
-            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64,
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32
+            | Float64,
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32
+            | Float64,
         ) => true,
         // end numeric casts
 
         // temporal casts
         (Int32, Date32 | Date64 | Time32(_)) => true,
         (Date32, Int32 | Int64) => true,
-        (Time32(_), Int32) => true,
+        (Time32(_), Int32 | Int64) => true,
         (Int64, Date64 | Date32 | Time64(_)) => true,
         (Date64, Int64 | Int32) => true,
         (Time64(_), Int64) => true,
@@ -342,7 +392,7 @@ where
             false => array.try_unary::<_, D, _>(|v| {
                 v.as_()
                     .div_checked(scale_factor)
-                    .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v))
+                    .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v))
             })?,
         }
     } else {
@@ -356,7 +406,7 @@ where
             false => array.try_unary::<_, D, _>(|v| {
                 v.as_()
                     .mul_checked(scale_factor)
-                    .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v))
+                    .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v))
             })?,
         }
     };
@@ -603,12 +653,28 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
 /// * Temporal to/from backing Primitive: zero-copy with data type change
 /// * `Float32/Float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
 ///   (i.e. casting `6.4999` to `Decimal(10, 1)` becomes `6.5`).
+/// * `Decimal` to `Float32/Float64` is lossy and values outside the representable
+///   range become `INFINITY` or `-INFINITY` without error.
 ///
 /// Unsupported Casts (check with `can_cast_types` before calling):
 /// * To or from `StructArray`
 /// * `List` to `Primitive`
 /// * `Interval` and `Duration`
 ///
+/// # Durations and Intervals
+///
+/// Casting integer types directly to interval types such as
+/// [`IntervalMonthDayNano`] is not supported because the meaning of the integer
+/// is ambiguous. For example, the integer  could represent either nanoseconds
+/// or months.
+///
+/// To cast an integer type to an interval type, first convert to a Duration
+/// type, and then cast that to the desired interval type.
+///
+/// For example, to convert an `Int64` representing nanoseconds to an
+/// `IntervalMonthDayNano` you would first convert the `Int64` to a
+/// `DurationNanoseconds`, and then cast that to `IntervalMonthDayNano`.
+///
 /// # Timestamps and Timezones
 ///
 /// Timestamps are stored with an optional timezone in Arrow.
@@ -705,40 +771,38 @@ pub fn cast_with_options(
         return Ok(make_array(array.to_data()));
     }
     match (from_type, to_type) {
-        (
-            Null,
-            Boolean
-            | Int8
-            | UInt8
-            | Int16
-            | UInt16
-            | Int32
-            | UInt32
-            | Float32
-            | Date32
-            | Time32(_)
-            | Int64
-            | UInt64
-            | Float64
-            | Date64
-            | Timestamp(_, _)
-            | Time64(_)
-            | Duration(_)
-            | Interval(_)
-            | FixedSizeBinary(_)
-            | Binary
-            | Utf8
-            | LargeBinary
-            | LargeUtf8
-            | BinaryView
-            | Utf8View
-            | List(_)
-            | LargeList(_)
-            | FixedSizeList(_, _)
-            | Struct(_)
-            | Map(_, _)
-            | Dictionary(_, _),
-        ) => Ok(new_null_array(to_type, array.len())),
+        (Null, _) => Ok(new_null_array(to_type, array.len())),
+        (RunEndEncoded(index_type, _), _) => match index_type.data_type() {
+            Int16 => run_end_encoded_cast::<Int16Type>(array, to_type, cast_options),
+            Int32 => run_end_encoded_cast::<Int32Type>(array, to_type, cast_options),
+            Int64 => run_end_encoded_cast::<Int64Type>(array, to_type, cast_options),
+            _ => Err(ArrowError::CastError(format!(
+                "Casting from run end encoded type {from_type:?} to {to_type:?} not supported",
+            ))),
+        },
+        (_, RunEndEncoded(index_type, value_type)) => {
+            let array_ref = make_array(array.to_data());
+            match index_type.data_type() {
+                Int16 => cast_to_run_end_encoded::<Int16Type>(
+                    &array_ref,
+                    value_type.data_type(),
+                    cast_options,
+                ),
+                Int32 => cast_to_run_end_encoded::<Int32Type>(
+                    &array_ref,
+                    value_type.data_type(),
+                    cast_options,
+                ),
+                Int64 => cast_to_run_end_encoded::<Int64Type>(
+                    &array_ref,
+                    value_type.data_type(),
+                    cast_options,
+                ),
+                _ => Err(ArrowError::CastError(format!(
+                    "Casting from type {from_type:?} to run end encoded type {to_type:?} not supported",
+                ))),
+            }
+        }
         (Dictionary(index_type, _), _) => match **index_type {
             Int8 => dictionary_cast::<Int8Type>(array, to_type, cast_options),
             Int16 => dictionary_cast::<Int16Type>(array, to_type, cast_options),
@@ -749,7 +813,7 @@ pub fn cast_with_options(
             UInt32 => dictionary_cast::<UInt32Type>(array, to_type, cast_options),
             UInt64 => dictionary_cast::<UInt64Type>(array, to_type, cast_options),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from dictionary type {from_type:?} to {to_type:?} not supported",
+                "Casting from dictionary type {from_type} to {to_type} not supported",
             ))),
         },
         (_, Dictionary(index_type, value_type)) => match **index_type {
@@ -762,7 +826,7 @@ pub fn cast_with_options(
             UInt32 => cast_to_dictionary::<UInt32Type>(array, value_type, cast_options),
             UInt64 => cast_to_dictionary::<UInt64Type>(array, value_type, cast_options),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from type {from_type:?} to dictionary type {to_type:?} not supported",
+                "Casting from type {from_type} to dictionary type {to_type} not supported",
             ))),
         },
         (List(_), List(to)) => cast_list_values::<i32>(array, to, cast_options),
@@ -777,6 +841,18 @@ pub fn cast_with_options(
             let array = array.as_list::<i64>();
             cast_list_to_fixed_size_list::<i64>(array, field, *size, cast_options)
         }
+        (ListView(_), List(list_to)) => cast_list_view_to_list::<i32>(array, list_to, cast_options),
+        (LargeListView(_), LargeList(list_to)) => {
+            cast_list_view_to_list::<i64>(array, list_to, cast_options)
+        }
+        (ListView(_), LargeListView(list_to)) => {
+            cast_list_view::<i32, i64>(array, list_to, cast_options)
+        }
+        (LargeListView(_), ListView(list_to)) => {
+            cast_list_view::<i64, i32>(array, list_to, cast_options)
+        }
+        (List(_), ListView(_)) => cast_list_to_list_view::<i32>(array),
+        (LargeList(_), LargeListView(_)) => cast_list_to_list_view::<i64>(array),
         (List(_) | LargeList(_), _) => match to_type {
             Utf8 => value_to_string::<i32>(array, cast_options),
             LargeUtf8 => value_to_string::<i64>(array, cast_options),
@@ -819,9 +895,9 @@ pub fn cast_with_options(
                 array.nulls().cloned(),
             )?))
         }
-        (_, List(ref to)) => cast_values_to_list::<i32>(array, to, cast_options),
-        (_, LargeList(ref to)) => cast_values_to_list::<i64>(array, to, cast_options),
-        (_, FixedSizeList(ref to, size)) if *size == 1 => {
+        (_, List(to)) => cast_values_to_list::<i32>(array, to, cast_options),
+        (_, LargeList(to)) => cast_values_to_list::<i64>(array, to, cast_options),
+        (_, FixedSizeList(to, size)) if *size == 1 => {
             cast_values_to_fixed_size_list(array, to, *size, cast_options)
         }
         (FixedSizeList(_, size), _) if *size == 1 => {
@@ -831,6 +907,26 @@ pub fn cast_with_options(
             cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned())
         }
         // Decimal to decimal, same width
+        (Decimal32(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal128(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal128Type>(
                 array.as_primitive(),
@@ -852,6 +948,86 @@ pub fn cast_with_options(
             )
         }
         // Decimal to decimal, different width
+        (Decimal32(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal128Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(p1, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal256Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal128Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal256Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal128(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal128(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal128(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal128Type, Decimal256Type>(
                 array.as_primitive(),
@@ -862,6 +1038,26 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        (Decimal256(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal256(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal256(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal128Type>(
                 array.as_primitive(),
@@ -873,6 +1069,28 @@ pub fn cast_with_options(
             )
         }
         // Decimal to non-decimal
+        (Decimal32(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                scale,
+                from_type,
+                to_type,
+                |x: i32| x as f64,
+                cast_options,
+            )
+        }
+        (Decimal64(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                scale,
+                from_type,
+                to_type,
+                |x: i64| x as f64,
+                cast_options,
+            )
+        }
         (Decimal128(_, scale), _) if !to_type.is_temporal() => {
             cast_from_decimal::<Decimal128Type, _>(
                 array,
@@ -891,11 +1109,33 @@ pub fn cast_with_options(
                 scale,
                 from_type,
                 to_type,
-                |x: i256| x.to_f64().unwrap(),
+                |x: i256| x.to_f64().expect("All i256 values fit in f64"),
                 cast_options,
             )
         }
         // Non-decimal to decimal
+        (_, Decimal32(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
+        }
+        (_, Decimal64(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
+        }
         (_, Decimal128(precision, scale)) if !from_type.is_temporal() => {
             cast_to_decimal::<Decimal128Type, _>(
                 array,
@@ -918,22 +1158,17 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Struct(_), Struct(to_fields)) => {
-            let array = array.as_struct();
-            let fields = array
-                .columns()
-                .iter()
-                .zip(to_fields.iter())
-                .map(|(l, field)| cast_with_options(l, field.data_type(), cast_options))
-                .collect::<Result<Vec<ArrayRef>, ArrowError>>()?;
-            let array = StructArray::try_new(to_fields.clone(), fields, array.nulls().cloned())?;
-            Ok(Arc::new(array) as ArrayRef)
-        }
+        (Struct(from_fields), Struct(to_fields)) => cast_struct_to_struct(
+            array.as_struct(),
+            from_fields.clone(),
+            to_fields.clone(),
+            cast_options,
+        ),
         (Struct(_), _) => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported"
+            "Casting from {from_type} to {to_type} not supported"
         ))),
         (_, Struct(_)) => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported"
+            "Casting from {from_type} to {to_type} not supported"
         ))),
         (_, Boolean) => match from_type {
             UInt8 => cast_numeric_to_bool::<UInt8Type>(array),
@@ -951,7 +1186,7 @@ pub fn cast_with_options(
             Utf8 => cast_utf8_to_boolean::<i32>(array, cast_options),
             LargeUtf8 => cast_utf8_to_boolean::<i64>(array, cast_options),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (Boolean, _) => match to_type {
@@ -970,7 +1205,7 @@ pub fn cast_with_options(
             Utf8 => value_to_string::<i32>(array, cast_options),
             LargeUtf8 => value_to_string::<i64>(array, cast_options),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (Utf8, _) => match to_type {
@@ -1032,7 +1267,7 @@ pub fn cast_with_options(
                 cast_string_to_month_day_nano_interval::<i32>(array, cast_options)
             }
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (Utf8View, _) => match to_type {
@@ -1083,7 +1318,7 @@ pub fn cast_with_options(
                 cast_view_to_month_day_nano_interval(array, cast_options)
             }
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (LargeUtf8, _) => match to_type {
@@ -1149,7 +1384,7 @@ pub fn cast_with_options(
                 cast_string_to_month_day_nano_interval::<i64>(array, cast_options)
             }
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (Binary, _) => match to_type {
@@ -1167,7 +1402,7 @@ pub fn cast_with_options(
                 cast_binary_to_string::<i32>(array, cast_options)?.as_string::<i32>(),
             ))),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (LargeBinary, _) => match to_type {
@@ -1186,7 +1421,7 @@ pub fn cast_with_options(
                 Ok(Arc::new(StringViewArray::from(array.as_string::<i64>())))
             }
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (FixedSizeBinary(size), _) => match to_type {
@@ -1194,7 +1429,7 @@ pub fn cast_with_options(
             LargeBinary => cast_fixed_size_binary_to_binary::<i64>(array, *size),
             BinaryView => cast_fixed_size_binary_to_binary_view(array, *size),
             _ => Err(ArrowError::CastError(format!(
-                "Casting from {from_type:?} to {to_type:?} not supported",
+                "Casting from {from_type} to {to_type} not supported",
             ))),
         },
         (BinaryView, Binary) => cast_view_to_byte::<BinaryViewType, GenericBinaryType<i32>>(array),
@@ -1209,11 +1444,9 @@ pub fn cast_with_options(
             let binary_arr = cast_view_to_byte::<BinaryViewType, GenericBinaryType<i64>>(array)?;
             cast_binary_to_string::<i64>(&binary_arr, cast_options)
         }
-        (BinaryView, Utf8View) => {
-            Ok(Arc::new(array.as_binary_view().clone().to_string_view()?) as ArrayRef)
-        }
+        (BinaryView, Utf8View) => cast_binary_view_to_string_view(array, cast_options),
         (BinaryView, _) => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported",
+            "Casting from {from_type} to {to_type} not supported",
         ))),
         (from_type, Utf8View) if from_type.is_primitive() => {
             value_to_string_view(array, cast_options)
@@ -1395,6 +1628,16 @@ pub fn cast_with_options(
         (Time32(TimeUnit::Millisecond), Int32) => {
             cast_reinterpret_arrays::<Time32MillisecondType, Int32Type>(array)
         }
+        (Time32(TimeUnit::Second), Int64) => cast_with_options(
+            &cast_with_options(array, &Int32, cast_options)?,
+            &Int64,
+            cast_options,
+        ),
+        (Time32(TimeUnit::Millisecond), Int64) => cast_with_options(
+            &cast_with_options(array, &Int32, cast_options)?,
+            &Int64,
+            cast_options,
+        ),
         (Int64, Date64) => cast_reinterpret_arrays::<Int64Type, Date64Type>(array),
         (Int64, Date32) => cast_with_options(
             &cast_with_options(array, &Int32, cast_options)?,
@@ -1947,11 +2190,79 @@ pub fn cast_with_options(
             cast_reinterpret_arrays::<Int32Type, IntervalYearMonthType>(array)
         }
         (_, _) => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported",
+            "Casting from {from_type} to {to_type} not supported",
         ))),
     }
 }
 
+fn cast_struct_to_struct(
+    array: &StructArray,
+    from_fields: Fields,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    // Fast path: if field names are in the same order, we can just zip and cast
+    let fields_match_order = from_fields.len() == to_fields.len()
+        && from_fields
+            .iter()
+            .zip(to_fields.iter())
+            .all(|(f1, f2)| f1.name() == f2.name());
+
+    let fields = if fields_match_order {
+        // Fast path: cast columns in order if their names match
+        cast_struct_fields_in_order(array, to_fields.clone(), cast_options)?
+    } else {
+        let all_fields_match_by_name = to_fields.iter().all(|to_field| {
+            from_fields
+                .iter()
+                .any(|from_field| from_field.name() == to_field.name())
+        });
+
+        if all_fields_match_by_name {
+            // Slow path: match fields by name and reorder
+            cast_struct_fields_by_name(array, from_fields.clone(), to_fields.clone(), cast_options)?
+        } else {
+            // Fallback: cast field by field in order
+            cast_struct_fields_in_order(array, to_fields.clone(), cast_options)?
+        }
+    };
+
+    let array = StructArray::try_new(to_fields.clone(), fields, array.nulls().cloned())?;
+    Ok(Arc::new(array) as ArrayRef)
+}
+
+fn cast_struct_fields_by_name(
+    array: &StructArray,
+    from_fields: Fields,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<Vec<ArrayRef>, ArrowError> {
+    to_fields
+        .iter()
+        .map(|to_field| {
+            let from_field_idx = from_fields
+                .iter()
+                .position(|from_field| from_field.name() == to_field.name())
+                .unwrap(); // safe because we checked above
+            let column = array.column(from_field_idx);
+            cast_with_options(column, to_field.data_type(), cast_options)
+        })
+        .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+}
+
+fn cast_struct_fields_in_order(
+    array: &StructArray,
+    to_fields: Fields,
+    cast_options: &CastOptions,
+) -> Result<Vec<ArrayRef>, ArrowError> {
+    array
+        .columns()
+        .iter()
+        .zip(to_fields.iter())
+        .map(|(l, field)| cast_with_options(l, field.data_type(), cast_options))
+        .collect::<Result<Vec<ArrayRef>, ArrowError>>()
+}
+
 fn cast_from_decimal<D, F>(
     array: &dyn Array,
     base: D::Native,
@@ -1988,7 +2299,7 @@ where
         LargeUtf8 => value_to_string::<i64>(array, cast_options),
         Null => Ok(new_null_array(to_type, array.len())),
         _ => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported"
+            "Casting from {from_type} to {to_type} not supported"
         ))),
     }
 }
@@ -2005,14 +2316,14 @@ fn cast_to_decimal<D, M>(
 where
     D: DecimalType + ArrowPrimitiveType<Native = M>,
     M: ArrowNativeTypeOp + DecimalCast,
-    u8: num::traits::AsPrimitive<M>,
-    u16: num::traits::AsPrimitive<M>,
-    u32: num::traits::AsPrimitive<M>,
-    u64: num::traits::AsPrimitive<M>,
-    i8: num::traits::AsPrimitive<M>,
-    i16: num::traits::AsPrimitive<M>,
-    i32: num::traits::AsPrimitive<M>,
-    i64: num::traits::AsPrimitive<M>,
+    u8: num_traits::AsPrimitive<M>,
+    u16: num_traits::AsPrimitive<M>,
+    u32: num_traits::AsPrimitive<M>,
+    u64: num_traits::AsPrimitive<M>,
+    i8: num_traits::AsPrimitive<M>,
+    i16: num_traits::AsPrimitive<M>,
+    i32: num_traits::AsPrimitive<M>,
+    i64: num_traits::AsPrimitive<M>,
 {
     use DataType::*;
     // cast data to decimal
@@ -2091,7 +2402,7 @@ where
         LargeUtf8 => cast_string_to_decimal::<D, i64>(array, *precision, *scale, cast_options),
         Null => Ok(new_null_array(to_type, array.len())),
         _ => Err(ArrowError::CastError(format!(
-            "Casting from {from_type:?} to {to_type:?} not supported"
+            "Casting from {from_type} to {to_type} not supported"
         ))),
     }
 }
@@ -2140,7 +2451,7 @@ where
     R::Native: NumCast,
 {
     from.try_unary(|value| {
-        num::cast::cast::<T::Native, R::Native>(value).ok_or_else(|| {
+        num_traits::cast::cast::<T::Native, R::Native>(value).ok_or_else(|| {
             ArrowError::CastError(format!(
                 "Can't cast value {:?} to type {}",
                 value,
@@ -2159,7 +2470,7 @@ where
     T::Native: NumCast,
     R::Native: NumCast,
 {
-    from.unary_opt::<_, R>(num::cast::cast::<T::Native, R::Native>)
+    from.unary_opt::<_, R>(num_traits::cast::cast::<T::Native, R::Native>)
 }
 
 fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>(
@@ -2167,12 +2478,12 @@ fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>(
 ) -> Result<ArrayRef, ArrowError> {
     let array = array.as_primitive::<FROM>();
     let size = std::mem::size_of::<FROM::Native>();
-    let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(array.len()));
-    Ok(Arc::new(GenericBinaryArray::<O>::new(
+    let offsets = OffsetBuffer::from_repeated_length(size, array.len());
+    Ok(Arc::new(GenericBinaryArray::<O>::try_new(
         offsets,
         array.values().inner().clone(),
         array.nulls().cloned(),
-    )))
+    )?))
 }
 
 fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
@@ -2235,7 +2546,7 @@ fn cast_bool_to_numeric<TO>(
 ) -> Result<ArrayRef, ArrowError>
 where
     TO: ArrowPrimitiveType,
-    TO::Native: num::cast::NumCast,
+    TO::Native: num_traits::cast::NumCast,
 {
     Ok(Arc::new(bool_to_numeric_cast::<TO>(
         from.as_any().downcast_ref::<BooleanArray>().unwrap(),
@@ -2246,14 +2557,14 @@ where
 fn bool_to_numeric_cast<T>(from: &BooleanArray, _cast_options: &CastOptions) -> PrimitiveArray<T>
 where
     T: ArrowPrimitiveType,
-    T::Native: num::NumCast,
+    T::Native: num_traits::NumCast,
 {
     let iter = (0..from.len()).map(|i| {
         if from.is_null(i) {
             None
         } else if from.value(i) {
             // a workaround to cast a primitive to T::Native, infallible
-            num::cast::cast(1)
+            num_traits::cast::cast(1)
         } else {
             Some(T::default_value())
         }
@@ -2426,9 +2737,14 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
+    use DataType::*;
+    use arrow_array::{Int64Array, RunArray, StringArray};
     use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer};
+    use arrow_buffer::{ScalarBuffer, i256};
+    use arrow_schema::{DataType, Field};
     use chrono::NaiveDate;
     use half::f16;
+    use std::sync::Arc;
 
     #[derive(Clone)]
     struct DecimalCastTestConfig {
@@ -2507,33 +2823,55 @@ mod tests {
         }
     }
 
-    fn create_decimal128_array(
-        array: Vec<Option<i128>>,
+    fn create_decimal32_array(
+        array: Vec<Option<i32>>,
         precision: u8,
         scale: i8,
-    ) -> Result<Decimal128Array, ArrowError> {
+    ) -> Result<Decimal32Array, ArrowError> {
         array
             .into_iter()
-            .collect::<Decimal128Array>()
+            .collect::<Decimal32Array>()
             .with_precision_and_scale(precision, scale)
     }
 
-    fn create_decimal256_array(
-        array: Vec<Option<i256>>,
+    fn create_decimal64_array(
+        array: Vec<Option<i64>>,
         precision: u8,
         scale: i8,
-    ) -> Result<Decimal256Array, ArrowError> {
+    ) -> Result<Decimal64Array, ArrowError> {
         array
             .into_iter()
-            .collect::<Decimal256Array>()
+            .collect::<Decimal64Array>()
             .with_precision_and_scale(precision, scale)
     }
 
-    #[test]
-    #[cfg(not(feature = "force_validate"))]
-    #[should_panic(
-        expected = "Cannot cast to Decimal128(20, 3). Overflowing on 57896044618658097711785492504343953926634992332820282019728792003956564819967"
-    )]
+    fn create_decimal128_array(
+        array: Vec<Option<i128>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal128Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal128Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
+    fn create_decimal256_array(
+        array: Vec<Option<i256>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal256Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal256Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    #[should_panic(
+        expected = "Cannot cast to Decimal128(20, 3). Overflowing on 57896044618658097711785492504343953926634992332820282019728792003956564819967"
+    )]
     fn test_cast_decimal_to_decimal_round_with_error() {
         // decimal256 to decimal128 overflow
         let array = vec![
@@ -2655,8 +2993,81 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32() {
+        // test changing precision
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![
+                Some(11234560_i32),
+                Some(21234560_i32),
+                Some(31234560_i32),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal32_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal32(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2, 2), &options);
+        assert_eq!(
+            "Invalid argument error: 123456.00 is too large to store in a Decimal32 of precision 2. Max is 0.99",
+            result_unsafe.unwrap_err().to_string()
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64() {
+        // test changing precision
+        let input_type = DataType::Decimal64(17, 3);
+        let output_type = DataType::Decimal64(17, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 17, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![
+                Some(11234560_i64),
+                Some(21234560_i64),
+                Some(31234560_i64),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal64_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal64(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2, 2), &options);
+        assert_eq!(
+            "Invalid argument error: 123456.00 is too large to store in a Decimal64 of precision 2. Max is 0.99",
+            result_unsafe.unwrap_err().to_string()
+        );
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128() {
+        // test changing precision
         let input_type = DataType::Decimal128(20, 3);
         let output_type = DataType::Decimal128(20, 4);
         assert!(can_cast_types(&input_type, &output_type));
@@ -2684,8 +3095,42 @@ mod tests {
         };
 
         let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options);
-        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99",
-                   result_unsafe.unwrap_err().to_string());
+        assert_eq!(
+            "Invalid argument error: 123456.00 is too large to store in a Decimal128 of precision 2. Max is 0.99",
+            result_unsafe.unwrap_err().to_string()
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal32_to_decimal32_dict() {
+        let p = 9;
+        let s = 3;
+        let input_type = DataType::Decimal32(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal32(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_dict() {
+        let p = 15;
+        let s = 3;
+        let input_type = DataType::Decimal64(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal64(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
     }
 
     #[test]
@@ -2720,6 +3165,136 @@ mod tests {
         assert_eq!(cast_array.data_type(), &output_type);
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_overflow() {
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 9);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i32::MAX)];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal32(9, 9). Overflowing on 2147483647",
+            result.unwrap_err().to_string()
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal32_to_decimal32_large_scale_reduction() {
+        let array = vec![Some(-999999999), Some(0), Some(999999999), None];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+
+        // Divide out all digits of precision -- rounding could still produce +/- 1
+        let output_type = DataType::Decimal32(9, -6);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![Some(-1), Some(0), Some(1), None]
+        );
+
+        // Divide out more digits than we have precision -- all-zero result
+        let output_type = DataType::Decimal32(9, -7);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![Some(0), Some(0), Some(0), None]
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_overflow() {
+        let input_type = DataType::Decimal64(18, 3);
+        let output_type = DataType::Decimal64(18, 18);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i64::MAX)];
+        let array = create_decimal64_array(array, 18, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal64(18, 18). Overflowing on 9223372036854775807",
+            result.unwrap_err().to_string()
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_large_scale_reduction() {
+        let array = vec![
+            Some(-999999999999999999),
+            Some(0),
+            Some(999999999999999999),
+            None,
+        ];
+        let array = create_decimal64_array(array, 18, 3).unwrap();
+
+        // Divide out all digits of precision -- rounding could still produce +/- 1
+        let output_type = DataType::Decimal64(18, -15);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![Some(-1), Some(0), Some(1), None]
+        );
+
+        // Divide out more digits than we have precision -- all-zero result
+        let output_type = DataType::Decimal64(18, -16);
+        assert!(can_cast_types(array.data_type(), &output_type));
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![Some(0), Some(0), Some(0), None]
+        );
+    }
+
+    #[test]
+    fn test_cast_floating_to_decimals() {
+        for output_type in [
+            DataType::Decimal32(9, 3),
+            DataType::Decimal64(9, 3),
+            DataType::Decimal128(9, 3),
+            DataType::Decimal256(9, 3),
+        ] {
+            let input_type = DataType::Float64;
+            assert!(can_cast_types(&input_type, &output_type));
+
+            let array = vec![Some(1.1_f64)];
+            let array = PrimitiveArray::<Float64Type>::from_iter(array);
+            let result = cast_with_options(
+                &array,
+                &output_type,
+                &CastOptions {
+                    safe: false,
+                    format_options: FormatOptions::default(),
+                },
+            );
+            assert!(
+                result.is_ok(),
+                "Failed to cast to {output_type} with: {}",
+                result.unwrap_err()
+            );
+        }
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128_overflow() {
         let input_type = DataType::Decimal128(38, 3);
@@ -2736,8 +3311,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal128(38, 38). Overflowing on 170141183460469231731687303715884105727",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal128(38, 38). Overflowing on 170141183460469231731687303715884105727",
+            result.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -2756,10 +3333,50 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal256(76, 76). Overflowing on 170141183460469231731687303715884105727",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal256(76, 76). Overflowing on 170141183460469231731687303715884105727",
+            result.unwrap_err().to_string()
+        );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal256() {
+        let input_type = DataType::Decimal32(8, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 8, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
+    #[test]
+    fn test_cast_decimal64_to_decimal256() {
+        let input_type = DataType::Decimal64(12, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 12, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
     #[test]
     fn test_cast_decimal128_to_decimal256() {
         let input_type = DataType::Decimal128(20, 3);
@@ -2795,8 +3412,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal128(38, 7). Overflowing on 170141183460469231731687303715884105727",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal128(38, 7). Overflowing on 170141183460469231731687303715884105727",
+            result.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -2814,8 +3433,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal256(76, 55). Overflowing on 170141183460469231731687303715884105727",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal256(76, 55). Overflowing on 170141183460469231731687303715884105727",
+            result.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -2956,6 +3577,22 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_numeric() {
+        let value_array: Vec<Option<i32>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal32_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case(&array);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_numeric() {
+        let value_array: Vec<Option<i64>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal64_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case(&array);
+    }
+
     #[test]
     fn test_cast_decimal128_to_numeric() {
         let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
@@ -3861,9 +4498,11 @@ mod tests {
         match casted {
             Ok(_) => panic!("expected error"),
             Err(e) => {
-                assert!(e
-                    .to_string()
-                    .contains("Cast error: Cannot cast value 'invalid' to value of Boolean type"))
+                assert!(
+                    e.to_string().contains(
+                        "Cast error: Cannot cast value 'invalid' to value of Boolean type"
+                    )
+                )
             }
         }
     }
@@ -4075,26 +4714,16 @@ mod tests {
 
     #[test]
     fn test_cast_list_i32_to_list_u16() {
-        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();
-
-        let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
-
-        // Construct a list array from the above two
-        // [[0,0,0], [-1, -2, -1], [2, 100000000]]
-        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
-        let list_data = ArrayData::builder(list_data_type)
-            .len(3)
-            .add_buffer(value_offsets)
-            .add_child_data(value_data)
-            .build()
-            .unwrap();
-        let list_array = ListArray::from(list_data);
+        let values = vec![
+            Some(vec![Some(0), Some(0), Some(0)]),
+            Some(vec![Some(-1), Some(-2), Some(-1)]),
+            Some(vec![Some(2), Some(100000000)]),
+        ];
+        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(values);
 
-        let cast_array = cast(
-            &list_array,
-            &DataType::List(Arc::new(Field::new_list_field(DataType::UInt16, true))),
-        )
-        .unwrap();
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::UInt16, true)));
+        assert!(can_cast_types(list_array.data_type(), &target_type));
+        let cast_array = cast(&list_array, &target_type).unwrap();
 
         // For the ListArray itself, there are no null values (as there were no nulls when they went in)
         //
@@ -4441,7 +5070,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Second) type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(s) type"
+            );
         }
     }
 
@@ -4483,7 +5115,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(Millisecond) type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string '08:08:61.091323414' to value of Time32(ms) type"
+            );
         }
     }
 
@@ -4517,7 +5152,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Microsecond) type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string 'Not a valid time' to value of Time64(µs) type"
+            );
         }
     }
 
@@ -4551,7 +5189,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid time' to value of Time64(Nanosecond) type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string 'Not a valid time' to value of Time64(ns) type"
+            );
         }
     }
 
@@ -5339,28 +5980,9 @@ mod tests {
         assert!(c.is_null(2));
     }
 
-    #[test]
-    fn test_cast_date32_to_string() {
-        let array = Date32Array::from(vec![10000, 17890]);
-        let b = cast(&array, &DataType::Utf8).unwrap();
-        let c = b.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(&DataType::Utf8, c.data_type());
-        assert_eq!("1997-05-19", c.value(0));
-        assert_eq!("2018-12-25", c.value(1));
-    }
-
-    #[test]
-    fn test_cast_date64_to_string() {
-        let array = Date64Array::from(vec![10000 * 86400000, 17890 * 86400000]);
-        let b = cast(&array, &DataType::Utf8).unwrap();
-        let c = b.as_any().downcast_ref::<StringArray>().unwrap();
-        assert_eq!(&DataType::Utf8, c.data_type());
-        assert_eq!("1997-05-19T00:00:00", c.value(0));
-        assert_eq!("2018-12-25T00:00:00", c.value(1));
-    }
-
-    macro_rules! assert_cast_timestamp_to_string {
+    macro_rules! assert_cast {
         ($array:expr, $datatype:expr, $output_array_type: ty, $expected:expr) => {{
+            assert!(can_cast_types($array.data_type(), &$datatype));
             let out = cast(&$array, &$datatype).unwrap();
             let actual = out
                 .as_any()
@@ -5371,6 +5993,7 @@ mod tests {
             assert_eq!(actual, $expected);
         }};
         ($array:expr, $datatype:expr, $output_array_type: ty, $options:expr, $expected:expr) => {{
+            assert!(can_cast_types($array.data_type(), &$datatype));
             let out = cast_with_options(&$array, &$datatype, &$options).unwrap();
             let actual = out
                 .as_any()
@@ -5382,6 +6005,44 @@ mod tests {
         }};
     }
 
+    #[test]
+    fn test_cast_date32_to_string() {
+        let array = Date32Array::from(vec![Some(0), Some(10000), Some(13036), Some(17890), None]);
+        let expected = vec![
+            Some("1970-01-01"),
+            Some("1997-05-19"),
+            Some("2005-09-10"),
+            Some("2018-12-25"),
+            None,
+        ];
+
+        assert_cast!(array, DataType::Utf8View, StringViewArray, expected);
+        assert_cast!(array, DataType::Utf8, StringArray, expected);
+        assert_cast!(array, DataType::LargeUtf8, LargeStringArray, expected);
+    }
+
+    #[test]
+    fn test_cast_date64_to_string() {
+        let array = Date64Array::from(vec![
+            Some(0),
+            Some(10000 * 86400000),
+            Some(13036 * 86400000),
+            Some(17890 * 86400000),
+            None,
+        ]);
+        let expected = vec![
+            Some("1970-01-01T00:00:00"),
+            Some("1997-05-19T00:00:00"),
+            Some("2005-09-10T00:00:00"),
+            Some("2018-12-25T00:00:00"),
+            None,
+        ];
+
+        assert_cast!(array, DataType::Utf8View, StringViewArray, expected);
+        assert_cast!(array, DataType::Utf8, StringArray, expected);
+        assert_cast!(array, DataType::LargeUtf8, LargeStringArray, expected);
+    }
+
     #[test]
     fn test_cast_date32_to_timestamp_and_timestamp_with_timezone() {
         let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu
@@ -5584,9 +6245,9 @@ mod tests {
             None,
         ];
 
-        assert_cast_timestamp_to_string!(array, DataType::Utf8View, StringViewArray, expected);
-        assert_cast_timestamp_to_string!(array, DataType::Utf8, StringArray, expected);
-        assert_cast_timestamp_to_string!(array, DataType::LargeUtf8, LargeStringArray, expected);
+        assert_cast!(array, DataType::Utf8View, StringViewArray, expected);
+        assert_cast!(array, DataType::Utf8, StringArray, expected);
+        assert_cast!(array, DataType::LargeUtf8, LargeStringArray, expected);
     }
 
     #[test]
@@ -5608,21 +6269,21 @@ mod tests {
             Some("2018-12-25 00:00:02.001000"),
             None,
         ];
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_without_tz,
             DataType::Utf8View,
             StringViewArray,
             cast_options,
             expected
         );
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_without_tz,
             DataType::Utf8,
             StringArray,
             cast_options,
             expected
         );
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_without_tz,
             DataType::LargeUtf8,
             LargeStringArray,
@@ -5638,21 +6299,21 @@ mod tests {
             Some("2018-12-25 05:45:02.001000"),
             None,
         ];
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_with_tz,
             DataType::Utf8View,
             StringViewArray,
             cast_options,
             expected
         );
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_with_tz,
             DataType::Utf8,
             StringArray,
             cast_options,
             expected
         );
-        assert_cast_timestamp_to_string!(
+        assert_cast!(
             array_with_tz,
             DataType::LargeUtf8,
             LargeStringArray,
@@ -5892,6 +6553,38 @@ mod tests {
         assert_eq!(string_view_array.as_ref(), &expect_string_view_array);
     }
 
+    #[test]
+    fn test_binary_view_to_string_view_with_invalid_utf8() {
+        let binary_view_array = BinaryViewArray::from_iter(vec![
+            Some("valid".as_bytes()),
+            Some(&[0xff]),
+            Some("utf8".as_bytes()),
+            None,
+        ]);
+
+        let strict_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        assert!(
+            cast_with_options(&binary_view_array, &DataType::Utf8View, &strict_options).is_err()
+        );
+
+        let safe_options = CastOptions {
+            safe: true,
+            ..Default::default()
+        };
+
+        let string_view_array =
+            cast_with_options(&binary_view_array, &DataType::Utf8View, &safe_options).unwrap();
+        assert_eq!(string_view_array.data_type(), &DataType::Utf8View);
+
+        let values: Vec<_> = string_view_array.as_string_view().iter().collect();
+
+        assert_eq!(values, vec![Some("valid"), None, Some("utf8"), None]);
+    }
+
     #[test]
     fn test_string_to_view() {
         _test_string_to_view::<i32>();
@@ -7192,8 +7885,6 @@ mod tests {
     #[test]
     fn test_cast_utf8_dict() {
         // FROM a dictionary with of Utf8 values
-        use DataType::*;
-
         let mut builder = StringDictionaryBuilder::<Int8Type>::new();
         builder.append("one").unwrap();
         builder.append_null();
@@ -7248,7 +7939,6 @@ mod tests {
 
     #[test]
     fn test_cast_dict_to_dict_bad_index_value_primitive() {
-        use DataType::*;
         // test converting from an array that has indexes of a type
         // that are out of bounds for a particular other kind of
         // index.
@@ -7276,7 +7966,6 @@ mod tests {
 
     #[test]
     fn test_cast_dict_to_dict_bad_index_value_utf8() {
-        use DataType::*;
         // Same test as test_cast_dict_to_dict_bad_index_value but use
         // string values (and encode the expected behavior here);
 
@@ -7305,8 +7994,6 @@ mod tests {
     #[test]
     fn test_cast_primitive_dict() {
         // FROM a dictionary with of INT32 values
-        use DataType::*;
-
         let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::new();
         builder.append(1).unwrap();
         builder.append_null();
@@ -7327,8 +8014,6 @@ mod tests {
 
     #[test]
     fn test_cast_primitive_array_to_dict() {
-        use DataType::*;
-
         let mut builder = PrimitiveBuilder::<Int32Type>::new();
         builder.append_value(1);
         builder.append_null();
@@ -7438,6 +8123,7 @@ mod tests {
         typed_test!(UInt32Array, UInt32, UInt32Type);
         typed_test!(UInt64Array, UInt64, UInt64Type);
 
+        typed_test!(Float16Array, Float16, Float16Type);
         typed_test!(Float32Array, Float32, Float32Type);
         typed_test!(Float64Array, Float64, Float64Type);
 
@@ -7445,19 +8131,29 @@ mod tests {
         typed_test!(Date64Array, Date64, Date64Type);
     }
 
-    fn cast_from_null_to_other(data_type: &DataType) {
+    fn cast_from_null_to_other_base(data_type: &DataType, is_complex: bool) {
         // Cast from null to data_type
-        {
-            let array = new_null_array(&DataType::Null, 4);
-            assert_eq!(array.data_type(), &DataType::Null);
-            let cast_array = cast(&array, data_type).expect("cast failed");
-            assert_eq!(cast_array.data_type(), data_type);
-            for i in 0..4 {
+        let array = new_null_array(&DataType::Null, 4);
+        assert_eq!(array.data_type(), &DataType::Null);
+        let cast_array = cast(&array, data_type).expect("cast failed");
+        assert_eq!(cast_array.data_type(), data_type);
+        for i in 0..4 {
+            if is_complex {
+                assert!(cast_array.logical_nulls().unwrap().is_null(i));
+            } else {
                 assert!(cast_array.is_null(i));
             }
         }
     }
 
+    fn cast_from_null_to_other(data_type: &DataType) {
+        cast_from_null_to_other_base(data_type, false);
+    }
+
+    fn cast_from_null_to_other_complex(data_type: &DataType) {
+        cast_from_null_to_other_base(data_type, true);
+    }
+
     #[test]
     fn test_cast_null_from_and_to_variable_sized() {
         cast_from_null_to_other(&DataType::Utf8);
@@ -7501,6 +8197,23 @@ mod tests {
         // Cast null from and to struct
         let data_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
         cast_from_null_to_other(&data_type);
+
+        let target_type = DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        cast_from_null_to_other(&target_type);
+
+        let target_type =
+            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        cast_from_null_to_other(&target_type);
+
+        let fields = UnionFields::from_fields(vec![Field::new("a", DataType::Int64, false)]);
+        let target_type = DataType::Union(fields, UnionMode::Sparse);
+        cast_from_null_to_other_complex(&target_type);
+
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            Arc::new(Field::new("item", DataType::Int32, true)),
+        );
+        cast_from_null_to_other_complex(&target_type);
     }
 
     /// Print the `DictionaryArray` `array` as a vector of strings
@@ -7695,13 +8408,11 @@ mod tests {
             );
 
             let list_array = cast(&array, expected.data_type())
-                .unwrap_or_else(|_| panic!("Failed to cast {:?} to {:?}", array, expected));
+                .unwrap_or_else(|_| panic!("Failed to cast {array:?} to {expected:?}"));
             assert_eq!(
                 list_array.as_ref(),
                 &expected,
-                "Incorrect result from casting {:?} to {:?}",
-                array,
-                expected
+                "Incorrect result from casting {array:?} to {expected:?}",
             );
         }
     }
@@ -7935,8 +8646,10 @@ mod tests {
             },
         );
         assert!(res.is_err());
-        assert!(format!("{:?}", res)
-            .contains("Cannot cast to FixedSizeList(3): value at index 1 has length 2"));
+        assert!(
+            format!("{res:?}")
+                .contains("Cannot cast to FixedSizeList(3): value at index 1 has length 2")
+        );
 
         // When safe=true (default), the cast will fill nulls for lists that are
         // too short and truncate lists that are too long.
@@ -8026,7 +8739,7 @@ mod tests {
             },
         );
         assert!(res.is_err());
-        assert!(format!("{:?}", res).contains("Can't cast value 2147483647 to type Int16"));
+        assert!(format!("{res:?}").contains("Can't cast value 2147483647 to type Int16"));
     }
 
     #[test]
@@ -8166,8 +8879,12 @@ mod tests {
 
         let new_array_result = cast(&array, &new_type.clone());
         assert!(!can_cast_types(array.data_type(), &new_type));
-        assert!(
-            matches!(new_array_result, Err(ArrowError::CastError(t)) if t == r#"Casting from Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) to Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, true) not supported"#)
+        let Err(ArrowError::CastError(t)) = new_array_result else {
+            panic!();
+        };
+        assert_eq!(
+            t,
+            r#"Casting from Map("entries": non-null Struct("key": non-null Utf8, "value": Utf8), unsorted) to Map("entries": non-null Struct("key": non-null Utf8, "value": non-null Utf8), sorted) not supported"#
         );
     }
 
@@ -8213,8 +8930,12 @@ mod tests {
 
         let new_array_result = cast(&array, &new_type.clone());
         assert!(!can_cast_types(array.data_type(), &new_type));
-        assert!(
-            matches!(new_array_result, Err(ArrowError::CastError(t)) if t == r#"Casting from Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Interval(DayTime), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) to Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Duration(Second), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, true) not supported"#)
+        let Err(ArrowError::CastError(t)) = new_array_result else {
+            panic!();
+        };
+        assert_eq!(
+            t,
+            r#"Casting from Map("entries": non-null Struct("key": non-null Utf8, "value": Interval(DayTime)), unsorted) to Map("entries": non-null Struct("key": non-null Utf8, "value": non-null Duration(s)), sorted) not supported"#
         );
     }
 
@@ -8563,7 +9284,7 @@ mod tests {
             },
         );
         let err = casted_array.unwrap_err().to_string();
-        let expected_error = "Invalid argument error: 110 is too large to store in a Decimal128 of precision 2. Max is 99";
+        let expected_error = "Invalid argument error: 1.10 is too large to store in a Decimal128 of precision 2. Max is 0.99";
         assert!(
             err.contains(expected_error),
             "did not find expected error '{expected_error}' in actual error '{err}'"
@@ -8594,11 +9315,8 @@ mod tests {
             },
         );
         let err = casted_array.unwrap_err().to_string();
-        let expected_error = "Invalid argument error: 110 is too large to store in a Decimal256 of precision 2. Max is 99";
-        assert!(
-            err.contains(expected_error),
-            "did not find expected error '{expected_error}' in actual error '{err}'"
-        );
+        let expected_error = "Invalid argument error: 1.10 is too large to store in a Decimal256 of precision 2. Max is 0.99";
+        assert_eq!(err, expected_error);
     }
 
     #[test]
@@ -8662,6 +9380,28 @@ mod tests {
             "did not find expected error '{expected_error}' in actual error '{err}'"
         );
     }
+    #[test]
+    fn test_cast_decimal256_to_f64_no_overflow() {
+        // Test casting i256::MAX: should produce a large finite positive value
+        let array = vec![Some(i256::MAX)];
+        let array = create_decimal256_array(array, 76, 2).unwrap();
+        let array = Arc::new(array) as ArrayRef;
+
+        let result = cast(&array, &DataType::Float64).unwrap();
+        let result = result.as_primitive::<Float64Type>();
+        assert!(result.value(0).is_finite());
+        assert!(result.value(0) > 0.0); // Positive result
+
+        // Test casting i256::MIN: should produce a large finite negative value
+        let array = vec![Some(i256::MIN)];
+        let array = create_decimal256_array(array, 76, 2).unwrap();
+        let array = Arc::new(array) as ArrayRef;
+
+        let result = cast(&array, &DataType::Float64).unwrap();
+        let result = result.as_primitive::<Float64Type>();
+        assert!(result.value(0).is_finite());
+        assert!(result.value(0) < 0.0); // Negative result
+    }
 
     #[test]
     fn test_cast_decimal128_to_decimal128_negative_scale() {
@@ -8691,6 +9431,15 @@ mod tests {
         assert_eq!("3123460", decimal_arr.value_as_string(2));
     }
 
+    #[test]
+    fn decimal128_min_max_to_f64() {
+        // Ensure Decimal128 i128::MIN/MAX round-trip cast
+        let min128 = i128::MIN;
+        let max128 = i128::MAX;
+        assert_eq!(min128 as f64, min128 as f64);
+        assert_eq!(max128 as f64, max128 as f64);
+    }
+
     #[test]
     fn test_cast_numeric_to_decimal128_negative() {
         let decimal_type = DataType::Decimal128(38, -1);
@@ -9090,7 +9839,7 @@ mod tests {
                 Some(array.value_as_string(i))
             };
             let actual = actual.as_ref().map(|s| s.as_ref());
-            assert_eq!(*expected, actual, "Expected at position {}", i);
+            assert_eq!(*expected, actual, "Expected at position {i}");
         }
     }
 
@@ -9119,16 +9868,20 @@ mod tests {
             format_options: FormatOptions::default(),
         };
         let casted_err = cast_with_options(&array, &output_type, &option).unwrap_err();
-        assert!(casted_err
-            .to_string()
-            .contains("Cannot cast string '4.4.5' to value of Decimal128(38, 10) type"));
+        assert!(
+            casted_err
+                .to_string()
+                .contains("Cannot cast string '4.4.5' to value of Decimal128(38, 10) type")
+        );
 
         let str_array = StringArray::from(vec![". 0.123"]);
         let array = Arc::new(str_array) as ArrayRef;
         let casted_err = cast_with_options(&array, &output_type, &option).unwrap_err();
-        assert!(casted_err
-            .to_string()
-            .contains("Cannot cast string '. 0.123' to value of Decimal128(38, 10) type"));
+        assert!(
+            casted_err
+                .to_string()
+                .contains("Cannot cast string '. 0.123' to value of Decimal128(38, 10) type")
+        );
     }
 
     fn test_cast_string_to_decimal128_overflow(overflow_array: ArrayRef) {
@@ -9172,7 +9925,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
+        assert_eq!(
+            "Invalid argument error: 1000.00000000 is too large to store in a Decimal128 of precision 10. Max is 99.99999999",
+            err.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -9255,7 +10011,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
+        assert_eq!(
+            "Invalid argument error: 1000.00000000 is too large to store in a Decimal256 of precision 10. Max is 99.99999999",
+            err.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -9513,6 +10272,14 @@ mod tests {
 
     #[test]
     fn test_cast_decimal_to_string() {
+        assert!(can_cast_types(
+            &DataType::Decimal32(9, 4),
+            &DataType::Utf8View
+        ));
+        assert!(can_cast_types(
+            &DataType::Decimal64(16, 4),
+            &DataType::Utf8View
+        ));
         assert!(can_cast_types(
             &DataType::Decimal128(10, 4),
             &DataType::Utf8View
@@ -9557,7 +10324,7 @@ mod tests {
             }
         }
 
-        let array128: Vec<Option<i128>> = vec![
+        let array32: Vec<Option<i32>> = vec![
             Some(1123454),
             Some(2123456),
             Some(-3123453),
@@ -9568,11 +10335,40 @@ mod tests {
             Some(-123456789),
             None,
         ];
+        let array64: Vec<Option<i64>> = array32.iter().map(|num| num.map(|x| x as i64)).collect();
+        let array128: Vec<Option<i128>> =
+            array64.iter().map(|num| num.map(|x| x as i128)).collect();
         let array256: Vec<Option<i256>> = array128
             .iter()
             .map(|num| num.map(i256::from_i128))
             .collect();
 
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8View,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal32_array(array32, 7, 3).unwrap(),
+        );
+
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8View,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal64_array(array64, 7, 3).unwrap(),
+        );
+
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8View,
             create_decimal128_array(array128.clone(), 7, 3).unwrap(),
@@ -9623,7 +10419,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal128 of precision 7. Max is 9999999", err.unwrap_err().to_string());
+        assert_eq!(
+            "Invalid argument error: 1234567.000 is too large to store in a Decimal128 of precision 7. Max is 9999.999",
+            err.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -9649,7 +10448,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string());
+        assert_eq!(
+            "Invalid argument error: 1234567.000 is too large to store in a Decimal256 of precision 7. Max is 9999.999",
+            err.unwrap_err().to_string()
+        );
     }
 
     /// helper function to test casting from duration to interval
@@ -10238,7 +11040,7 @@ mod tests {
         let to_type = DataType::Utf8;
         let result = cast(&struct_array, &to_type);
         assert_eq!(
-            r#"Cast error: Casting from Struct([Field { name: "a", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]) to Utf8 not supported"#,
+            r#"Cast error: Casting from Struct("a": non-null Boolean) to Utf8 not supported"#,
             result.unwrap_err().to_string()
         );
     }
@@ -10249,11 +11051,170 @@ mod tests {
         let to_type = DataType::Struct(vec![Field::new("a", DataType::Boolean, false)].into());
         let result = cast(&array, &to_type);
         assert_eq!(
-            r#"Cast error: Casting from Utf8 to Struct([Field { name: "a", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]) not supported"#,
+            r#"Cast error: Casting from Utf8 to Struct("a": non-null Boolean) not supported"#,
             result.unwrap_err().to_string()
         );
     }
 
+    #[test]
+    fn test_cast_struct_with_different_field_order() {
+        // Test slow path: fields are in different order
+        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
+        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
+        let string = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "qux"]));
+
+        let struct_array = StructArray::from(vec![
+            (
+                Arc::new(Field::new("a", DataType::Boolean, false)),
+                boolean.clone() as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("b", DataType::Int32, false)),
+                int.clone() as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("c", DataType::Utf8, false)),
+                string.clone() as ArrayRef,
+            ),
+        ]);
+
+        // Target has fields in different order: c, a, b instead of a, b, c
+        let to_type = DataType::Struct(
+            vec![
+                Field::new("c", DataType::Utf8, false),
+                Field::new("a", DataType::Utf8, false), // Boolean to Utf8
+                Field::new("b", DataType::Utf8, false), // Int32 to Utf8
+            ]
+            .into(),
+        );
+
+        let result = cast(&struct_array, &to_type).unwrap();
+        let result_struct = result.as_struct();
+
+        assert_eq!(result_struct.data_type(), &to_type);
+        assert_eq!(result_struct.num_columns(), 3);
+
+        // Verify field "c" (originally position 2, now position 0) remains Utf8
+        let c_column = result_struct.column(0).as_string::<i32>();
+        assert_eq!(
+            c_column.into_iter().flatten().collect::<Vec<_>>(),
+            vec!["foo", "bar", "baz", "qux"]
+        );
+
+        // Verify field "a" (originally position 0, now position 1) was cast from Boolean to Utf8
+        let a_column = result_struct.column(1).as_string::<i32>();
+        assert_eq!(
+            a_column.into_iter().flatten().collect::<Vec<_>>(),
+            vec!["false", "false", "true", "true"]
+        );
+
+        // Verify field "b" (originally position 1, now position 2) was cast from Int32 to Utf8
+        let b_column = result_struct.column(2).as_string::<i32>();
+        assert_eq!(
+            b_column.into_iter().flatten().collect::<Vec<_>>(),
+            vec!["42", "28", "19", "31"]
+        );
+    }
+
+    #[test]
+    fn test_cast_struct_with_missing_field() {
+        // Test that casting fails when target has a field not present in source
+        let boolean = Arc::new(BooleanArray::from(vec![false, true]));
+        let struct_array = StructArray::from(vec![(
+            Arc::new(Field::new("a", DataType::Boolean, false)),
+            boolean.clone() as ArrayRef,
+        )]);
+
+        let to_type = DataType::Struct(
+            vec![
+                Field::new("a", DataType::Utf8, false),
+                Field::new("b", DataType::Int32, false), // Field "b" doesn't exist in source
+            ]
+            .into(),
+        );
+
+        let result = cast(&struct_array, &to_type);
+        assert!(result.is_err());
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument error: Incorrect number of arrays for StructArray fields, expected 2 got 1"
+        );
+    }
+
+    #[test]
+    fn test_cast_struct_with_subset_of_fields() {
+        // Test casting to a struct with fewer fields (selecting a subset)
+        let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true]));
+        let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
+        let string = Arc::new(StringArray::from(vec!["foo", "bar", "baz", "qux"]));
+
+        let struct_array = StructArray::from(vec![
+            (
+                Arc::new(Field::new("a", DataType::Boolean, false)),
+                boolean.clone() as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("b", DataType::Int32, false)),
+                int.clone() as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("c", DataType::Utf8, false)),
+                string.clone() as ArrayRef,
+            ),
+        ]);
+
+        // Target has only fields "c" and "a", omitting "b"
+        let to_type = DataType::Struct(
+            vec![
+                Field::new("c", DataType::Utf8, false),
+                Field::new("a", DataType::Utf8, false),
+            ]
+            .into(),
+        );
+
+        let result = cast(&struct_array, &to_type).unwrap();
+        let result_struct = result.as_struct();
+
+        assert_eq!(result_struct.data_type(), &to_type);
+        assert_eq!(result_struct.num_columns(), 2);
+
+        // Verify field "c" remains Utf8
+        let c_column = result_struct.column(0).as_string::<i32>();
+        assert_eq!(
+            c_column.into_iter().flatten().collect::<Vec<_>>(),
+            vec!["foo", "bar", "baz", "qux"]
+        );
+
+        // Verify field "a" was cast from Boolean to Utf8
+        let a_column = result_struct.column(1).as_string::<i32>();
+        assert_eq!(
+            a_column.into_iter().flatten().collect::<Vec<_>>(),
+            vec!["false", "false", "true", "true"]
+        );
+    }
+
+    #[test]
+    fn test_can_cast_struct_rename_field() {
+        // Test that can_cast_types returns false when target has a field not in source
+        let from_type = DataType::Struct(
+            vec![
+                Field::new("a", DataType::Int32, false),
+                Field::new("b", DataType::Utf8, false),
+            ]
+            .into(),
+        );
+
+        let to_type = DataType::Struct(
+            vec![
+                Field::new("a", DataType::Int64, false),
+                Field::new("c", DataType::Boolean, false), // Field "c" not in source
+            ]
+            .into(),
+        );
+
+        assert!(can_cast_types(&from_type, &to_type));
+    }
+
     fn run_decimal_cast_test_case_between_multiple_types(t: DecimalCastTestConfig) {
         run_decimal_cast_test_case::<Decimal128Type, Decimal128Type>(t.clone());
         run_decimal_cast_test_case::<Decimal128Type, Decimal256Type>(t.clone());
@@ -10289,7 +11250,7 @@ mod tests {
                 input_repr: 99999, // 9999.9
                 output_prec: 7,
                 output_scale: 6,
-                expected_output_repr: Err("Invalid argument error: 9999900000 is too large to store in a {} of precision 7. Max is 9999999".to_string()) // max is 9.999999
+                expected_output_repr: Err("Invalid argument error: 9999.900000 is too large to store in a {} of precision 7. Max is 9.999999".to_string()) // max is 9.999999
             },
             // increase precision, decrease scale, always infallible
             DecimalCastTestConfig {
@@ -10334,7 +11295,7 @@ mod tests {
                 input_repr: 9999999, // 99.99999
                 output_prec: 8,
                 output_scale: 7,
-                expected_output_repr: Err("Invalid argument error: 999999900 is too large to store in a {} of precision 8. Max is 99999999".to_string()) // max is 9.9999999
+                expected_output_repr: Err("Invalid argument error: 99.9999900 is too large to store in a {} of precision 8. Max is 9.9999999".to_string()) // max is 9.9999999
             },
             // decrease precision, decrease scale, safe, infallible
             DecimalCastTestConfig {
@@ -10361,7 +11322,7 @@ mod tests {
                 input_repr: 9999999, // 99.99999
                 output_prec: 4,
                 output_scale: 3,
-                expected_output_repr: Err("Invalid argument error: 100000 is too large to store in a {} of precision 4. Max is 9999".to_string()) // max is 9.999
+                expected_output_repr: Err("Invalid argument error: 100.000 is too large to store in a {} of precision 4. Max is 9.999".to_string()) // max is 9.999
             },
             // decrease precision, same scale, safe
             DecimalCastTestConfig {
@@ -10379,7 +11340,7 @@ mod tests {
                 input_repr: 9999999, // 99.99999
                 output_prec: 6,
                 output_scale: 5,
-                expected_output_repr: Err("Invalid argument error: 9999999 is too large to store in a {} of precision 6. Max is 999999".to_string()) // max is 9.99999
+                expected_output_repr: Err("Invalid argument error: 99.99999 is too large to store in a {} of precision 6. Max is 9.99999".to_string()) // max is 9.99999
             },
             // same precision, increase scale, safe
             DecimalCastTestConfig {
@@ -10397,7 +11358,7 @@ mod tests {
                 input_repr: 123456, // 12.3456
                 output_prec: 7,
                 output_scale: 6,
-                expected_output_repr: Err("Invalid argument error: 12345600 is too large to store in a {} of precision 7. Max is 9999999".to_string()) // max is 9.99999
+                expected_output_repr: Err("Invalid argument error: 12.345600 is too large to store in a {} of precision 7. Max is 9.999999".to_string()) // max is 9.99999
             },
             // same precision, decrease scale, infallible
             DecimalCastTestConfig {
@@ -10492,7 +11453,7 @@ mod tests {
                 input_repr: -12345,
                 output_prec: 6,
                 output_scale: 5,
-                expected_output_repr: Err("Invalid argument error: -1234500 is too small to store in a {} of precision 6. Min is -999999".to_string())
+                expected_output_repr: Err("Invalid argument error: -12.34500 is too small to store in a {} of precision 6. Min is -9.99999".to_string())
             },
         ];
 
@@ -10543,7 +11504,7 @@ mod tests {
                 output_prec: 6,
                 output_scale: 3,
                 expected_output_repr:
-                    Err("Invalid argument error: 1000000 is too large to store in a {} of precision 6. Max is 999999".to_string()),
+                    Err("Invalid argument error: 1000.000 is too large to store in a {} of precision 6. Max is 999.999".to_string()),
             },
         ];
         for t in test_cases {
@@ -10564,8 +11525,10 @@ mod tests {
             ..Default::default()
         };
         let result = cast_with_options(&array, &output_type, &options);
-        assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999");
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument error: 1234567.89 is too large to store in a Decimal128 of precision 6. Max is 9999.99"
+        );
     }
 
     #[test]
@@ -10610,8 +11573,10 @@ mod tests {
             ..Default::default()
         };
         let result = cast_with_options(&array, &output_type, &options);
-        assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999");
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument error: 12345.68 is too large to store in a Decimal128 of precision 6. Max is 9999.99"
+        );
     }
 
     #[test]
@@ -10627,8 +11592,10 @@ mod tests {
             ..Default::default()
         };
         let result = cast_with_options(&array, &output_type, &options);
-        assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999");
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument error: 1234567.890 is too large to store in a Decimal128 of precision 6. Max is 999.999"
+        );
     }
 
     #[test]
@@ -10643,9 +11610,11 @@ mod tests {
             safe: false,
             ..Default::default()
         };
-        let result = cast_with_options(&array, &output_type, &options);
-        assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999");
+        let result = cast_with_options(&array, &output_type, &options).unwrap_err();
+        assert_eq!(
+            result.to_string(),
+            "Invalid argument error: 1234567.89 is too large to store in a Decimal256 of precision 6. Max is 9999.99"
+        );
     }
 
     #[test]
@@ -10684,4 +11653,802 @@ mod tests {
         )) as ArrayRef;
         assert_eq!(*fixed_array, *r);
     }
+
+    #[test]
+    fn test_cast_decimal_error_output() {
+        let array = Int64Array::from(vec![1]);
+        let error = cast_with_options(
+            &array,
+            &DataType::Decimal32(1, 1),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        )
+        .unwrap_err();
+        assert_eq!(
+            error.to_string(),
+            "Invalid argument error: 1.0 is too large to store in a Decimal32 of precision 1. Max is 0.9"
+        );
+
+        let array = Int64Array::from(vec![-1]);
+        let error = cast_with_options(
+            &array,
+            &DataType::Decimal32(1, 1),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        )
+        .unwrap_err();
+        assert_eq!(
+            error.to_string(),
+            "Invalid argument error: -1.0 is too small to store in a Decimal32 of precision 1. Min is -0.9"
+        );
+    }
+
+    #[test]
+    fn test_run_end_encoded_to_primitive() {
+        // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3]
+        let run_ends = Int32Array::from(vec![2, 5, 6]);
+        let values = Int32Array::from(vec![1, 2, 3]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(run_array) as ArrayRef;
+        // Cast to Int64
+        let cast_result = cast(&array_ref, &DataType::Int64).unwrap();
+        // Verify the result is a RunArray with Int64 values
+        let result_run_array = cast_result.as_any().downcast_ref::<Int64Array>().unwrap();
+        assert_eq!(
+            result_run_array.values(),
+            &[1i64, 1i64, 2i64, 2i64, 2i64, 3i64]
+        );
+    }
+
+    #[test]
+    fn test_sliced_run_end_encoded_to_primitive() {
+        let run_ends = Int32Array::from(vec![2, 5, 6]);
+        let values = Int32Array::from(vec![1, 2, 3]);
+        // [1, 1, 2, 2, 2, 3]
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+        let run_array = run_array.slice(3, 3); // [2, 2, 3]
+        let array_ref = Arc::new(run_array) as ArrayRef;
+
+        let cast_result = cast(&array_ref, &DataType::Int64).unwrap();
+        let result_run_array = cast_result.as_primitive::<Int64Type>();
+        assert_eq!(result_run_array.values(), &[2, 2, 3]);
+    }
+
+    #[test]
+    fn test_run_end_encoded_to_string() {
+        let run_ends = Int32Array::from(vec![2, 3, 5]);
+        let values = Int32Array::from(vec![10, 20, 30]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(run_array) as ArrayRef;
+
+        // Cast to String
+        let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
+
+        // Verify the result is a RunArray with String values
+        let result_array = cast_result.as_any().downcast_ref::<StringArray>().unwrap();
+        // Check that values are correct
+        assert_eq!(result_array.value(0), "10");
+        assert_eq!(result_array.value(1), "10");
+        assert_eq!(result_array.value(2), "20");
+    }
+
+    #[test]
+    fn test_primitive_to_run_end_encoded() {
+        // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3]
+        let source_array = Int32Array::from(vec![1, 1, 2, 2, 2, 3]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+
+        // Cast to RunEndEncoded<Int32, Int32>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+
+        // Verify the result is a RunArray
+        let result_run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .unwrap();
+
+        // Check run structure: runs should end at positions [2, 5, 6]
+        assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]);
+
+        // Check values: should be [1, 2, 3]
+        let values_array = result_run_array.values().as_primitive::<Int32Type>();
+        assert_eq!(values_array.values(), &[1, 2, 3]);
+    }
+
+    #[test]
+    fn test_primitive_to_run_end_encoded_with_nulls() {
+        let source_array = Int32Array::from(vec![
+            Some(1),
+            Some(1),
+            None,
+            None,
+            Some(2),
+            Some(2),
+            Some(3),
+            Some(3),
+            None,
+            None,
+            Some(4),
+            Some(4),
+            Some(5),
+            Some(5),
+            None,
+            None,
+        ]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+        let result_run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .unwrap();
+        assert_eq!(
+            result_run_array.run_ends().values(),
+            &[2, 4, 6, 8, 10, 12, 14, 16]
+        );
+        assert_eq!(
+            result_run_array
+                .values()
+                .as_primitive::<Int32Type>()
+                .values(),
+            &[1, 0, 2, 3, 0, 4, 5, 0]
+        );
+        assert_eq!(result_run_array.values().null_count(), 3);
+    }
+
+    #[test]
+    fn test_primitive_to_run_end_encoded_with_nulls_consecutive() {
+        let source_array = Int64Array::from(vec![
+            Some(1),
+            Some(1),
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(4),
+            Some(20),
+            Some(500),
+            Some(500),
+            None,
+            None,
+        ]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int16, false)),
+            Arc::new(Field::new("values", DataType::Int64, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+        let result_run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int16Type>>()
+            .unwrap();
+        assert_eq!(
+            result_run_array.run_ends().values(),
+            &[2, 10, 11, 12, 14, 16]
+        );
+        assert_eq!(
+            result_run_array
+                .values()
+                .as_primitive::<Int64Type>()
+                .values(),
+            &[1, 0, 4, 20, 500, 0]
+        );
+        assert_eq!(result_run_array.values().null_count(), 2);
+    }
+
+    #[test]
+    fn test_string_to_run_end_encoded() {
+        // Create a String array with repeated values: ["a", "a", "b", "c", "c"]
+        let source_array = StringArray::from(vec!["a", "a", "b", "c", "c"]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+
+        // Cast to RunEndEncoded<Int32, String>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+
+        // Verify the result is a RunArray
+        let result_run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .unwrap();
+
+        // Check run structure: runs should end at positions [2, 3, 5]
+        assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]);
+
+        // Check values: should be ["a", "b", "c"]
+        let values_array = result_run_array.values().as_string::<i32>();
+        assert_eq!(values_array.value(0), "a");
+        assert_eq!(values_array.value(1), "b");
+        assert_eq!(values_array.value(2), "c");
+    }
+
+    #[test]
+    fn test_empty_array_to_run_end_encoded() {
+        // Create an empty Int32 array
+        let source_array = Int32Array::from(Vec::<i32>::new());
+        let array_ref = Arc::new(source_array) as ArrayRef;
+
+        // Cast to RunEndEncoded<Int32, Int32>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+
+        // Verify the result is an empty RunArray
+        let result_run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int32Type>>()
+            .unwrap();
+
+        // Check that both run_ends and values are empty
+        assert_eq!(result_run_array.run_ends().len(), 0);
+        assert_eq!(result_run_array.values().len(), 0);
+    }
+
+    #[test]
+    fn test_run_end_encoded_with_nulls() {
+        // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2]
+        let run_ends = Int32Array::from(vec![2, 3, 5]);
+        let values = Int32Array::from(vec![Some(1), None, Some(2)]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(run_array) as ArrayRef;
+
+        // Cast to String
+        let cast_result = cast(&array_ref, &DataType::Utf8).unwrap();
+
+        // Verify the result preserves nulls
+        let result_run_array = cast_result.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(result_run_array.value(0), "1");
+        assert!(result_run_array.is_null(2));
+        assert_eq!(result_run_array.value(4), "2");
+    }
+
+    #[test]
+    fn test_different_index_types() {
+        // Test with Int16 index type
+        let source_array = Int32Array::from(vec![1, 1, 2, 3, 3]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int16, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+        assert_eq!(cast_result.data_type(), &target_type);
+
+        // Verify the cast worked correctly: values are [1, 2, 3]
+        // and run-ends are [2, 3, 5]
+        let run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int16Type>>()
+            .unwrap();
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(0), 1);
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(1), 2);
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(2), 3);
+        assert_eq!(run_array.run_ends().values(), &[2i16, 3i16, 5i16]);
+
+        // Test again with Int64 index type
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int64, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        let cast_result = cast(&array_ref, &target_type).unwrap();
+        assert_eq!(cast_result.data_type(), &target_type);
+
+        // Verify the cast worked correctly: values are [1, 2, 3]
+        // and run-ends are [2, 3, 5]
+        let run_array = cast_result
+            .as_any()
+            .downcast_ref::<RunArray<Int64Type>>()
+            .unwrap();
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(0), 1);
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(1), 2);
+        assert_eq!(run_array.values().as_primitive::<Int32Type>().value(2), 3);
+        assert_eq!(run_array.run_ends().values(), &[2i64, 3i64, 5i64]);
+    }
+
+    #[test]
+    fn test_unsupported_cast_to_run_end_encoded() {
+        // Create a Struct array - complex nested type that might not be supported
+        let field = Field::new("item", DataType::Int32, false);
+        let struct_array = StructArray::from(vec![(
+            Arc::new(field),
+            Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef,
+        )]);
+        let array_ref = Arc::new(struct_array) as ArrayRef;
+
+        // This should fail because:
+        // 1. The target type is not RunEndEncoded
+        // 2. The target type is not supported for casting from StructArray
+        let cast_result = cast(&array_ref, &DataType::FixedSizeBinary(10));
+
+        // Expect this to fail
+        assert!(cast_result.is_err());
+    }
+
+    /// Test casting RunEndEncoded<Int64, String> to RunEndEncoded<Int16, String> should fail
+    #[test]
+    fn test_cast_run_end_encoded_int64_to_int16_should_fail() {
+        // Construct a valid REE array with Int64 run-ends
+        let run_ends = Int64Array::from(vec![100_000, 400_000, 700_000]); // values too large for Int16
+        let values = StringArray::from(vec!["a", "b", "c"]);
+
+        let ree_array = RunArray::<Int64Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(ree_array) as ArrayRef;
+
+        // Attempt to cast to RunEndEncoded<Int16, Utf8>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int16, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let cast_options = CastOptions {
+            safe: false, // This should make it fail instead of returning nulls
+            format_options: FormatOptions::default(),
+        };
+
+        // This should fail due to run-end overflow
+        let result: Result<Arc<dyn Array + 'static>, ArrowError> =
+            cast_with_options(&array_ref, &target_type, &cast_options);
+
+        let e = result.expect_err("Cast should have failed but succeeded");
+        assert!(
+            e.to_string()
+                .contains("Cast error: Can't cast value 100000 to type Int16")
+        );
+    }
+
+    #[test]
+    fn test_cast_run_end_encoded_int64_to_int16_with_safe_should_fail_with_null_invalid_error() {
+        // Construct a valid REE array with Int64 run-ends
+        let run_ends = Int64Array::from(vec![100_000, 400_000, 700_000]); // values too large for Int16
+        let values = StringArray::from(vec!["a", "b", "c"]);
+
+        let ree_array = RunArray::<Int64Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(ree_array) as ArrayRef;
+
+        // Attempt to cast to RunEndEncoded<Int16, Utf8>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int16, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let cast_options = CastOptions {
+            safe: true,
+            format_options: FormatOptions::default(),
+        };
+
+        // This fails even though safe is true because the run_ends array has null values
+        let result: Result<Arc<dyn Array + 'static>, ArrowError> =
+            cast_with_options(&array_ref, &target_type, &cast_options);
+        let e = result.expect_err("Cast should have failed but succeeded");
+        assert!(
+            e.to_string()
+                .contains("Invalid argument error: Found null values in run_ends array. The run_ends array should not have null values.")
+        );
+    }
+
+    /// Test casting RunEndEncoded<Int16, String> to RunEndEncoded<Int64, String> should succeed
+    #[test]
+    fn test_cast_run_end_encoded_int16_to_int64_should_succeed() {
+        // Construct a valid REE array with Int16 run-ends
+        let run_ends = Int16Array::from(vec![2, 5, 8]); // values that fit in Int16
+        let values = StringArray::from(vec!["a", "b", "c"]);
+
+        let ree_array = RunArray::<Int16Type>::try_new(&run_ends, &values).unwrap();
+        let array_ref = Arc::new(ree_array) as ArrayRef;
+
+        // Attempt to cast to RunEndEncoded<Int64, Utf8> (upcast should succeed)
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int64, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let cast_options = CastOptions {
+            safe: false,
+            format_options: FormatOptions::default(),
+        };
+
+        // This should succeed due to valid upcast
+        let result: Result<Arc<dyn Array + 'static>, ArrowError> =
+            cast_with_options(&array_ref, &target_type, &cast_options);
+
+        let array_ref = result.expect("Cast should have succeeded but failed");
+        // Downcast to RunArray<Int64Type>
+        let run_array = array_ref
+            .as_any()
+            .downcast_ref::<RunArray<Int64Type>>()
+            .unwrap();
+
+        // Verify the cast worked correctly
+        // Assert the values were cast correctly
+        assert_eq!(run_array.run_ends().values(), &[2i64, 5i64, 8i64]);
+        assert_eq!(run_array.values().as_string::<i32>().value(0), "a");
+        assert_eq!(run_array.values().as_string::<i32>().value(1), "b");
+        assert_eq!(run_array.values().as_string::<i32>().value(2), "c");
+    }
+
+    #[test]
+    fn test_cast_run_end_encoded_dictionary_to_run_end_encoded() {
+        // Construct a valid dictionary encoded array
+        let values = StringArray::from_iter([Some("a"), Some("b"), Some("c")]);
+        let keys = UInt64Array::from_iter(vec![1, 1, 1, 0, 0, 0, 2, 2, 2]);
+        let array_ref = Arc::new(DictionaryArray::new(keys, Arc::new(values))) as ArrayRef;
+
+        // Attempt to cast to RunEndEncoded<Int64, Utf8>
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int64, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        let cast_options = CastOptions {
+            safe: false,
+            format_options: FormatOptions::default(),
+        };
+
+        // This should succeed
+        let result = cast_with_options(&array_ref, &target_type, &cast_options)
+            .expect("Cast should have succeeded but failed");
+
+        // Verify the cast worked correctly
+        // Assert the values were cast correctly
+        let run_array = result
+            .as_any()
+            .downcast_ref::<RunArray<Int64Type>>()
+            .unwrap();
+        assert_eq!(run_array.values().as_string::<i32>().value(0), "b");
+        assert_eq!(run_array.values().as_string::<i32>().value(1), "a");
+        assert_eq!(run_array.values().as_string::<i32>().value(2), "c");
+
+        // Verify the run-ends were cast correctly (run ends at 3, 6, 9)
+        assert_eq!(run_array.run_ends().values(), &[3i64, 6i64, 9i64]);
+    }
+
+    fn int32_list_values() -> Vec<Option<Vec<Option<i32>>>> {
+        vec![
+            Some(vec![Some(1), Some(2), Some(3)]),
+            Some(vec![Some(4), Some(5), Some(6)]),
+            None,
+            Some(vec![Some(7), Some(8), Some(9)]),
+            Some(vec![None, Some(10)]),
+        ]
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list() {
+        let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected_list = ListArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_to_list_view() {
+        let list = ListArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type = DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list.data_type(), &target_type));
+        let cast_result = cast(&list, &target_type).unwrap();
+
+        let got_list_view = cast_result
+            .as_any()
+            .downcast_ref::<ListViewArray>()
+            .unwrap();
+        let expected_list_view =
+            ListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got_list_view, &expected_list_view);
+    }
+
+    #[test]
+    fn test_cast_large_list_view_to_large_list() {
+        let list_view =
+            LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result
+            .as_any()
+            .downcast_ref::<LargeListArray>()
+            .unwrap();
+
+        let expected_list =
+            LargeListArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_large_list_to_large_list_view() {
+        let list = LargeListArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type =
+            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list.data_type(), &target_type));
+        let cast_result = cast(&list, &target_type).unwrap();
+
+        let got_list_view = cast_result
+            .as_any()
+            .downcast_ref::<LargeListViewArray>()
+            .unwrap();
+        let expected_list_view =
+            LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got_list_view, &expected_list_view);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list_out_of_order() {
+        let list_view = ListViewArray::new(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            ScalarBuffer::from(vec![0, 6, 3]),
+            ScalarBuffer::from(vec![3, 3, 3]),
+            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
+            None,
+        );
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected_list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            Some(vec![Some(1), Some(2), Some(3)]),
+            Some(vec![Some(7), Some(8), Some(9)]),
+            Some(vec![Some(4), Some(5), Some(6)]),
+        ]);
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list_overlapping() {
+        let list_view = ListViewArray::new(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            ScalarBuffer::from(vec![0, 0]),
+            ScalarBuffer::from(vec![1, 2]),
+            Arc::new(Int32Array::from(vec![1, 2])),
+            None,
+        );
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected_list = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            Some(vec![Some(1)]),
+            Some(vec![Some(1), Some(2)]),
+        ]);
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list_empty() {
+        let values: Vec<Option<Vec<Option<i32>>>> = vec![];
+        let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(values.clone());
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values);
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list_different_inner_type() {
+        let values = int32_list_values();
+        let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(values.clone());
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+
+        let expected_list =
+            ListArray::from_iter_primitive::<Int64Type, _, _>(values.into_iter().map(|list| {
+                list.map(|list| {
+                    list.into_iter()
+                        .map(|v| v.map(|v| v as i64))
+                        .collect::<Vec<_>>()
+                })
+            }));
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_list_out_of_order_with_nulls() {
+        let list_view = ListViewArray::new(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            ScalarBuffer::from(vec![0, 6, 3]),
+            ScalarBuffer::from(vec![3, 3, 3]),
+            Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
+            Some(NullBuffer::from(vec![false, true, false])),
+        );
+        let target_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got_list = cast_result.as_any().downcast_ref::<ListArray>().unwrap();
+        let expected_list = ListArray::new(
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            OffsetBuffer::from_lengths([3, 3, 3]),
+            Arc::new(Int32Array::from(vec![1, 2, 3, 7, 8, 9, 4, 5, 6])),
+            Some(NullBuffer::from(vec![false, true, false])),
+        );
+        assert_eq!(got_list, &expected_list);
+    }
+
+    #[test]
+    fn test_cast_list_view_to_large_list_view() {
+        let list_view = ListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type =
+            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got = cast_result
+            .as_any()
+            .downcast_ref::<LargeListViewArray>()
+            .unwrap();
+
+        let expected =
+            LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got, &expected);
+    }
+
+    #[test]
+    fn test_cast_large_list_view_to_list_view() {
+        let list_view =
+            LargeListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        let target_type = DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        assert!(can_cast_types(list_view.data_type(), &target_type));
+        let cast_result = cast(&list_view, &target_type).unwrap();
+        let got = cast_result
+            .as_any()
+            .downcast_ref::<ListViewArray>()
+            .unwrap();
+
+        let expected = ListViewArray::from_iter_primitive::<Int32Type, _, _>(int32_list_values());
+        assert_eq!(got, &expected);
+    }
+
+    #[test]
+    fn test_cast_time32_second_to_int64() {
+        let array = Time32SecondArray::from(vec![1000, 2000, 3000]);
+        let array = Arc::new(array) as Arc<dyn Array>;
+        let to_type = DataType::Int64;
+        let cast_options = CastOptions::default();
+
+        assert!(can_cast_types(array.data_type(), &to_type));
+
+        let result = cast_with_options(&array, &to_type, &cast_options);
+        assert!(
+            result.is_ok(),
+            "Failed to cast Time32(Second) to Int64: {:?}",
+            result.err()
+        );
+
+        let cast_array = result.unwrap();
+        let cast_array = cast_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        assert_eq!(cast_array.value(0), 1000);
+        assert_eq!(cast_array.value(1), 2000);
+        assert_eq!(cast_array.value(2), 3000);
+    }
+
+    #[test]
+    fn test_cast_time32_millisecond_to_int64() {
+        let array = Time32MillisecondArray::from(vec![1000, 2000, 3000]);
+        let array = Arc::new(array) as Arc<dyn Array>;
+        let to_type = DataType::Int64;
+        let cast_options = CastOptions::default();
+
+        assert!(can_cast_types(array.data_type(), &to_type));
+
+        let result = cast_with_options(&array, &to_type, &cast_options);
+        assert!(
+            result.is_ok(),
+            "Failed to cast Time32(Millisecond) to Int64: {:?}",
+            result.err()
+        );
+
+        let cast_array = result.unwrap();
+        let cast_array = cast_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        assert_eq!(cast_array.value(0), 1000);
+        assert_eq!(cast_array.value(1), 2000);
+        assert_eq!(cast_array.value(2), 3000);
+    }
+
+    #[test]
+    fn test_cast_string_to_time32_second_to_int64() {
+        // Mimic: select arrow_cast('03:12:44'::time, 'Time32(Second)')::bigint;
+        // raised in https://github.com/apache/datafusion/issues/19036
+        let array = StringArray::from(vec!["03:12:44"]);
+        let array = Arc::new(array) as Arc<dyn Array>;
+        let cast_options = CastOptions::default();
+
+        // 1. Cast String to Time32(Second)
+        let time32_type = DataType::Time32(TimeUnit::Second);
+        let time32_array = cast_with_options(&array, &time32_type, &cast_options).unwrap();
+
+        // 2. Cast Time32(Second) to Int64
+        let int64_type = DataType::Int64;
+        assert!(can_cast_types(time32_array.data_type(), &int64_type));
+
+        let result = cast_with_options(&time32_array, &int64_type, &cast_options);
+
+        assert!(
+            result.is_ok(),
+            "Failed to cast Time32(Second) to Int64: {:?}",
+            result.err()
+        );
+
+        let cast_array = result.unwrap();
+        let cast_array = cast_array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        // 03:12:44 = 3*3600 + 12*60 + 44 = 10800 + 720 + 44 = 11564
+        assert_eq!(cast_array.value(0), 11564);
+    }
+    #[test]
+    fn test_string_dicts_to_binary_view() {
+        let expected = BinaryViewArray::from_iter(vec![
+            VIEW_TEST_DATA[1],
+            VIEW_TEST_DATA[0],
+            None,
+            VIEW_TEST_DATA[3],
+            None,
+            VIEW_TEST_DATA[1],
+            VIEW_TEST_DATA[4],
+        ]);
+
+        let values_arrays: [ArrayRef; _] = [
+            Arc::new(StringArray::from_iter(VIEW_TEST_DATA)),
+            Arc::new(StringViewArray::from_iter(VIEW_TEST_DATA)),
+            Arc::new(LargeStringArray::from_iter(VIEW_TEST_DATA)),
+        ];
+        for values in values_arrays {
+            let keys =
+                Int8Array::from_iter([Some(1), Some(0), None, Some(3), None, Some(1), Some(4)]);
+            let string_dict_array =
+                DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap();
+
+            let casted = cast(&string_dict_array, &DataType::BinaryView).unwrap();
+            assert_eq!(casted.as_ref(), &expected);
+        }
+    }
+
+    #[test]
+    fn test_binary_dicts_to_string_view() {
+        let expected = StringViewArray::from_iter(vec![
+            VIEW_TEST_DATA[1],
+            VIEW_TEST_DATA[0],
+            None,
+            VIEW_TEST_DATA[3],
+            None,
+            VIEW_TEST_DATA[1],
+            VIEW_TEST_DATA[4],
+        ]);
+
+        let values_arrays: [ArrayRef; _] = [
+            Arc::new(BinaryArray::from_iter(VIEW_TEST_DATA)),
+            Arc::new(BinaryViewArray::from_iter(VIEW_TEST_DATA)),
+            Arc::new(LargeBinaryArray::from_iter(VIEW_TEST_DATA)),
+        ];
+        for values in values_arrays {
+            let keys =
+                Int8Array::from_iter([Some(1), Some(0), None, Some(3), None, Some(1), Some(4)]);
+            let string_dict_array =
+                DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap();
+
+            let casted = cast(&string_dict_array, &DataType::Utf8View).unwrap();
+            assert_eq!(casted.as_ref(), &expected);
+        }
+    }
 }
diff --git a/arrow-cast/src/cast/run_array.rs b/arrow-cast/src/cast/run_array.rs
new file mode 100644
index 000000000000..3e14804dc824
--- /dev/null
+++ b/arrow-cast/src/cast/run_array.rs
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::cast::*;
+use arrow_ord::partition::partition;
+
+/// Attempts to cast a `RunArray` with index type K into
+/// `to_type` for supported types.
+pub(crate) fn run_end_encoded_cast<K: RunEndIndexType>(
+    array: &dyn Array,
+    to_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    match array.data_type() {
+        DataType::RunEndEncoded(_, _) => {
+            let run_array = array
+                .as_any()
+                .downcast_ref::<RunArray<K>>()
+                .ok_or_else(|| ArrowError::CastError("Expected RunArray".to_string()))?;
+
+            let values = run_array.values();
+
+            match to_type {
+                // Stay as RunEndEncoded, cast only the values
+                DataType::RunEndEncoded(target_index_field, target_value_field) => {
+                    let cast_values =
+                        cast_with_options(values, target_value_field.data_type(), cast_options)?;
+
+                    let run_ends_array = PrimitiveArray::<K>::from_iter_values(
+                        run_array.run_ends().values().iter().copied(),
+                    );
+                    let cast_run_ends = cast_with_options(
+                        &run_ends_array,
+                        target_index_field.data_type(),
+                        cast_options,
+                    )?;
+                    let new_run_array: ArrayRef = match target_index_field.data_type() {
+                        DataType::Int16 => {
+                            let re = cast_run_ends.as_primitive::<Int16Type>();
+                            Arc::new(RunArray::<Int16Type>::try_new(re, cast_values.as_ref())?)
+                        }
+                        DataType::Int32 => {
+                            let re = cast_run_ends.as_primitive::<Int32Type>();
+                            Arc::new(RunArray::<Int32Type>::try_new(re, cast_values.as_ref())?)
+                        }
+                        DataType::Int64 => {
+                            let re = cast_run_ends.as_primitive::<Int64Type>();
+                            Arc::new(RunArray::<Int64Type>::try_new(re, cast_values.as_ref())?)
+                        }
+                        _ => {
+                            return Err(ArrowError::CastError(
+                                "Run-end type must be i16, i32, or i64".to_string(),
+                            ));
+                        }
+                    };
+                    Ok(Arc::new(new_run_array))
+                }
+
+                // Expand to logical form
+                _ => {
+                    let len = run_array.len();
+                    let offset = run_array.offset();
+                    let run_ends = run_array.run_ends().values();
+
+                    let mut indices = Vec::with_capacity(len);
+                    let mut physical_idx = run_array.get_start_physical_index();
+
+                    for logical_idx in offset..offset + len {
+                        if logical_idx == run_ends[physical_idx].as_usize() {
+                            // If the logical index is equal to the (next) run end, increment the physical index,
+                            // since we are at the end of a run.
+                            physical_idx += 1;
+                        }
+                        indices.push(physical_idx as i32);
+                    }
+
+                    let taken = take(&values, &Int32Array::from_iter_values(indices), None)?;
+                    if taken.data_type() != to_type {
+                        cast_with_options(taken.as_ref(), to_type, cast_options)
+                    } else {
+                        Ok(taken)
+                    }
+                }
+            }
+        }
+
+        _ => Err(ArrowError::CastError(format!(
+            "Cannot cast array of type {:?} to RunEndEncodedArray",
+            array.data_type()
+        ))),
+    }
+}
+
+/// Attempts to encode an array into a `RunArray` with index type K
+/// and value type `value_type`
+pub(crate) fn cast_to_run_end_encoded<K: RunEndIndexType>(
+    array: &ArrayRef,
+    value_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let mut run_ends_builder = PrimitiveBuilder::<K>::new();
+
+    // Cast the input array to the target value type if necessary
+    let cast_array = if array.data_type() == value_type {
+        array
+    } else {
+        &cast_with_options(array, value_type, cast_options)?
+    };
+
+    // Return early if the array to cast is empty
+    if cast_array.is_empty() {
+        let empty_run_ends = run_ends_builder.finish();
+        let empty_values = make_array(ArrayData::new_empty(value_type));
+        return Ok(Arc::new(RunArray::<K>::try_new(
+            &empty_run_ends,
+            empty_values.as_ref(),
+        )?));
+    }
+
+    // REE arrays are handled by run_end_encoded_cast
+    if let DataType::RunEndEncoded(_, _) = array.data_type() {
+        return Err(ArrowError::CastError(
+            "Source array is already a RunEndEncoded array, should have been handled by run_end_encoded_cast".to_string()
+        ));
+    }
+
+    // Partition the array to identify runs of consecutive equal values
+    let partitions = partition(&[Arc::clone(cast_array)])?;
+    let size = partitions.len();
+    let mut run_ends = Vec::with_capacity(size);
+    let mut values_indexes = Vec::with_capacity(size);
+    let mut last_partition_end = 0;
+    for partition in partitions.ranges() {
+        values_indexes.push(last_partition_end);
+        run_ends.push(partition.end);
+        last_partition_end = partition.end;
+    }
+
+    // Build the run_ends array
+    for run_end in run_ends {
+        run_ends_builder.append_value(K::Native::from_usize(run_end).ok_or_else(|| {
+            ArrowError::CastError(format!("Run end index out of range: {}", run_end))
+        })?);
+    }
+    let run_ends_array = run_ends_builder.finish();
+    // Build the values array by taking elements at the run start positions
+    let indices = PrimitiveArray::<UInt32Type>::from_iter_values(
+        values_indexes.iter().map(|&idx| idx as u32),
+    );
+    let values_array = take(&cast_array, &indices, None)?;
+
+    // Create and return the RunArray
+    let run_array = RunArray::<K>::try_new(&run_ends_array, values_array.as_ref())?;
+    Ok(Arc::new(run_array))
+}
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 7f22c4fd64de..77696ae0d8cc 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -107,15 +107,14 @@ fn parse_string_iter<
             .map(|x| match x {
                 Some(v) => P::parse(v).ok_or_else(|| {
                     ArrowError::CastError(format!(
-                        "Cannot cast string '{}' to value of {:?} type",
-                        v,
+                        "Cannot cast string '{v}' to value of {} type",
                         P::DATA_TYPE
                     ))
                 }),
                 None => Ok(P::Native::default()),
             })
             .collect::<Result<Vec<_>, ArrowError>>()?;
-        PrimitiveArray::new(v.into(), nulls())
+        PrimitiveArray::try_new(v.into(), nulls())?
     };
 
     Ok(Arc::new(array) as ArrayRef)
@@ -339,6 +338,14 @@ where
 
 /// A specified helper to cast from `GenericBinaryArray` to `GenericStringArray` when they have same
 /// offset size so re-encoding offset is unnecessary.
+fn extend_valid_utf8<'a, B, I>(builder: &mut B, iter: I)
+where
+    B: Extend<Option<&'a str>>,
+    I: Iterator<Item = Option<&'a [u8]>>,
+{
+    builder.extend(iter.map(|value| value.and_then(|bytes| std::str::from_utf8(bytes).ok())));
+}
+
 pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
     array: &dyn Array,
     cast_options: &CastOptions,
@@ -356,11 +363,7 @@ pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
                 let mut builder =
                     GenericStringBuilder::<O>::with_capacity(array.len(), array.value_data().len());
 
-                let iter = array
-                    .iter()
-                    .map(|v| v.and_then(|v| std::str::from_utf8(v).ok()));
-
-                builder.extend(iter);
+                extend_valid_utf8(&mut builder, array.iter());
                 Ok(Arc::new(builder.finish()))
             }
             false => Err(e),
@@ -368,6 +371,25 @@ pub(crate) fn cast_binary_to_string<O: OffsetSizeTrait>(
     }
 }
 
+pub(crate) fn cast_binary_view_to_string_view(
+    array: &dyn Array,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let array = array.as_binary_view();
+
+    match array.clone().to_string_view() {
+        Ok(result) => Ok(Arc::new(result)),
+        Err(error) => match cast_options.safe {
+            true => {
+                let mut builder = StringViewBuilder::with_capacity(array.len());
+                extend_valid_utf8(&mut builder, array.iter());
+                Ok(Arc::new(builder.finish()))
+            }
+            false => Err(error),
+        },
+    }
+}
+
 /// Casts string to boolean
 fn cast_string_to_boolean<'a, StrArray>(
     array: &StrArray,
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 6761ac22fa1d..bfd0f06dbef5 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -23,7 +23,8 @@
 //! record batch pretty printing.
 //!
 //! [`pretty`]: crate::pretty
-use std::fmt::{Display, Formatter, Write};
+use std::fmt::{Debug, Display, Formatter, Write};
+use std::hash::{Hash, Hasher};
 use std::ops::Range;
 
 use arrow_array::cast::*;
@@ -53,7 +54,12 @@ pub enum DurationFormat {
 /// By default nulls are formatted as `""` and temporal types formatted
 /// according to RFC3339
 ///
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+/// # Equality
+///
+/// Most fields in [`FormatOptions`] are compared by value, except `formatter_factory`. As the trait
+/// does not require an [`Eq`] and [`Hash`] implementation, this struct only compares the pointer of
+/// the factories.
+#[derive(Debug, Clone)]
 pub struct FormatOptions<'a> {
     /// If set to `true` any formatting errors will be written to the output
     /// instead of being converted into a [`std::fmt::Error`]
@@ -74,6 +80,9 @@ pub struct FormatOptions<'a> {
     duration_format: DurationFormat,
     /// Show types in visual representation batches
     types_info: bool,
+    /// Formatter factory used to instantiate custom [`ArrayFormatter`]s. This allows users to
+    /// provide custom formatters.
+    formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
 }
 
 impl Default for FormatOptions<'_> {
@@ -82,6 +91,44 @@ impl Default for FormatOptions<'_> {
     }
 }
 
+impl PartialEq for FormatOptions<'_> {
+    fn eq(&self, other: &Self) -> bool {
+        self.safe == other.safe
+            && self.null == other.null
+            && self.date_format == other.date_format
+            && self.datetime_format == other.datetime_format
+            && self.timestamp_format == other.timestamp_format
+            && self.timestamp_tz_format == other.timestamp_tz_format
+            && self.time_format == other.time_format
+            && self.duration_format == other.duration_format
+            && self.types_info == other.types_info
+            && match (self.formatter_factory, other.formatter_factory) {
+                (Some(f1), Some(f2)) => std::ptr::eq(f1, f2),
+                (None, None) => true,
+                _ => false,
+            }
+    }
+}
+
+impl Eq for FormatOptions<'_> {}
+
+impl Hash for FormatOptions<'_> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.safe.hash(state);
+        self.null.hash(state);
+        self.date_format.hash(state);
+        self.datetime_format.hash(state);
+        self.timestamp_format.hash(state);
+        self.timestamp_tz_format.hash(state);
+        self.time_format.hash(state);
+        self.duration_format.hash(state);
+        self.types_info.hash(state);
+        self.formatter_factory
+            .map(|f| f as *const dyn ArrayFormatterFactory)
+            .hash(state);
+    }
+}
+
 impl<'a> FormatOptions<'a> {
     /// Creates a new set of format options
     pub const fn new() -> Self {
@@ -95,6 +142,7 @@ impl<'a> FormatOptions<'a> {
             time_format: None,
             duration_format: DurationFormat::ISO8601,
             types_info: false,
+            formatter_factory: None,
         }
     }
 
@@ -169,10 +217,172 @@ impl<'a> FormatOptions<'a> {
         Self { types_info, ..self }
     }
 
-    /// Returns true if type info should be included in visual representation of batches
+    /// Overrides the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s.
+    ///
+    /// Using [`None`] causes pretty-printers to use the default [`ArrayFormatter`]s.
+    pub const fn with_formatter_factory(
+        self,
+        formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
+    ) -> Self {
+        Self {
+            formatter_factory,
+            ..self
+        }
+    }
+
+    /// Returns whether formatting errors should be written to the output instead of being converted
+    /// into a [`std::fmt::Error`].
+    pub const fn safe(&self) -> bool {
+        self.safe
+    }
+
+    /// Returns the string used for displaying nulls.
+    pub const fn null(&self) -> &'a str {
+        self.null
+    }
+
+    /// Returns the format used for [`DataType::Date32`] columns.
+    pub const fn date_format(&self) -> TimeFormat<'a> {
+        self.date_format
+    }
+
+    /// Returns the format used for [`DataType::Date64`] columns.
+    pub const fn datetime_format(&self) -> TimeFormat<'a> {
+        self.datetime_format
+    }
+
+    /// Returns the format used for [`DataType::Timestamp`] columns without a timezone.
+    pub const fn timestamp_format(&self) -> TimeFormat<'a> {
+        self.timestamp_format
+    }
+
+    /// Returns the format used for [`DataType::Timestamp`] columns with a timezone.
+    pub const fn timestamp_tz_format(&self) -> TimeFormat<'a> {
+        self.timestamp_tz_format
+    }
+
+    /// Returns the format used for [`DataType::Time32`] and [`DataType::Time64`] columns.
+    pub const fn time_format(&self) -> TimeFormat<'a> {
+        self.time_format
+    }
+
+    /// Returns the [`DurationFormat`] used for duration columns.
+    pub const fn duration_format(&self) -> DurationFormat {
+        self.duration_format
+    }
+
+    /// Returns true if type info should be included in a visual representation of batches.
     pub const fn types_info(&self) -> bool {
         self.types_info
     }
+
+    /// Returns the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s.
+    pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> {
+        self.formatter_factory
+    }
+}
+
+/// Allows creating a new [`ArrayFormatter`] for a given [`Array`] and an optional [`Field`].
+///
+/// # Example
+///
+/// The example below shows how to create a custom formatter for a custom type `my_money`. Note that
+/// this example requires the `prettyprint` feature.
+///
+/// ```rust
+/// # #[cfg(feature = "prettyprint")]{
+/// use std::fmt::Write;
+/// use arrow_array::{cast::AsArray, Array, Int32Array};
+/// use arrow_cast::display::{ArrayFormatter, ArrayFormatterFactory, DisplayIndex, FormatOptions, FormatResult};
+/// use arrow_cast::pretty::pretty_format_batches_with_options;
+/// use arrow_schema::{ArrowError, Field};
+///
+/// /// A custom formatter factory that can create a formatter for the special type `my_money`.
+/// ///
+/// /// This struct could have access to some kind of extension type registry that can lookup the
+/// /// correct formatter for an extension type on-demand.
+/// #[derive(Debug)]
+/// struct MyFormatters {}
+///
+/// impl ArrayFormatterFactory for MyFormatters {
+///     fn create_array_formatter<'formatter>(
+///         &self,
+///         array: &'formatter dyn Array,
+///         options: &FormatOptions<'formatter>,
+///         field: Option<&'formatter Field>,
+///     ) -> Result<Option<ArrayFormatter<'formatter>>, ArrowError> {
+///         // check if this is the money type
+///         if field
+///             .map(|f| f.extension_type_name() == Some("my_money"))
+///             .unwrap_or(false)
+///         {
+///             // We assume that my_money always is an Int32.
+///             let array = array.as_primitive();
+///             let display_index = Box::new(MyMoneyFormatter { array, options: options.clone() });
+///             return Ok(Some(ArrayFormatter::new(display_index, options.safe())));
+///         }
+///
+///         Ok(None) // None indicates that the default formatter should be used.
+///     }
+/// }
+///
+/// /// A formatter for the type `my_money` that wraps a specific array and has access to the
+/// /// formatting options.
+/// struct MyMoneyFormatter<'a> {
+///     array: &'a Int32Array,
+///     options: FormatOptions<'a>,
+/// }
+///
+/// impl<'a> DisplayIndex for MyMoneyFormatter<'a> {
+///     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
+///         match self.array.is_valid(idx) {
+///             true => write!(f, "{} €", self.array.value(idx))?,
+///             false => write!(f, "{}", self.options.null())?,
+///         }
+///
+///         Ok(())
+///     }
+/// }
+///
+/// // Usually, here you would provide your record batches.
+/// let my_batches = vec![];
+///
+/// // Call the pretty printer with the custom formatter factory.
+/// pretty_format_batches_with_options(
+///        &my_batches,
+///        &FormatOptions::new().with_formatter_factory(Some(&MyFormatters {}))
+/// );
+/// # }
+/// ```
+pub trait ArrayFormatterFactory: Debug + Send + Sync {
+    /// Creates a new [`ArrayFormatter`] for the given [`Array`] and an optional [`Field`]. If the
+    /// default implementation should be used, return [`None`].
+    ///
+    /// The field shall be used to look up metadata about the `array` while `options` provide
+    /// information on formatting, for example, dates and times which should be considered by an
+    /// implementor.
+    fn create_array_formatter<'formatter>(
+        &self,
+        array: &'formatter dyn Array,
+        options: &FormatOptions<'formatter>,
+        field: Option<&'formatter Field>,
+    ) -> Result<Option<ArrayFormatter<'formatter>>, ArrowError>;
+}
+
+/// Used to create a new [`ArrayFormatter`] from the given `array`, while also checking whether
+/// there is an override available in the [`ArrayFormatterFactory`].
+pub(crate) fn make_array_formatter<'a>(
+    array: &'a dyn Array,
+    options: &FormatOptions<'a>,
+    field: Option<&'a Field>,
+) -> Result<ArrayFormatter<'a>, ArrowError> {
+    match options.formatter_factory() {
+        None => ArrayFormatter::try_new(array, options),
+        Some(formatters) => formatters
+            .create_array_formatter(array, options, field)
+            .transpose()
+            .unwrap_or_else(|| ArrayFormatter::try_new(array, options)),
+    }
 }
 
 /// Implements [`Display`] for a specific array value
@@ -272,14 +482,19 @@ pub struct ArrayFormatter<'a> {
 }
 
 impl<'a> ArrayFormatter<'a> {
+    /// Returns an [`ArrayFormatter`] using the provided formatter.
+    pub fn new(format: Box<dyn DisplayIndex + 'a>, safe: bool) -> Self {
+        Self { format, safe }
+    }
+
     /// Returns an [`ArrayFormatter`] that can be used to format `array`
     ///
     /// This returns an error if an array of the given data type cannot be formatted
     pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
-        Ok(Self {
-            format: make_formatter(array, options)?,
-            safe: options.safe,
-        })
+        Ok(Self::new(
+            make_default_display_index(array, options)?,
+            options.safe,
+        ))
     }
 
     /// Returns a [`ValueFormatter`] that implements [`Display`] for
@@ -292,7 +507,7 @@ impl<'a> ArrayFormatter<'a> {
     }
 }
 
-fn make_formatter<'a>(
+fn make_default_display_index<'a>(
     array: &'a dyn Array,
     options: &FormatOptions<'a>,
 ) -> Result<Box<dyn DisplayIndex + 'a>, ArrowError> {
@@ -332,12 +547,15 @@ fn make_formatter<'a>(
 }
 
 /// Either an [`ArrowError`] or [`std::fmt::Error`]
-enum FormatError {
+pub enum FormatError {
+    /// An error occurred while formatting the array
     Format(std::fmt::Error),
+    /// An Arrow error occurred while formatting the array.
     Arrow(ArrowError),
 }
 
-type FormatResult = Result<(), FormatError>;
+/// The result of formatting an array element via [`DisplayIndex::write`].
+pub type FormatResult = Result<(), FormatError>;
 
 impl From<std::fmt::Error> for FormatError {
     fn from(value: std::fmt::Error) -> Self {
@@ -352,7 +570,8 @@ impl From<ArrowError> for FormatError {
 }
 
 /// [`Display`] but accepting an index
-trait DisplayIndex {
+pub trait DisplayIndex {
+    /// Write the value of the underlying array at `idx` to `f`.
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult;
 }
 
@@ -489,7 +708,7 @@ macro_rules! decimal_display {
     };
 }
 
-decimal_display!(Decimal128Type, Decimal256Type);
+decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type);
 
 fn write_timestamp(
     f: &mut dyn Write,
@@ -710,6 +929,12 @@ impl DisplayIndex for &PrimitiveArray<IntervalYearMonthType> {
 impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
         let value = self.value(idx);
+
+        if value.is_zero() {
+            write!(f, "0 secs")?;
+            return Ok(());
+        }
+
         let mut prefix = "";
 
         if value.days != 0 {
@@ -733,6 +958,12 @@ impl DisplayIndex for &PrimitiveArray<IntervalDayTimeType> {
 impl DisplayIndex for &PrimitiveArray<IntervalMonthDayNanoType> {
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
         let value = self.value(idx);
+
+        if value.is_zero() {
+            write!(f, "0 secs")?;
+            return Ok(());
+        }
+
         let mut prefix = "";
 
         if value.months != 0 {
@@ -776,12 +1007,12 @@ impl Display for NanosecondsFormatter<'_> {
         let nanoseconds = self.nanoseconds % 1_000_000_000;
 
         if hours != 0 {
-            write!(f, "{prefix}{} hours", hours)?;
+            write!(f, "{prefix}{hours} hours")?;
             prefix = " ";
         }
 
         if mins != 0 {
-            write!(f, "{prefix}{} mins", mins)?;
+            write!(f, "{prefix}{mins} mins")?;
             prefix = " ";
         }
 
@@ -819,12 +1050,12 @@ impl Display for MillisecondsFormatter<'_> {
         let milliseconds = self.milliseconds % 1_000;
 
         if hours != 0 {
-            write!(f, "{prefix}{} hours", hours,)?;
+            write!(f, "{prefix}{hours} hours")?;
             prefix = " ";
         }
 
         if mins != 0 {
-            write!(f, "{prefix}{} mins", mins,)?;
+            write!(f, "{prefix}{mins} mins")?;
             prefix = " ";
         }
 
@@ -896,7 +1127,7 @@ impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArra
     type State = Box<dyn DisplayIndex + 'a>;
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
-        make_formatter(self.values().as_ref(), options)
+        make_default_display_index(self.values().as_ref(), options)
     }
 
     fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
@@ -906,68 +1137,82 @@ impl<'a, K: ArrowDictionaryKeyType> DisplayIndexState<'a> for &'a DictionaryArra
 }
 
 impl<'a, K: RunEndIndexType> DisplayIndexState<'a> for &'a RunArray<K> {
-    type State = Box<dyn DisplayIndex + 'a>;
+    type State = ArrayFormatter<'a>;
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
-        make_formatter(self.values().as_ref(), options)
+        let field = match (*self).data_type() {
+            DataType::RunEndEncoded(_, values_field) => values_field,
+            _ => unreachable!(),
+        };
+        make_array_formatter(self.values().as_ref(), options, Some(field))
     }
 
     fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
         let value_idx = self.get_physical_index(idx);
-        s.as_ref().write(value_idx, f)
+        write!(f, "{}", s.value(value_idx))?;
+        Ok(())
     }
 }
 
 fn write_list(
     f: &mut dyn Write,
     mut range: Range<usize>,
-    values: &dyn DisplayIndex,
+    values: &ArrayFormatter<'_>,
 ) -> FormatResult {
     f.write_char('[')?;
     if let Some(idx) = range.next() {
-        values.write(idx, f)?;
+        write!(f, "{}", values.value(idx))?;
     }
     for idx in range {
-        write!(f, ", ")?;
-        values.write(idx, f)?;
+        write!(f, ", {}", values.value(idx))?;
     }
     f.write_char(']')?;
     Ok(())
 }
 
 impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericListArray<O> {
-    type State = Box<dyn DisplayIndex + 'a>;
+    type State = ArrayFormatter<'a>;
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
-        make_formatter(self.values().as_ref(), options)
+        let field = match (*self).data_type() {
+            DataType::List(f) => f,
+            DataType::LargeList(f) => f,
+            _ => unreachable!(),
+        };
+        make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))
     }
 
     fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
         let offsets = self.value_offsets();
         let end = offsets[idx + 1].as_usize();
         let start = offsets[idx].as_usize();
-        write_list(f, start..end, s.as_ref())
+        write_list(f, start..end, s)
     }
 }
 
 impl<'a> DisplayIndexState<'a> for &'a FixedSizeListArray {
-    type State = (usize, Box<dyn DisplayIndex + 'a>);
+    type State = (usize, ArrayFormatter<'a>);
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
-        let values = make_formatter(self.values().as_ref(), options)?;
+        let field = match (*self).data_type() {
+            DataType::FixedSizeList(f, _) => f,
+            _ => unreachable!(),
+        };
+        let formatter =
+            make_array_formatter(self.values().as_ref(), options, Some(field.as_ref()))?;
         let length = self.value_length();
-        Ok((length as usize, values))
+        Ok((length as usize, formatter))
     }
 
     fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
         let start = idx * s.0;
         let end = start + s.0;
-        write_list(f, start..end, s.1.as_ref())
+        write_list(f, start..end, &s.1)
     }
 }
 
-/// Pairs a boxed [`DisplayIndex`] with its field name
-type FieldDisplay<'a> = (&'a str, Box<dyn DisplayIndex + 'a>);
+/// Pairs an [`ArrayFormatter`] with its field name
+type FieldDisplay<'a> = (&'a str, ArrayFormatter<'a>);
 
 impl<'a> DisplayIndexState<'a> for &'a StructArray {
     type State = Vec<FieldDisplay<'a>>;
@@ -982,7 +1227,7 @@ impl<'a> DisplayIndexState<'a> for &'a StructArray {
             .iter()
             .zip(fields)
             .map(|(a, f)| {
-                let format = make_formatter(a.as_ref(), options)?;
+                let format = make_array_formatter(a.as_ref(), options, Some(f))?;
                 Ok((f.name().as_str(), format))
             })
             .collect()
@@ -992,12 +1237,10 @@ impl<'a> DisplayIndexState<'a> for &'a StructArray {
         let mut iter = s.iter();
         f.write_char('{')?;
         if let Some((name, display)) = iter.next() {
-            write!(f, "{name}: ")?;
-            display.as_ref().write(idx, f)?;
+            write!(f, "{name}: {}", display.value(idx))?;
         }
         for (name, display) in iter {
-            write!(f, ", {name}: ")?;
-            display.as_ref().write(idx, f)?;
+            write!(f, ", {name}: {}", display.value(idx))?;
         }
         f.write_char('}')?;
         Ok(())
@@ -1005,11 +1248,13 @@ impl<'a> DisplayIndexState<'a> for &'a StructArray {
 }
 
 impl<'a> DisplayIndexState<'a> for &'a MapArray {
-    type State = (Box<dyn DisplayIndex + 'a>, Box<dyn DisplayIndex + 'a>);
+    type State = (ArrayFormatter<'a>, ArrayFormatter<'a>);
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
-        let keys = make_formatter(self.keys().as_ref(), options)?;
-        let values = make_formatter(self.values().as_ref(), options)?;
+        let (key_field, value_field) = (*self).entries_fields();
+
+        let keys = make_array_formatter(self.keys().as_ref(), options, Some(key_field))?;
+        let values = make_array_formatter(self.values().as_ref(), options, Some(value_field))?;
         Ok((keys, values))
     }
 
@@ -1021,16 +1266,12 @@ impl<'a> DisplayIndexState<'a> for &'a MapArray {
 
         f.write_char('{')?;
         if let Some(idx) = iter.next() {
-            s.0.write(idx, f)?;
-            write!(f, ": ")?;
-            s.1.write(idx, f)?;
+            write!(f, "{}: {}", s.0.value(idx), s.1.value(idx))?;
         }
 
         for idx in iter {
-            write!(f, ", ")?;
-            s.0.write(idx, f)?;
-            write!(f, ": ")?;
-            s.1.write(idx, f)?;
+            write!(f, ", {}", s.0.value(idx))?;
+            write!(f, ": {}", s.1.value(idx))?;
         }
 
         f.write_char('}')?;
@@ -1039,10 +1280,7 @@ impl<'a> DisplayIndexState<'a> for &'a MapArray {
 }
 
 impl<'a> DisplayIndexState<'a> for &'a UnionArray {
-    type State = (
-        Vec<Option<(&'a str, Box<dyn DisplayIndex + 'a>)>>,
-        UnionMode,
-    );
+    type State = (Vec<Option<FieldDisplay<'a>>>, UnionMode);
 
     fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
         let (fields, mode) = match (*self).data_type() {
@@ -1053,7 +1291,7 @@ impl<'a> DisplayIndexState<'a> for &'a UnionArray {
         let max_id = fields.iter().map(|(id, _)| id).max().unwrap_or_default() as usize;
         let mut out: Vec<Option<FieldDisplay>> = (0..max_id + 1).map(|_| None).collect();
         for (i, field) in fields.iter() {
-            let formatter = make_formatter(self.child(i).as_ref(), options)?;
+            let formatter = make_array_formatter(self.child(i).as_ref(), options, Some(field))?;
             out[i as usize] = Some((field.name().as_str(), formatter))
         }
         Ok((out, *mode))
@@ -1067,9 +1305,7 @@ impl<'a> DisplayIndexState<'a> for &'a UnionArray {
         };
         let (name, field) = s.0[id as usize].as_ref().unwrap();
 
-        write!(f, "{{{name}=")?;
-        field.write(idx, f)?;
-        f.write_char('}')?;
+        write!(f, "{{{name}={}}}", field.value(idx))?;
         Ok(())
     }
 }
@@ -1118,6 +1354,19 @@ mod tests {
         assert_eq!(TEST_CONST_OPTIONS.date_format, Some("foo"));
     }
 
+    /// See https://github.com/apache/arrow-rs/issues/8875
+    #[test]
+    fn test_options_send_sync() {
+        fn assert_send_sync<T>()
+        where
+            T: Send + Sync,
+        {
+            // nothing – the compiler does the work
+        }
+
+        assert_send_sync::<FormatOptions<'static>>();
+    }
+
     #[test]
     fn test_map_array_to_string() {
         let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
diff --git a/arrow-cast/src/lib.rs b/arrow-cast/src/lib.rs
index b042a7338519..3412616c5caf 100644
--- a/arrow-cast/src/lib.rs
+++ b/arrow-cast/src/lib.rs
@@ -21,7 +21,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 pub mod cast;
 pub use cast::*;
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index 28d36db89af0..b266cc4aa360 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -18,9 +18,9 @@
 //! [`Parser`] implementations for converting strings to Arrow types
 //!
 //! Used by the CSV and JSON readers to convert strings to Arrow types
+use arrow_array::ArrowNativeTypeOp;
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
-use arrow_array::ArrowNativeTypeOp;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::ArrowError;
 use chrono::prelude::*;
@@ -794,7 +794,7 @@ fn parse_e_notation<T: DecimalType>(
                 None => {
                     return Err(ArrowError::ParseError(format!(
                         "can't parse the string value {s} to decimal"
-                    )))
+                    )));
                 }
             };
 
@@ -1235,8 +1235,7 @@ impl Interval {
         match (self.months, self.days, self.nanos) {
             (months, days, nanos) if days == 0 && nanos == 0 => Ok(months),
             _ => Err(ArrowError::InvalidArgumentError(format!(
-                "Unable to represent interval with days and nanos as year-months: {:?}",
-                self
+                "Unable to represent interval with days and nanos as year-months: {self:?}"
             ))),
         }
     }
@@ -2690,26 +2689,10 @@ mod tests {
                 0i128,
                 15,
             ),
-            (
-                "1.016744e-320",
-                0i128,
-                15,
-            ),
-            (
-                "-1e3",
-                -1000000000i128,
-                6,
-            ),
-            (
-                "+1e3",
-                1000000000i128,
-                6,
-            ),
-            (
-                "-1e31",
-                -10000000000000000000000000000000000000i128,
-                6,
-            ),
+            ("1.016744e-320", 0i128, 15),
+            ("-1e3", -1000000000i128, 6),
+            ("+1e3", 1000000000i128, 6),
+            ("-1e31", -10000000000000000000000000000000000000i128, 6),
         ];
         for (s, i, scale) in edge_tests_128 {
             let result_128 = parse_decimal::<Decimal128Type>(s, 38, scale);
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index c3fc00e4b911..e7c199dbed97 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -22,14 +22,12 @@
 //! [`RecordBatch`]: arrow_array::RecordBatch
 //! [`Array`]: arrow_array::Array
 
-use std::fmt::Display;
-
-use comfy_table::{Cell, Table};
-
 use arrow_array::{Array, ArrayRef, RecordBatch};
 use arrow_schema::{ArrowError, SchemaRef};
+use comfy_table::{Cell, Table};
+use std::fmt::Display;
 
-use crate::display::{ArrayFormatter, FormatOptions};
+use crate::display::{ArrayFormatter, FormatOptions, make_array_formatter};
 
 /// Create a visual representation of [`RecordBatch`]es
 ///
@@ -60,7 +58,7 @@ use crate::display::{ArrayFormatter, FormatOptions};
 /// | 5 | e |
 /// +---+---+"#);
 /// ```
-pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowError> {
+pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display + use<>, ArrowError> {
     let options = FormatOptions::default().with_display_error(true);
     pretty_format_batches_with_options(results, &options)
 }
@@ -92,7 +90,7 @@ pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, Ar
 pub fn pretty_format_batches_with_schema(
     schema: SchemaRef,
     results: &[RecordBatch],
-) -> Result<impl Display, ArrowError> {
+) -> Result<impl Display + use<>, ArrowError> {
     let options = FormatOptions::default().with_display_error(true);
     create_table(Some(schema), results, &options)
 }
@@ -130,7 +128,7 @@ pub fn pretty_format_batches_with_schema(
 pub fn pretty_format_batches_with_options(
     results: &[RecordBatch],
     options: &FormatOptions,
-) -> Result<impl Display, ArrowError> {
+) -> Result<impl Display + use<>, ArrowError> {
     create_table(None, results, options)
 }
 
@@ -142,7 +140,7 @@ pub fn pretty_format_batches_with_options(
 pub fn pretty_format_columns(
     col_name: &str,
     results: &[ArrayRef],
-) -> Result<impl Display, ArrowError> {
+) -> Result<impl Display + use<>, ArrowError> {
     let options = FormatOptions::default().with_display_error(true);
     pretty_format_columns_with_options(col_name, results, &options)
 }
@@ -154,7 +152,7 @@ pub fn pretty_format_columns_with_options(
     col_name: &str,
     results: &[ArrayRef],
     options: &FormatOptions,
-) -> Result<impl Display, ArrowError> {
+) -> Result<impl Display + use<>, ArrowError> {
     create_column(col_name, results, options)
 }
 
@@ -187,7 +185,7 @@ fn create_table(
         }
     });
 
-    if let Some(schema) = schema_opt {
+    if let Some(schema) = &schema_opt {
         let mut header = Vec::new();
         for field in schema.fields() {
             if options.types_info() {
@@ -208,10 +206,22 @@ fn create_table(
     }
 
     for batch in results {
+        let schema = schema_opt.as_ref().unwrap_or(batch.schema_ref());
+
+        // Could be a custom schema that was provided.
+        if batch.columns().len() != schema.fields().len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Expected the same number of columns in a record batch ({}) as the number of fields ({}) in the schema",
+                batch.columns().len(),
+                schema.fields.len()
+            )));
+        }
+
         let formatters = batch
             .columns()
             .iter()
-            .map(|c| ArrayFormatter::try_new(c.as_ref(), options))
+            .zip(schema.fields().iter())
+            .map(|(c, field)| make_array_formatter(c, options, Some(field)))
             .collect::<Result<Vec<_>, ArrowError>>()?;
 
         for row in 0..batch.num_rows() {
@@ -242,7 +252,13 @@ fn create_column(
     table.set_header(header);
 
     for col in columns {
-        let formatter = ArrayFormatter::try_new(col.as_ref(), options)?;
+        let formatter = match options.formatter_factory() {
+            None => ArrayFormatter::try_new(col.as_ref(), options)?,
+            Some(formatters) => formatters
+                .create_array_formatter(col.as_ref(), options, None)
+                .transpose()
+                .unwrap_or_else(|| ArrayFormatter::try_new(col.as_ref(), options))?,
+        };
         for row in 0..col.len() {
             let cells = vec![Cell::new(formatter.value(row))];
             table.add_row(cells);
@@ -254,18 +270,21 @@ fn create_column(
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
     use std::fmt::Write;
     use std::sync::Arc;
 
-    use half::f16;
-
     use arrow_array::builder::*;
+    use arrow_array::cast::AsArray;
     use arrow_array::types::*;
     use arrow_array::*;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
     use arrow_schema::*;
+    use half::f16;
 
-    use crate::display::{array_value_to_string, DurationFormat};
+    use crate::display::{
+        ArrayFormatterFactory, DisplayIndex, DurationFormat, array_value_to_string,
+    };
 
     use super::*;
 
@@ -1089,6 +1108,7 @@ mod tests {
             Some(IntervalDayTime::new(0, 1)),
             Some(IntervalDayTime::new(0, 10)),
             Some(IntervalDayTime::new(0, 100)),
+            Some(IntervalDayTime::new(0, 0)),
         ]));
 
         let schema = Arc::new(Schema::new(vec![Field::new(
@@ -1111,6 +1131,7 @@ mod tests {
             "| 0.001 secs       |",
             "| 0.010 secs       |",
             "| 0.100 secs       |",
+            "| 0 secs           |",
             "+------------------+",
         ];
 
@@ -1135,6 +1156,7 @@ mod tests {
             Some(IntervalMonthDayNano::new(0, 0, 10_000_000)),
             Some(IntervalMonthDayNano::new(0, 0, 100_000_000)),
             Some(IntervalMonthDayNano::new(0, 0, 1_000_000_000)),
+            Some(IntervalMonthDayNano::new(0, 0, 0)),
         ]));
 
         let schema = Arc::new(Schema::new(vec![Field::new(
@@ -1164,6 +1186,7 @@ mod tests {
             "| 0.010000000 secs         |",
             "| 0.100000000 secs         |",
             "| 1.000000000 secs         |",
+            "| 0 secs                   |",
             "+--------------------------+",
         ];
 
@@ -1240,9 +1263,10 @@ mod tests {
         // Pretty formatting
         let opts = FormatOptions::default().with_null("null");
         let opts = opts.with_duration_format(DurationFormat::Pretty);
-        let pretty = pretty_format_columns_with_options("pretty", &[array.clone()], &opts)
-            .unwrap()
-            .to_string();
+        let pretty =
+            pretty_format_columns_with_options("pretty", std::slice::from_ref(&array), &opts)
+                .unwrap()
+                .to_string();
 
         // Expected output
         let expected_pretty = vec![
@@ -1282,4 +1306,474 @@ mod tests {
         let actual: Vec<&str> = iso.lines().collect();
         assert_eq!(expected_iso, actual, "Actual result:\n{iso}");
     }
+
+    //
+    // Custom Formatting
+    //
+
+    /// The factory that will create the [`ArrayFormatter`]s.
+    #[derive(Debug)]
+    struct TestFormatters {}
+
+    impl ArrayFormatterFactory for TestFormatters {
+        fn create_array_formatter<'formatter>(
+            &self,
+            array: &'formatter dyn Array,
+            options: &FormatOptions<'formatter>,
+            field: Option<&'formatter Field>,
+        ) -> Result<Option<ArrayFormatter<'formatter>>, ArrowError> {
+            if field
+                .map(|f| f.extension_type_name() == Some("my_money"))
+                .unwrap_or(false)
+            {
+                // We assume that my_money always is an Int32.
+                let array = array.as_primitive();
+                let display_index = Box::new(MyMoneyFormatter {
+                    array,
+                    options: options.clone(),
+                });
+                return Ok(Some(ArrayFormatter::new(display_index, options.safe())));
+            }
+
+            if array.data_type() == &DataType::Int32 {
+                let array = array.as_primitive();
+                let display_index = Box::new(MyInt32Formatter {
+                    array,
+                    options: options.clone(),
+                });
+                return Ok(Some(ArrayFormatter::new(display_index, options.safe())));
+            }
+
+            Ok(None)
+        }
+    }
+
+    /// A format that will append a "€" sign to the end of the Int32 values.
+    struct MyMoneyFormatter<'a> {
+        array: &'a Int32Array,
+        options: FormatOptions<'a>,
+    }
+
+    impl<'a> DisplayIndex for MyMoneyFormatter<'a> {
+        fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult {
+            match self.array.is_valid(idx) {
+                true => write!(f, "{} €", self.array.value(idx))?,
+                false => write!(f, "{}", self.options.null())?,
+            }
+
+            Ok(())
+        }
+    }
+
+    /// The actual formatter
+    struct MyInt32Formatter<'a> {
+        array: &'a Int32Array,
+        options: FormatOptions<'a>,
+    }
+
+    impl<'a> DisplayIndex for MyInt32Formatter<'a> {
+        fn write(&self, idx: usize, f: &mut dyn Write) -> crate::display::FormatResult {
+            match self.array.is_valid(idx) {
+                true => write!(f, "{} (32-Bit)", self.array.value(idx))?,
+                false => write!(f, "{}", self.options.null())?,
+            }
+
+            Ok(())
+        }
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters() {
+        // define a schema.
+        let options = FormatOptions::new()
+            .with_null("<NULL>")
+            .with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone()),
+        ]));
+
+        // define data.
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(array::Int32Array::from(vec![
+                Some(1),
+                None,
+                Some(10),
+                Some(100),
+            ]))],
+        )
+        .unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_batches_with_options(&[batch], &options).unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+--------+",
+            "| income |",
+            "+--------+",
+            "| 1 €    |",
+            "| <NULL> |",
+            "| 10 €   |",
+            "| 100 €  |",
+            "+--------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters_multi_nested_list() {
+        // define a schema.
+        let options = FormatOptions::new()
+            .with_null("<NULL>")
+            .with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+        let nested_field = Arc::new(
+            Field::new_list_field(DataType::Int32, true).with_metadata(money_metadata.clone()),
+        );
+
+        // Create nested data
+        let inner_list = ListBuilder::new(Int32Builder::new()).with_field(nested_field);
+        let mut outer_list = FixedSizeListBuilder::new(inner_list, 2);
+        outer_list.values().append_value([Some(1)]);
+        outer_list.values().append_null();
+        outer_list.append(true);
+        outer_list.values().append_value([Some(2), Some(8)]);
+        outer_list
+            .values()
+            .append_value([Some(50), Some(25), Some(25)]);
+        outer_list.append(true);
+        let outer_list = outer_list.finish();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "income",
+            outer_list.data_type().clone(),
+            true,
+        )]));
+
+        // define data.
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(outer_list)]).unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_batches_with_options(&[batch], &options).unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+----------------------------------+",
+            "| income                           |",
+            "+----------------------------------+",
+            "| [[1 €], <NULL>]                  |",
+            "| [[2 €, 8 €], [50 €, 25 €, 25 €]] |",
+            "+----------------------------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters_nested_struct() {
+        // define a schema.
+        let options = FormatOptions::new()
+            .with_null("<NULL>")
+            .with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+        let fields = Fields::from(vec![
+            Field::new("name", DataType::Utf8, true),
+            Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone()),
+        ]);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "income",
+            DataType::Struct(fields.clone()),
+            true,
+        )]));
+
+        // Create nested data
+        let mut nested_data = StructBuilder::new(
+            fields,
+            vec![
+                Box::new(StringBuilder::new()),
+                Box::new(Int32Builder::new()),
+            ],
+        );
+        nested_data
+            .field_builder::<StringBuilder>(0)
+            .unwrap()
+            .extend([Some("Gimli"), Some("Legolas"), Some("Aragorn")]);
+        nested_data
+            .field_builder::<Int32Builder>(1)
+            .unwrap()
+            .extend([Some(10), None, Some(30)]);
+        nested_data.append(true);
+        nested_data.append(true);
+        nested_data.append(true);
+
+        // define data.
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(nested_data.finish())]).unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_batches_with_options(&[batch], &options).unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+---------------------------------+",
+            "| income                          |",
+            "+---------------------------------+",
+            "| {name: Gimli, income: 10 €}     |",
+            "| {name: Legolas, income: <NULL>} |",
+            "| {name: Aragorn, income: 30 €}   |",
+            "+---------------------------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters_nested_map() {
+        // define a schema.
+        let options = FormatOptions::new()
+            .with_null("<NULL>")
+            .with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+
+        let mut array = MapBuilder::<StringBuilder, Int32Builder>::new(
+            None,
+            StringBuilder::new(),
+            Int32Builder::new(),
+        )
+        .with_values_field(
+            Field::new("values", DataType::Int32, true).with_metadata(money_metadata.clone()),
+        );
+        array
+            .keys()
+            .extend([Some("Gimli"), Some("Legolas"), Some("Aragorn")]);
+        array.values().extend([Some(10), None, Some(30)]);
+        array.append(true).unwrap();
+        let array = array.finish();
+
+        // define data.
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "income",
+            array.data_type().clone(),
+            true,
+        )]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_batches_with_options(&[batch], &options).unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+-----------------------------------------------+",
+            "| income                                        |",
+            "+-----------------------------------------------+",
+            "| {Gimli: 10 €, Legolas: <NULL>, Aragorn: 30 €} |",
+            "+-----------------------------------------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters_nested_union() {
+        // define a schema.
+        let options = FormatOptions::new()
+            .with_null("<NULL>")
+            .with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+        let fields = UnionFields::try_new(
+            vec![0],
+            vec![Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone())],
+        )
+        .unwrap();
+
+        // Create nested data and construct it with the correct metadata
+        let mut array_builder = UnionBuilder::new_dense();
+        array_builder.append::<Int32Type>("income", 1).unwrap();
+        let (_, type_ids, offsets, children) = array_builder.build().unwrap().into_parts();
+        let array = UnionArray::try_new(fields, type_ids, offsets, children).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "income",
+            array.data_type().clone(),
+            true,
+        )]));
+
+        // define data.
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_batches_with_options(&[batch], &options).unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+--------------+",
+            "| income       |",
+            "+--------------+",
+            "| {income=1 €} |",
+            "+--------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_batches_with_custom_formatters_custom_schema_overrules_batch_schema() {
+        // define a schema.
+        let options = FormatOptions::new().with_formatter_factory(Some(&TestFormatters {}));
+        let money_metadata = HashMap::from([(
+            extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
+            "my_money".to_owned(),
+        )]);
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone()),
+        ]));
+
+        // define data.
+        let batch = RecordBatch::try_new(
+            schema,
+            vec![Arc::new(array::Int32Array::from(vec![
+                Some(1),
+                None,
+                Some(10),
+                Some(100),
+            ]))],
+        )
+        .unwrap();
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            create_table(
+                // No metadata compared to test_format_batches_with_custom_formatters
+                Some(Arc::new(Schema::new(vec![Field::new(
+                    "income",
+                    DataType::Int32,
+                    true
+                ),]))),
+                &[batch],
+                &options,
+            )
+            .unwrap()
+        )
+        .unwrap();
+
+        // No € formatting as in test_format_batches_with_custom_formatters
+        let s = [
+            "+--------------+",
+            "| income       |",
+            "+--------------+",
+            "| 1 (32-Bit)   |",
+            "|              |",
+            "| 10 (32-Bit)  |",
+            "| 100 (32-Bit) |",
+            "+--------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_format_column_with_custom_formatters() {
+        // define data.
+        let array = Arc::new(array::Int32Array::from(vec![
+            Some(1),
+            None,
+            Some(10),
+            Some(100),
+        ]));
+
+        let mut buf = String::new();
+        write!(
+            &mut buf,
+            "{}",
+            pretty_format_columns_with_options(
+                "income",
+                &[array],
+                &FormatOptions::default().with_formatter_factory(Some(&TestFormatters {}))
+            )
+            .unwrap()
+        )
+        .unwrap();
+
+        let s = [
+            "+--------------+",
+            "| income       |",
+            "+--------------+",
+            "| 1 (32-Bit)   |",
+            "|              |",
+            "| 10 (32-Bit)  |",
+            "| 100 (32-Bit) |",
+            "+--------------+",
+        ];
+        let expected = s.join("\n");
+        assert_eq!(expected, buf);
+    }
+
+    #[test]
+    fn test_pretty_format_batches_with_schema_with_wrong_number_of_fields() {
+        let schema_a = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Utf8, true),
+        ]));
+        let schema_b = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
+
+        // define data.
+        let batch = RecordBatch::try_new(
+            schema_b,
+            vec![Arc::new(array::Int32Array::from(vec![
+                Some(1),
+                None,
+                Some(10),
+                Some(100),
+            ]))],
+        )
+        .unwrap();
+
+        let error = pretty_format_batches_with_schema(schema_a, &[batch])
+            .err()
+            .unwrap();
+        assert_eq!(
+            &error.to_string(),
+            "Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema"
+        );
+    }
 }
diff --git a/arrow-csv/examples/README.md b/arrow-csv/examples/README.md
deleted file mode 100644
index 340413e76d94..000000000000
--- a/arrow-csv/examples/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Examples
-- [`csv_calculation.rs`](csv_calculation.rs): performs a simple calculation using the CSV reader
\ No newline at end of file
diff --git a/arrow-csv/examples/csv_calculation.rs b/arrow-csv/examples/csv_calculation.rs
deleted file mode 100644
index 6ce963e2b012..000000000000
--- a/arrow-csv/examples/csv_calculation.rs
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::cast::AsArray;
-use arrow_array::types::Int16Type;
-use arrow_csv::ReaderBuilder;
-
-use arrow_schema::{DataType, Field, Schema};
-use std::fs::File;
-use std::sync::Arc;
-
-fn main() {
-    // read csv from file
-    let file = File::open("arrow-csv/test/data/example.csv").unwrap();
-    let csv_schema = Schema::new(vec![
-        Field::new("c1", DataType::Int16, true),
-        Field::new("c2", DataType::Float32, true),
-        Field::new("c3", DataType::Utf8, true),
-        Field::new("c4", DataType::Boolean, true),
-    ]);
-    let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
-        .with_header(true)
-        .build(file)
-        .unwrap();
-
-    match reader.next() {
-        Some(r) => match r {
-            Ok(r) => {
-                // get the column(0) max value
-                let col = r.column(0).as_primitive::<Int16Type>();
-                let max = col.iter().max().flatten();
-                println!("max value column(0): {max:?}")
-            }
-            Err(e) => {
-                println!("{e:?}");
-            }
-        },
-        None => {
-            println!("csv is empty");
-        }
-    }
-}
diff --git a/arrow-csv/src/lib.rs b/arrow-csv/src/lib.rs
index 8532cf59a218..4c4b04098175 100644
--- a/arrow-csv/src/lib.rs
+++ b/arrow-csv/src/lib.rs
@@ -15,21 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Transfer data between the Arrow memory format and CSV (comma-separated values).
+//! Transfer data between the [Apache Arrow] memory format and CSV (comma-separated values).
+//!
+//! [Apache Arrow]: https://arrow.apache.org/
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 
 pub mod reader;
 pub mod writer;
 
-pub use self::reader::infer_schema_from_files;
 pub use self::reader::Reader;
 pub use self::reader::ReaderBuilder;
+pub use self::reader::infer_schema_from_files;
+pub use self::writer::QuoteStyle;
 pub use self::writer::Writer;
 pub use self::writer::WriterBuilder;
 use arrow_schema::ArrowError;
@@ -51,8 +54,8 @@ fn map_csv_error(error: csv::Error) -> ArrowError {
         } => ArrowError::CsvError(format!(
             "Encountered unequal lengths between records on CSV file. Expected {} \
                  records, found {} records{}",
-            len,
             expected_len,
+            len,
             pos.as_ref()
                 .map(|pos| format!(" at line {}", pos.line()))
                 .unwrap_or_default(),
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index e9f612557e0a..e26072fea917 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! CSV Reader
+//! CSV Reading: [`Reader`] and [`ReaderBuilder`]
 //!
 //! # Basic Usage
 //!
@@ -42,6 +42,46 @@
 //! let batch = csv.next().unwrap().unwrap();
 //! ```
 //!
+//! # Example: Numeric calculations on CSV
+//! This code finds the maximum value in column 0 of a CSV file containing
+//! ```csv
+//! c1,c2,c3,c4
+//! 1,1.1,"hong kong",true
+//! 3,323.12,"XiAn",false
+//! 10,131323.12,"cheng du",false
+//! ```
+//!
+//! ```
+//! # use arrow_array::cast::AsArray;
+//! # use arrow_array::types::Int16Type;
+//! # use arrow_csv::ReaderBuilder;
+//! # use arrow_schema::{DataType, Field, Schema};
+//! # use std::fs::File;
+//! # use std::sync::Arc;
+//! // Open the example file
+//! let file = File::open("test/data/example.csv").unwrap();
+//! let csv_schema = Schema::new(vec![
+//!     Field::new("c1", DataType::Int16, true),
+//!     Field::new("c2", DataType::Float32, true),
+//!     Field::new("c3", DataType::Utf8, true),
+//!     Field::new("c4", DataType::Boolean, true),
+//! ]);
+//! let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
+//!     .with_header(true)
+//!     .build(file)
+//!     .unwrap();
+//! // find the maximum value in column 0 across all batches
+//! let mut max_c0 = 0;
+//! while let Some(r) = reader.next() {
+//!   let r = r.unwrap(); // handle error
+//!   // get the max value in column(0) for this batch
+//!   let col = r.column(0).as_primitive::<Int16Type>();
+//!   let batch_max = col.iter().max().flatten().unwrap_or_default();
+//!   max_c0 = max_c0.max(batch_max);
+//! }
+//! assert_eq!(max_c0, 10);
+//!```
+//!
 //! # Async Usage
 //!
 //! The lower-level [`Decoder`] can be integrated with various forms of async data streams,
@@ -128,7 +168,7 @@ mod records;
 use arrow_array::builder::{NullBuilder, PrimitiveBuilder};
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_cast::parse::{parse_decimal, string_to_datetime, Parser};
+use arrow_cast::parse::{Parser, parse_decimal, string_to_datetime};
 use arrow_schema::*;
 use chrono::{TimeZone, Utc};
 use csv::StringRecord;
@@ -441,13 +481,18 @@ pub fn infer_schema_from_files(
 type Bounds = Option<(usize, usize)>;
 
 /// CSV file reader using [`std::io::BufReader`]
+///
+/// See [`ReaderBuilder`] to construct a CSV reader with options and  the
+/// [module-level documentation](crate::reader) for more details and examples
 pub type Reader<R> = BufReader<StdBufReader<R>>;
 
-/// CSV file reader
+/// CSV file reader implementation. See [`Reader`] for usage
+///
+/// Despite having the same name as [`std::io::BufReader`, this structure does
+/// not buffer reads itself
 pub struct BufReader<R> {
     /// File reader
     reader: R,
-
     /// The decoder
     decoder: Decoder,
 }
@@ -654,6 +699,22 @@ fn parse(
             let field = &fields[i];
             match field.data_type() {
                 DataType::Boolean => build_boolean_array(line_number, rows, i, null_regex),
+                DataType::Decimal32(precision, scale) => build_decimal_array::<Decimal32Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
+                DataType::Decimal64(precision, scale) => build_decimal_array::<Decimal64Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
                 DataType::Decimal128(precision, scale) => build_decimal_array::<Decimal128Type>(
                     line_number,
                     rows,
@@ -844,7 +905,7 @@ fn parse(
                                 .collect::<DictionaryArray<UInt64Type>>(),
                         ) as ArrayRef),
                         _ => Err(ArrowError::ParseError(format!(
-                            "Unsupported dictionary key type {key_type:?}"
+                            "Unsupported dictionary key type {key_type}"
                         ))),
                     }
                 }
@@ -1037,7 +1098,7 @@ fn build_boolean_array(
         .map(|e| Arc::new(e) as ArrayRef)
 }
 
-/// CSV file reader builder
+/// Builder for CSV [`Reader`]s
 #[derive(Debug)]
 pub struct ReaderBuilder {
     /// Schema of the CSV file
@@ -1055,9 +1116,10 @@ pub struct ReaderBuilder {
 }
 
 impl ReaderBuilder {
-    /// Create a new builder for configuring CSV parsing options.
+    /// Create a new builder for configuring [`Reader`] CSV parsing options.
     ///
-    /// To convert a builder into a reader, call `ReaderBuilder::build`
+    /// To convert a builder into a reader, call [`ReaderBuilder::build`]. See
+    /// the [module-level documentation](crate::reader) for more details and examples.
     ///
     /// # Example
     ///
@@ -1315,6 +1377,54 @@ mod tests {
         assert_eq!("0.290472", lng.value_as_string(9));
     }
 
+    #[test]
+    fn test_csv_reader_with_decimal_3264() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Decimal32(9, 6), false),
+            Field::new("lng", DataType::Decimal64(16, 6), false),
+        ]));
+
+        let file = File::open("test/data/decimal_test.csv").unwrap();
+
+        let mut csv = ReaderBuilder::new(schema).build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Decimal32Array>()
+            .unwrap();
+
+        assert_eq!("57.653484", lat.value_as_string(0));
+        assert_eq!("53.002666", lat.value_as_string(1));
+        assert_eq!("52.412811", lat.value_as_string(2));
+        assert_eq!("51.481583", lat.value_as_string(3));
+        assert_eq!("12.123456", lat.value_as_string(4));
+        assert_eq!("50.760000", lat.value_as_string(5));
+        assert_eq!("0.123000", lat.value_as_string(6));
+        assert_eq!("123.000000", lat.value_as_string(7));
+        assert_eq!("123.000000", lat.value_as_string(8));
+        assert_eq!("-50.760000", lat.value_as_string(9));
+
+        let lng = batch
+            .column(2)
+            .as_any()
+            .downcast_ref::<Decimal64Array>()
+            .unwrap();
+
+        assert_eq!("-3.335724", lng.value_as_string(0));
+        assert_eq!("-2.179404", lng.value_as_string(1));
+        assert_eq!("-1.778197", lng.value_as_string(2));
+        assert_eq!("-3.179090", lng.value_as_string(3));
+        assert_eq!("-3.179090", lng.value_as_string(4));
+        assert_eq!("0.290472", lng.value_as_string(5));
+        assert_eq!("0.290472", lng.value_as_string(6));
+        assert_eq!("0.290472", lng.value_as_string(7));
+        assert_eq!("0.290472", lng.value_as_string(8));
+        assert_eq!("0.290472", lng.value_as_string(9));
+    }
+
     #[test]
     fn test_csv_from_buf_reader() {
         let schema = Schema::new(vec![
@@ -1789,7 +1899,10 @@ mod tests {
         let file_name = "test/data/various_invalid_types/invalid_float.csv";
 
         let error = invalid_csv_helper(file_name);
-        assert_eq!("Parser error: Error while parsing value '4.x4' as type 'Float32' for column 1 at line 4. Row data: '[4,4.x4,,false]'", error);
+        assert_eq!(
+            "Parser error: Error while parsing value '4.x4' as type 'Float32' for column 1 at line 4. Row data: '[4,4.x4,,false]'",
+            error
+        );
     }
 
     #[test]
@@ -1797,7 +1910,10 @@ mod tests {
         let file_name = "test/data/various_invalid_types/invalid_int.csv";
 
         let error = invalid_csv_helper(file_name);
-        assert_eq!("Parser error: Error while parsing value '2.3' as type 'UInt64' for column 0 at line 2. Row data: '[2.3,2.2,2.22,false]'", error);
+        assert_eq!(
+            "Parser error: Error while parsing value '2.3' as type 'UInt64' for column 0 at line 2. Row data: '[2.3,2.2,2.22,false]'",
+            error
+        );
     }
 
     #[test]
@@ -1805,7 +1921,10 @@ mod tests {
         let file_name = "test/data/various_invalid_types/invalid_bool.csv";
 
         let error = invalid_csv_helper(file_name);
-        assert_eq!("Parser error: Error while parsing value 'none' as type 'Boolean' for column 3 at line 2. Row data: '[2,2.2,2.22,none]'", error);
+        assert_eq!(
+            "Parser error: Error while parsing value 'none' as type 'Boolean' for column 3 at line 2. Row data: '[2,2.2,2.22,none]'",
+            error
+        );
     }
 
     /// Infer the data type of a record
@@ -2633,7 +2752,10 @@ mod tests {
             .infer_schema(&mut read, None);
         assert!(result.is_err());
         // Include line number in the error message to help locate and fix the issue
-        assert_eq!(result.err().unwrap().to_string(), "Csv error: Encountered unequal lengths between records on CSV file. Expected 2 records, found 3 records at line 3");
+        assert_eq!(
+            result.err().unwrap().to_string(),
+            "Csv error: Encountered unequal lengths between records on CSV file. Expected 3 records, found 2 records at line 3"
+        );
     }
 
     #[test]
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index c5a0a0b76d59..c38d1cdec337 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -15,13 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! CSV Writer
+//! CSV Writing: [`Writer`] and [`WriterBuilder`]
 //!
 //! This CSV writer allows Arrow data (in record batches) to be written as CSV files.
 //! The writer does not support writing `ListArray` and `StructArray`.
 //!
-//! Example:
-//!
+//! # Example
 //! ```
 //! # use arrow_array::*;
 //! # use arrow_array::types::*;
@@ -62,6 +61,117 @@
 //!     writer.write(batch).unwrap();
 //! }
 //! ```
+//!
+//! # Whitespace Handling
+//!
+//! The writer supports trimming leading and trailing whitespace from string values,
+//! compatible with Apache Spark's CSV options `ignoreLeadingWhiteSpace` and
+//! `ignoreTrailingWhiteSpace`. This is useful when working with data that may have
+//! unwanted padding.
+//!
+//! Whitespace trimming is applied to all string data types:
+//! - `DataType::Utf8`
+//! - `DataType::LargeUtf8`
+//! - `DataType::Utf8View`
+//!
+//! ## Example: Use [`WriterBuilder`] to control whitespace handling
+//!
+//! ```
+//! # use arrow_array::*;
+//! # use arrow_csv::WriterBuilder;
+//! # use arrow_schema::*;
+//! # use std::sync::Arc;
+//! let schema = Schema::new(vec![
+//!     Field::new("name", DataType::Utf8, false),
+//!     Field::new("comment", DataType::Utf8, false),
+//! ]);
+//!
+//! let name = StringArray::from(vec![
+//!     "  Alice  ",   // Leading and trailing spaces
+//!     "Bob",         // No spaces
+//!     "  Charlie",   // Leading spaces only
+//! ]);
+//! let comment = StringArray::from(vec![
+//!     "  Great job!  ",
+//!     "Well done",
+//!     "Excellent  ",
+//! ]);
+//!
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(schema),
+//!     vec![Arc::new(name), Arc::new(comment)],
+//! )
+//! .unwrap();
+//!
+//! // Trim both leading and trailing whitespace
+//! let mut output = Vec::new();
+//! WriterBuilder::new()
+//!     .with_ignore_leading_whitespace(true)
+//!     .with_ignore_trailing_whitespace(true)
+//!     .build(&mut output)
+//!     .write(&batch)
+//!     .unwrap();
+//! assert_eq!(
+//!     String::from_utf8(output).unwrap(),
+//!     "\
+//! name,comment\n\
+//! Alice,Great job!\n\
+//! Bob,Well done\n\
+//! Charlie,Excellent\n"
+//! );
+//! ```
+//!
+//! # Quoting Styles
+//!
+//! The writer supports different quoting styles for fields, compatible with Apache Spark's
+//! CSV options like `quoteAll`. You can control when fields are quoted using the
+//! [`QuoteStyle`] enum.
+//!
+//! ## Example
+//!
+//! ```
+//! # use arrow_array::*;
+//! # use arrow_csv::{WriterBuilder, QuoteStyle};
+//! # use arrow_schema::*;
+//! # use std::sync::Arc;
+//!
+//! let schema = Schema::new(vec![
+//!     Field::new("product", DataType::Utf8, false),
+//!     Field::new("price", DataType::Float64, false),
+//! ]);
+//!
+//! let product = StringArray::from(vec!["apple", "banana,organic", "cherry"]);
+//! let price = Float64Array::from(vec![1.50, 2.25, 3.00]);
+//!
+//! let batch = RecordBatch::try_new(
+//!     Arc::new(schema),
+//!     vec![Arc::new(product), Arc::new(price)],
+//! )
+//! .unwrap();
+//!
+//! // Default behavior (QuoteStyle::Necessary)
+//! let mut output = Vec::new();
+//! WriterBuilder::new()
+//!     .build(&mut output)
+//!     .write(&batch)
+//!     .unwrap();
+//! assert_eq!(
+//!     String::from_utf8(output).unwrap(),
+//!     "product,price\napple,1.5\n\"banana,organic\",2.25\ncherry,3.0\n"
+//! );
+//!
+//! // Quote all fields (Spark's quoteAll=true)
+//! let mut output = Vec::new();
+//! WriterBuilder::new()
+//!     .with_quote_style(QuoteStyle::Always)
+//!     .build(&mut output)
+//!     .write(&batch)
+//!     .unwrap();
+//! assert_eq!(
+//!     String::from_utf8(output).unwrap(),
+//!     "\"product\",\"price\"\n\"apple\",\"1.5\"\n\"banana,organic\",\"2.25\"\n\"cherry\",\"3.0\"\n"
+//! );
+//! ```
 
 use arrow_array::*;
 use arrow_cast::display::*;
@@ -72,7 +182,25 @@ use std::io::Write;
 use crate::map_csv_error;
 const DEFAULT_NULL_VALUE: &str = "";
 
+/// The quoting style to use when writing CSV files.
+///
+/// This type is re-exported from the `csv` crate and supports different
+/// strategies for quoting fields. It is compatible with Apache Spark's
+/// CSV options like `quoteAll`.
+///
+/// # Example
+///
+/// ```
+/// use arrow_csv::{WriterBuilder, QuoteStyle};
+///
+/// let builder = WriterBuilder::new()
+///     .with_quote_style(QuoteStyle::Always); // Equivalent to Spark's quoteAll=true
+/// ```
+pub use csv::QuoteStyle;
+
 /// A CSV writer
+///
+/// See the [module documentation](crate::writer) for examples.
 #[derive(Debug)]
 pub struct Writer<W: Write> {
     /// The object to write to
@@ -93,16 +221,23 @@ pub struct Writer<W: Write> {
     beginning: bool,
     /// The value to represent null entries, defaults to [`DEFAULT_NULL_VALUE`]
     null_value: Option<String>,
+    /// Whether to ignore leading whitespace in string values
+    ignore_leading_whitespace: bool,
+    /// Whether to ignore trailing whitespace in string values
+    ignore_trailing_whitespace: bool,
 }
 
 impl<W: Write> Writer<W> {
     /// Create a new CsvWriter from a writable object, with default options
+    ///
+    /// See [`WriterBuilder`] for configure options, and the [module
+    /// documentation](crate::writer) for examples.
     pub fn new(writer: W) -> Self {
         let delimiter = b',';
         WriterBuilder::new().with_delimiter(delimiter).build(writer)
     }
 
-    /// Write a vector of record batches to a writable object
+    /// Write a RecordBatch to the underlying writer
     pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
         let num_columns = batch.num_columns();
         if self.beginning {
@@ -157,7 +292,10 @@ impl<W: Write> Writer<W> {
                         col_idx + 1
                     ))
                 })?;
-                byte_record.push_field(buffer.as_bytes());
+
+                let field_bytes =
+                    self.get_trimmed_field_bytes(&buffer, batch.column(col_idx).data_type());
+                byte_record.push_field(field_bytes);
             }
 
             self.writer
@@ -169,6 +307,29 @@ impl<W: Write> Writer<W> {
         Ok(())
     }
 
+    /// Returns the bytes for a field, applying whitespace trimming if configured and applicable
+    fn get_trimmed_field_bytes<'a>(&self, buffer: &'a str, data_type: &DataType) -> &'a [u8] {
+        // Only trim string types when trimming is enabled
+        let should_trim = (self.ignore_leading_whitespace || self.ignore_trailing_whitespace)
+            && matches!(
+                data_type,
+                DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+            );
+
+        if !should_trim {
+            return buffer.as_bytes();
+        }
+
+        let mut trimmed = buffer;
+        if self.ignore_leading_whitespace {
+            trimmed = trimmed.trim_start();
+        }
+        if self.ignore_trailing_whitespace {
+            trimmed = trimmed.trim_end();
+        }
+        trimmed.as_bytes()
+    }
+
     /// Unwraps this `Writer<W>`, returning the underlying writer.
     pub fn into_inner(self) -> W {
         // Safe to call `unwrap` since `write` always flushes the writer.
@@ -211,6 +372,12 @@ pub struct WriterBuilder {
     time_format: Option<String>,
     /// Optional value to represent null
     null_value: Option<String>,
+    /// Whether to ignore leading whitespace in string values. Defaults to `false`
+    ignore_leading_whitespace: bool,
+    /// Whether to ignore trailing whitespace in string values. Defaults to `false`
+    ignore_trailing_whitespace: bool,
+    /// The quoting style to use. Defaults to `QuoteStyle::Necessary`
+    quote_style: QuoteStyle,
 }
 
 impl Default for WriterBuilder {
@@ -227,14 +394,18 @@ impl Default for WriterBuilder {
             timestamp_tz_format: None,
             time_format: None,
             null_value: None,
+            ignore_leading_whitespace: false,
+            ignore_trailing_whitespace: false,
+            quote_style: QuoteStyle::default(),
         }
     }
 }
 
 impl WriterBuilder {
-    /// Create a new builder for configuring CSV writing options.
+    /// Create a new builder for configuring CSV [`Writer`] options.
     ///
-    /// To convert a builder into a writer, call `WriterBuilder::build`
+    /// To convert a builder into a writer, call [`WriterBuilder::build`]. See
+    /// the [module documentation](crate::writer) for more examples.
     ///
     /// # Example
     ///
@@ -389,12 +560,62 @@ impl WriterBuilder {
         self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE)
     }
 
+    /// Set whether to ignore leading whitespace in string values
+    /// For example, a string value such as "   foo" will be written as "foo"
+    pub fn with_ignore_leading_whitespace(mut self, ignore: bool) -> Self {
+        self.ignore_leading_whitespace = ignore;
+        self
+    }
+
+    /// Get whether to ignore leading whitespace in string values
+    pub fn ignore_leading_whitespace(&self) -> bool {
+        self.ignore_leading_whitespace
+    }
+
+    /// Set whether to ignore trailing whitespace in string values
+    /// For example, a string value such as "foo    " will be written as "foo"
+    pub fn with_ignore_trailing_whitespace(mut self, ignore: bool) -> Self {
+        self.ignore_trailing_whitespace = ignore;
+        self
+    }
+
+    /// Get whether to ignore trailing whitespace in string values
+    pub fn ignore_trailing_whitespace(&self) -> bool {
+        self.ignore_trailing_whitespace
+    }
+
+    /// Set the quoting style for writing CSV files
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use arrow_csv::{WriterBuilder, QuoteStyle};
+    ///
+    /// // Quote all fields (equivalent to Spark's quoteAll=true)
+    /// let builder = WriterBuilder::new()
+    ///     .with_quote_style(QuoteStyle::Always);
+    ///
+    /// // Only quote when necessary (default)
+    /// let builder = WriterBuilder::new()
+    ///     .with_quote_style(QuoteStyle::Necessary);
+    /// ```
+    pub fn with_quote_style(mut self, quote_style: QuoteStyle) -> Self {
+        self.quote_style = quote_style;
+        self
+    }
+
+    /// Get the configured quoting style
+    pub fn quote_style(&self) -> QuoteStyle {
+        self.quote_style
+    }
+
     /// Create a new `Writer`
     pub fn build<W: Write>(self, writer: W) -> Writer<W> {
         let mut builder = csv::WriterBuilder::new();
         let writer = builder
             .delimiter(self.delimiter)
             .quote(self.quote)
+            .quote_style(self.quote_style)
             .double_quote(self.double_quote)
             .escape(self.escape)
             .from_writer(writer);
@@ -408,6 +629,8 @@ impl WriterBuilder {
             timestamp_format: self.timestamp_format,
             timestamp_tz_format: self.timestamp_tz_format,
             null_value: self.null_value,
+            ignore_leading_whitespace: self.ignore_leading_whitespace,
+            ignore_trailing_whitespace: self.ignore_trailing_whitespace,
         }
     }
 }
@@ -418,8 +641,8 @@ mod tests {
 
     use crate::ReaderBuilder;
     use arrow_array::builder::{
-        BinaryBuilder, Decimal128Builder, Decimal256Builder, FixedSizeBinaryBuilder,
-        LargeBinaryBuilder,
+        BinaryBuilder, Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder,
+        FixedSizeBinaryBuilder, LargeBinaryBuilder,
     };
     use arrow_array::types::*;
     use arrow_buffer::i256;
@@ -496,25 +719,38 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
     #[test]
     fn test_write_csv_decimal() {
         let schema = Schema::new(vec![
-            Field::new("c1", DataType::Decimal128(38, 6), true),
-            Field::new("c2", DataType::Decimal256(76, 6), true),
+            Field::new("c1", DataType::Decimal32(9, 6), true),
+            Field::new("c2", DataType::Decimal64(17, 6), true),
+            Field::new("c3", DataType::Decimal128(38, 6), true),
+            Field::new("c4", DataType::Decimal256(76, 6), true),
         ]);
 
-        let mut c1_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
+        let mut c1_builder = Decimal32Builder::new().with_data_type(DataType::Decimal32(9, 6));
         c1_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
         let c1 = c1_builder.finish();
 
-        let mut c2_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
-        c2_builder.extend(vec![
+        let mut c2_builder = Decimal64Builder::new().with_data_type(DataType::Decimal64(17, 6));
+        c2_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
+        let c2 = c2_builder.finish();
+
+        let mut c3_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
+        c3_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
+        let c3 = c3_builder.finish();
+
+        let mut c4_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
+        c4_builder.extend(vec![
             Some(i256::from_i128(-3335724)),
             Some(i256::from_i128(2179404)),
             None,
             Some(i256::from_i128(290472)),
         ]);
-        let c2 = c2_builder.finish();
+        let c4 = c4_builder.finish();
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
+        )
+        .unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
 
@@ -530,15 +766,15 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
         let mut buffer: Vec<u8> = vec![];
         file.read_to_end(&mut buffer).unwrap();
 
-        let expected = r#"c1,c2
--3.335724,-3.335724
-2.179404,2.179404
-,
-0.290472,0.290472
--3.335724,-3.335724
-2.179404,2.179404
-,
-0.290472,0.290472
+        let expected = r#"c1,c2,c3,c4
+-3.335724,-3.335724,-3.335724,-3.335724
+2.179404,2.179404,2.179404,2.179404
+,,,
+0.290472,0.290472,0.290472,0.290472
+-3.335724,-3.335724,-3.335724,-3.335724
+2.179404,2.179404,2.179404,2.179404
+,,,
+0.290472,0.290472,0.290472,0.290472
 "#;
         assert_eq!(expected, str::from_utf8(&buffer).unwrap());
     }
@@ -704,7 +940,10 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
 
         for batch in batches {
             let err = writer.write(batch).unwrap_err().to_string();
-            assert_eq!(err, "Csv error: Error processing row 2, col 2: Cast error: Failed to convert 1926632005177685347 to temporal for Date64")
+            assert_eq!(
+                err,
+                "Csv error: Error processing row 2, col 2: Cast error: Failed to convert 1926632005177685347 to temporal for Date64"
+            )
         }
         drop(writer);
     }
@@ -844,4 +1083,279 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
             String::from_utf8(buf).unwrap()
         );
     }
+
+    #[test]
+    fn test_write_csv_whitespace_handling() {
+        let schema = Schema::new(vec![
+            Field::new("c1", DataType::Utf8, false),
+            Field::new("c2", DataType::Float64, true),
+            Field::new("c3", DataType::Utf8, true),
+        ]);
+
+        let c1 = StringArray::from(vec![
+            "  leading space",
+            "trailing space  ",
+            "  both spaces  ",
+            "no spaces",
+        ]);
+        let c2 = PrimitiveArray::<Float64Type>::from(vec![
+            Some(123.45),
+            Some(678.90),
+            None,
+            Some(111.22),
+        ]);
+        let c3 = StringArray::from(vec![
+            Some("  test  "),
+            Some("value  "),
+            None,
+            Some("  another"),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3)],
+        )
+        .unwrap();
+
+        // Test with no whitespace handling (default)
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new();
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "c1,c2,c3\n  leading space,123.45,  test  \ntrailing space  ,678.9,value  \n  both spaces  ,,\nno spaces,111.22,  another\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with ignore leading whitespace only
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new().with_ignore_leading_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "c1,c2,c3\nleading space,123.45,test  \ntrailing space  ,678.9,value  \nboth spaces  ,,\nno spaces,111.22,another\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with ignore trailing whitespace only
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new().with_ignore_trailing_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "c1,c2,c3\n  leading space,123.45,  test\ntrailing space,678.9,value\n  both spaces,,\nno spaces,111.22,  another\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with both ignore leading and trailing whitespace
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new()
+            .with_ignore_leading_whitespace(true)
+            .with_ignore_trailing_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "c1,c2,c3\nleading space,123.45,test\ntrailing space,678.9,value\nboth spaces,,\nno spaces,111.22,another\n",
+            String::from_utf8(buf).unwrap()
+        );
+    }
+
+    #[test]
+    fn test_write_csv_whitespace_with_special_chars() {
+        let schema = Schema::new(vec![Field::new("c1", DataType::Utf8, false)]);
+
+        let c1 = StringArray::from(vec![
+            "  quoted \"value\"  ",
+            "  new\nline  ",
+            "  comma,value  ",
+            "\ttab\tvalue\t",
+        ]);
+
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1)]).unwrap();
+
+        // Test with both ignore leading and trailing whitespace
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new()
+            .with_ignore_leading_whitespace(true)
+            .with_ignore_trailing_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+
+        // Note: tabs are trimmed as they are whitespace characters
+        assert_eq!(
+            "c1\n\"quoted \"\"value\"\"\"\n\"new\nline\"\n\"comma,value\"\ntab\tvalue\n",
+            String::from_utf8(buf).unwrap()
+        );
+    }
+
+    #[test]
+    fn test_write_csv_whitespace_all_string_types() {
+        use arrow_array::{LargeStringArray, StringViewArray};
+
+        let schema = Schema::new(vec![
+            Field::new("utf8", DataType::Utf8, false),
+            Field::new("large_utf8", DataType::LargeUtf8, false),
+            Field::new("utf8_view", DataType::Utf8View, false),
+        ]);
+
+        let utf8 = StringArray::from(vec!["  leading", "trailing  ", "  both  ", "no_spaces"]);
+
+        let large_utf8 =
+            LargeStringArray::from(vec!["  leading", "trailing  ", "  both  ", "no_spaces"]);
+
+        let utf8_view =
+            StringViewArray::from(vec!["  leading", "trailing  ", "  both  ", "no_spaces"]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(utf8), Arc::new(large_utf8), Arc::new(utf8_view)],
+        )
+        .unwrap();
+
+        // Test with no whitespace handling (default)
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new();
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "utf8,large_utf8,utf8_view\n  leading,  leading,  leading\ntrailing  ,trailing  ,trailing  \n  both  ,  both  ,  both  \nno_spaces,no_spaces,no_spaces\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with both ignore leading and trailing whitespace
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new()
+            .with_ignore_leading_whitespace(true)
+            .with_ignore_trailing_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "utf8,large_utf8,utf8_view\nleading,leading,leading\ntrailing,trailing,trailing\nboth,both,both\nno_spaces,no_spaces,no_spaces\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with only leading whitespace trimming
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new().with_ignore_leading_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "utf8,large_utf8,utf8_view\nleading,leading,leading\ntrailing  ,trailing  ,trailing  \nboth  ,both  ,both  \nno_spaces,no_spaces,no_spaces\n",
+            String::from_utf8(buf).unwrap()
+        );
+
+        // Test with only trailing whitespace trimming
+        let mut buf = Vec::new();
+        let builder = WriterBuilder::new().with_ignore_trailing_whitespace(true);
+        let mut writer = builder.build(&mut buf);
+        writer.write(&batch).unwrap();
+        drop(writer);
+        assert_eq!(
+            "utf8,large_utf8,utf8_view\n  leading,  leading,  leading\ntrailing,trailing,trailing\n  both,  both,  both\nno_spaces,no_spaces,no_spaces\n",
+            String::from_utf8(buf).unwrap()
+        );
+    }
+
+    fn write_quote_style(batch: &RecordBatch, quote_style: QuoteStyle) -> String {
+        let mut buf = Vec::new();
+        let mut writer = WriterBuilder::new()
+            .with_quote_style(quote_style)
+            .build(&mut buf);
+        writer.write(batch).unwrap();
+        drop(writer);
+        String::from_utf8(buf).unwrap()
+    }
+
+    fn write_quote_style_with_null(
+        batch: &RecordBatch,
+        quote_style: QuoteStyle,
+        null_value: &str,
+    ) -> String {
+        let mut buf = Vec::new();
+        let mut writer = WriterBuilder::new()
+            .with_quote_style(quote_style)
+            .with_null(null_value.to_string())
+            .build(&mut buf);
+        writer.write(batch).unwrap();
+        drop(writer);
+        String::from_utf8(buf).unwrap()
+    }
+
+    #[test]
+    fn test_write_csv_quote_style() {
+        let schema = Schema::new(vec![
+            Field::new("text", DataType::Utf8, false),
+            Field::new("number", DataType::Int32, false),
+            Field::new("float", DataType::Float64, false),
+        ]);
+
+        let text = StringArray::from(vec!["hello", "world", "comma,value", "quote\"test"]);
+        let number = Int32Array::from(vec![1, 2, 3, 4]);
+        let float = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(text), Arc::new(number), Arc::new(float)],
+        )
+        .unwrap();
+
+        // Test with QuoteStyle::Necessary (default)
+        assert_eq!(
+            "text,number,float\nhello,1,1.1\nworld,2,2.2\n\"comma,value\",3,3.3\n\"quote\"\"test\",4,4.4\n",
+            write_quote_style(&batch, QuoteStyle::Necessary)
+        );
+
+        // Test with QuoteStyle::Always (equivalent to Spark's quoteAll=true)
+        assert_eq!(
+            "\"text\",\"number\",\"float\"\n\"hello\",\"1\",\"1.1\"\n\"world\",\"2\",\"2.2\"\n\"comma,value\",\"3\",\"3.3\"\n\"quote\"\"test\",\"4\",\"4.4\"\n",
+            write_quote_style(&batch, QuoteStyle::Always)
+        );
+
+        // Test with QuoteStyle::NonNumeric
+        assert_eq!(
+            "\"text\",\"number\",\"float\"\n\"hello\",1,1.1\n\"world\",2,2.2\n\"comma,value\",3,3.3\n\"quote\"\"test\",4,4.4\n",
+            write_quote_style(&batch, QuoteStyle::NonNumeric)
+        );
+
+        // Test with QuoteStyle::Never (warning: can produce invalid CSV)
+        // Note: This produces invalid CSV for fields with commas or quotes
+        assert_eq!(
+            "text,number,float\nhello,1,1.1\nworld,2,2.2\ncomma,value,3,3.3\nquote\"test,4,4.4\n",
+            write_quote_style(&batch, QuoteStyle::Never)
+        );
+    }
+
+    #[test]
+    fn test_write_csv_quote_style_with_nulls() {
+        let schema = Schema::new(vec![
+            Field::new("text", DataType::Utf8, true),
+            Field::new("number", DataType::Int32, true),
+        ]);
+
+        let text = StringArray::from(vec![Some("hello"), None, Some("world")]);
+        let number = Int32Array::from(vec![Some(1), Some(2), None]);
+
+        let batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(text), Arc::new(number)]).unwrap();
+
+        // Test with QuoteStyle::Always
+        assert_eq!(
+            "\"text\",\"number\"\n\"hello\",\"1\"\n\"\",\"2\"\n\"world\",\"\"\n",
+            write_quote_style(&batch, QuoteStyle::Always)
+        );
+
+        // Test with QuoteStyle::Always and custom null value
+        assert_eq!(
+            "\"text\",\"number\"\n\"hello\",\"1\"\n\"NULL\",\"2\"\n\"world\",\"NULL\"\n",
+            write_quote_style_with_null(&batch, QuoteStyle::Always, "NULL")
+        );
+    }
 }
diff --git a/arrow-data/Cargo.toml b/arrow-data/Cargo.toml
index fbed24fea1fa..9c7a5206b2f4 100644
--- a/arrow-data/Cargo.toml
+++ b/arrow-data/Cargo.toml
@@ -48,7 +48,8 @@ all-features = true
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
 
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-integer = { version = "0.1.46", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 half = { version = "2.1", default-features = false }
 
 [dev-dependencies]
diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs
index 3b3ec6246066..270f4f9948ac 100644
--- a/arrow-data/src/byte_view.rs
+++ b/arrow-data/src/byte_view.rs
@@ -18,6 +18,14 @@
 use arrow_buffer::Buffer;
 use arrow_schema::ArrowError;
 
+/// The maximum number of bytes that can be stored inline in a byte view.
+///
+/// See [`ByteView`] and [`GenericByteViewArray`] for more information on the
+/// layout of the views.
+///
+/// [`GenericByteViewArray`]: https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
+pub const MAX_INLINE_VIEW_LEN: u32 = 12;
+
 /// Helper to access views of [`GenericByteViewArray`] (`StringViewArray` and
 /// `BinaryViewArray`) where the length is greater than 12 bytes.
 ///
@@ -76,15 +84,15 @@ impl ByteView {
     /// See example on [`ByteView`] docs
     ///
     /// Notes:
-    /// * the length should always be greater than 12 (Data less than 12
-    ///   bytes is stored as an inline view)
+    /// * the length should always be greater than [`MAX_INLINE_VIEW_LEN`]
+    ///   (Data less than 12 bytes is stored as an inline view)
     /// * buffer and offset are set to `0`
     ///
     /// # Panics
     /// If the prefix is not exactly 4 bytes
     #[inline]
     pub fn new(length: u32, prefix: &[u8]) -> Self {
-        debug_assert!(length > 12);
+        debug_assert!(length > MAX_INLINE_VIEW_LEN);
         Self {
             length,
             prefix: u32::from_le_bytes(prefix.try_into().unwrap()),
@@ -159,8 +167,8 @@ where
 {
     for (idx, v) in views.iter().enumerate() {
         let len = *v as u32;
-        if len <= 12 {
-            if len < 12 && (v >> (32 + len * 8)) != 0 {
+        if len <= MAX_INLINE_VIEW_LEN {
+            if len < MAX_INLINE_VIEW_LEN && (v >> (32 + len * 8)) != 0 {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "View at index {idx} contained non-zero padding for string of length {len}",
                 )));
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 4c117184de79..4917691e23f8 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -21,7 +21,7 @@
 use crate::bit_iterator::BitSliceIterator;
 use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
 use arrow_buffer::{
-    bit_util, i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer,
+    ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer, bit_util, i256,
 };
 use arrow_schema::{ArrowError, DataType, UnionMode};
 use std::mem;
@@ -83,6 +83,8 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         | DataType::Float16
         | DataType::Float32
         | DataType::Float64
+        | DataType::Decimal32(_, _)
+        | DataType::Decimal64(_, _)
         | DataType::Decimal128(_, _)
         | DataType::Decimal256(_, _)
         | DataType::Date32
@@ -279,7 +281,7 @@ impl ArrayData {
     ) -> Self {
         let mut skip_validation = UnsafeFlag::new();
         // SAFETY: caller responsible for ensuring data is valid
-        skip_validation.set(true);
+        unsafe { skip_validation.set(true) };
 
         ArrayDataBuilder {
             data_type,
@@ -307,6 +309,9 @@ impl ArrayData {
     ///
     /// Note: This is a low level API and most users of the arrow crate should create
     /// arrays using the builders found in [arrow_array](https://docs.rs/arrow-array)
+    /// or [`ArrayDataBuilder`].
+    ///
+    /// See also [`Self::into_parts`] to recover the fields
     pub fn try_new(
         data_type: DataType,
         len: usize,
@@ -349,6 +354,33 @@ impl ArrayData {
         Ok(new_self)
     }
 
+    /// Return the constituent parts of this ArrayData
+    ///
+    /// This is the inverse of [`ArrayData::try_new`].
+    ///
+    /// Returns `(data_type, len, nulls, offset, buffers, child_data)`
+    pub fn into_parts(
+        self,
+    ) -> (
+        DataType,
+        usize,
+        Option<NullBuffer>,
+        usize,
+        Vec<Buffer>,
+        Vec<ArrayData>,
+    ) {
+        let Self {
+            data_type,
+            len,
+            nulls,
+            offset,
+            buffers,
+            child_data,
+        } = self;
+
+        (data_type, len, nulls, offset, buffers, child_data)
+    }
+
     /// Returns a builder to construct a [`ArrayData`] instance of the same [`DataType`]
     #[inline]
     pub const fn builder(data_type: DataType) -> ArrayDataBuilder {
@@ -474,21 +506,20 @@ impl ArrayData {
                     result += buffer_size;
                 }
                 BufferSpec::VariableWidth => {
-                    let buffer_len: usize;
-                    match self.data_type {
+                    let buffer_len = match self.data_type {
                         DataType::Utf8 | DataType::Binary => {
                             let offsets = self.typed_offsets::<i32>()?;
-                            buffer_len = (offsets[self.len] - offsets[0] ) as usize;
+                            (offsets[self.len] - offsets[0]) as usize
                         }
                         DataType::LargeUtf8 | DataType::LargeBinary => {
                             let offsets = self.typed_offsets::<i64>()?;
-                            buffer_len = (offsets[self.len] - offsets[0]) as usize;
+                            (offsets[self.len] - offsets[0]) as usize
                         }
                         _ => {
                             return Err(ArrowError::NotYetImplemented(format!(
-                            "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
-                            self.data_type
-                            )))
+                                "Invalid data type for VariableWidth buffer. Expected Utf8, LargeUtf8, Binary or LargeBinary. Got {}",
+                                self.data_type
+                            )));
                         }
                     };
                     result += buffer_len;
@@ -552,7 +583,7 @@ impl ArrayData {
         if let DataType::Struct(_) = self.data_type() {
             // Slice into children
             let new_offset = self.offset + offset;
-            let new_data = ArrayData {
+            ArrayData {
                 data_type: self.data_type().clone(),
                 len: length,
                 offset: new_offset,
@@ -564,9 +595,7 @@ impl ArrayData {
                     .map(|data| data.slice(offset, length))
                     .collect(),
                 nulls: self.nulls.as_ref().map(|x| x.slice(offset, length)),
-            };
-
-            new_data
+            }
         } else {
             let mut new_data = self.clone();
 
@@ -616,6 +645,16 @@ impl ArrayData {
                     vec![ArrayData::new_empty(f.data_type())],
                     true,
                 ),
+                DataType::ListView(f) => (
+                    vec![zeroed(len * 4), zeroed(len * 4)],
+                    vec![ArrayData::new_empty(f.data_type())],
+                    true,
+                ),
+                DataType::LargeListView(f) => (
+                    vec![zeroed(len * 8), zeroed(len * 8)],
+                    vec![ArrayData::new_empty(f.data_type())],
+                    true,
+                ),
                 DataType::FixedSizeList(f, list_len) => (
                     vec![],
                     vec![ArrayData::new_null(f.data_type(), *list_len as usize * len)],
@@ -636,7 +675,7 @@ impl ArrayData {
                 ),
                 DataType::Union(f, mode) => {
                     let (id, _) = f.iter().next().unwrap();
-                    let ids = Buffer::from_iter(std::iter::repeat(id).take(len));
+                    let ids = Buffer::from_iter(std::iter::repeat_n(id, len));
                     let buffers = match mode {
                         UnionMode::Sparse => vec![ids],
                         UnionMode::Dense => {
@@ -689,7 +728,29 @@ impl ArrayData {
                         false,
                     )
                 }
-                d => unreachable!("{d}"),
+                // Handled by Some(width) branch above
+                DataType::Int8
+                | DataType::Int16
+                | DataType::Int32
+                | DataType::Int64
+                | DataType::UInt8
+                | DataType::UInt16
+                | DataType::UInt32
+                | DataType::UInt64
+                | DataType::Float16
+                | DataType::Float32
+                | DataType::Float64
+                | DataType::Timestamp(_, _)
+                | DataType::Date32
+                | DataType::Date64
+                | DataType::Time32(_)
+                | DataType::Time64(_)
+                | DataType::Duration(_)
+                | DataType::Interval(_)
+                | DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
+                | DataType::Decimal128(_, _)
+                | DataType::Decimal256(_, _) => unreachable!("{data_type}"),
             },
         };
 
@@ -782,7 +843,10 @@ impl ArrayData {
                     if buffer.len() < min_buffer_size {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "Need at least {} bytes in buffers[{}] in array of type {:?}, but got {}",
-                            min_buffer_size, i, self.data_type, buffer.len()
+                            min_buffer_size,
+                            i,
+                            self.data_type,
+                            buffer.len()
                         )));
                     }
 
@@ -790,7 +854,8 @@ impl ArrayData {
                     if align_offset != 0 {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "Misaligned buffers[{i}] in array of type {:?}, offset from expected alignment of {alignment} by {}",
-                            self.data_type, align_offset.min(alignment - align_offset)
+                            self.data_type,
+                            align_offset.min(alignment - align_offset)
                         )));
                     }
                 }
@@ -804,7 +869,10 @@ impl ArrayData {
                     if buffer.len() < min_buffer_size {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "Need at least {} bytes for bitmap in buffers[{}] in array of type {:?}, but got {}",
-                            min_buffer_size, i, self.data_type, buffer.len()
+                            min_buffer_size,
+                            i,
+                            self.data_type,
+                            buffer.len()
                         )));
                     }
                 }
@@ -884,7 +952,7 @@ impl ArrayData {
     /// entries.
     ///
     /// For an empty array, the `buffer` can also be empty.
-    fn typed_offsets<T: ArrowNativeType + num::Num>(&self) -> Result<&[T], ArrowError> {
+    fn typed_offsets<T: ArrowNativeType + num_traits::Num>(&self) -> Result<&[T], ArrowError> {
         // An empty list-like array can have 0 offsets
         if self.len == 0 && self.buffers[0].is_empty() {
             return Ok(&[]);
@@ -894,7 +962,7 @@ impl ArrayData {
     }
 
     /// Returns a reference to the data in `buffers[idx]` as a typed slice after validating
-    fn typed_buffer<T: ArrowNativeType + num::Num>(
+    fn typed_buffer<T: ArrowNativeType + num_traits::Num>(
         &self,
         idx: usize,
         len: usize,
@@ -918,7 +986,7 @@ impl ArrayData {
 
     /// Does a cheap sanity check that the `self.len` values in `buffer` are valid
     /// offsets (of type T) into some other buffer of `values_length` bytes long
-    fn validate_offsets<T: ArrowNativeType + num::Num + std::fmt::Display>(
+    fn validate_offsets<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
         &self,
         values_length: usize,
     ) -> Result<(), ArrowError> {
@@ -968,13 +1036,21 @@ impl ArrayData {
 
     /// Does a cheap sanity check that the `self.len` values in `buffer` are valid
     /// offsets and sizes (of type T) into some other buffer of `values_length` bytes long
-    fn validate_offsets_and_sizes<T: ArrowNativeType + num::Num + std::fmt::Display>(
+    fn validate_offsets_and_sizes<T: ArrowNativeType + num_traits::Num + std::fmt::Display>(
         &self,
         values_length: usize,
     ) -> Result<(), ArrowError> {
         let offsets: &[T] = self.typed_buffer(0, self.len)?;
         let sizes: &[T] = self.typed_buffer(1, self.len)?;
-        for i in 0..values_length {
+        if offsets.len() != sizes.len() {
+            return Err(ArrowError::ComputeError(format!(
+                "ListView offsets len {} does not match sizes len {}",
+                offsets.len(),
+                sizes.len()
+            )));
+        }
+
+        for i in 0..sizes.len() {
             let size = sizes[i].to_usize().ok_or_else(|| {
                 ArrowError::InvalidArgumentError(format!(
                     "Error converting size[{}] ({}) to usize for {}",
@@ -1056,7 +1132,11 @@ impl ArrayData {
                     if field_data.len < self.len {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
-                            self.data_type, i, field.name(), field_data.len, self.len
+                            self.data_type,
+                            i,
+                            field.name(),
+                            field_data.len,
+                            self.len
                         )));
                     }
                 }
@@ -1088,7 +1168,9 @@ impl ArrayData {
                     if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
-                            i, field_data.len, self.len + self.offset
+                            i,
+                            field_data.len,
+                            self.len + self.offset
                         )));
                     }
                 }
@@ -1280,7 +1362,7 @@ impl ArrayData {
                         "non-nullable child of type {} contains nulls not present in parent {}",
                         child.data_type, self.data_type
                     ))),
-                }
+                };
             }
         };
 
@@ -1371,7 +1453,7 @@ impl ArrayData {
     /// function would call `validate([1,2])`, and `validate([2,4])`
     fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
     where
-        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
         V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
     {
         self.typed_offsets::<T>()?
@@ -1418,7 +1500,7 @@ impl ArrayData {
     /// into `buffers[1]` are valid utf8 sequences
     fn validate_utf8<T>(&self) -> Result<(), ArrowError>
     where
-        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
     {
         let values_buffer = &self.buffers[1].as_slice();
         if let Ok(values_str) = std::str::from_utf8(values_buffer) {
@@ -1450,7 +1532,7 @@ impl ArrayData {
     /// between `0` and `offset_limit`
     fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<(), ArrowError>
     where
-        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num_traits::Num + std::fmt::Display,
     {
         self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
             // No validation applied to each value, but the iteration
@@ -1463,7 +1545,7 @@ impl ArrayData {
     /// is within the range [0, max_value], inclusive
     fn check_bounds<T>(&self, max_value: i64) -> Result<(), ArrowError>
     where
-        T: ArrowNativeType + TryInto<i64> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
     {
         let required_len = self.len + self.offset;
         let buffer = &self.buffers[0];
@@ -1498,7 +1580,7 @@ impl ArrayData {
     /// Validates that each value in run_ends array is positive and strictly increasing.
     fn check_run_ends<T>(&self) -> Result<(), ArrowError>
     where
-        T: ArrowNativeType + TryInto<i64> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<i64> + num_traits::Num + std::fmt::Display,
     {
         let values = self.typed_buffer::<T>(0, self.len)?;
         let mut prev_value: i64 = 0_i64;
@@ -1612,6 +1694,8 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
             DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
         }
         DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
+        DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
+        DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
         DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
         DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
         DataType::FixedSizeBinary(size) => {
@@ -1761,7 +1845,7 @@ impl DataTypeLayout {
                 },
             ],
             can_contain_null_mask: true,
-            variadic: true,
+            variadic: false,
         }
     }
 }
@@ -1984,6 +2068,7 @@ impl ArrayDataBuilder {
     ///
     /// Note: This is shorthand for
     /// ```rust
+    /// # #[expect(unsafe_op_in_unsafe_fn)]
     /// # let mut builder = arrow_data::ArrayDataBuilder::new(arrow_schema::DataType::Null);
     /// # let _ = unsafe {
     /// builder.skip_validation(true).build().unwrap()
@@ -1995,7 +2080,7 @@ impl ArrayDataBuilder {
     /// The same caveats as [`ArrayData::new_unchecked`]
     /// apply.
     pub unsafe fn build_unchecked(self) -> ArrayData {
-        self.skip_validation(true).build().unwrap()
+        unsafe { self.skip_validation(true) }.build().unwrap()
     }
 
     /// Creates an `ArrayData`, consuming `self`
@@ -2094,7 +2179,9 @@ impl ArrayDataBuilder {
     /// If validation is skipped, the buffers must form a valid Arrow array,
     /// otherwise undefined behavior will result
     pub unsafe fn skip_validation(mut self, skip_validation: bool) -> Self {
-        self.skip_validation.set(skip_validation);
+        unsafe {
+            self.skip_validation.set(skip_validation);
+        }
         self
     }
 }
@@ -2447,5 +2534,23 @@ mod tests {
         for i in 0..array.len() {
             assert!(array.is_null(i));
         }
+
+        let array = ArrayData::new_null(
+            &DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
+            array_len,
+        );
+        assert_eq!(array.len(), array_len);
+        for i in 0..array.len() {
+            assert!(array.is_null(i));
+        }
+
+        let array = ArrayData::new_null(
+            &DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
+            array_len,
+        );
+        assert_eq!(array.len(), array_len);
+        for i in 0..array.len() {
+            assert!(array.is_null(i));
+        }
     }
 }
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index e84461f2ec3a..2c997753bd5f 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -15,19 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Maximum and minimum values for [`Decimal256`] and [`Decimal128`].
+//! Maximum and minimum values for [`Decimal256`], [`Decimal128`], [`Decimal64`] and [`Decimal32`].
 //!
 //! Also provides functions to validate if a given decimal value is within
 //! the valid range of the decimal type.
 //!
+//! [`Decimal32`]: arrow_schema::DataType::Decimal32
+//! [`Decimal64`]: arrow_schema::DataType::Decimal64
 //! [`Decimal128`]: arrow_schema::DataType::Decimal128
 //! [`Decimal256`]: arrow_schema::DataType::Decimal256
 use arrow_buffer::i256;
 use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
+    DECIMAL_DEFAULT_SCALE, DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
 };
 
 /// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can
@@ -899,26 +902,264 @@ pub const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [
     -99999999999999999999999999999999999999,
 ];
 
+/// `MAX_DECIMAL64_FOR_EACH_PRECISION[p]` holds the maximum `i64` value that can
+/// be stored in [`Decimal64`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
+/// assert_eq!(MAX_DECIMAL64_FOR_EACH_PRECISION[3], 999);
+/// ```
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+pub const MAX_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+    0, // unused first element
+    9,
+    99,
+    999,
+    9999,
+    99999,
+    999999,
+    9999999,
+    99999999,
+    999999999,
+    9999999999,
+    99999999999,
+    999999999999,
+    9999999999999,
+    99999999999999,
+    999999999999999,
+    9999999999999999,
+    99999999999999999,
+    999999999999999999,
+];
+
+/// `MIN_DECIMAL64_FOR_EACH_PRECISION[p]` holds the minimum `i64` value that can
+/// be stored in a [`Decimal64`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MIN_DECIMAL64_FOR_EACH_PRECISION;
+/// assert_eq!(MIN_DECIMAL64_FOR_EACH_PRECISION[3], -999);
+/// ```
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+pub const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+    0, // unused first element
+    -9,
+    -99,
+    -999,
+    -9999,
+    -99999,
+    -999999,
+    -9999999,
+    -99999999,
+    -999999999,
+    -9999999999,
+    -99999999999,
+    -999999999999,
+    -9999999999999,
+    -99999999999999,
+    -999999999999999,
+    -9999999999999999,
+    -99999999999999999,
+    -999999999999999999,
+];
+
+/// `MAX_DECIMAL32_FOR_EACH_PRECISION[p]` holds the maximum `i32` value that can
+/// be stored in [`Decimal32`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
+/// assert_eq!(MAX_DECIMAL32_FOR_EACH_PRECISION[3], 999);
+/// ```
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+pub const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+    0, // unused first element
+    9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999,
+];
+
+/// `MIN_DECIMAL32_FOR_EACH_PRECISION[p]` holds the minimum `ialue that can
+/// be stored in a [`Decimal32`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MIN_DECIMAL32_FOR_EACH_PRECISION;
+/// assert_eq!(MIN_DECIMAL32_FOR_EACH_PRECISION[3], -999);
+/// ```
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+pub const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+    0, // unused first element
+    -9, -99, -999, -9999, -99999, -999999, -9999999, -99999999, -999999999,
+];
+
+/// Validates that the specified `i32` value can be properly
+/// interpreted as a [`Decimal32`] number with precision `precision`
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+#[inline]
+pub fn validate_decimal32_precision(
+    value: i32,
+    precision: u8,
+    scale: i8,
+) -> Result<(), ArrowError> {
+    if precision > DECIMAL32_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal32 is {DECIMAL32_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscale_max_value = format_decimal_str(
+            &MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{unscaled_value} is too large to store in a Decimal32 of precision {precision}. Max is {}",
+            unscale_max_value
+        )))
+    } else if value < MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscale_min_value = format_decimal_str(
+            &MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{unscaled_value} is too small to store in a Decimal32 of precision {precision}. Min is {}",
+            unscale_min_value
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Returns true if the specified `i32` value can be properly
+/// interpreted as a [`Decimal32`] number with precision `precision`
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+#[inline]
+pub fn is_validate_decimal32_precision(value: i32, precision: u8) -> bool {
+    precision <= DECIMAL32_MAX_PRECISION
+        && value >= MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+        && value <= MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+}
+
+/// Validates that the specified `i64` value can be properly
+/// interpreted as a [`Decimal64`] number with precision `precision`
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+#[inline]
+pub fn validate_decimal64_precision(
+    value: i64,
+    precision: u8,
+    scale: i8,
+) -> Result<(), ArrowError> {
+    if precision > DECIMAL64_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal64 is {DECIMAL64_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_max_value = format_decimal_str(
+            &MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{unscaled_value} is too large to store in a Decimal64 of precision {precision}. Max is {}",
+            unscaled_max_value
+        )))
+    } else if value < MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_min_value = format_decimal_str(
+            &MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{unscaled_value} is too small to store in a Decimal64 of precision {precision}. Min is {}",
+            unscaled_min_value
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Returns true if the specified `i64` value can be properly
+/// interpreted as a [`Decimal64`] number with precision `precision`
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+#[inline]
+pub fn is_validate_decimal64_precision(value: i64, precision: u8) -> bool {
+    precision <= DECIMAL64_MAX_PRECISION
+        && value >= MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+        && value <= MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+}
+
 /// Validates that the specified `i128` value can be properly
 /// interpreted as a [`Decimal128`] number with precision `precision`
 ///
 /// [`Decimal128`]: arrow_schema::DataType::Decimal128
 #[inline]
-pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), ArrowError> {
+pub fn validate_decimal_precision(value: i128, precision: u8, scale: i8) -> Result<(), ArrowError> {
     if precision > DECIMAL128_MAX_PRECISION {
         return Err(ArrowError::InvalidArgumentError(format!(
             "Max precision of a Decimal128 is {DECIMAL128_MAX_PRECISION}, but got {precision}",
         )));
     }
     if value > MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_max_value = format_decimal_str(
+            &MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
         Err(ArrowError::InvalidArgumentError(format!(
-            "{value} is too large to store in a Decimal128 of precision {precision}. Max is {}",
-            MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
+            "{unscaled_value} is too large to store in a Decimal128 of precision {precision}. Max is {}",
+            unscaled_max_value
         )))
     } else if value < MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_min_value = format_decimal_str(
+            &MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
         Err(ArrowError::InvalidArgumentError(format!(
-            "{value} is too small to store in a Decimal128 of precision {precision}. Min is {}",
-            MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize]
+            "{unscaled_value} is too small to store in a Decimal128 of precision {precision}. Min is {}",
+            unscaled_min_value
         )))
     } else {
         Ok(())
@@ -941,21 +1182,40 @@ pub fn is_validate_decimal_precision(value: i128, precision: u8) -> bool {
 ///
 /// [`Decimal256`]: arrow_schema::DataType::Decimal256
 #[inline]
-pub fn validate_decimal256_precision(value: i256, precision: u8) -> Result<(), ArrowError> {
+pub fn validate_decimal256_precision(
+    value: i256,
+    precision: u8,
+    scale: i8,
+) -> Result<(), ArrowError> {
     if precision > DECIMAL256_MAX_PRECISION {
         return Err(ArrowError::InvalidArgumentError(format!(
             "Max precision of a Decimal256 is {DECIMAL256_MAX_PRECISION}, but got {precision}",
         )));
     }
+
     if value > MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_max_value = format_decimal_str(
+            &MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
         Err(ArrowError::InvalidArgumentError(format!(
-            "{value:?} is too large to store in a Decimal256 of precision {precision}. Max is {:?}",
-            MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
+            "{unscaled_value} is too large to store in a Decimal256 of precision {precision}. Max is {}",
+            unscaled_max_value
         )))
     } else if value < MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize] {
+        let unscaled_value =
+            format_decimal_str_internal(&value.to_string(), precision.into(), scale, false);
+        let unscaled_min_value = format_decimal_str(
+            &MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize].to_string(),
+            precision.into(),
+            scale,
+        );
         Err(ArrowError::InvalidArgumentError(format!(
-            "{value:?} is too small to store in a Decimal256 of precision {precision}. Min is {:?}",
-            MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
+            "{unscaled_value} is too small to store in a Decimal256 of precision {precision}. Min is {}",
+            unscaled_min_value
         )))
     } else {
         Ok(())
@@ -972,3 +1232,44 @@ pub fn is_validate_decimal256_precision(value: i256, precision: u8) -> bool {
         && value >= MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
         && value <= MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize]
 }
+
+#[inline]
+/// Formats a decimal string given the precision and scale.
+pub fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String {
+    format_decimal_str_internal(value_str, precision, scale, true)
+}
+
+// Format a decimal string given the precision and scale.
+// If `safe_decimal` is true, the function will ensure that the output string
+// does not exceed the specified precision.
+fn format_decimal_str_internal(
+    value_str: &str,
+    precision: usize,
+    scale: i8,
+    safe_decimal: bool,
+) -> String {
+    let (sign, rest) = match value_str.strip_prefix('-') {
+        Some(stripped) => ("-", stripped),
+        None => ("", value_str),
+    };
+    let bound = if safe_decimal {
+        precision.min(rest.len()) + sign.len()
+    } else {
+        value_str.len()
+    };
+    let value_str = &value_str[0..bound];
+
+    if scale == 0 {
+        value_str.to_string()
+    } else if scale < 0 {
+        let padding = value_str.len() + scale.unsigned_abs() as usize;
+        format!("{value_str:0<padding$}")
+    } else if rest.len() > scale as usize {
+        // Decimal separator is in the middle of the string
+        let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize);
+        format!("{whole}.{decimal}")
+    } else {
+        // String has to be padded
+        format!("{}0.{:0>width$}", sign, rest, width = scale as usize)
+    }
+}
diff --git a/arrow-data/src/equal/boolean.rs b/arrow-data/src/equal/boolean.rs
index addae936f118..64b7125e3688 100644
--- a/arrow-data/src/equal/boolean.rs
+++ b/arrow-data/src/equal/boolean.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::bit_iterator::BitIndexIterator;
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 use arrow_buffer::bit_util::get_bit;
 
 use super::utils::{equal_bits, equal_len};
diff --git a/arrow-data/src/equal/dictionary.rs b/arrow-data/src/equal/dictionary.rs
index 1d9c4b8d964f..a906ec030580 100644
--- a/arrow-data/src/equal/dictionary.rs
+++ b/arrow-data/src/equal/dictionary.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 use arrow_buffer::ArrowNativeType;
 
 use super::equal_range;
diff --git a/arrow-data/src/equal/fixed_list.rs b/arrow-data/src/equal/fixed_list.rs
index 4b79e5c33fab..9a5d64d217ad 100644
--- a/arrow-data/src/equal/fixed_list.rs
+++ b/arrow-data/src/equal/fixed_list.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 use arrow_schema::DataType;
 
 use super::equal_range;
diff --git a/arrow-data/src/equal/list.rs b/arrow-data/src/equal/list.rs
index cc4ba3cacf9f..ba5e5a8c93c1 100644
--- a/arrow-data/src/equal/list.rs
+++ b/arrow-data/src/equal/list.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{count_nulls, ArrayData};
+use crate::data::{ArrayData, count_nulls};
 use arrow_buffer::ArrowNativeType;
-use num::Integer;
+use num_integer::Integer;
 
 use super::equal_range;
 
diff --git a/arrow-data/src/equal/list_view.rs b/arrow-data/src/equal/list_view.rs
new file mode 100644
index 000000000000..c7cb31db9099
--- /dev/null
+++ b/arrow-data/src/equal/list_view.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use crate::data::count_nulls;
+use crate::equal::equal_values;
+use arrow_buffer::ArrowNativeType;
+use num_integer::Integer;
+
+pub(super) fn list_view_equal<T: ArrowNativeType + Integer>(
+    lhs: &ArrayData,
+    rhs: &ArrayData,
+    lhs_start: usize,
+    rhs_start: usize,
+    len: usize,
+) -> bool {
+    let lhs_offsets = lhs.buffer::<T>(0);
+    let lhs_sizes = lhs.buffer::<T>(1);
+
+    let rhs_offsets = rhs.buffer::<T>(0);
+    let rhs_sizes = rhs.buffer::<T>(1);
+
+    let lhs_data = &lhs.child_data()[0];
+    let rhs_data = &rhs.child_data()[0];
+
+    let lhs_null_count = count_nulls(lhs.nulls(), lhs_start, len);
+    let rhs_null_count = count_nulls(rhs.nulls(), rhs_start, len);
+
+    if lhs_null_count != rhs_null_count {
+        return false;
+    }
+
+    if lhs_null_count == 0 {
+        // non-null pathway: all sizes must be equal, and all values must be equal
+        let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len];
+        let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len];
+
+        if lhs_range_sizes.len() != rhs_range_sizes.len() {
+            return false;
+        }
+
+        if lhs_range_sizes != rhs_range_sizes {
+            return false;
+        }
+
+        // Check values for equality
+        let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len];
+        let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len];
+
+        if lhs_range_offsets.len() != rhs_range_offsets.len() {
+            return false;
+        }
+
+        for ((&lhs_offset, &rhs_offset), &size) in lhs_range_offsets
+            .iter()
+            .zip(rhs_range_offsets)
+            .zip(lhs_range_sizes)
+        {
+            let lhs_offset = lhs_offset.to_usize().unwrap();
+            let rhs_offset = rhs_offset.to_usize().unwrap();
+            let size = size.to_usize().unwrap();
+
+            // Check if offsets are valid for the given range
+            if !equal_values(lhs_data, rhs_data, lhs_offset, rhs_offset, size) {
+                return false;
+            }
+        }
+    } else {
+        // Need to integrate validity check in the inner loop.
+        // non-null pathway: all sizes must be equal, and all values must be equal
+        let lhs_range_sizes = &lhs_sizes[lhs_start..lhs_start + len];
+        let rhs_range_sizes = &rhs_sizes[rhs_start..rhs_start + len];
+
+        let lhs_nulls = lhs.nulls().unwrap().slice(lhs_start, len);
+        let rhs_nulls = rhs.nulls().unwrap().slice(rhs_start, len);
+
+        // Sizes can differ if values are null
+        if lhs_range_sizes.len() != rhs_range_sizes.len() {
+            return false;
+        }
+
+        // Check values for equality, with null checking
+        let lhs_range_offsets = &lhs_offsets[lhs_start..lhs_start + len];
+        let rhs_range_offsets = &rhs_offsets[rhs_start..rhs_start + len];
+
+        if lhs_range_offsets.len() != rhs_range_offsets.len() {
+            return false;
+        }
+
+        for (index, ((&lhs_offset, &rhs_offset), &size)) in lhs_range_offsets
+            .iter()
+            .zip(rhs_range_offsets)
+            .zip(lhs_range_sizes)
+            .enumerate()
+        {
+            let lhs_is_null = lhs_nulls.is_null(index);
+            let rhs_is_null = rhs_nulls.is_null(index);
+
+            if lhs_is_null != rhs_is_null {
+                return false;
+            }
+
+            let lhs_offset = lhs_offset.to_usize().unwrap();
+            let rhs_offset = rhs_offset.to_usize().unwrap();
+            let size = size.to_usize().unwrap();
+
+            // Check if values match in the range
+            if !lhs_is_null && !equal_values(lhs_data, rhs_data, lhs_offset, rhs_offset, size) {
+                return false;
+            }
+        }
+    }
+
+    true
+}
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index f24179b61700..7a310b1240df 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -30,6 +30,7 @@ mod dictionary;
 mod fixed_binary;
 mod fixed_list;
 mod list;
+mod list_view;
 mod null;
 mod primitive;
 mod run;
@@ -41,6 +42,8 @@ mod variable_size;
 // these methods assume the same type, len and null count.
 // For this reason, they are not exposed and are instead used
 // to build the generic functions below (`equal_range` and `equal`).
+use self::run::run_equal;
+use crate::equal::list_view::list_view_equal;
 use boolean::boolean_equal;
 use byte_view::byte_view_equal;
 use dictionary::dictionary_equal;
@@ -53,8 +56,6 @@ use structure::struct_equal;
 use union::union_equal;
 use variable_size::variable_sized_equal;
 
-use self::run::run_equal;
-
 /// Compares the values of two [ArrayData] starting at `lhs_start` and `rhs_start` respectively
 /// for `len` slots.
 #[inline]
@@ -78,6 +79,8 @@ fn equal_values(
         DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal32(_, _) => primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal64(_, _) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
@@ -102,10 +105,9 @@ fn equal_values(
             byte_view_equal(lhs, rhs, lhs_start, rhs_start, len)
         }
         DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not yet implemented")
-        }
         DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::ListView(_) => list_view_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::LargeListView(_) => list_view_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Union(_, _) => union_equal(lhs, rhs, lhs_start, rhs_start, len),
diff --git a/arrow-data/src/equal/structure.rs b/arrow-data/src/equal/structure.rs
index e4751c26f489..d6efaff9e4a8 100644
--- a/arrow-data/src/equal/structure.rs
+++ b/arrow-data/src/equal/structure.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 
 use super::equal_range;
 
diff --git a/arrow-data/src/equal/utils.rs b/arrow-data/src/equal/utils.rs
index f1f4be44730e..464907c78b21 100644
--- a/arrow-data/src/equal/utils.rs
+++ b/arrow-data/src/equal/utils.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 use arrow_buffer::bit_chunk_iterator::BitChunks;
 use arrow_schema::DataType;
 
diff --git a/arrow-data/src/equal/variable_size.rs b/arrow-data/src/equal/variable_size.rs
index d6e8e6a95481..c83a39ebd808 100644
--- a/arrow-data/src/equal/variable_size.rs
+++ b/arrow-data/src/equal/variable_size.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::data::{contains_nulls, ArrayData};
+use crate::data::{ArrayData, contains_nulls};
 use arrow_buffer::ArrowNativeType;
-use num::Integer;
+use num_integer::Integer;
 
 use super::utils::equal_len;
 
diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs
index 3b446ef255fe..408dfbaac909 100644
--- a/arrow-data/src/ffi.rs
+++ b/arrow-data/src/ffi.rs
@@ -18,7 +18,7 @@
 //! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
 
 use crate::bit_mask::set_bits;
-use crate::{layout, ArrayData};
+use crate::{ArrayData, layout};
 use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
 use arrow_schema::DataType;
@@ -71,15 +71,15 @@ unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
     if array.is_null() {
         return;
     }
-    let array = &mut *array;
+    let array = unsafe { &mut *array };
 
     // take ownership of `private_data`, therefore dropping it`
-    let private = Box::from_raw(array.private_data as *mut ArrayPrivateData);
+    let private = unsafe { Box::from_raw(array.private_data as *mut ArrayPrivateData) };
     for child in private.children.iter() {
-        let _ = Box::from_raw(*child);
+        let _ = unsafe { Box::from_raw(*child) };
     }
     if !private.dictionary.is_null() {
-        let _ = Box::from_raw(private.dictionary);
+        let _ = unsafe { Box::from_raw(private.dictionary) };
     }
 
     array.release = None;
@@ -222,7 +222,7 @@ impl FFI_ArrowArray {
     /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
     /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
     pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
-        std::ptr::replace(array, Self::empty())
+        unsafe { std::ptr::replace(array, Self::empty()) }
     }
 
     /// create an empty `FFI_ArrowArray`, which can be used to import data into
diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs
index a023b1d98cb6..07e7553b2b43 100644
--- a/arrow-data/src/lib.rs
+++ b/arrow-data/src/lib.rs
@@ -23,7 +23,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 mod data;
 pub use data::*;
diff --git a/arrow-data/src/transform/boolean.rs b/arrow-data/src/transform/boolean.rs
index d93fa15a4e0f..1f3bd8f885c0 100644
--- a/arrow-data/src/transform/boolean.rs
+++ b/arrow-data/src/transform/boolean.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Extend, _MutableArrayData, utils::resize_for_bits};
-use crate::bit_mask::set_bits;
+use super::{_MutableArrayData, Extend, utils::resize_for_bits};
 use crate::ArrayData;
+use crate::bit_mask::set_bits;
 
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
+pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
     let values = array.buffers()[0].as_slice();
     Box::new(
         move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
diff --git a/arrow-data/src/transform/fixed_binary.rs b/arrow-data/src/transform/fixed_binary.rs
index 44c6f46ebf7e..626ecbee0261 100644
--- a/arrow-data/src/transform/fixed_binary.rs
+++ b/arrow-data/src/transform/fixed_binary.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
 use arrow_schema::DataType;
 
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
+pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
     let size = match array.data_type() {
         DataType::FixedSizeBinary(i) => *i as usize,
         _ => unreachable!(),
diff --git a/arrow-data/src/transform/fixed_size_list.rs b/arrow-data/src/transform/fixed_size_list.rs
index 8eef7bce9bb3..ada1a2f763c4 100644
--- a/arrow-data/src/transform/fixed_size_list.rs
+++ b/arrow-data/src/transform/fixed_size_list.rs
@@ -18,9 +18,9 @@
 use crate::ArrayData;
 use arrow_schema::DataType;
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 
-pub(super) fn build_extend(array: &ArrayData) -> Extend {
+pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
     let size = match array.data_type() {
         DataType::FixedSizeList(_, i) => *i as usize,
         _ => unreachable!(),
diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index d9a1c62a8e8e..b7a9ab6da0ed 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -16,14 +16,17 @@
 // under the License.
 
 use super::{
-    Extend, _MutableArrayData,
+    _MutableArrayData, Extend,
     utils::{extend_offsets, get_last_offset},
 };
 use crate::ArrayData;
 use arrow_buffer::ArrowNativeType;
-use num::{CheckedAdd, Integer};
+use num_integer::Integer;
+use num_traits::CheckedAdd;
 
-pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(array: &ArrayData) -> Extend {
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
+    array: &ArrayData,
+) -> Extend<'_> {
     let offsets = array.buffer::<T>(0);
     Box::new(
         move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
diff --git a/arrow-data/src/transform/list_view.rs b/arrow-data/src/transform/list_view.rs
new file mode 100644
index 000000000000..9b66a6a6abb1
--- /dev/null
+++ b/arrow-data/src/transform/list_view.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::ArrayData;
+use crate::transform::_MutableArrayData;
+use arrow_buffer::ArrowNativeType;
+use num_integer::Integer;
+use num_traits::CheckedAdd;
+
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
+    array: &ArrayData,
+) -> crate::transform::Extend<'_> {
+    let offsets = array.buffer::<T>(0);
+    let sizes = array.buffer::<T>(1);
+    Box::new(
+        move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| {
+            let offset_buffer = &mut mutable.buffer1;
+            let sizes_buffer = &mut mutable.buffer2;
+
+            for &offset in &offsets[start..start + len] {
+                offset_buffer.push(offset);
+            }
+
+            // sizes
+            for &size in &sizes[start..start + len] {
+                sizes_buffer.push(size);
+            }
+
+            // the beauty of views is that we don't need to copy child_data, we just splat
+            // the offsets and sizes.
+        },
+    )
+}
+
+pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
+    let offset_buffer = &mut mutable.buffer1;
+    let sizes_buffer = &mut mutable.buffer2;
+
+    // We push 0 as a placeholder for NULL values in both the offsets and sizes
+    (0..len).for_each(|_| offset_buffer.push(T::default()));
+    (0..len).for_each(|_| sizes_buffer.push(T::default()));
+}
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index af0e1c104f6a..c6052817bfb6 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -20,19 +20,20 @@
 //! Provides utilities for creating, manipulating, and converting Arrow arrays
 //! made of primitive types, strings, and nested types.
 
-use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
+use super::{ArrayData, ArrayDataBuilder, ByteView, data::new_buffers};
 use crate::bit_mask::set_bits;
 use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
-use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util, i256};
 use arrow_schema::{ArrowError, DataType, IntervalUnit, UnionMode};
 use half::f16;
-use num::Integer;
+use num_integer::Integer;
 use std::mem;
 
 mod boolean;
 mod fixed_binary;
 mod fixed_size_list;
 mod list;
+mod list_view;
 mod null;
 mod primitive;
 mod run;
@@ -73,7 +74,7 @@ impl _MutableArrayData<'_> {
     }
 }
 
-fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits {
+fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits<'_> {
     if let Some(nulls) = array.nulls() {
         let bytes = nulls.validity();
         Box::new(move |mutable, start, len| {
@@ -190,7 +191,7 @@ impl std::fmt::Debug for MutableArrayData<'_> {
 /// Builds an extend that adds `offset` to the source primitive
 /// Additionally validates that `max` fits into the
 /// the underlying primitive returning None if not
-fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Option<Extend> {
+fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Option<Extend<'_>> {
     macro_rules! validate_and_build {
         ($dt: ty) => {{
             let _: $dt = max.try_into().ok()?;
@@ -215,7 +216,7 @@ fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Opti
 }
 
 /// Builds an extend that adds `buffer_offset` to any buffer indices encountered
-fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend {
+fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend<'_> {
     let views = array.buffer::<u128>(0);
     Box::new(
         move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
@@ -234,7 +235,7 @@ fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend {
     )
 }
 
-fn build_extend(array: &ArrayData) -> Extend {
+fn build_extend(array: &ArrayData) -> Extend<'_> {
     match array.data_type() {
         DataType::Null => null::build_extend(array),
         DataType::Boolean => boolean::build_extend(array),
@@ -257,16 +258,17 @@ fn build_extend(array: &ArrayData) -> Extend {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
+        DataType::Decimal32(_, _) => primitive::build_extend::<i32>(array),
+        DataType::Decimal64(_, _) => primitive::build_extend::<i64>(array),
         DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
         DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
         DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
         DataType::LargeUtf8 | DataType::LargeBinary => variable_size::build_extend::<i64>(array),
         DataType::BinaryView | DataType::Utf8View => unreachable!("should use build_extend_view"),
         DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not implemented")
-        }
         DataType::LargeList(_) => list::build_extend::<i64>(array),
+        DataType::ListView(_) => list_view::build_extend::<i32>(array),
+        DataType::LargeListView(_) => list_view::build_extend::<i64>(array),
         DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
         DataType::Struct(_) => structure::build_extend(array),
         DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array),
@@ -303,16 +305,17 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::extend_nulls::<i128>,
+        DataType::Decimal32(_, _) => primitive::extend_nulls::<i32>,
+        DataType::Decimal64(_, _) => primitive::extend_nulls::<i64>,
         DataType::Decimal128(_, _) => primitive::extend_nulls::<i128>,
         DataType::Decimal256(_, _) => primitive::extend_nulls::<i256>,
         DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
         DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>,
         DataType::BinaryView | DataType::Utf8View => primitive::extend_nulls::<u128>,
         DataType::Map(_, _) | DataType::List(_) => list::extend_nulls::<i32>,
-        DataType::ListView(_) | DataType::LargeListView(_) => {
-            unimplemented!("ListView/LargeListView not implemented")
-        }
         DataType::LargeList(_) => list::extend_nulls::<i64>,
+        DataType::ListView(_) => list_view::extend_nulls::<i32>,
+        DataType::LargeListView(_) => list_view::extend_nulls::<i64>,
         DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
             DataType::UInt8 => primitive::extend_nulls::<u8>,
             DataType::UInt16 => primitive::extend_nulls::<u16>,
@@ -446,7 +449,11 @@ impl<'a> MutableArrayData<'a> {
                 new_buffers(data_type, *capacity)
             }
             (
-                DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _),
+                DataType::List(_)
+                | DataType::LargeList(_)
+                | DataType::ListView(_)
+                | DataType::LargeListView(_)
+                | DataType::FixedSizeList(_, _),
                 Capacities::List(capacity, _),
             ) => {
                 array_capacity = *capacity;
@@ -456,7 +463,9 @@ impl<'a> MutableArrayData<'a> {
         };
 
         let child_data = match &data_type {
-            DataType::Decimal128(_, _)
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
             | DataType::Null
             | DataType::Boolean
@@ -485,10 +494,11 @@ impl<'a> MutableArrayData<'a> {
             | DataType::Utf8View
             | DataType::Interval(_)
             | DataType::FixedSizeBinary(_) => vec![],
-            DataType::ListView(_) | DataType::LargeListView(_) => {
-                unimplemented!("ListView/LargeListView not implemented")
-            }
-            DataType::Map(_, _) | DataType::List(_) | DataType::LargeList(_) => {
+            DataType::Map(_, _)
+            | DataType::List(_)
+            | DataType::LargeList(_)
+            | DataType::ListView(_)
+            | DataType::LargeListView(_) => {
                 let children = arrays
                     .iter()
                     .map(|array| &array.child_data()[0])
@@ -779,7 +789,12 @@ impl<'a> MutableArrayData<'a> {
                 b.insert(0, data.buffer1.into());
                 b
             }
-            DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
+            DataType::Utf8
+            | DataType::Binary
+            | DataType::LargeUtf8
+            | DataType::LargeBinary
+            | DataType::ListView(_)
+            | DataType::LargeListView(_) => {
                 vec![data.buffer1.into(), data.buffer2.into()]
             }
             DataType::Union(_, mode) => {
diff --git a/arrow-data/src/transform/null.rs b/arrow-data/src/transform/null.rs
index 5d1535564d9e..7355a5420b8e 100644
--- a/arrow-data/src/transform/null.rs
+++ b/arrow-data/src/transform/null.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
 
-pub(super) fn build_extend(_: &ArrayData) -> Extend {
+pub(super) fn build_extend(_: &ArrayData) -> Extend<'_> {
     Box::new(move |_, _, _, _| {})
 }
 
diff --git a/arrow-data/src/transform/primitive.rs b/arrow-data/src/transform/primitive.rs
index 627dc00de1df..8f9929c4305d 100644
--- a/arrow-data/src/transform/primitive.rs
+++ b/arrow-data/src/transform/primitive.rs
@@ -20,9 +20,9 @@ use arrow_buffer::ArrowNativeType;
 use std::mem::size_of;
 use std::ops::Add;
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 
-pub(super) fn build_extend<T: ArrowNativeType>(array: &ArrayData) -> Extend {
+pub(super) fn build_extend<T: ArrowNativeType>(array: &ArrayData) -> Extend<'_> {
     let values = array.buffer::<T>(0);
     Box::new(
         move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
@@ -33,7 +33,7 @@ pub(super) fn build_extend<T: ArrowNativeType>(array: &ArrayData) -> Extend {
     )
 }
 
-pub(super) fn build_extend_with_offset<T>(array: &ArrayData, offset: T) -> Extend
+pub(super) fn build_extend_with_offset<T>(array: &ArrayData, offset: T) -> Extend<'_>
 where
     T: ArrowNativeType + Add<Output = T>,
 {
diff --git a/arrow-data/src/transform/run.rs b/arrow-data/src/transform/run.rs
index 0d37a8374c6d..6ae3a034f340 100644
--- a/arrow-data/src/transform/run.rs
+++ b/arrow-data/src/transform/run.rs
@@ -15,19 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{ArrayData, Extend, _MutableArrayData};
+use super::{_MutableArrayData, ArrayData, Extend};
 use arrow_buffer::{ArrowNativeType, Buffer, ToByteSlice};
 use arrow_schema::DataType;
-use num::CheckedAdd;
+use num_traits::CheckedAdd;
 
 /// Generic helper to get the last run end value from a run ends array
 fn get_last_run_end<T: ArrowNativeType>(run_ends_data: &super::MutableArrayData) -> T {
     if run_ends_data.data.len == 0 {
         T::default()
     } else {
-        // Convert buffer to typed slice and get the last element
-        let buffer = Buffer::from(run_ends_data.data.buffer1.as_slice());
-        let typed_slice: &[T] = buffer.typed_data();
+        let typed_slice: &[T] = run_ends_data.data.buffer1.typed_data();
         if typed_slice.len() >= run_ends_data.data.len {
             typed_slice[run_ends_data.data.len - 1]
         } else {
@@ -75,10 +73,7 @@ pub fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
         DataType::Int16 => extend_nulls_impl!(i16),
         DataType::Int32 => extend_nulls_impl!(i32),
         DataType::Int64 => extend_nulls_impl!(i64),
-        _ => panic!(
-            "Invalid run end type for RunEndEncoded array: {:?}",
-            run_end_type
-        ),
+        _ => panic!("Invalid run end type for RunEndEncoded array: {run_end_type}"),
     };
 
     mutable.child_data[0].data.len += 1;
@@ -184,7 +179,7 @@ fn process_extends_batch<T: ArrowNativeType>(
 /// Returns a function that extends the run encoded array.
 ///
 /// It finds the physical indices in the source array that correspond to the logical range to copy, and adjusts the runs to the logical indices of the array to extend. The values are copied from the source array to the destination array verbatim.
-pub fn build_extend(array: &ArrayData) -> Extend {
+pub fn build_extend(array: &ArrayData) -> Extend<'_> {
     Box::new(
         move |mutable: &mut _MutableArrayData, array_idx: usize, start: usize, len: usize| {
             if len == 0 {
@@ -211,7 +206,7 @@ pub fn build_extend(array: &ArrayData) -> Extend {
                     let (run_ends_bytes, values_range) = build_extend_arrays::<$run_end_type>(
                         source_buffer,
                         source_run_ends.len(),
-                        start,
+                        start + array.offset(),
                         len,
                         dest_last_run_end,
                     );
@@ -228,10 +223,7 @@ pub fn build_extend(array: &ArrayData) -> Extend {
                 DataType::Int16 => build_and_process_impl!(i16),
                 DataType::Int32 => build_and_process_impl!(i32),
                 DataType::Int64 => build_and_process_impl!(i64),
-                _ => panic!(
-                    "Invalid run end type for RunEndEncoded array: {:?}",
-                    dest_run_end_type
-                ),
+                _ => panic!("Invalid run end type for RunEndEncoded array: {dest_run_end_type}",),
             }
         },
     )
diff --git a/arrow-data/src/transform/structure.rs b/arrow-data/src/transform/structure.rs
index 7330dcaa3705..588cc00f446b 100644
--- a/arrow-data/src/transform/structure.rs
+++ b/arrow-data/src/transform/structure.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
 
-pub(super) fn build_extend(_: &ArrayData) -> Extend {
+pub(super) fn build_extend(_: &ArrayData) -> Extend<'_> {
     Box::new(
         move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
             mutable
diff --git a/arrow-data/src/transform/union.rs b/arrow-data/src/transform/union.rs
index d7083588d782..f6f291e3f05d 100644
--- a/arrow-data/src/transform/union.rs
+++ b/arrow-data/src/transform/union.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use super::{Extend, _MutableArrayData};
+use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
 
-pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend {
+pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend<'_> {
     let type_ids = array.buffer::<i8>(0);
 
     Box::new(
@@ -36,7 +36,7 @@ pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend {
     )
 }
 
-pub(super) fn build_extend_dense(array: &ArrayData) -> Extend {
+pub(super) fn build_extend_dense(array: &ArrayData) -> Extend<'_> {
     let type_ids = array.buffer::<i8>(0);
     let offsets = array.buffer::<i32>(1);
     let arrow_schema::DataType::Union(src_fields, _) = array.data_type() else {
diff --git a/arrow-data/src/transform/utils.rs b/arrow-data/src/transform/utils.rs
index 5407f68e0d0c..979738d057fd 100644
--- a/arrow-data/src/transform/utils.rs
+++ b/arrow-data/src/transform/utils.rs
@@ -15,8 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::{bit_util, ArrowNativeType, MutableBuffer};
-use num::{CheckedAdd, Integer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, bit_util};
+use num_integer::Integer;
+use num_traits::CheckedAdd;
 
 /// extends the `buffer` to be able to hold `len` bits, setting all bits of the new size to zero.
 #[inline]
@@ -52,9 +53,9 @@ pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &Mutable
     //  Soundness
     //      * offset buffer is always extended in slices of T and aligned accordingly.
     //      * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid.
-    let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::<T>();
+    let (prefix, offsets, suffix) = unsafe { offset_buffer.as_slice().align_to::<T>() };
     debug_assert!(prefix.is_empty() && suffix.is_empty());
-    *offsets.get_unchecked(offsets.len() - 1)
+    *unsafe { offsets.get_unchecked(offsets.len() - 1) }
 }
 
 #[cfg(test)]
diff --git a/arrow-data/src/transform/variable_size.rs b/arrow-data/src/transform/variable_size.rs
index ec0174bf8cb2..ec9dcf1fd1c2 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -17,11 +17,11 @@
 
 use crate::ArrayData;
 use arrow_buffer::{ArrowNativeType, MutableBuffer};
-use num::traits::AsPrimitive;
-use num::{CheckedAdd, Integer};
+use num_integer::Integer;
+use num_traits::{AsPrimitive, CheckedAdd};
 
 use super::{
-    Extend, _MutableArrayData,
+    _MutableArrayData, Extend,
     utils::{extend_offsets, get_last_offset},
 };
 
@@ -41,7 +41,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
 
 pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>>(
     array: &ArrayData,
-) -> Extend {
+) -> Extend<'_> {
     let offsets = array.buffer::<T>(0);
     let values = array.buffers()[1].as_slice();
     Box::new(
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index 041901e4915a..8f95e1995a67 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -44,11 +44,12 @@ bytes = { version = "1", default-features = false }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 once_cell = { version = "1", optional = true }
 paste = { version = "1.0" , optional = true }
-prost = { version = "0.13.1", default-features = false, features = ["prost-derive"] }
+prost = { version = "0.14.1", default-features = false, features = ["derive"] }
 # For Timestamp type
-prost-types = { version = "0.13.1", default-features = false }
+prost-types = { version = "0.14.1", default-features = false }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"], optional = true }
-tonic = { version = "0.12.3", default-features = false, features = ["transport", "codegen", "prost"] }
+tonic = { version = "0.14.1", default-features = false, features = ["transport", "codegen", "router"] }
+tonic-prost = { version = "0.14.1", default-features = false }
 
 # CLI-related dependencies
 anyhow = { version = "1.0", optional = true }
@@ -64,9 +65,13 @@ default = []
 flight-sql = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"]
 # TODO: Remove in the next release
 flight-sql-experimental = ["flight-sql"]
-tls = ["tonic/tls"]
+tls-aws-lc= ["tonic/tls-aws-lc"]
+tls-native-roots = ["tonic/tls-native-roots"]
+tls-ring = ["tonic/tls-ring"]
+tls-webpki-roots = ["tonic/tls-webpki-roots"]
+
 # Enable CLI tools
-cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber"]
+cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "tonic/gzip", "tonic/deflate", "tonic/zstd", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber", "dep:tokio"]
 
 [dev-dependencies]
 arrow-cast = { workspace = true, features = ["prettyprint"] }
@@ -85,18 +90,18 @@ uuid = { version = "1.10.0", features = ["v4"] }
 
 [[example]]
 name = "flight_sql_server"
-required-features = ["flight-sql", "tls"]
+required-features = ["flight-sql", "tls-ring"]
 
 [[bin]]
 name = "flight_sql_client"
-required-features = ["cli", "flight-sql", "tls"]
+required-features = ["cli", "flight-sql", "tls-ring"]
 
 [[test]]
 name = "flight_sql_client"
 path = "tests/flight_sql_client.rs"
-required-features = ["flight-sql", "tls"]
+required-features = ["flight-sql", "tls-ring"]
 
 [[test]]
 name = "flight_sql_client_cli"
 path = "tests/flight_sql_client_cli.rs"
-required-features = ["cli", "flight-sql", "tls"]
+required-features = ["cli", "flight-sql", "tls-ring"]
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index 381a63048b69..1cd8f5cfe21b 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -43,12 +43,16 @@ that demonstrate how to build a Flight server implemented with [tonic](https://d
 
 ## Feature Flags
 
-- `flight-sql`: Enables experimental support for
-  [Apache Arrow FlightSQL], a protocol for interacting with SQL databases.
+- `flight-sql`: Support for [Apache Arrow FlightSQL], a protocol for interacting with SQL databases.
 
-- `flight-sql-experimental` : Deprecated feature and will be removed in next release
+You can enable TLS using the following features (not enabled by default)
 
-- `tls`: Enables `tls` on `tonic`
+- `tls-aws-lc`: enables [tonic feature] `tls-aws-lc`
+- `tls-native-roots`: enables [tonic feature] `tls-native-roots`
+- `tls-ring`: enables [tonic feature] `tls-ring`
+- `tls-webpki`: enables [tonic feature] `tls-webpki-roots`
+
+[tonic feature]: https://docs.rs/tonic/latest/tonic/#feature-flags
 
 ## CLI
 
diff --git a/arrow-flight/examples/flight_sql_server.rs b/arrow-flight/examples/flight_sql_server.rs
index 396b72f4cb22..ae03cac28515 100644
--- a/arrow-flight/examples/flight_sql_server.rs
+++ b/arrow-flight/examples/flight_sql_server.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_flight::sql::server::PeekableFlightDataStream;
 use arrow_flight::sql::DoPutPreparedStatementResult;
-use base64::prelude::BASE64_STANDARD;
+use arrow_flight::sql::server::PeekableFlightDataStream;
 use base64::Engine;
+use base64::prelude::BASE64_STANDARD;
 use core::str;
-use futures::{stream, Stream, TryStreamExt};
+use futures::{Stream, TryStreamExt, stream};
 use once_cell::sync::Lazy;
 use prost::Message;
 use std::collections::HashSet;
@@ -39,23 +39,23 @@ use arrow_flight::sql::metadata::{
     SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData, XdbcTypeInfoDataBuilder,
 };
 use arrow_flight::sql::{
-    server::FlightSqlService, ActionBeginSavepointRequest, ActionBeginSavepointResult,
-    ActionBeginTransactionRequest, ActionBeginTransactionResult, ActionCancelQueryRequest,
-    ActionCancelQueryResult, ActionClosePreparedStatementRequest,
-    ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult,
-    ActionCreatePreparedSubstraitPlanRequest, ActionEndSavepointRequest,
-    ActionEndTransactionRequest, Any, CommandGetCatalogs, CommandGetCrossReference,
-    CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
-    CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
-    CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementIngest,
-    CommandStatementQuery, CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable,
-    ProstMessageExt, Searchable, SqlInfo, TicketStatementQuery, XdbcDataType,
+    ActionBeginSavepointRequest, ActionBeginSavepointResult, ActionBeginTransactionRequest,
+    ActionBeginTransactionResult, ActionCancelQueryRequest, ActionCancelQueryResult,
+    ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
+    ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
+    ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs,
+    CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys,
+    CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables,
+    CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandPreparedStatementUpdate,
+    CommandStatementIngest, CommandStatementQuery, CommandStatementSubstraitPlan,
+    CommandStatementUpdate, Nullable, ProstMessageExt, Searchable, SqlInfo, TicketStatementQuery,
+    XdbcDataType, server::FlightSqlService,
 };
 use arrow_flight::utils::batches_to_flight_data;
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
-    FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse,
-    IpcMessage, SchemaAsIpc, Ticket,
+    Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
+    HandshakeResponse, IpcMessage, SchemaAsIpc, Ticket, flight_service_server::FlightService,
+    flight_service_server::FlightServiceServer,
 };
 use arrow_ipc::writer::IpcWriteOptions;
 use arrow_schema::{ArrowError, DataType, Field, Schema};
@@ -189,7 +189,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         let result = Ok(result);
         let output = futures::stream::iter(vec![result]);
 
-        let token = format!("Bearer {}", FAKE_TOKEN);
+        let token = format!("Bearer {FAKE_TOKEN}");
         let mut response: Response<Pin<Box<dyn Stream<Item = _> + Send>>> =
             Response::new(Box::pin(output));
         response.metadata_mut().append(
@@ -745,7 +745,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let addr_str = "0.0.0.0:50051";
     let addr = addr_str.parse()?;
 
-    println!("Listening on {:?}", addr);
+    println!("Listening on {addr:?}");
 
     if std::env::var("USE_TLS").ok().is_some() {
         let cert = std::fs::read_to_string("arrow-flight/examples/data/server.pem")?;
@@ -814,7 +814,7 @@ mod tests {
     async fn bind_tcp() -> (TcpIncoming, SocketAddr) {
         let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
         let addr = listener.local_addr().unwrap();
-        let incoming = TcpIncoming::from_listener(listener, true, None).unwrap();
+        let incoming = TcpIncoming::from(listener).with_nodelay(Some(true));
         (incoming, addr)
     }
 
diff --git a/arrow-flight/examples/server.rs b/arrow-flight/examples/server.rs
index 8c766b075957..ca856dce28cb 100644
--- a/arrow-flight/examples/server.rs
+++ b/arrow-flight/examples/server.rs
@@ -20,9 +20,9 @@ use tonic::transport::Server;
 use tonic::{Request, Response, Status, Streaming};
 
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
-    ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
-    HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket,
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
 };
 
 #[derive(Clone)]
diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml
index 79d46cd377fa..2ce3f814d89b 100644
--- a/arrow-flight/gen/Cargo.toml
+++ b/arrow-flight/gen/Cargo.toml
@@ -32,5 +32,5 @@ publish = false
 [dependencies]
 # Pin specific version of the tonic-build dependencies to avoid auto-generated
 # (and checked in) arrow.flight.protocol.rs from changing
-prost-build = { version = "=0.13.5", default-features = false }
-tonic-build = { version = "=0.12.3", default-features = false, features = ["transport", "prost"] }
+prost-build = { version = "0.14.1", default-features = false }
+tonic-prost-build = { version = "0.14.1", default-features = false }
diff --git a/arrow-flight/gen/src/main.rs b/arrow-flight/gen/src/main.rs
index a69134e7acbe..6db70dc10938 100644
--- a/arrow-flight/gen/src/main.rs
+++ b/arrow-flight/gen/src/main.rs
@@ -25,11 +25,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let proto_dir = Path::new("../format");
     let proto_path = Path::new("../format/Flight.proto");
 
-    tonic_build::configure()
+    tonic_prost_build::configure()
         // protoc in Ubuntu builder needs this option
         .protoc_arg("--experimental_allow_proto3_optional")
         .out_dir("src")
-        .compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?;
+        .compile_with_config(prost_config(), &[proto_path], &[proto_dir])?;
 
     // read file contents to string
     let mut file = OpenOptions::new()
@@ -48,11 +48,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let proto_dir = Path::new("../format");
     let proto_path = Path::new("../format/FlightSql.proto");
 
-    tonic_build::configure()
+    tonic_prost_build::configure()
         // protoc in Ubuntu builder needs this option
         .protoc_arg("--experimental_allow_proto3_optional")
         .out_dir("src/sql")
-        .compile_protos_with_config(prost_config(), &[proto_path], &[proto_dir])?;
+        .compile_with_config(prost_config(), &[proto_path], &[proto_dir])?;
 
     // read file contents to string
     let mut file = OpenOptions::new()
diff --git a/arrow-flight/src/arrow.flight.protocol.rs b/arrow-flight/src/arrow.flight.protocol.rs
index 0cd4f6948b77..bb6370d1acec 100644
--- a/arrow-flight/src/arrow.flight.protocol.rs
+++ b/arrow-flight/src/arrow.flight.protocol.rs
@@ -3,7 +3,7 @@
 // This file is @generated by prost-build.
 ///
 /// The request that a client provides to a server on handshake.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct HandshakeRequest {
     ///
     /// A defined protocol version
@@ -14,7 +14,7 @@ pub struct HandshakeRequest {
     #[prost(bytes = "bytes", tag = "2")]
     pub payload: ::prost::bytes::Bytes,
 }
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct HandshakeResponse {
     ///
     /// A defined protocol version
@@ -27,19 +27,19 @@ pub struct HandshakeResponse {
 }
 ///
 /// A message for doing simple auth.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct BasicAuth {
     #[prost(string, tag = "2")]
     pub username: ::prost::alloc::string::String,
     #[prost(string, tag = "3")]
     pub password: ::prost::alloc::string::String,
 }
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Empty {}
 ///
 /// Describes an available action, including both the name used for execution
 /// along with a short description of the purpose of the action.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionType {
     #[prost(string, tag = "1")]
     pub r#type: ::prost::alloc::string::String,
@@ -49,14 +49,14 @@ pub struct ActionType {
 ///
 /// A service specific expression that can be used to return a limited set
 /// of available Arrow Flight streams.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Criteria {
     #[prost(bytes = "bytes", tag = "1")]
     pub expression: ::prost::bytes::Bytes,
 }
 ///
 /// An opaque action specific for the service.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Action {
     #[prost(string, tag = "1")]
     pub r#type: ::prost::alloc::string::String,
@@ -83,7 +83,7 @@ pub struct RenewFlightEndpointRequest {
 }
 ///
 /// An opaque result returned after executing an action.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Result {
     #[prost(bytes = "bytes", tag = "1")]
     pub body: ::prost::bytes::Bytes,
@@ -92,14 +92,14 @@ pub struct Result {
 /// The result of the CancelFlightInfo action.
 ///
 /// The result should be stored in Result.body.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CancelFlightInfoResult {
     #[prost(enumeration = "CancelStatus", tag = "1")]
     pub status: i32,
 }
 ///
 /// Wrap the result of a getSchema call
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct SchemaResult {
     /// The schema of the dataset in its IPC form:
     ///    4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
@@ -111,7 +111,7 @@ pub struct SchemaResult {
 ///
 /// The name or tag for a Flight. May be used as a way to retrieve or generate
 /// a flight or be used to expose a set of previously defined flights.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct FlightDescriptor {
     #[prost(enumeration = "flight_descriptor::DescriptorType", tag = "1")]
     pub r#type: i32,
@@ -322,7 +322,7 @@ pub struct FlightEndpoint {
 ///
 /// A location where a Flight service will accept retrieval of a particular
 /// stream given a ticket.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Location {
     #[prost(string, tag = "1")]
     pub uri: ::prost::alloc::string::String,
@@ -333,14 +333,14 @@ pub struct Location {
 ///
 /// Tickets are meant to be single use. It is an error/application-defined
 /// behavior to reuse a ticket.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct Ticket {
     #[prost(bytes = "bytes", tag = "1")]
     pub ticket: ::prost::bytes::Bytes,
 }
 ///
 /// A batch of Arrow data as part of a stream of batches.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct FlightData {
     ///
     /// The descriptor of the data. This is only relevant when a client is
@@ -365,7 +365,7 @@ pub struct FlightData {
 }
 /// *
 /// The response message associated with the submission of a DoPut.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct PutResult {
     #[prost(bytes = "bytes", tag = "1")]
     pub app_metadata: ::prost::bytes::Bytes,
@@ -435,20 +435,9 @@ pub mod flight_service_client {
     pub struct FlightServiceClient<T> {
         inner: tonic::client::Grpc<T>,
     }
-    impl FlightServiceClient<tonic::transport::Channel> {
-        /// Attempt to create a new client by connecting to a given endpoint.
-        pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
-        where
-            D: TryInto<tonic::transport::Endpoint>,
-            D::Error: Into<StdError>,
-        {
-            let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
-            Ok(Self::new(conn))
-        }
-    }
     impl<T> FlightServiceClient<T>
     where
-        T: tonic::client::GrpcService<tonic::body::BoxBody>,
+        T: tonic::client::GrpcService<tonic::body::Body>,
         T::Error: Into<StdError>,
         T::ResponseBody: Body<Data = Bytes> + std::marker::Send + 'static,
         <T::ResponseBody as Body>::Error: Into<StdError> + std::marker::Send,
@@ -469,13 +458,13 @@ pub mod flight_service_client {
             F: tonic::service::Interceptor,
             T::ResponseBody: Default,
             T: tonic::codegen::Service<
-                http::Request<tonic::body::BoxBody>,
+                http::Request<tonic::body::Body>,
                 Response = http::Response<
-                    <T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
+                    <T as tonic::client::GrpcService<tonic::body::Body>>::ResponseBody,
                 >,
             >,
             <T as tonic::codegen::Service<
-                http::Request<tonic::body::BoxBody>,
+                http::Request<tonic::body::Body>,
             >>::Error: Into<StdError> + std::marker::Send + std::marker::Sync,
         {
             FlightServiceClient::new(InterceptedService::new(inner, interceptor))
@@ -531,7 +520,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/Handshake",
             );
@@ -564,7 +553,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/ListFlights",
             );
@@ -598,7 +587,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/GetFlightInfo",
             );
@@ -647,7 +636,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/PollFlightInfo",
             );
@@ -678,7 +667,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/GetSchema",
             );
@@ -709,7 +698,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/DoGet",
             );
@@ -740,7 +729,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/DoPut",
             );
@@ -770,7 +759,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/DoExchange",
             );
@@ -803,7 +792,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/DoAction",
             );
@@ -833,7 +822,7 @@ pub mod flight_service_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/arrow.flight.protocol.FlightService/ListActions",
             );
@@ -1098,7 +1087,7 @@ pub mod flight_service_server {
         B: Body + std::marker::Send + 'static,
         B::Error: Into<StdError> + std::marker::Send + 'static,
     {
-        type Response = http::Response<tonic::body::BoxBody>;
+        type Response = http::Response<tonic::body::Body>;
         type Error = std::convert::Infallible;
         type Future = BoxFuture<Self::Response, Self::Error>;
         fn poll_ready(
@@ -1142,7 +1131,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = HandshakeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1188,7 +1177,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ListFlightsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1233,7 +1222,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetFlightInfoSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1279,7 +1268,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = PollFlightInfoSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1324,7 +1313,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetSchemaSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1370,7 +1359,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = DoGetSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1416,7 +1405,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = DoPutSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1462,7 +1451,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = DoExchangeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1508,7 +1497,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = DoActionSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1554,7 +1543,7 @@ pub mod flight_service_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ListActionsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1571,7 +1560,9 @@ pub mod flight_service_server {
                 }
                 _ => {
                     Box::pin(async move {
-                        let mut response = http::Response::new(empty_body());
+                        let mut response = http::Response::new(
+                            tonic::body::Body::default(),
+                        );
                         let headers = response.headers_mut();
                         headers
                             .insert(
diff --git a/arrow-flight/src/bin/flight_sql_client.rs b/arrow-flight/src/bin/flight_sql_client.rs
index 7b9e34898ac8..554c6339aac2 100644
--- a/arrow-flight/src/bin/flight_sql_client.rs
+++ b/arrow-flight/src/bin/flight_sql_client.rs
@@ -17,15 +17,16 @@
 
 use std::{sync::Arc, time::Duration};
 
-use anyhow::{bail, Context, Result};
+use anyhow::{Context, Result, bail};
 use arrow_array::{ArrayRef, Datum, RecordBatch, StringArray};
-use arrow_cast::{cast_with_options, pretty::pretty_format_batches, CastOptions};
+use arrow_cast::{CastOptions, cast_with_options, pretty::pretty_format_batches};
 use arrow_flight::{
-    sql::{client::FlightSqlServiceClient, CommandGetDbSchemas, CommandGetTables},
     FlightInfo,
+    flight_service_client::FlightServiceClient,
+    sql::{CommandGetDbSchemas, CommandGetTables, client::FlightSqlServiceClient},
 };
 use arrow_schema::Schema;
-use clap::{Parser, Subcommand};
+use clap::{Parser, Subcommand, ValueEnum};
 use core::str;
 use futures::TryStreamExt;
 use tonic::{
@@ -53,6 +54,24 @@ pub struct LoggingArgs {
     log_verbose_count: u8,
 }
 
+/// gRPC/HTTP compression algorithms.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, ValueEnum)]
+pub enum CompressionEncoding {
+    Gzip,
+    Deflate,
+    Zstd,
+}
+
+impl From<CompressionEncoding> for tonic::codec::CompressionEncoding {
+    fn from(encoding: CompressionEncoding) -> Self {
+        match encoding {
+            CompressionEncoding::Gzip => Self::Gzip,
+            CompressionEncoding::Deflate => Self::Deflate,
+            CompressionEncoding::Zstd => Self::Zstd,
+        }
+    }
+}
+
 #[derive(Debug, Parser)]
 struct ClientArgs {
     /// Additional headers.
@@ -85,6 +104,14 @@ struct ClientArgs {
     #[clap(long)]
     tls: bool,
 
+    /// Dump TLS key log.
+    ///
+    /// The target file is specified by the `SSLKEYLOGFILE` environment variable.
+    ///
+    /// Requires `--tls`.
+    #[clap(long, requires = "tls")]
+    key_log: bool,
+
     /// Server host.
     ///
     /// Required.
@@ -96,6 +123,34 @@ struct ClientArgs {
     /// Defaults to `443` if `tls` is set, otherwise defaults to `80`.
     #[clap(long)]
     port: Option<u16>,
+
+    /// Compression accepted by the client for responses sent by the server.
+    ///
+    /// The client will send this information to the server as part of the request. The server is free to pick an
+    /// algorithm from that list or use no compression (called "identity" encoding).
+    ///
+    /// You may define multiple algorithms by using a comma-separated list.
+    #[clap(long, value_delimiter = ',')]
+    accept_compression: Vec<CompressionEncoding>,
+
+    /// Compression of requests sent by the client to the server.
+    ///
+    /// Since the client needs to decide on the compression before sending the request, there is no client<->server
+    /// negotiation. If the server does NOT support the chosen compression, it will respond with an error a la:
+    ///
+    /// ```
+    /// Ipc error: Status {
+    ///     code: Unimplemented,
+    ///     message: "Content is compressed with `zstd` which isn't supported",
+    ///     metadata: MetadataMap { headers: {"grpc-accept-encoding": "identity", ...} },
+    ///     ...
+    /// }
+    /// ```
+    ///
+    /// Based on the algorithms listed in the `grpc-accept-encoding` header, you may make a more educated guess for
+    /// your next request. Note that `identity` is a synonym for "no compression".
+    #[clap(long)]
+    send_compression: Option<CompressionEncoding>,
 }
 
 #[derive(Debug, Parser)]
@@ -323,7 +378,7 @@ fn construct_record_batch_from_params(
 }
 
 fn setup_logging(args: LoggingArgs) -> Result<()> {
-    use tracing_subscriber::{util::SubscriberInitExt, EnvFilter, FmtSubscriber};
+    use tracing_subscriber::{EnvFilter, FmtSubscriber, util::SubscriberInitExt};
 
     tracing_log::LogTracer::init().context("tracing log init")?;
 
@@ -357,7 +412,11 @@ async fn setup_client(args: ClientArgs) -> Result<FlightSqlServiceClient<Channel
         .keep_alive_while_idle(true);
 
     if args.tls {
-        let tls_config = ClientTlsConfig::new().with_enabled_roots();
+        let mut tls_config = ClientTlsConfig::new().with_enabled_roots();
+        if args.key_log {
+            tls_config = tls_config.use_key_log();
+        }
+
         endpoint = endpoint
             .tls_config(tls_config)
             .context("create TLS endpoint")?;
@@ -365,7 +424,14 @@ async fn setup_client(args: ClientArgs) -> Result<FlightSqlServiceClient<Channel
 
     let channel = endpoint.connect().await.context("connect to endpoint")?;
 
-    let mut client = FlightSqlServiceClient::new(channel);
+    let mut client = FlightServiceClient::new(channel);
+    for encoding in args.accept_compression {
+        client = client.accept_compressed(encoding.into());
+    }
+    if let Some(encoding) = args.send_compression {
+        client = client.send_compressed(encoding.into());
+    }
+    let mut client = FlightSqlServiceClient::new_from_inner(client);
     info!("connected");
 
     for (k, v) in args.headers {
diff --git a/arrow-flight/src/client.rs b/arrow-flight/src/client.rs
index 9b4c10e9a093..dac086271cb7 100644
--- a/arrow-flight/src/client.rs
+++ b/arrow-flight/src/client.rs
@@ -16,19 +16,19 @@
 // under the License.
 
 use crate::{
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
+    HandshakeRequest, PollInfo, PutResult, Ticket,
     decode::FlightRecordBatchStream,
     flight_service_client::FlightServiceClient,
-    gen::{CancelFlightInfoRequest, CancelFlightInfoResult, RenewFlightEndpointRequest},
+    r#gen::{CancelFlightInfoRequest, CancelFlightInfoResult, RenewFlightEndpointRequest},
     trailers::extract_lazy_trailers,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
-    HandshakeRequest, PollInfo, PutResult, Ticket,
 };
 use arrow_schema::Schema;
 use bytes::Bytes;
 use futures::{
+    Stream, StreamExt, TryStreamExt,
     future::ready,
     stream::{self, BoxStream},
-    Stream, StreamExt, TryStreamExt,
 };
 use prost::Message;
 use tonic::{metadata::MetadataMap, transport::Channel};
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 760fc926fca6..8c518ac9d454 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{trailers::LazyTrailers, utils::flight_data_to_arrow_batch, FlightData};
+use crate::{FlightData, trailers::LazyTrailers, utils::flight_data_to_arrow_batch};
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_buffer::Buffer;
 use arrow_schema::{Schema, SchemaRef};
 use bytes::Bytes;
-use futures::{ready, stream::BoxStream, Stream, StreamExt};
+use futures::{Stream, StreamExt, ready, stream::BoxStream};
 use std::{collections::HashMap, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
 use tonic::metadata::MetadataMap;
 
@@ -138,12 +138,6 @@ impl FlightRecordBatchStream {
         self.trailers.as_ref().and_then(|trailers| trailers.get())
     }
 
-    /// Has a message defining the schema been received yet?
-    #[deprecated = "use schema().is_some() instead"]
-    pub fn got_schema(&self) -> bool {
-        self.schema().is_some()
-    }
-
     /// Return schema for the stream, if it has been received
     pub fn schema(&self) -> Option<&SchemaRef> {
         self.inner.schema()
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 57ac9f3173fe..187de400f6c0 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -17,14 +17,14 @@
 
 use std::{collections::VecDeque, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
 
-use crate::{error::Result, FlightData, FlightDescriptor, SchemaAsIpc};
+use crate::{FlightData, FlightDescriptor, SchemaAsIpc, error::Result};
 
 use arrow_array::{Array, ArrayRef, RecordBatch, RecordBatchOptions, UnionArray};
-use arrow_ipc::writer::{DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
+use arrow_ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
 
 use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef, UnionMode};
 use bytes::Bytes;
-use futures::{ready, stream::BoxStream, Stream, StreamExt};
+use futures::{Stream, StreamExt, ready, stream::BoxStream};
 
 /// Creates a [`Stream`] of [`FlightData`]s from a
 /// `Stream` of [`Result`]<[`RecordBatch`], [`FlightError`]>.
@@ -535,15 +535,13 @@ fn prepare_field_for_flight(
                 )
                 .with_metadata(field.metadata().clone())
             } else {
-                #[allow(deprecated)]
-                let dict_id = dictionary_tracker.set_dict_id(field.as_ref());
-
+                dictionary_tracker.next_dict_id();
                 #[allow(deprecated)]
                 Field::new_dict(
                     field.name(),
                     field.data_type().clone(),
                     field.is_nullable(),
-                    dict_id,
+                    0,
                     field.dict_is_ordered().unwrap_or_default(),
                 )
                 .with_metadata(field.metadata().clone())
@@ -585,14 +583,13 @@ fn prepare_schema_for_flight(
                     )
                     .with_metadata(field.metadata().clone())
                 } else {
-                    #[allow(deprecated)]
-                    let dict_id = dictionary_tracker.set_dict_id(field.as_ref());
+                    dictionary_tracker.next_dict_id();
                     #[allow(deprecated)]
                     Field::new_dict(
                         field.name(),
                         field.data_type().clone(),
                         field.is_nullable(),
-                        dict_id,
+                        0,
                         field.dict_is_ordered().unwrap_or_default(),
                     )
                     .with_metadata(field.metadata().clone())
@@ -650,20 +647,16 @@ struct FlightIpcEncoder {
     options: IpcWriteOptions,
     data_gen: IpcDataGenerator,
     dictionary_tracker: DictionaryTracker,
+    compression_context: CompressionContext,
 }
 
 impl FlightIpcEncoder {
     fn new(options: IpcWriteOptions, error_on_replacement: bool) -> Self {
-        #[allow(deprecated)]
-        let preserve_dict_id = options.preserve_dict_id();
         Self {
             options,
             data_gen: IpcDataGenerator::default(),
-            #[allow(deprecated)]
-            dictionary_tracker: DictionaryTracker::new_with_preserve_dict_id(
-                error_on_replacement,
-                preserve_dict_id,
-            ),
+            dictionary_tracker: DictionaryTracker::new(error_on_replacement),
+            compression_context: CompressionContext::default(),
         }
     }
 
@@ -675,9 +668,12 @@ impl FlightIpcEncoder {
     /// Convert a `RecordBatch` to a Vec of `FlightData` representing
     /// dictionaries and a `FlightData` representing the batch
     fn encode_batch(&mut self, batch: &RecordBatch) -> Result<(Vec<FlightData>, FlightData)> {
-        let (encoded_dictionaries, encoded_batch) =
-            self.data_gen
-                .encoded_batch(batch, &mut self.dictionary_tracker, &self.options)?;
+        let (encoded_dictionaries, encoded_batch) = self.data_gen.encode(
+            batch,
+            &mut self.dictionary_tracker,
+            &self.options,
+            &mut self.compression_context,
+        )?;
 
         let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
         let flight_batch = encoded_batch.into();
@@ -1547,9 +1543,8 @@ mod tests {
     async fn verify_flight_round_trip(mut batches: Vec<RecordBatch>) {
         let expected_schema = batches.first().unwrap().schema();
 
-        #[allow(deprecated)]
         let encoder = FlightDataEncoderBuilder::default()
-            .with_options(IpcWriteOptions::default().with_preserve_dict_id(false))
+            .with_options(IpcWriteOptions::default())
             .with_dictionary_handling(DictionaryHandling::Resend)
             .build(futures::stream::iter(batches.clone().into_iter().map(Ok)));
 
@@ -1575,8 +1570,7 @@ mod tests {
             HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
         );
 
-        #[allow(deprecated)]
-        let mut dictionary_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
+        let mut dictionary_tracker = DictionaryTracker::new(false);
 
         let got = prepare_schema_for_flight(&schema, &mut dictionary_tracker, false);
         assert!(got.metadata().contains_key("some_key"));
@@ -1606,12 +1600,16 @@ mod tests {
         options: &IpcWriteOptions,
     ) -> (Vec<FlightData>, FlightData) {
         let data_gen = IpcDataGenerator::default();
-        #[allow(deprecated)]
-        let mut dictionary_tracker =
-            DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+        let mut dictionary_tracker = DictionaryTracker::new(false);
+        let mut compression_context = CompressionContext::default();
 
         let (encoded_dictionaries, encoded_batch) = data_gen
-            .encoded_batch(batch, &mut dictionary_tracker, options)
+            .encode(
+                batch,
+                &mut dictionary_tracker,
+                options,
+                &mut compression_context,
+            )
             .expect("DictionaryTracker configured above to not error on replacement");
 
         let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
@@ -1695,9 +1693,9 @@ mod tests {
 
     #[tokio::test]
     async fn flight_data_size_even() {
-        let s1 = StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
+        let s1 = StringArray::from_iter_values(std::iter::repeat_n(".10 bytes.", 1024));
         let i1 = Int16Array::from_iter_values(0..1024);
-        let s2 = StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
+        let s2 = StringArray::from_iter_values(std::iter::repeat_n("6bytes", 1024));
         let i2 = Int64Array::from_iter_values(0..1024);
 
         let batch = RecordBatch::try_from_iter(vec![
diff --git a/arrow-flight/src/error.rs b/arrow-flight/src/error.rs
index ac8030583299..d22c24eea6d4 100644
--- a/arrow-flight/src/error.rs
+++ b/arrow-flight/src/error.rs
@@ -51,12 +51,12 @@ impl FlightError {
 impl std::fmt::Display for FlightError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            FlightError::Arrow(source) => write!(f, "Arrow error: {}", source),
-            FlightError::NotYetImplemented(desc) => write!(f, "Not yet implemented: {}", desc),
-            FlightError::Tonic(source) => write!(f, "Tonic error: {}", source),
-            FlightError::ProtocolError(desc) => write!(f, "Protocol error: {}", desc),
-            FlightError::DecodeError(desc) => write!(f, "Decode error: {}", desc),
-            FlightError::ExternalError(source) => write!(f, "External error: {}", source),
+            FlightError::Arrow(source) => write!(f, "Arrow error: {source}"),
+            FlightError::NotYetImplemented(desc) => write!(f, "Not yet implemented: {desc}"),
+            FlightError::Tonic(source) => write!(f, "Tonic error: {source}"),
+            FlightError::ProtocolError(desc) => write!(f, "Protocol error: {desc}"),
+            FlightError::DecodeError(desc) => write!(f, "Decode error: {desc}"),
+            FlightError::ExternalError(source) => write!(f, "External error: {source}"),
         }
     }
 }
@@ -78,6 +78,12 @@ impl From<tonic::Status> for FlightError {
     }
 }
 
+impl From<prost::DecodeError> for FlightError {
+    fn from(error: prost::DecodeError) -> Self {
+        Self::DecodeError(error.to_string())
+    }
+}
+
 impl From<ArrowError> for FlightError {
     fn from(value: ArrowError) -> Self {
         Self::Arrow(value)
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index 72dd07040920..db900341560c 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -35,15 +35,13 @@
 //! 3. Support for [Flight SQL] in [`sql`]. Requires the
 //!    `flight-sql` feature of this crate to be activated.
 //!
-//! 4. The feature [`flight-sql-experimental`] is deprecated and will be removed in a future release.
-//!
 //! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![allow(rustdoc::invalid_html_tags)]
 #![warn(missing_docs)]
 // The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets
@@ -53,8 +51,8 @@ use arrow_ipc::{convert, writer, writer::EncodedData, writer::IpcWriteOptions};
 use arrow_schema::{ArrowError, Schema};
 
 use arrow_ipc::convert::try_schema_from_ipc_buffer;
-use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
+use base64::prelude::BASE64_STANDARD;
 use bytes::Bytes;
 use prost_types::Timestamp;
 use std::{fmt, ops::Deref};
@@ -62,7 +60,7 @@ use std::{fmt, ops::Deref};
 type ArrowResult<T> = std::result::Result<T, ArrowError>;
 
 #[allow(clippy::all)]
-mod gen {
+mod r#gen {
     // Since this file is auto-generated, we suppress all warnings
     #![allow(missing_docs)]
     include!("arrow.flight.protocol.rs");
@@ -70,22 +68,22 @@ mod gen {
 
 /// Defines a `Flight` for generation or retrieval.
 pub mod flight_descriptor {
-    use super::gen;
-    pub use gen::flight_descriptor::DescriptorType;
+    use super::r#gen;
+    pub use r#gen::flight_descriptor::DescriptorType;
 }
 
 /// Low Level [tonic] [`FlightServiceClient`](gen::flight_service_client::FlightServiceClient).
 pub mod flight_service_client {
-    use super::gen;
-    pub use gen::flight_service_client::FlightServiceClient;
+    use super::r#gen;
+    pub use r#gen::flight_service_client::FlightServiceClient;
 }
 
 /// Low Level [tonic] [`FlightServiceServer`](gen::flight_service_server::FlightServiceServer)
 /// and [`FlightService`](gen::flight_service_server::FlightService).
 pub mod flight_service_server {
-    use super::gen;
-    pub use gen::flight_service_server::FlightService;
-    pub use gen::flight_service_server::FlightServiceServer;
+    use super::r#gen;
+    pub use r#gen::flight_service_server::FlightService;
+    pub use r#gen::flight_service_server::FlightServiceServer;
 }
 
 /// Mid Level [`FlightClient`]
@@ -103,27 +101,27 @@ pub mod encode;
 /// Common error types
 pub mod error;
 
-pub use gen::Action;
-pub use gen::ActionType;
-pub use gen::BasicAuth;
-pub use gen::CancelFlightInfoRequest;
-pub use gen::CancelFlightInfoResult;
-pub use gen::CancelStatus;
-pub use gen::Criteria;
-pub use gen::Empty;
-pub use gen::FlightData;
-pub use gen::FlightDescriptor;
-pub use gen::FlightEndpoint;
-pub use gen::FlightInfo;
-pub use gen::HandshakeRequest;
-pub use gen::HandshakeResponse;
-pub use gen::Location;
-pub use gen::PollInfo;
-pub use gen::PutResult;
-pub use gen::RenewFlightEndpointRequest;
-pub use gen::Result;
-pub use gen::SchemaResult;
-pub use gen::Ticket;
+pub use r#gen::Action;
+pub use r#gen::ActionType;
+pub use r#gen::BasicAuth;
+pub use r#gen::CancelFlightInfoRequest;
+pub use r#gen::CancelFlightInfoResult;
+pub use r#gen::CancelStatus;
+pub use r#gen::Criteria;
+pub use r#gen::Empty;
+pub use r#gen::FlightData;
+pub use r#gen::FlightDescriptor;
+pub use r#gen::FlightEndpoint;
+pub use r#gen::FlightInfo;
+pub use r#gen::HandshakeRequest;
+pub use r#gen::HandshakeResponse;
+pub use r#gen::Location;
+pub use r#gen::PollInfo;
+pub use r#gen::PutResult;
+pub use r#gen::RenewFlightEndpointRequest;
+pub use r#gen::Result;
+pub use r#gen::SchemaResult;
+pub use r#gen::Ticket;
 
 /// Helper to extract HTTP/gRPC trailers from a tonic stream.
 mod trailers;
@@ -151,9 +149,7 @@ pub struct IpcMessage(pub Bytes);
 
 fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
     let data_gen = writer::IpcDataGenerator::default();
-    #[allow(deprecated)]
-    let mut dict_tracker =
-        writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+    let mut dict_tracker = writer::DictionaryTracker::new(false);
     data_gen.schema_to_bytes_with_dictionary_tracker(arrow_schema, &mut dict_tracker, options)
 }
 
@@ -607,6 +603,12 @@ impl FlightInfo {
         self
     }
 
+    /// Add endpoints for fetching all data
+    pub fn with_endpoints(mut self, endpoints: Vec<FlightEndpoint>) -> Self {
+        self.endpoint = endpoints;
+        self
+    }
+
     /// Add a [`FlightDescriptor`] describing what this data is
     pub fn with_descriptor(mut self, flight_descriptor: FlightDescriptor) -> Self {
         self.flight_descriptor = Some(flight_descriptor);
diff --git a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
index 7a37a0b28856..e7083c583edd 100644
--- a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
+++ b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
@@ -19,7 +19,7 @@
 ///               int32_to_int32_list_map: map<key: int32, value: list<$data$: int32>>
 /// >
 /// where there is one row per requested piece of metadata information.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetSqlInfo {
     ///
     /// Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide
@@ -99,7 +99,7 @@ pub struct CommandGetSqlInfo {
 ///                               is only relevant to be used by ODBC).
 /// >
 /// The returned data should be ordered by data_type and then by type_name.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetXdbcTypeInfo {
     ///
     /// Specifies the data type to search for the info.
@@ -118,7 +118,7 @@ pub struct CommandGetXdbcTypeInfo {
 ///   catalog_name: utf8 not null
 /// >
 /// The returned data should be ordered by catalog_name.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetCatalogs {}
 ///
 /// Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend.
@@ -133,7 +133,7 @@ pub struct CommandGetCatalogs {}
 ///   db_schema_name: utf8 not null
 /// >
 /// The returned data should be ordered by catalog_name, then db_schema_name.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetDbSchemas {
     ///
     /// Specifies the Catalog to search for the tables.
@@ -177,7 +177,7 @@ pub struct CommandGetDbSchemas {
 ///   - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
 ///   - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
 /// The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetTables {
     ///
     /// Specifies the Catalog to search for the tables.
@@ -226,7 +226,7 @@ pub struct CommandGetTables {
 ///   table_type: utf8 not null
 /// >
 /// The returned data should be ordered by table_type.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetTableTypes {}
 ///
 /// Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend.
@@ -244,7 +244,7 @@ pub struct CommandGetTableTypes {}
 ///   key_sequence: int32 not null
 /// >
 /// The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetPrimaryKeys {
     ///
     /// Specifies the catalog to search for the table.
@@ -287,7 +287,7 @@ pub struct CommandGetPrimaryKeys {
 /// >
 /// The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence.
 /// update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetExportedKeys {
     ///
     /// Specifies the catalog to search for the foreign key table.
@@ -334,7 +334,7 @@ pub struct CommandGetExportedKeys {
 ///     - 2 = SET NULL
 ///     - 3 = NO ACTION
 ///     - 4 = SET DEFAULT
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetImportedKeys {
     ///
     /// Specifies the catalog to search for the primary key table.
@@ -383,7 +383,7 @@ pub struct CommandGetImportedKeys {
 ///     - 2 = SET NULL
 ///     - 3 = NO ACTION
 ///     - 4 = SET DEFAULT
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandGetCrossReference {
     /// *
     /// The catalog name where the parent table is.
@@ -420,7 +420,7 @@ pub struct CommandGetCrossReference {
 }
 ///
 /// Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionCreatePreparedStatementRequest {
     /// The valid SQL string to create a prepared statement for.
     #[prost(string, tag = "1")]
@@ -432,7 +432,7 @@ pub struct ActionCreatePreparedStatementRequest {
 }
 ///
 /// An embedded message describing a Substrait plan to execute.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct SubstraitPlan {
     /// The serialized substrait.Plan to create a prepared statement for.
     /// XXX(ARROW-16902): this is bytes instead of an embedded message
@@ -448,7 +448,7 @@ pub struct SubstraitPlan {
 }
 ///
 /// Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionCreatePreparedSubstraitPlanRequest {
     /// The serialized substrait.Plan to create a prepared statement for.
     #[prost(message, optional, tag = "1")]
@@ -466,7 +466,7 @@ pub struct ActionCreatePreparedSubstraitPlanRequest {
 /// - Automatically, by a server timeout.
 ///
 /// The result should be wrapped in a google.protobuf.Any message.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionCreatePreparedStatementResult {
     /// Opaque handle for the prepared statement on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -486,7 +486,7 @@ pub struct ActionCreatePreparedStatementResult {
 ///
 /// Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend.
 /// Closes server resources associated with the prepared statement handle.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionClosePreparedStatementRequest {
     /// Opaque handle for the prepared statement on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -495,7 +495,7 @@ pub struct ActionClosePreparedStatementRequest {
 ///
 /// Request message for the "BeginTransaction" action.
 /// Begins a transaction.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionBeginTransactionRequest {}
 ///
 /// Request message for the "BeginSavepoint" action.
@@ -503,7 +503,7 @@ pub struct ActionBeginTransactionRequest {}
 ///
 /// Only supported if FLIGHT_SQL_TRANSACTION is
 /// FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionBeginSavepointRequest {
     /// The transaction to which a savepoint belongs.
     #[prost(bytes = "bytes", tag = "1")]
@@ -520,7 +520,7 @@ pub struct ActionBeginSavepointRequest {
 /// automatically rolled back.
 ///
 /// The result should be wrapped in a google.protobuf.Any message.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionBeginTransactionResult {
     /// Opaque handle for the transaction on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -534,7 +534,7 @@ pub struct ActionBeginTransactionResult {
 /// out, then the savepoint is also invalidated.
 ///
 /// The result should be wrapped in a google.protobuf.Any message.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionBeginSavepointResult {
     /// Opaque handle for the savepoint on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -547,7 +547,7 @@ pub struct ActionBeginSavepointResult {
 ///
 /// If the action completes successfully, the transaction handle is
 /// invalidated, as are all associated savepoints.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionEndTransactionRequest {
     /// Opaque handle for the transaction on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -609,7 +609,7 @@ pub mod action_end_transaction_request {
 /// Releasing a savepoint invalidates that savepoint.  Rolling back to
 /// a savepoint does not invalidate the savepoint, but invalidates all
 /// savepoints created after the current savepoint.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionEndSavepointRequest {
     /// Opaque handle for the savepoint on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -678,7 +678,7 @@ pub mod action_end_savepoint_request {
 ///     - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
 ///     - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
 ///   - GetFlightInfo: execute the query.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandStatementQuery {
     /// The SQL syntax.
     #[prost(string, tag = "1")]
@@ -704,7 +704,7 @@ pub struct CommandStatementQuery {
 ///     - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
 ///   - GetFlightInfo: execute the query.
 ///   - DoPut: execute the query.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandStatementSubstraitPlan {
     /// A serialized substrait.Plan
     #[prost(message, optional, tag = "1")]
@@ -716,7 +716,7 @@ pub struct CommandStatementSubstraitPlan {
 /// *
 /// Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery.
 /// This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct TicketStatementQuery {
     /// Unique identifier for the instance of the statement to execute.
     #[prost(bytes = "bytes", tag = "1")]
@@ -742,7 +742,7 @@ pub struct TicketStatementQuery {
 ///     for the parameters when determining the schema.
 ///   - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution.
 ///   - GetFlightInfo: execute the prepared statement instance.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandPreparedStatementQuery {
     /// Opaque handle for the prepared statement on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -751,7 +751,7 @@ pub struct CommandPreparedStatementQuery {
 ///
 /// Represents a SQL update query. Used in the command member of FlightDescriptor
 /// for the RPC call DoPut to cause the server to execute the included SQL update.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandStatementUpdate {
     /// The SQL syntax.
     #[prost(string, tag = "1")]
@@ -764,7 +764,7 @@ pub struct CommandStatementUpdate {
 /// Represents a SQL update query. Used in the command member of FlightDescriptor
 /// for the RPC call DoPut to cause the server to execute the included
 /// prepared statement handle as an update.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct CommandPreparedStatementUpdate {
     /// Opaque handle for the prepared statement on the server.
     #[prost(bytes = "bytes", tag = "1")]
@@ -810,7 +810,7 @@ pub struct CommandStatementIngest {
 /// Nested message and enum types in `CommandStatementIngest`.
 pub mod command_statement_ingest {
     /// Options for table definition behavior
-    #[derive(Clone, Copy, PartialEq, ::prost::Message)]
+    #[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
     pub struct TableDefinitionOptions {
         #[prost(
             enumeration = "table_definition_options::TableNotExistOption",
@@ -918,7 +918,7 @@ pub mod command_statement_ingest {
 /// Returned from the RPC call DoPut when a CommandStatementUpdate,
 /// CommandPreparedStatementUpdate, or CommandStatementIngest was
 /// in the request, containing results from the update.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct DoPutUpdateResult {
     /// The number of records updated. A return value of -1 represents
     /// an unknown updated record count.
@@ -930,7 +930,7 @@ pub struct DoPutUpdateResult {
 /// *Note on legacy behavior*: previous versions of the protocol did not return any result for
 /// this command, and that behavior should still be supported by clients. In that case, the client
 /// can continue as though the fields in this message were not provided or set to sensible default values.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct DoPutPreparedStatementResult {
     /// Represents a (potentially updated) opaque handle for the prepared statement on the server.
     /// Because the handle could potentially be updated, any previous handles for this prepared
@@ -959,7 +959,7 @@ pub struct DoPutPreparedStatementResult {
 ///
 /// This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
 /// action with DoAction instead.
-#[derive(Clone, PartialEq, ::prost::Message)]
+#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionCancelQueryRequest {
     /// The result of the GetFlightInfo RPC that initiated the query.
     /// XXX(ARROW-16902): this must be a serialized FlightInfo, but is
@@ -975,7 +975,7 @@ pub struct ActionCancelQueryRequest {
 ///
 /// This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
 /// action with DoAction instead.
-#[derive(Clone, Copy, PartialEq, ::prost::Message)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)]
 pub struct ActionCancelQueryResult {
     #[prost(enumeration = "action_cancel_query_result::CancelResult", tag = "1")]
     pub result: i32,
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index 6791b68b757d..5476d4ede9a4 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -17,8 +17,14 @@
 
 //! A FlightSQL Client [`FlightSqlServiceClient`]
 
-use base64::prelude::BASE64_STANDARD;
+use arrow_buffer::Buffer;
+use arrow_ipc::MessageHeader;
+use arrow_ipc::convert::fb_to_schema;
+use arrow_ipc::reader::read_record_batch;
+use arrow_ipc::root_as_message;
+use arrow_schema::SchemaRef;
 use base64::Engine;
+use base64::prelude::BASE64_STANDARD;
 use bytes::Bytes;
 use std::collections::HashMap;
 use std::str::FromStr;
@@ -27,8 +33,9 @@ use tonic::metadata::AsciiMetadataKey;
 use crate::decode::FlightRecordBatchStream;
 use crate::encode::FlightDataEncoderBuilder;
 use crate::error::FlightError;
+use crate::error::Result;
 use crate::flight_service_client::FlightServiceClient;
-use crate::sql::gen::action_end_transaction_request::EndTransaction;
+use crate::sql::r#gen::action_end_transaction_request::EndTransaction;
 use crate::sql::server::{
     BEGIN_TRANSACTION, CLOSE_PREPARED_STATEMENT, CREATE_PREPARED_STATEMENT, END_TRANSACTION,
 };
@@ -49,19 +56,15 @@ use crate::{
     IpcMessage, PutResult, Ticket,
 };
 use arrow_array::RecordBatch;
-use arrow_buffer::Buffer;
-use arrow_ipc::convert::fb_to_schema;
-use arrow_ipc::reader::read_record_batch;
-use arrow_ipc::{root_as_message, MessageHeader};
-use arrow_schema::{ArrowError, Schema, SchemaRef};
-use futures::{stream, Stream, TryStreamExt};
+use arrow_schema::{ArrowError, Schema};
+use futures::{Stream, TryStreamExt, stream};
 use prost::Message;
-use tonic::transport::Channel;
+use tonic::codegen::{Body, StdError};
 use tonic::{IntoRequest, IntoStreamingRequest, Streaming};
 
 /// A FlightSQLServiceClient is an endpoint for retrieving or storing Arrow data
 /// by FlightSQL protocol.
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct FlightSqlServiceClient<T> {
     token: Option<String>,
     headers: HashMap<String, String>,
@@ -71,14 +74,20 @@ pub struct FlightSqlServiceClient<T> {
 /// A FlightSql protocol client that can run queries against FlightSql servers
 /// This client is in the "experimental" stage. It is not guaranteed to follow the spec in all instances.
 /// Github issues are welcomed.
-impl FlightSqlServiceClient<Channel> {
+impl<T> FlightSqlServiceClient<T>
+where
+    T: tonic::client::GrpcService<tonic::body::Body>,
+    T::Error: Into<StdError>,
+    T::ResponseBody: Body<Data = Bytes> + Send + 'static,
+    <T::ResponseBody as Body>::Error: Into<StdError> + Send,
+{
     /// Creates a new FlightSql client that connects to a server over an arbitrary tonic `Channel`
-    pub fn new(channel: Channel) -> Self {
+    pub fn new(channel: T) -> Self {
         Self::new_from_inner(FlightServiceClient::new(channel))
     }
 
     /// Creates a new higher level client with the provided lower level client
-    pub fn new_from_inner(inner: FlightServiceClient<Channel>) -> Self {
+    pub fn new_from_inner(inner: FlightServiceClient<T>) -> Self {
         Self {
             token: None,
             flight_client: inner,
@@ -87,17 +96,17 @@ impl FlightSqlServiceClient<Channel> {
     }
 
     /// Return a reference to the underlying [`FlightServiceClient`]
-    pub fn inner(&self) -> &FlightServiceClient<Channel> {
+    pub fn inner(&self) -> &FlightServiceClient<T> {
         &self.flight_client
     }
 
     /// Return a mutable reference to the underlying [`FlightServiceClient`]
-    pub fn inner_mut(&mut self) -> &mut FlightServiceClient<Channel> {
+    pub fn inner_mut(&mut self) -> &mut FlightServiceClient<T> {
         &mut self.flight_client
     }
 
     /// Consume this client and return the underlying [`FlightServiceClient`]
-    pub fn into_inner(self) -> FlightServiceClient<Channel> {
+    pub fn into_inner(self) -> FlightServiceClient<T> {
         self.flight_client
     }
 
@@ -126,15 +135,10 @@ impl FlightSqlServiceClient<Channel> {
     async fn get_flight_info_for_command<M: ProstMessageExt>(
         &mut self,
         cmd: M,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         let descriptor = FlightDescriptor::new_cmd(cmd.as_any().encode_to_vec());
         let req = self.set_request_headers(descriptor.into_request())?;
-        let fi = self
-            .flight_client
-            .get_flight_info(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
+        let fi = self.flight_client.get_flight_info(req).await?.into_inner();
         Ok(fi)
     }
 
@@ -143,7 +147,7 @@ impl FlightSqlServiceClient<Channel> {
         &mut self,
         query: String,
         transaction_id: Option<Bytes>,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         let cmd = CommandStatementQuery {
             query,
             transaction_id,
@@ -156,7 +160,7 @@ impl FlightSqlServiceClient<Channel> {
     /// If the server returns an "authorization" header, it is automatically parsed and set as
     /// a token for future requests. Any other data returned by the server in the handshake
     /// response is returned as a binary blob.
-    pub async fn handshake(&mut self, username: &str, password: &str) -> Result<Bytes, ArrowError> {
+    pub async fn handshake(&mut self, username: &str, password: &str) -> Result<Bytes> {
         let cmd = HandshakeRequest {
             protocol_version: 0,
             payload: Default::default(),
@@ -179,7 +183,7 @@ impl FlightSqlServiceClient<Channel> {
                 .map_err(|_| ArrowError::ParseError("Can't read auth header".to_string()))?;
             let bearer = "Bearer ";
             if !auth.starts_with(bearer) {
-                Err(ArrowError::ParseError("Invalid auth header!".to_string()))?;
+                return Err(ArrowError::ParseError("Invalid auth header!".to_string()))?;
             }
             let auth = auth[bearer.len()..].to_string();
             self.token = Some(auth);
@@ -204,7 +208,7 @@ impl FlightSqlServiceClient<Channel> {
         &mut self,
         query: String,
         transaction_id: Option<Bytes>,
-    ) -> Result<i64, ArrowError> {
+    ) -> Result<i64> {
         let cmd = CommandStatementUpdate {
             query,
             transaction_id,
@@ -217,19 +221,9 @@ impl FlightSqlServiceClient<Channel> {
             }])
             .into_request(),
         )?;
-        let mut result = self
-            .flight_client
-            .do_put(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
-        let result = result
-            .message()
-            .await
-            .map_err(status_to_arrow_error)?
-            .unwrap();
-        let result: DoPutUpdateResult =
-            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
+        let mut result = self.flight_client.do_put(req).await?.into_inner();
+        let result = result.message().await?.unwrap();
+        let result: DoPutUpdateResult = Message::decode(&*result.app_metadata)?;
         Ok(result.record_count)
     }
 
@@ -238,7 +232,7 @@ impl FlightSqlServiceClient<Channel> {
         &mut self,
         command: CommandStatementIngest,
         stream: S,
-    ) -> Result<i64, ArrowError>
+    ) -> Result<i64>
     where
         S: Stream<Item = crate::error::Result<RecordBatch>> + Send + 'static,
     {
@@ -255,41 +249,28 @@ impl FlightSqlServiceClient<Channel> {
             FallibleRequestStream::new(sender, flight_data);
 
         let req = self.set_request_headers(flight_data.into_streaming_request())?;
-        let mut result = self
-            .flight_client
-            .do_put(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
+        let mut result = self.flight_client.do_put(req).await?.into_inner();
 
         // check if the there were any errors in the input stream provided note
         // if receiver.await fails, it means the sender was dropped and there is
         // no message to return.
         if let Ok(msg) = receiver.await {
-            return Err(ArrowError::ExternalError(Box::new(msg)));
+            return Err(FlightError::ExternalError(Box::new(msg)));
         }
 
-        let result = result
-            .message()
-            .await
-            .map_err(status_to_arrow_error)?
-            .unwrap();
-        let result: DoPutUpdateResult =
-            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
+        let result = result.message().await?.unwrap();
+        let result: DoPutUpdateResult = Message::decode(&*result.app_metadata)?;
         Ok(result.record_count)
     }
 
     /// Request a list of catalogs as tabular FlightInfo results
-    pub async fn get_catalogs(&mut self) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_catalogs(&mut self) -> Result<FlightInfo> {
         self.get_flight_info_for_command(CommandGetCatalogs {})
             .await
     }
 
     /// Request a list of database schemas as tabular FlightInfo results
-    pub async fn get_db_schemas(
-        &mut self,
-        request: CommandGetDbSchemas,
-    ) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_db_schemas(&mut self, request: CommandGetDbSchemas) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
@@ -297,15 +278,10 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn do_get(
         &mut self,
         ticket: impl IntoRequest<Ticket>,
-    ) -> Result<FlightRecordBatchStream, ArrowError> {
+    ) -> Result<FlightRecordBatchStream> {
         let req = self.set_request_headers(ticket.into_request())?;
 
-        let (md, response_stream, _ext) = self
-            .flight_client
-            .do_get(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_parts();
+        let (md, response_stream, _ext) = self.flight_client.do_get(req).await?.into_parts();
         let (response_stream, trailers) = extract_lazy_trailers(response_stream);
 
         Ok(FlightRecordBatchStream::new_from_flight_data(
@@ -319,43 +295,27 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn do_put(
         &mut self,
         request: impl tonic::IntoStreamingRequest<Message = FlightData>,
-    ) -> Result<Streaming<PutResult>, ArrowError> {
+    ) -> Result<Streaming<PutResult>> {
         let req = self.set_request_headers(request.into_streaming_request())?;
-        Ok(self
-            .flight_client
-            .do_put(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner())
+        Ok(self.flight_client.do_put(req).await?.into_inner())
     }
 
     /// DoAction allows a flight client to do a specific action against a flight service
     pub async fn do_action(
         &mut self,
         request: impl IntoRequest<Action>,
-    ) -> Result<Streaming<crate::Result>, ArrowError> {
+    ) -> Result<Streaming<crate::Result>> {
         let req = self.set_request_headers(request.into_request())?;
-        Ok(self
-            .flight_client
-            .do_action(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner())
+        Ok(self.flight_client.do_action(req).await?.into_inner())
     }
 
     /// Request a list of tables.
-    pub async fn get_tables(
-        &mut self,
-        request: CommandGetTables,
-    ) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_tables(&mut self, request: CommandGetTables) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
     /// Request the primary keys for a table.
-    pub async fn get_primary_keys(
-        &mut self,
-        request: CommandGetPrimaryKeys,
-    ) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_primary_keys(&mut self, request: CommandGetPrimaryKeys) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
@@ -364,7 +324,7 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn get_exported_keys(
         &mut self,
         request: CommandGetExportedKeys,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
@@ -372,7 +332,7 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn get_imported_keys(
         &mut self,
         request: CommandGetImportedKeys,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
@@ -382,21 +342,18 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn get_cross_reference(
         &mut self,
         request: CommandGetCrossReference,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
     /// Request a list of table types.
-    pub async fn get_table_types(&mut self) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_table_types(&mut self) -> Result<FlightInfo> {
         self.get_flight_info_for_command(CommandGetTableTypes {})
             .await
     }
 
     /// Request a list of SQL information.
-    pub async fn get_sql_info(
-        &mut self,
-        sql_infos: Vec<SqlInfo>,
-    ) -> Result<FlightInfo, ArrowError> {
+    pub async fn get_sql_info(&mut self, sql_infos: Vec<SqlInfo>) -> Result<FlightInfo> {
         let request = CommandGetSqlInfo {
             info: sql_infos.iter().map(|sql_info| *sql_info as u32).collect(),
         };
@@ -407,7 +364,7 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn get_xdbc_type_info(
         &mut self,
         request: CommandGetXdbcTypeInfo,
-    ) -> Result<FlightInfo, ArrowError> {
+    ) -> Result<FlightInfo> {
         self.get_flight_info_for_command(request).await
     }
 
@@ -416,7 +373,10 @@ impl FlightSqlServiceClient<Channel> {
         &mut self,
         query: String,
         transaction_id: Option<Bytes>,
-    ) -> Result<PreparedStatement<Channel>, ArrowError> {
+    ) -> Result<PreparedStatement<T>>
+    where
+        T: Clone,
+    {
         let cmd = ActionCreatePreparedStatementRequest {
             query,
             transaction_id,
@@ -426,18 +386,9 @@ impl FlightSqlServiceClient<Channel> {
             body: cmd.as_any().encode_to_vec().into(),
         };
         let req = self.set_request_headers(action.into_request())?;
-        let mut result = self
-            .flight_client
-            .do_action(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
-        let result = result
-            .message()
-            .await
-            .map_err(status_to_arrow_error)?
-            .unwrap();
-        let any = Any::decode(&*result.body).map_err(decode_error_to_arrow_error)?;
+        let mut result = self.flight_client.do_action(req).await?.into_inner();
+        let result = result.message().await?.unwrap();
+        let any = Any::decode(&*result.body)?;
         let prepared_result: ActionCreatePreparedStatementResult = any.unpack()?.unwrap();
         let dataset_schema = match prepared_result.dataset_schema.len() {
             0 => Schema::empty(),
@@ -456,25 +407,16 @@ impl FlightSqlServiceClient<Channel> {
     }
 
     /// Request to begin a transaction.
-    pub async fn begin_transaction(&mut self) -> Result<Bytes, ArrowError> {
+    pub async fn begin_transaction(&mut self) -> Result<Bytes> {
         let cmd = ActionBeginTransactionRequest {};
         let action = Action {
             r#type: BEGIN_TRANSACTION.to_string(),
             body: cmd.as_any().encode_to_vec().into(),
         };
         let req = self.set_request_headers(action.into_request())?;
-        let mut result = self
-            .flight_client
-            .do_action(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
-        let result = result
-            .message()
-            .await
-            .map_err(status_to_arrow_error)?
-            .unwrap();
-        let any = Any::decode(&*result.body).map_err(decode_error_to_arrow_error)?;
+        let mut result = self.flight_client.do_action(req).await?.into_inner();
+        let result = result.message().await?.unwrap();
+        let any = Any::decode(&*result.body)?;
         let begin_result: ActionBeginTransactionResult = any.unpack()?.unwrap();
         Ok(begin_result.transaction_id)
     }
@@ -484,7 +426,7 @@ impl FlightSqlServiceClient<Channel> {
         &mut self,
         transaction_id: Bytes,
         action: EndTransaction,
-    ) -> Result<(), ArrowError> {
+    ) -> Result<()> {
         let cmd = ActionEndTransactionRequest {
             transaction_id,
             action: action as i32,
@@ -494,25 +436,17 @@ impl FlightSqlServiceClient<Channel> {
             body: cmd.as_any().encode_to_vec().into(),
         };
         let req = self.set_request_headers(action.into_request())?;
-        let _ = self
-            .flight_client
-            .do_action(req)
-            .await
-            .map_err(status_to_arrow_error)?
-            .into_inner();
+        let _ = self.flight_client.do_action(req).await?.into_inner();
         Ok(())
     }
 
     /// Explicitly shut down and clean up the client.
-    pub async fn close(&mut self) -> Result<(), ArrowError> {
+    pub async fn close(&mut self) -> Result<()> {
         // TODO: consume self instead of &mut self to explicitly prevent reuse?
         Ok(())
     }
 
-    fn set_request_headers<T>(
-        &self,
-        mut req: tonic::Request<T>,
-    ) -> Result<tonic::Request<T>, ArrowError> {
+    fn set_request_headers<M>(&self, mut req: tonic::Request<M>) -> Result<tonic::Request<M>> {
         for (k, v) in &self.headers {
             let k = AsciiMetadataKey::from_str(k.as_str()).map_err(|e| {
                 ArrowError::ParseError(format!("Cannot convert header key \"{k}\": {e}"))
@@ -532,6 +466,16 @@ impl FlightSqlServiceClient<Channel> {
     }
 }
 
+impl<T: Clone> Clone for FlightSqlServiceClient<T> {
+    fn clone(&self) -> Self {
+        Self {
+            headers: self.headers.clone(),
+            token: self.token.clone(),
+            flight_client: self.flight_client.clone(),
+        }
+    }
+}
+
 /// A PreparedStatement
 #[derive(Debug, Clone)]
 pub struct PreparedStatement<T> {
@@ -542,9 +486,15 @@ pub struct PreparedStatement<T> {
     parameter_schema: Schema,
 }
 
-impl PreparedStatement<Channel> {
+impl<T> PreparedStatement<T>
+where
+    T: tonic::client::GrpcService<tonic::body::Body>,
+    T::Error: Into<StdError>,
+    T::ResponseBody: Body<Data = Bytes> + Send + 'static,
+    <T::ResponseBody as Body>::Error: Into<StdError> + Send,
+{
     pub(crate) fn new(
-        flight_client: FlightSqlServiceClient<Channel>,
+        flight_client: FlightSqlServiceClient<T>,
         handle: impl Into<Bytes>,
         dataset_schema: Schema,
         parameter_schema: Schema,
@@ -559,7 +509,7 @@ impl PreparedStatement<Channel> {
     }
 
     /// Executes the prepared statement query on the server.
-    pub async fn execute(&mut self) -> Result<FlightInfo, ArrowError> {
+    pub async fn execute(&mut self) -> Result<FlightInfo> {
         self.write_bind_params().await?;
 
         let cmd = CommandPreparedStatementQuery {
@@ -574,7 +524,7 @@ impl PreparedStatement<Channel> {
     }
 
     /// Executes the prepared statement update query on the server.
-    pub async fn execute_update(&mut self) -> Result<i64, ArrowError> {
+    pub async fn execute_update(&mut self) -> Result<i64> {
         self.write_bind_params().await?;
 
         let cmd = CommandPreparedStatementUpdate {
@@ -588,35 +538,30 @@ impl PreparedStatement<Channel> {
                 ..Default::default()
             }]))
             .await?;
-        let result = result
-            .message()
-            .await
-            .map_err(status_to_arrow_error)?
-            .unwrap();
-        let result: DoPutUpdateResult =
-            Message::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
+        let result = result.message().await?.unwrap();
+        let result: DoPutUpdateResult = Message::decode(&*result.app_metadata)?;
         Ok(result.record_count)
     }
 
     /// Retrieve the parameter schema from the query.
-    pub fn parameter_schema(&self) -> Result<&Schema, ArrowError> {
+    pub fn parameter_schema(&self) -> Result<&Schema> {
         Ok(&self.parameter_schema)
     }
 
     /// Retrieve the ResultSet schema from the query.
-    pub fn dataset_schema(&self) -> Result<&Schema, ArrowError> {
+    pub fn dataset_schema(&self) -> Result<&Schema> {
         Ok(&self.dataset_schema)
     }
 
     /// Set a RecordBatch that contains the parameters that will be bind.
-    pub fn set_parameters(&mut self, parameter_binding: RecordBatch) -> Result<(), ArrowError> {
+    pub fn set_parameters(&mut self, parameter_binding: RecordBatch) -> Result<()> {
         self.parameter_binding = Some(parameter_binding);
         Ok(())
     }
 
     /// Submit parameters to the server, if any have been set on this prepared statement instance
     /// Updates our stored prepared statement handle with the handle given by the server response.
-    async fn write_bind_params(&mut self) -> Result<(), ArrowError> {
+    async fn write_bind_params(&mut self) -> Result<()> {
         if let Some(ref params_batch) = self.parameter_binding {
             let cmd = CommandPreparedStatementQuery {
                 prepared_statement_handle: self.handle.clone(),
@@ -631,8 +576,7 @@ impl PreparedStatement<Channel> {
                     self.parameter_binding.clone().map(Ok),
                 ))
                 .try_collect::<Vec<_>>()
-                .await
-                .map_err(flight_error_to_arrow_error)?;
+                .await?;
 
             // Attempt to update the stored handle with any updated handle in the DoPut result.
             // Older servers do not respond with a result for DoPut, so skip this step when
@@ -642,8 +586,7 @@ impl PreparedStatement<Channel> {
                 .do_put(stream::iter(flight_data))
                 .await?
                 .message()
-                .await
-                .map_err(status_to_arrow_error)?
+                .await?
             {
                 if let Some(handle) = self.unpack_prepared_statement_handle(&result)? {
                     self.handle = handle;
@@ -656,18 +599,14 @@ impl PreparedStatement<Channel> {
     /// Decodes the app_metadata stored in a [`PutResult`] as a
     /// [`DoPutPreparedStatementResult`] and then returns
     /// the inner prepared statement handle as [`Bytes`]
-    fn unpack_prepared_statement_handle(
-        &self,
-        put_result: &PutResult,
-    ) -> Result<Option<Bytes>, ArrowError> {
-        let result: DoPutPreparedStatementResult =
-            Message::decode(&*put_result.app_metadata).map_err(decode_error_to_arrow_error)?;
+    fn unpack_prepared_statement_handle(&self, put_result: &PutResult) -> Result<Option<Bytes>> {
+        let result: DoPutPreparedStatementResult = Message::decode(&*put_result.app_metadata)?;
         Ok(result.prepared_statement_handle)
     }
 
     /// Close the prepared statement, so that this PreparedStatement can not used
     /// anymore and server can free up any resources.
-    pub async fn close(mut self) -> Result<(), ArrowError> {
+    pub async fn close(mut self) -> Result<()> {
         let cmd = ActionClosePreparedStatementRequest {
             prepared_statement_handle: self.handle.clone(),
         };
@@ -680,21 +619,6 @@ impl PreparedStatement<Channel> {
     }
 }
 
-fn decode_error_to_arrow_error(err: prost::DecodeError) -> ArrowError {
-    ArrowError::IpcError(err.to_string())
-}
-
-fn status_to_arrow_error(status: tonic::Status) -> ArrowError {
-    ArrowError::IpcError(format!("{status:?}"))
-}
-
-fn flight_error_to_arrow_error(err: FlightError) -> ArrowError {
-    match err {
-        FlightError::Arrow(e) => e,
-        e => ArrowError::ExternalError(Box::new(e)),
-    }
-}
-
 /// A polymorphic structure to natively represent different types of data contained in `FlightData`
 pub enum ArrowFlightData {
     /// A record batch
@@ -707,7 +631,7 @@ pub enum ArrowFlightData {
 pub fn arrow_data_from_flight_data(
     flight_data: FlightData,
     arrow_schema_ref: &SchemaRef,
-) -> Result<ArrowFlightData, ArrowError> {
+) -> std::result::Result<ArrowFlightData, ArrowError> {
     let ipc_message = root_as_message(&flight_data.data_header[..])
         .map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
 
diff --git a/arrow-flight/src/sql/metadata/db_schemas.rs b/arrow-flight/src/sql/metadata/db_schemas.rs
index 68e8b497336e..c182140e58f3 100644
--- a/arrow-flight/src/sql/metadata/db_schemas.rs
+++ b/arrow-flight/src/sql/metadata/db_schemas.rs
@@ -22,7 +22,7 @@
 use std::sync::Arc;
 
 use arrow_arith::boolean::and;
-use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch, StringArray};
+use arrow_array::{ArrayRef, RecordBatch, StringArray, builder::StringBuilder};
 use arrow_ord::cmp::eq;
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use arrow_select::{filter::filter_record_batch, take::take};
diff --git a/arrow-flight/src/sql/metadata/mod.rs b/arrow-flight/src/sql/metadata/mod.rs
index fd71149a3180..66c12fce9af4 100644
--- a/arrow-flight/src/sql/metadata/mod.rs
+++ b/arrow-flight/src/sql/metadata/mod.rs
@@ -70,8 +70,7 @@ mod tests {
         let actual_lines: Vec<_> = formatted.trim().lines().collect();
         assert_eq!(
             &actual_lines, expected_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
+            "\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n",
         );
     }
 }
diff --git a/arrow-flight/src/sql/metadata/sql_info.rs b/arrow-flight/src/sql/metadata/sql_info.rs
index 58b228530942..155946ea6ce6 100644
--- a/arrow-flight/src/sql/metadata/sql_info.rs
+++ b/arrow-flight/src/sql/metadata/sql_info.rs
@@ -30,7 +30,7 @@ use std::sync::Arc;
 use arrow_arith::boolean::or;
 use arrow_array::array::{Array, UInt32Array, UnionArray};
 use arrow_array::builder::{
-    ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, MapBuilder,
+    ArrayBuilder, BooleanBuilder, Int8Builder, Int32Builder, Int64Builder, ListBuilder, MapBuilder,
     StringBuilder, UInt32Builder,
 };
 use arrow_array::{RecordBatch, Scalar};
@@ -196,10 +196,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
         ),
     ];
 
-    // create "type ids", one for each type, assume they go from 0 .. num_fields
-    let type_ids: Vec<i8> = (0..fields.len()).map(|v| v as i8).collect();
-
-    DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
+    DataType::Union(UnionFields::from_fields(fields), UnionMode::Dense)
 });
 
 impl SqlInfoUnionBuilder {
@@ -444,7 +441,7 @@ pub struct GetSqlInfoBuilder<'a> {
 
 impl CommandGetSqlInfo {
     /// Create a builder suitable for constructing a response
-    pub fn into_builder(self, infos: &SqlInfoData) -> GetSqlInfoBuilder {
+    pub fn into_builder(self, infos: &SqlInfoData) -> GetSqlInfoBuilder<'_> {
         GetSqlInfoBuilder {
             info: self.info,
             infos,
diff --git a/arrow-flight/src/sql/metadata/table_types.rs b/arrow-flight/src/sql/metadata/table_types.rs
index 54cfe6fe27a7..7f525da05f90 100644
--- a/arrow-flight/src/sql/metadata/table_types.rs
+++ b/arrow-flight/src/sql/metadata/table_types.rs
@@ -21,7 +21,7 @@
 
 use std::sync::Arc;
 
-use arrow_array::{builder::StringBuilder, ArrayRef, RecordBatch};
+use arrow_array::{ArrayRef, RecordBatch, builder::StringBuilder};
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use arrow_select::take::take;
 use once_cell::sync::Lazy;
diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs b/arrow-flight/src/sql/metadata/xdbc_info.rs
index a3a18ca10888..62e2de9e5d97 100644
--- a/arrow-flight/src/sql/metadata/xdbc_info.rs
+++ b/arrow-flight/src/sql/metadata/xdbc_info.rs
@@ -299,7 +299,7 @@ pub struct GetXdbcTypeInfoBuilder<'a> {
 
 impl CommandGetXdbcTypeInfo {
     /// Create a builder suitable for constructing a response
-    pub fn into_builder(self, infos: &XdbcTypeInfoData) -> GetXdbcTypeInfoBuilder {
+    pub fn into_builder(self, infos: &XdbcTypeInfoData) -> GetXdbcTypeInfoBuilder<'_> {
         GetXdbcTypeInfoBuilder {
             data_type: self.data_type,
             infos,
diff --git a/arrow-flight/src/sql/mod.rs b/arrow-flight/src/sql/mod.rs
index 955f1904a6d6..e076f7aa0747 100644
--- a/arrow-flight/src/sql/mod.rs
+++ b/arrow-flight/src/sql/mod.rs
@@ -44,70 +44,70 @@ use paste::paste;
 use prost::Message;
 
 #[allow(clippy::all)]
-mod gen {
+mod r#gen {
     // Since this file is auto-generated, we suppress all warnings
     #![allow(missing_docs)]
     include!("arrow.flight.protocol.sql.rs");
 }
 
-pub use gen::action_end_transaction_request::EndTransaction;
-pub use gen::command_statement_ingest::table_definition_options::{
+pub use r#gen::ActionBeginSavepointRequest;
+pub use r#gen::ActionBeginSavepointResult;
+pub use r#gen::ActionBeginTransactionRequest;
+pub use r#gen::ActionBeginTransactionResult;
+pub use r#gen::ActionCancelQueryRequest;
+pub use r#gen::ActionCancelQueryResult;
+pub use r#gen::ActionClosePreparedStatementRequest;
+pub use r#gen::ActionCreatePreparedStatementRequest;
+pub use r#gen::ActionCreatePreparedStatementResult;
+pub use r#gen::ActionCreatePreparedSubstraitPlanRequest;
+pub use r#gen::ActionEndSavepointRequest;
+pub use r#gen::ActionEndTransactionRequest;
+pub use r#gen::CommandGetCatalogs;
+pub use r#gen::CommandGetCrossReference;
+pub use r#gen::CommandGetDbSchemas;
+pub use r#gen::CommandGetExportedKeys;
+pub use r#gen::CommandGetImportedKeys;
+pub use r#gen::CommandGetPrimaryKeys;
+pub use r#gen::CommandGetSqlInfo;
+pub use r#gen::CommandGetTableTypes;
+pub use r#gen::CommandGetTables;
+pub use r#gen::CommandGetXdbcTypeInfo;
+pub use r#gen::CommandPreparedStatementQuery;
+pub use r#gen::CommandPreparedStatementUpdate;
+pub use r#gen::CommandStatementIngest;
+pub use r#gen::CommandStatementQuery;
+pub use r#gen::CommandStatementSubstraitPlan;
+pub use r#gen::CommandStatementUpdate;
+pub use r#gen::DoPutPreparedStatementResult;
+pub use r#gen::DoPutUpdateResult;
+pub use r#gen::Nullable;
+pub use r#gen::Searchable;
+pub use r#gen::SqlInfo;
+pub use r#gen::SqlNullOrdering;
+pub use r#gen::SqlOuterJoinsSupportLevel;
+pub use r#gen::SqlSupportedCaseSensitivity;
+pub use r#gen::SqlSupportedElementActions;
+pub use r#gen::SqlSupportedGroupBy;
+pub use r#gen::SqlSupportedPositionedCommands;
+pub use r#gen::SqlSupportedResultSetConcurrency;
+pub use r#gen::SqlSupportedResultSetType;
+pub use r#gen::SqlSupportedSubqueries;
+pub use r#gen::SqlSupportedTransaction;
+pub use r#gen::SqlSupportedTransactions;
+pub use r#gen::SqlSupportedUnions;
+pub use r#gen::SqlSupportsConvert;
+pub use r#gen::SqlTransactionIsolationLevel;
+pub use r#gen::SubstraitPlan;
+pub use r#gen::SupportedSqlGrammar;
+pub use r#gen::TicketStatementQuery;
+pub use r#gen::UpdateDeleteRules;
+pub use r#gen::XdbcDataType;
+pub use r#gen::XdbcDatetimeSubcode;
+pub use r#gen::action_end_transaction_request::EndTransaction;
+pub use r#gen::command_statement_ingest::TableDefinitionOptions;
+pub use r#gen::command_statement_ingest::table_definition_options::{
     TableExistsOption, TableNotExistOption,
 };
-pub use gen::command_statement_ingest::TableDefinitionOptions;
-pub use gen::ActionBeginSavepointRequest;
-pub use gen::ActionBeginSavepointResult;
-pub use gen::ActionBeginTransactionRequest;
-pub use gen::ActionBeginTransactionResult;
-pub use gen::ActionCancelQueryRequest;
-pub use gen::ActionCancelQueryResult;
-pub use gen::ActionClosePreparedStatementRequest;
-pub use gen::ActionCreatePreparedStatementRequest;
-pub use gen::ActionCreatePreparedStatementResult;
-pub use gen::ActionCreatePreparedSubstraitPlanRequest;
-pub use gen::ActionEndSavepointRequest;
-pub use gen::ActionEndTransactionRequest;
-pub use gen::CommandGetCatalogs;
-pub use gen::CommandGetCrossReference;
-pub use gen::CommandGetDbSchemas;
-pub use gen::CommandGetExportedKeys;
-pub use gen::CommandGetImportedKeys;
-pub use gen::CommandGetPrimaryKeys;
-pub use gen::CommandGetSqlInfo;
-pub use gen::CommandGetTableTypes;
-pub use gen::CommandGetTables;
-pub use gen::CommandGetXdbcTypeInfo;
-pub use gen::CommandPreparedStatementQuery;
-pub use gen::CommandPreparedStatementUpdate;
-pub use gen::CommandStatementIngest;
-pub use gen::CommandStatementQuery;
-pub use gen::CommandStatementSubstraitPlan;
-pub use gen::CommandStatementUpdate;
-pub use gen::DoPutPreparedStatementResult;
-pub use gen::DoPutUpdateResult;
-pub use gen::Nullable;
-pub use gen::Searchable;
-pub use gen::SqlInfo;
-pub use gen::SqlNullOrdering;
-pub use gen::SqlOuterJoinsSupportLevel;
-pub use gen::SqlSupportedCaseSensitivity;
-pub use gen::SqlSupportedElementActions;
-pub use gen::SqlSupportedGroupBy;
-pub use gen::SqlSupportedPositionedCommands;
-pub use gen::SqlSupportedResultSetConcurrency;
-pub use gen::SqlSupportedResultSetType;
-pub use gen::SqlSupportedSubqueries;
-pub use gen::SqlSupportedTransaction;
-pub use gen::SqlSupportedTransactions;
-pub use gen::SqlSupportedUnions;
-pub use gen::SqlSupportsConvert;
-pub use gen::SqlTransactionIsolationLevel;
-pub use gen::SubstraitPlan;
-pub use gen::SupportedSqlGrammar;
-pub use gen::TicketStatementQuery;
-pub use gen::UpdateDeleteRules;
-pub use gen::XdbcDataType;
-pub use gen::XdbcDatetimeSubcode;
 
 pub mod client;
 pub mod metadata;
diff --git a/arrow-flight/src/sql/server.rs b/arrow-flight/src/sql/server.rs
index add7c8db40c2..871a67b72cd6 100644
--- a/arrow-flight/src/sql/server.rs
+++ b/arrow-flight/src/sql/server.rs
@@ -34,11 +34,11 @@ use super::{
     SqlInfo, TicketStatementQuery,
 };
 use crate::{
-    flight_service_server::FlightService, gen::PollInfo, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult,
-    SchemaResult, Ticket,
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, r#gen::PollInfo,
 };
-use futures::{stream::Peekable, Stream, StreamExt};
+use futures::{Stream, StreamExt, stream::Peekable};
 use prost::Message;
 use tonic::{Request, Response, Status, Streaming};
 
@@ -392,7 +392,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
         _request: Request<PeekableFlightDataStream>,
         error: DoPutError,
     ) -> Result<Response<<Self as FlightService>::DoPutStream>, Status> {
-        Err(Status::unimplemented(format!("Unhandled Error: {}", error)))
+        Err(Status::unimplemented(format!("Unhandled Error: {error}")))
     }
 
     /// Execute an update SQL statement.
@@ -628,7 +628,7 @@ where
                 self.get_flight_info_catalogs(token, request).await
             }
             Command::CommandGetDbSchemas(token) => {
-                return self.get_flight_info_schemas(token, request).await
+                return self.get_flight_info_schemas(token, request).await;
             }
             Command::CommandGetTables(token) => self.get_flight_info_tables(token, request).await,
             Command::CommandGetTableTypes(token) => {
@@ -879,7 +879,7 @@ where
             let stmt = self
                 .do_action_create_prepared_statement(cmd, request)
                 .await?;
-            let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
+            let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result {
                 body: stmt.as_any().encode_to_vec().into(),
             })]);
             return Ok(Response::new(Box::pin(output)));
@@ -921,7 +921,7 @@ where
                 Status::invalid_argument("Unable to unpack ActionBeginTransactionRequest.")
             })?;
             let stmt = self.do_action_begin_transaction(cmd, request).await?;
-            let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
+            let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result {
                 body: stmt.as_any().encode_to_vec().into(),
             })]);
             return Ok(Response::new(Box::pin(output)));
@@ -946,7 +946,7 @@ where
                     Status::invalid_argument("Unable to unpack ActionBeginSavepointRequest.")
                 })?;
             let stmt = self.do_action_begin_savepoint(cmd, request).await?;
-            let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
+            let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result {
                 body: stmt.as_any().encode_to_vec().into(),
             })]);
             return Ok(Response::new(Box::pin(output)));
@@ -971,7 +971,7 @@ where
                     Status::invalid_argument("Unable to unpack ActionCancelQueryRequest.")
                 })?;
             let stmt = self.do_action_cancel_query(cmd, request).await?;
-            let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
+            let output = futures::stream::iter(vec![Ok(super::super::r#gen::Result {
                 body: stmt.as_any().encode_to_vec().into(),
             })]);
             return Ok(Response::new(Box::pin(output)));
diff --git a/arrow-flight/src/streams.rs b/arrow-flight/src/streams.rs
index 0cd3aa41a547..8a9d5ab30667 100644
--- a/arrow-flight/src/streams.rs
+++ b/arrow-flight/src/streams.rs
@@ -19,11 +19,11 @@
 
 use crate::error::FlightError;
 use futures::{
-    channel::oneshot::{Receiver, Sender},
     FutureExt, Stream, StreamExt,
+    channel::oneshot::{Receiver, Sender},
 };
 use std::pin::Pin;
-use std::task::{ready, Poll};
+use std::task::{Poll, ready};
 
 /// Wrapper around a fallible stream (one that returns errors) that makes it infallible.
 ///
diff --git a/arrow-flight/src/trailers.rs b/arrow-flight/src/trailers.rs
index 73136379d69f..7929b53a41a0 100644
--- a/arrow-flight/src/trailers.rs
+++ b/arrow-flight/src/trailers.rs
@@ -21,8 +21,8 @@ use std::{
     task::{Context, Poll},
 };
 
-use futures::{ready, FutureExt, Stream, StreamExt};
-use tonic::{metadata::MetadataMap, Status, Streaming};
+use futures::{FutureExt, Stream, StreamExt, ready};
+use tonic::{Status, Streaming, metadata::MetadataMap};
 
 /// Extract [`LazyTrailers`] from [`Streaming`] [tonic] response.
 ///
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 428dde73ca6c..6effb5f86aaf 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -24,6 +24,7 @@ use std::sync::Arc;
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_buffer::Buffer;
 use arrow_ipc::convert::fb_to_schema;
+use arrow_ipc::writer::CompressionContext;
 use arrow_ipc::{reader, root_as_message, writer, writer::IpcWriteOptions};
 use arrow_schema::{ArrowError, Schema, SchemaRef};
 
@@ -90,13 +91,16 @@ pub fn batches_to_flight_data(
     let mut flight_data = vec![];
 
     let data_gen = writer::IpcDataGenerator::default();
-    #[allow(deprecated)]
-    let mut dictionary_tracker =
-        writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+    let mut dictionary_tracker = writer::DictionaryTracker::new(false);
+    let mut compression_context = CompressionContext::default();
 
     for batch in batches.iter() {
-        let (encoded_dictionaries, encoded_batch) =
-            data_gen.encoded_batch(batch, &mut dictionary_tracker, &options)?;
+        let (encoded_dictionaries, encoded_batch) = data_gen.encode(
+            batch,
+            &mut dictionary_tracker,
+            &options,
+            &mut compression_context,
+        )?;
 
         dictionaries.extend(encoded_dictionaries.into_iter().map(Into::into));
         flight_data.push(encoded_batch.into());
diff --git a/arrow-flight/tests/client.rs b/arrow-flight/tests/client.rs
index 25dad0e77a3e..ab566f578cbb 100644
--- a/arrow-flight/tests/client.rs
+++ b/arrow-flight/tests/client.rs
@@ -22,10 +22,10 @@ mod common;
 use crate::common::fixture::TestFixture;
 use arrow_array::{RecordBatch, UInt64Array};
 use arrow_flight::{
-    decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder, error::FlightError, Action,
-    ActionType, CancelFlightInfoRequest, CancelFlightInfoResult, CancelStatus, Criteria, Empty,
-    FlightClient, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, PollInfo, PutResult, RenewFlightEndpointRequest, Ticket,
+    Action, ActionType, CancelFlightInfoRequest, CancelFlightInfoResult, CancelStatus, Criteria,
+    Empty, FlightClient, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PollInfo, PutResult, RenewFlightEndpointRequest, Ticket,
+    decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder, error::FlightError,
 };
 use arrow_schema::{DataType, Field, Schema};
 use bytes::Bytes;
diff --git a/arrow-flight/tests/common/server.rs b/arrow-flight/tests/common/server.rs
index a004ccb0737e..5aa22a869627 100644
--- a/arrow-flight/tests/common/server.rs
+++ b/arrow-flight/tests/common/server.rs
@@ -19,14 +19,14 @@ use std::sync::{Arc, Mutex};
 
 use arrow_array::RecordBatch;
 use arrow_schema::Schema;
-use futures::{stream::BoxStream, StreamExt, TryStreamExt};
-use tonic::{metadata::MetadataMap, Request, Response, Status, Streaming};
+use futures::{StreamExt, TryStreamExt, stream::BoxStream};
+use tonic::{Request, Response, Status, Streaming, metadata::MetadataMap};
 
 use arrow_flight::{
-    encode::FlightDataEncoderBuilder,
-    flight_service_server::{FlightService, FlightServiceServer},
     Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
     HandshakeRequest, HandshakeResponse, PollInfo, PutResult, SchemaAsIpc, SchemaResult, Ticket,
+    encode::FlightDataEncoderBuilder,
+    flight_service_server::{FlightService, FlightServiceServer},
 };
 
 #[derive(Debug, Clone)]
diff --git a/arrow-flight/tests/common/utils.rs b/arrow-flight/tests/common/utils.rs
index 0f70e4b31021..f36b41cba344 100644
--- a/arrow-flight/tests/common/utils.rs
+++ b/arrow-flight/tests/common/utils.rs
@@ -20,8 +20,8 @@
 use std::sync::Arc;
 
 use arrow_array::{
-    types::Int32Type, ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, RecordBatch,
-    StringViewArray, UInt8Array,
+    ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, RecordBatch, StringViewArray,
+    UInt8Array, types::Int32Type,
 };
 use arrow_schema::{DataType, Field, Schema};
 
diff --git a/arrow-flight/tests/encode_decode.rs b/arrow-flight/tests/encode_decode.rs
index cbfae1825845..fcd6b39ab0a1 100644
--- a/arrow-flight/tests/encode_decode.rs
+++ b/arrow-flight/tests/encode_decode.rs
@@ -21,8 +21,8 @@ use std::{collections::HashMap, sync::Arc};
 
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_cast::pretty::pretty_format_batches;
-use arrow_flight::flight_descriptor::DescriptorType;
 use arrow_flight::FlightDescriptor;
+use arrow_flight::flight_descriptor::DescriptorType;
 use arrow_flight::{
     decode::{DecodedPayload, FlightDataDecoder, FlightRecordBatchStream},
     encode::FlightDataEncoderBuilder,
diff --git a/arrow-flight/tests/flight_sql_client.rs b/arrow-flight/tests/flight_sql_client.rs
index f3b7114dbafa..97687c3dea37 100644
--- a/arrow-flight/tests/flight_sql_client.rs
+++ b/arrow-flight/tests/flight_sql_client.rs
@@ -64,10 +64,12 @@ pub async fn test_begin_end_transaction() {
 
     // unknown transaction id
     let transaction_id = "UnknownTransactionId".to_string().into();
-    assert!(flight_sql_client
-        .end_transaction(transaction_id, EndTransaction::Commit)
-        .await
-        .is_err());
+    assert!(
+        flight_sql_client
+            .end_transaction(transaction_id, EndTransaction::Commit)
+            .await
+            .is_err()
+    );
 }
 
 #[tokio::test]
@@ -139,9 +141,10 @@ pub async fn test_do_put_empty_stream() {
 
     // Execute a `do_put` and verify that the server error contains the expected message
     let err = flight_sql_client.do_put(request_stream).await.unwrap_err();
-    assert!(err
-        .to_string()
-        .contains("Unhandled Error: Command is missing."),);
+    assert!(
+        err.to_string()
+            .contains("Unhandled Error: Command is missing."),
+    );
 }
 
 #[tokio::test]
@@ -172,9 +175,10 @@ pub async fn test_do_put_first_element_err() {
     // Execute a `do_put` and verify that the server error contains the expected message
     let err = flight_sql_client.do_put(request_stream).await.unwrap_err();
 
-    assert!(err
-        .to_string()
-        .contains("Unhandled Error: Command is missing."),);
+    assert!(
+        err.to_string()
+            .contains("Unhandled Error: Command is missing."),
+    );
 }
 
 #[tokio::test]
@@ -196,9 +200,10 @@ pub async fn test_do_put_missing_flight_descriptor() {
 
     // Execute a `do_put` and verify that the server error contains the expected message
     let err = flight_sql_client.do_put(request_stream).await.unwrap_err();
-    assert!(err
-        .to_string()
-        .contains("Unhandled Error: Flight descriptor is missing."),);
+    assert!(
+        err.to_string()
+            .contains("Unhandled Error: Flight descriptor is missing."),
+    );
 }
 
 fn make_ingest_command() -> CommandStatementIngest {
diff --git a/arrow-flight/tests/flight_sql_client_cli.rs b/arrow-flight/tests/flight_sql_client_cli.rs
index c8e9190e246f..c161caae8ca4 100644
--- a/arrow-flight/tests/flight_sql_client_cli.rs
+++ b/arrow-flight/tests/flight_sql_client_cli.rs
@@ -22,19 +22,19 @@ use std::{pin::Pin, sync::Arc};
 use crate::common::fixture::TestFixture;
 use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray};
 use arrow_flight::{
+    Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
+    HandshakeResponse, IpcMessage, SchemaAsIpc, Ticket,
     decode::FlightRecordBatchStream,
     encode::FlightDataEncoderBuilder,
     flight_service_server::{FlightService, FlightServiceServer},
     sql::{
-        server::{FlightSqlService, PeekableFlightDataStream},
         ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult, Any,
         CommandGetCatalogs, CommandGetDbSchemas, CommandGetTableTypes, CommandGetTables,
         CommandPreparedStatementQuery, CommandStatementQuery, DoPutPreparedStatementResult,
         ProstMessageExt, SqlInfo,
+        server::{FlightSqlService, PeekableFlightDataStream},
     },
     utils::batches_to_flight_data,
-    Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, IpcMessage, SchemaAsIpc, Ticket,
 };
 use arrow_ipc::writer::IpcWriteOptions;
 use arrow_schema::{ArrowError, DataType, Field, Schema, TimeUnit};
@@ -46,6 +46,11 @@ use tonic::{Request, Response, Status, Streaming};
 
 const QUERY: &str = "SELECT * FROM table;";
 
+/// Return a Command instance for running the `flight_sql_client` CLI
+fn flight_sql_client_cmd() -> Command {
+    Command::new(assert_cmd::cargo::cargo_bin!("flight_sql_client"))
+}
+
 #[tokio::test]
 async fn test_simple() {
     let test_server = FlightSqlServiceImpl::default();
@@ -53,8 +58,7 @@ async fn test_simple() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -94,8 +98,7 @@ async fn test_get_catalogs() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -133,8 +136,7 @@ async fn test_get_db_schemas() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -173,8 +175,7 @@ async fn test_get_tables() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -212,8 +213,7 @@ async fn test_get_tables_db_filter() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -253,8 +253,7 @@ async fn test_get_tables_types() {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
@@ -295,8 +294,7 @@ async fn test_do_put_prepared_statement(test_server: FlightSqlServiceImpl) {
     let addr = fixture.addr;
 
     let stdout = tokio::task::spawn_blocking(move || {
-        Command::cargo_bin("flight_sql_client")
-            .unwrap()
+        flight_sql_client_cmd()
             .env_clear()
             .env("RUST_BACKTRACE", "1")
             .env("RUST_LOG", "warn")
diff --git a/arrow-integration-test/Cargo.toml b/arrow-integration-test/Cargo.toml
index d560d4fd8363..39ea3b60b1ab 100644
--- a/arrow-integration-test/Cargo.toml
+++ b/arrow-integration-test/Cargo.toml
@@ -39,6 +39,7 @@ all-features = true
 arrow = { workspace = true }
 arrow-buffer = { workspace = true }
 hex = { version = "0.4", default-features = false, features = ["std"] }
+num-bigint = { version = "0.4", default-features = false }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
-num = { version = "0.4", default-features = false, features = ["std"] }
diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs
index 24e02c8430c7..4c17fbe76be7 100644
--- a/arrow-integration-test/src/datatype.rs
+++ b/arrow-integration-test/src/datatype.rs
@@ -61,6 +61,8 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
                 };
 
                 match bit_width {
+                    32 => Ok(DataType::Decimal32(precision, scale)),
+                    64 => Ok(DataType::Decimal64(precision, scale)),
                     128 => Ok(DataType::Decimal128(precision, scale)),
                     256 => Ok(DataType::Decimal256(precision, scale)),
                     _ => Err(ArrowError::ParseError(
@@ -335,6 +337,12 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
             TimeUnit::Nanosecond => "NANOSECOND",
         }}),
         DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
+        DataType::Decimal32(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 32})
+        }
+        DataType::Decimal64(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 64})
+        }
         DataType::Decimal128(precision, scale) => {
             json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128})
         }
diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs
index 4b896ed391be..8b0ca264e02e 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -142,7 +142,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                         Some(_) => {
                             return Err(ArrowError::ParseError(
                                 "Field 'children' must be an array".to_string(),
-                            ))
+                            ));
                         }
                         None => {
                             return Err(ArrowError::ParseError(
@@ -158,7 +158,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                     Some(_) => {
                         return Err(ArrowError::ParseError(
                             "Field 'children' must be an array".to_string(),
-                        ))
+                        ));
                     }
                     None => {
                         return Err(ArrowError::ParseError(
@@ -177,15 +177,15 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                                 }
                                 t => {
                                     return Err(ArrowError::ParseError(format!(
-                                    "Map children should be a struct with 2 fields, found {t:?}"
-                                )))
+                                        "Map children should be a struct with 2 fields, found {t:?}"
+                                    )));
                                 }
                             }
                         }
                         Some(_) => {
                             return Err(ArrowError::ParseError(
                                 "Field 'children' must be an array with 1 element".to_string(),
-                            ))
+                            ));
                         }
                         None => {
                             return Err(ArrowError::ParseError(
@@ -207,7 +207,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                     Some(_) => {
                         return Err(ArrowError::ParseError(
                             "Field 'children' must be an array".to_string(),
-                        ))
+                        ));
                     }
                     None => {
                         return Err(ArrowError::ParseError(
@@ -275,7 +275,7 @@ pub fn field_to_json(field: &Field) -> serde_json::Value {
     };
 
     match field.data_type() {
-        DataType::Dictionary(ref index_type, ref value_type) => {
+        DataType::Dictionary(index_type, value_type) => {
             #[allow(deprecated)]
             let dict_id = field.dict_id().unwrap();
             serde_json::json!({
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index baa76059f9c6..0f0b4fe2ffee 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -15,22 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Support for the [Apache Arrow JSON test data format](https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format)
+//! Partial support for the [Apache Arrow JSON test data format](https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format)
 //!
 //! These utilities define structs that read the integration JSON format for integration testing purposes.
 //!
 //! This is not a canonical format, but provides a human-readable way of verifying language implementations
+//!
+//! <div class="warning">
+//!
+//! This crate is **only intended for integration testing the
+//! [Arrow project](https://github.com/apache/arrow-rs)**. It is not [intended for usage outside of
+//! this context](https://github.com/apache/arrow-rs/issues/8684#issuecomment-3433193158).
+//!
+//! </div>
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer};
 use hex::decode;
-use num::BigInt;
-use num::Signed;
+use num_bigint::BigInt;
+use num_traits::Signed;
 use serde::{Deserialize, Serialize};
 use serde_json::{Map as SJMap, Value};
 use std::collections::HashMap;
@@ -794,13 +802,13 @@ pub fn array_from_json(
         DataType::Dictionary(key_type, value_type) => {
             #[allow(deprecated)]
             let dict_id = field.dict_id().ok_or_else(|| {
-                ArrowError::JsonError(format!("Unable to find dict_id for field {field:?}"))
+                ArrowError::JsonError(format!("Unable to find dict_id for field {field}"))
             })?;
             // find dictionary
             let dictionary = dictionaries
                 .ok_or_else(|| {
                     ArrowError::JsonError(format!(
-                        "Unable to find any dictionaries for field {field:?}"
+                        "Unable to find any dictionaries for field {field}"
                     ))
                 })?
                 .get(&dict_id);
@@ -814,10 +822,46 @@ pub fn array_from_json(
                     dictionaries,
                 ),
                 None => Err(ArrowError::JsonError(format!(
-                    "Unable to find dictionary for field {field:?}"
+                    "Unable to find dictionary for field {field}"
                 ))),
             }
         }
+        DataType::Decimal32(precision, scale) => {
+            let mut b = Decimal32Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i32>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
+        DataType::Decimal64(precision, scale) => {
+            let mut b = Decimal64Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i64>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
         DataType::Decimal128(precision, scale) => {
             let mut b = Decimal128Builder::with_capacity(json_col.count);
             for (is_valid, value) in json_col
@@ -910,7 +954,7 @@ pub fn array_from_json(
             Ok(Arc::new(array))
         }
         t => Err(ArrowError::JsonError(format!(
-            "data type {t:?} not supported"
+            "data type {t} not supported"
         ))),
     }
 }
@@ -1007,6 +1051,16 @@ fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer {
 
 impl ArrowJsonBatch {
     /// Convert a [`RecordBatch`] to an [`ArrowJsonBatch`]
+    ///
+    /// <div class="warning">
+    ///
+    /// This function is **deliberately incomplete**! As noted in the crate-level documentation,
+    /// this crate is only intended for use within the Arrow project itself.
+    ///
+    /// Right now, this function only supports `DataType::Int8` columns. Other data types will lead
+    /// to an empty `ArrowJsonColumn`.
+    ///
+    /// </div>
     pub fn from_batch(batch: &RecordBatch) -> ArrowJsonBatch {
         let mut json_batch = ArrowJsonBatch {
             count: batch.num_rows(),
diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs
index 512f0aed8e54..7777c48c1f4b 100644
--- a/arrow-integration-test/src/schema.rs
+++ b/arrow-integration-test/src/schema.rs
@@ -40,7 +40,7 @@ pub fn schema_from_json(json: &serde_json::Value) -> Result<Schema> {
                 _ => {
                     return Err(ArrowError::ParseError(
                         "Schema fields should be an array".to_string(),
-                    ))
+                    ));
                 }
             };
 
diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
index 8654b4b92734..ae13d32b57a9 100644
--- a/arrow-integration-testing/Cargo.toml
+++ b/arrow-integration-testing/Cargo.toml
@@ -39,11 +39,10 @@ arrow-flight = { path = "../arrow-flight", default-features = false }
 arrow-integration-test = { path = "../arrow-integration-test", default-features = false }
 clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
 futures = { version = "0.3", default-features = false }
-prost = { version = "0.13", default-features = false }
-serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
+prost = { version = "0.14.1", default-features = false }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
-tonic = { version = "0.12", default-features = false }
+tonic = { version = "0.14.1", default-features = false }
 tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true }
 flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
 
diff --git a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
index 34c3c7706df5..4c12be6d6c42 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
@@ -19,10 +19,10 @@
 
 use crate::{AUTH_PASSWORD, AUTH_USERNAME};
 
-use arrow_flight::{flight_service_client::FlightServiceClient, BasicAuth, HandshakeRequest};
-use futures::{stream, StreamExt};
+use arrow_flight::{BasicAuth, HandshakeRequest, flight_service_client::FlightServiceClient};
+use futures::{StreamExt, stream};
 use prost::Message;
-use tonic::{metadata::MetadataValue, Request, Status};
+use tonic::{Request, Status, metadata::MetadataValue, transport::Endpoint};
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
 type Result<T = (), E = Error> = std::result::Result<T, E>;
@@ -32,7 +32,9 @@ type Client = FlightServiceClient<tonic::transport::Channel>;
 /// Run a scenario that tests basic auth.
 pub async fn run_scenario(host: &str, port: u16) -> Result {
     let url = format!("http://{host}:{port}");
-    let mut client = FlightServiceClient::connect(url).await?;
+    let endpoint = Endpoint::new(url)?;
+    let channel = endpoint.connect().await?;
+    let mut client = FlightServiceClient::new(channel);
 
     let action = arrow_flight::Action::default();
 
diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index 406419028d00..05ca5627ecd8 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -24,15 +24,18 @@ use arrow::{
     array::ArrayRef,
     buffer::Buffer,
     datatypes::SchemaRef,
-    ipc::{self, reader, writer},
+    ipc::{
+        self, reader,
+        writer::{self, CompressionContext},
+    },
     record_batch::RecordBatch,
 };
 use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, IpcMessage, Location, Ticket,
+    FlightData, FlightDescriptor, IpcMessage, Location, Ticket, flight_descriptor::DescriptorType,
+    flight_service_client::FlightServiceClient, utils::flight_data_to_arrow_batch,
 };
-use futures::{channel::mpsc, sink::SinkExt, stream, StreamExt};
-use tonic::{Request, Streaming};
+use futures::{StreamExt, channel::mpsc, sink::SinkExt, stream};
+use tonic::{Request, Streaming, transport::Endpoint};
 
 use arrow::datatypes::Schema;
 use std::sync::Arc;
@@ -46,7 +49,9 @@ type Client = FlightServiceClient<tonic::transport::Channel>;
 pub async fn run_scenario(host: &str, port: u16, path: &str) -> Result {
     let url = format!("http://{host}:{port}");
 
-    let client = FlightServiceClient::connect(url).await?;
+    let endpoint = Endpoint::new(url)?;
+    let channel = endpoint.connect().await?;
+    let client = FlightServiceClient::new(channel);
 
     let json_file = open_json_file(path)?;
 
@@ -72,9 +77,7 @@ async fn upload_data(
     let (mut upload_tx, upload_rx) = mpsc::channel(10);
 
     let options = arrow::ipc::writer::IpcWriteOptions::default();
-    #[allow(deprecated)]
-    let mut dict_tracker =
-        writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+    let mut dict_tracker = writer::DictionaryTracker::new(false);
     let data_gen = writer::IpcDataGenerator::default();
     let data = IpcMessage(
         data_gen
@@ -92,6 +95,8 @@ async fn upload_data(
 
     let mut original_data_iter = original_data.iter().enumerate();
 
+    let mut compression_context = CompressionContext::default();
+
     if let Some((counter, first_batch)) = original_data_iter.next() {
         let metadata = counter.to_string().into_bytes();
         // Preload the first batch into the channel before starting the request
@@ -101,6 +106,7 @@ async fn upload_data(
             first_batch,
             &options,
             &mut dict_tracker,
+            &mut compression_context,
         )
         .await?;
 
@@ -123,6 +129,7 @@ async fn upload_data(
                 batch,
                 &options,
                 &mut dict_tracker,
+                &mut compression_context,
             )
             .await?;
 
@@ -152,11 +159,12 @@ async fn send_batch(
     batch: &RecordBatch,
     options: &writer::IpcWriteOptions,
     dictionary_tracker: &mut writer::DictionaryTracker,
+    compression_context: &mut CompressionContext,
 ) -> Result {
     let data_gen = writer::IpcDataGenerator::default();
 
     let (encoded_dictionaries, encoded_batch) = data_gen
-        .encoded_batch(batch, dictionary_tracker, options)
+        .encode(batch, dictionary_tracker, options, compression_context)
         .expect("DictionaryTracker configured above to not error on replacement");
 
     let dictionary_flight_data: Vec<FlightData> =
@@ -213,7 +221,9 @@ async fn consume_flight_location(
     // more details: https://github.com/apache/arrow-rs/issues/1398
     location.uri = location.uri.replace("grpc+tcp://", "http://");
 
-    let mut client = FlightServiceClient::connect(location.uri).await?;
+    let endpoint = Endpoint::new(location.uri)?;
+    let channel = endpoint.connect().await?;
+    let mut client = FlightServiceClient::new(channel);
     let resp = client.do_get(ticket).await?;
     let mut resp = resp.into_inner();
 
diff --git a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
index 495825738aec..e8836c34c47d 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
@@ -18,7 +18,7 @@
 //! Scenario for testing middleware.
 
 use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient, FlightDescriptor,
+    FlightDescriptor, flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
 };
 use prost::bytes::Bytes;
 use tonic::{Request, Status};
diff --git a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
index 5462e5bd674b..38582e6fef68 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
@@ -21,13 +21,13 @@ use std::pin::Pin;
 use std::sync::Arc;
 
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
-    ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    Action, ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
     HandshakeRequest, HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
 };
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
+use futures::{Stream, StreamExt, channel::mpsc, sink::SinkExt};
 use tokio::sync::Mutex;
-use tonic::{metadata::MetadataMap, transport::Server, Request, Response, Status, Streaming};
+use tonic::{Request, Response, Status, Streaming, metadata::MetadataMap, transport::Server};
 type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index 92989a20393e..ae316886381a 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -31,14 +31,14 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty, FlightData,
-    FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse, IpcMessage,
-    PollInfo, PutResult, SchemaAsIpc, SchemaResult, Ticket,
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo,
+    HandshakeRequest, HandshakeResponse, IpcMessage, PollInfo, PutResult, SchemaAsIpc,
+    SchemaResult, Ticket, flight_descriptor::DescriptorType, flight_service_server::FlightService,
+    flight_service_server::FlightServiceServer,
 };
-use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
+use futures::{Stream, StreamExt, channel::mpsc, sink::SinkExt};
 use tokio::sync::Mutex;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
+use tonic::{Request, Response, Status, Streaming, transport::Server};
 
 type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
 
@@ -119,9 +119,7 @@ impl FlightService for FlightServiceImpl {
             .ok_or_else(|| Status::not_found(format!("Could not find flight. {key}")))?;
 
         let options = arrow::ipc::writer::IpcWriteOptions::default();
-        #[allow(deprecated)]
-        let mut dictionary_tracker =
-            writer::DictionaryTracker::new_with_preserve_dict_id(false, options.preserve_dict_id());
+        let mut dictionary_tracker = writer::DictionaryTracker::new(false);
         let data_gen = writer::IpcDataGenerator::default();
         let data = IpcMessage(
             data_gen
@@ -146,7 +144,12 @@ impl FlightService for FlightServiceImpl {
             .enumerate()
             .flat_map(|(counter, batch)| {
                 let (encoded_dictionaries, encoded_batch) = data_gen
-                    .encoded_batch(batch, &mut dictionary_tracker, &options)
+                    .encode(
+                        batch,
+                        &mut dictionary_tracker,
+                        &options,
+                        &mut Default::default(),
+                    )
                     .expect("DictionaryTracker configured above to not error on replacement");
 
                 let dictionary_flight_data = encoded_dictionaries.into_iter().map(Into::into);
@@ -380,7 +383,7 @@ async fn save_uploaded_chunks(
             ipc::MessageHeader::Schema => {
                 return Err(Status::internal(
                     "Not expecting a schema when messages are read",
-                ))
+                ));
             }
             ipc::MessageHeader::RecordBatch => {
                 send_app_metadata(&mut response_tx, &data.app_metadata).await?;
diff --git a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
index 6685d45dffac..6bafb4843316 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
@@ -20,13 +20,13 @@
 use std::pin::Pin;
 
 use arrow_flight::{
+    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PollInfo, PutResult, SchemaResult, Ticket,
     flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty, FlightData,
-    FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PollInfo, PutResult,
-    SchemaResult, Ticket,
+    flight_service_server::FlightServiceServer,
 };
 use futures::Stream;
-use tonic::{transport::Server, Request, Response, Status, Streaming};
+use tonic::{Request, Response, Status, Streaming, transport::Server};
 
 type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
 
diff --git a/arrow-integration-testing/src/lib.rs b/arrow-integration-testing/src/lib.rs
index e669690ef4f5..cf572d769df5 100644
--- a/arrow-integration-testing/src/lib.rs
+++ b/arrow-integration-testing/src/lib.rs
@@ -25,12 +25,12 @@ use serde_json::Value;
 use arrow::array::{Array, StructArray};
 use arrow::datatypes::{DataType, Field, Fields, Schema};
 use arrow::error::{ArrowError, Result};
-use arrow::ffi::{from_ffi_and_data_type, FFI_ArrowArray, FFI_ArrowSchema};
+use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi_and_data_type};
 use arrow::record_batch::RecordBatch;
 use arrow::util::test_util::arrow_test_data;
 use arrow_integration_test::*;
 use std::collections::HashMap;
-use std::ffi::{c_char, c_int, CStr, CString};
+use std::ffi::{CStr, CString, c_char, c_int};
 use std::fs::File;
 use std::io::BufReader;
 use std::iter::zip;
@@ -207,8 +207,7 @@ fn cdata_integration_import_schema_and_compare_to_json(
     // compare schemas
     if canonicalize_schema(&json_schema) != canonicalize_schema(&imported_schema) {
         return Err(ArrowError::ComputeError(format!(
-            "Schemas do not match.\n- JSON: {:?}\n- Imported: {:?}",
-            json_schema, imported_schema
+            "Schemas do not match.\n- JSON: {json_schema:?}\n- Imported: {imported_schema:?}",
         )));
     }
     Ok(())
@@ -253,7 +252,7 @@ fn cdata_integration_import_batch_and_compare_to_json(
 fn result_to_c_error<T, E: std::fmt::Display>(result: &std::result::Result<T, E>) -> *mut c_char {
     match result {
         Ok(_) => ptr::null_mut(),
-        Err(e) => CString::new(format!("{}", e)).unwrap().into_raw(),
+        Err(e) => CString::new(format!("{e}")).unwrap().into_raw(),
     }
 }
 
@@ -262,7 +261,7 @@ fn result_to_c_error<T, E: std::fmt::Display>(result: &std::result::Result<T, E>
 /// # Safety
 ///
 /// The pointer is assumed to have been obtained using CString::into_raw.
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut c_char) {
     if !c_error.is_null() {
         drop(unsafe { CString::from_raw(c_error) });
@@ -270,7 +269,7 @@ pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut c_char) {
 }
 
 /// A C-ABI for exporting an Arrow schema from a JSON file
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json(
     c_json_name: *const c_char,
     out: *mut FFI_ArrowSchema,
@@ -280,7 +279,7 @@ pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json(
 }
 
 /// A C-ABI to compare an Arrow schema against a JSON file
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json(
     c_json_name: *const c_char,
     c_schema: *mut FFI_ArrowSchema,
@@ -290,7 +289,7 @@ pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json(
 }
 
 /// A C-ABI for exporting a RecordBatch from a JSON file
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json(
     c_json_name: *const c_char,
     batch_num: c_int,
@@ -301,7 +300,7 @@ pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json(
 }
 
 /// A C-ABI to compare a RecordBatch against a JSON file
-#[no_mangle]
+#[unsafe(no_mangle)]
 pub extern "C" fn arrow_rs_cdata_integration_import_batch_and_compare_to_json(
     c_json_name: *const c_char,
     batch_num: c_int,
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index a1f826ef7d10..943852ffdec9 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -40,8 +40,9 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
+arrow-select = { workspace = true}
 flatbuffers = { version = "25.2.10", default-features = false }
-lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
+lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13.0", default-features = false, optional = true }
 
 [features]
@@ -49,7 +50,7 @@ default = []
 lz4 = ["lz4_flex"]
 
 [dev-dependencies]
-criterion = "0.5.1"
+criterion = { workspace = true }
 tempfile = "3.3"
 tokio = "1.43.0"
 # used in benches
diff --git a/arrow-ipc/benches/ipc_reader.rs b/arrow-ipc/benches/ipc_reader.rs
index ab77449eeb7d..ef1de88d328d 100644
--- a/arrow-ipc/benches/ipc_reader.rs
+++ b/arrow-ipc/benches/ipc_reader.rs
@@ -16,14 +16,14 @@
 // under the License.
 
 use arrow_array::builder::{Date32Builder, Decimal128Builder, Int32Builder};
-use arrow_array::{builder::StringBuilder, RecordBatch};
+use arrow_array::{RecordBatch, builder::StringBuilder};
 use arrow_buffer::Buffer;
 use arrow_ipc::convert::fb_to_schema;
-use arrow_ipc::reader::{read_footer_length, FileDecoder, FileReader, StreamReader};
+use arrow_ipc::reader::{FileDecoder, FileReader, StreamReader, read_footer_length};
 use arrow_ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter};
-use arrow_ipc::{root_as_footer, Block, CompressionType};
+use arrow_ipc::{Block, CompressionType, root_as_footer};
 use arrow_schema::{DataType, Field, Schema};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use std::io::{Cursor, Write};
 use std::sync::Arc;
 use tempfile::tempdir;
@@ -240,7 +240,7 @@ impl IPCBufferDecoder {
     }
 
     unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self {
-        self.decoder = self.decoder.with_skip_validation(skip_validation);
+        self.decoder = unsafe { self.decoder.with_skip_validation(skip_validation) };
         self
     }
 
diff --git a/arrow-ipc/benches/ipc_writer.rs b/arrow-ipc/benches/ipc_writer.rs
index 6b4d184b4556..eda7e3c58fe0 100644
--- a/arrow-ipc/benches/ipc_writer.rs
+++ b/arrow-ipc/benches/ipc_writer.rs
@@ -16,11 +16,11 @@
 // under the License.
 
 use arrow_array::builder::{Date32Builder, Decimal128Builder, Int32Builder};
-use arrow_array::{builder::StringBuilder, RecordBatch};
-use arrow_ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter};
+use arrow_array::{RecordBatch, builder::StringBuilder};
 use arrow_ipc::CompressionType;
+use arrow_ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter};
 use arrow_schema::{DataType, Field, Schema};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh
index b368bd1bc7cc..676ec9933c55 100755
--- a/arrow-ipc/regen.sh
+++ b/arrow-ipc/regen.sh
@@ -88,9 +88,9 @@ use flatbuffers::EndianScalar;
 HEREDOC
 )
 
-SCHEMA_IMPORT="\nuse crate::gen::Schema::*;"
-SPARSE_TENSOR_IMPORT="\nuse crate::gen::SparseTensor::*;"
-TENSOR_IMPORT="\nuse crate::gen::Tensor::*;"
+SCHEMA_IMPORT="\nuse crate::r#gen::Schema::*;"
+SPARSE_TENSOR_IMPORT="\nuse crate::r#gen::SparseTensor::*;"
+TENSOR_IMPORT="\nuse crate::r#gen::Tensor::*;"
 
 # For flatbuffer(1.12.0+), remove: use crate::${name}::\*;
 names=("File" "Message" "Schema" "SparseTensor" "Tensor")
@@ -129,7 +129,7 @@ for f in `ls *.rs`; do
     sed --in-place='' 's/TYPE__/TYPE_/g' $f
 
     # Some files need prefixes
-    if [[ $f == "File.rs" ]]; then 
+    if [[ $f == "File.rs" ]]; then
         # Now prefix the file with the static contents
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
     elif [[ $f == "Message.rs" ]]; then
diff --git a/arrow-ipc/src/compression.rs b/arrow-ipc/src/compression.rs
index 47ea7785cbec..9bbc6e752c12 100644
--- a/arrow-ipc/src/compression.rs
+++ b/arrow-ipc/src/compression.rs
@@ -22,6 +22,41 @@ use arrow_schema::ArrowError;
 const LENGTH_NO_COMPRESSED_DATA: i64 = -1;
 const LENGTH_OF_PREFIX_DATA: i64 = 8;
 
+/// Additional context that may be needed for compression.
+///
+/// In the case of zstd, this will contain the zstd context, which can be reused between subsequent
+/// compression calls to avoid the performance overhead of initialising a new context for every
+/// compression.
+pub struct CompressionContext {
+    #[cfg(feature = "zstd")]
+    compressor: zstd::bulk::Compressor<'static>,
+}
+
+// the reason we allow derivable_impls here is because when zstd feature is not enabled, this
+// becomes derivable. however with zstd feature want to be explicit about the compression level.
+#[allow(clippy::derivable_impls)]
+impl Default for CompressionContext {
+    fn default() -> Self {
+        CompressionContext {
+            // safety: `new` here will only return error here if using an invalid compression level
+            #[cfg(feature = "zstd")]
+            compressor: zstd::bulk::Compressor::new(zstd::DEFAULT_COMPRESSION_LEVEL)
+                .expect("can use default compression level"),
+        }
+    }
+}
+
+impl std::fmt::Debug for CompressionContext {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut ds = f.debug_struct("CompressionContext");
+
+        #[cfg(feature = "zstd")]
+        ds.field("compressor", &"zstd::bulk::Compressor");
+
+        ds.finish()
+    }
+}
+
 /// Represents compressing a ipc stream using a particular compression algorithm
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum CompressionCodec {
@@ -58,6 +93,7 @@ impl CompressionCodec {
         &self,
         input: &[u8],
         output: &mut Vec<u8>,
+        context: &mut CompressionContext,
     ) -> Result<usize, ArrowError> {
         let uncompressed_data_len = input.len();
         let original_output_len = output.len();
@@ -67,7 +103,7 @@ impl CompressionCodec {
         } else {
             // write compressed data directly into the output buffer
             output.extend_from_slice(&uncompressed_data_len.to_le_bytes());
-            self.compress(input, output)?;
+            self.compress(input, output, context)?;
 
             let compression_len = output.len() - original_output_len;
             if compression_len > uncompressed_data_len {
@@ -115,10 +151,15 @@ impl CompressionCodec {
 
     /// Compress the data in input buffer and write to output buffer
     /// using the specified compression
-    fn compress(&self, input: &[u8], output: &mut Vec<u8>) -> Result<(), ArrowError> {
+    fn compress(
+        &self,
+        input: &[u8],
+        output: &mut Vec<u8>,
+        context: &mut CompressionContext,
+    ) -> Result<(), ArrowError> {
         match self {
             CompressionCodec::Lz4Frame => compress_lz4(input, output),
-            CompressionCodec::Zstd => compress_zstd(input, output),
+            CompressionCodec::Zstd => compress_zstd(input, output, context),
         }
     }
 
@@ -175,17 +216,23 @@ fn decompress_lz4(_input: &[u8], _decompressed_size: usize) -> Result<Vec<u8>, A
 }
 
 #[cfg(feature = "zstd")]
-fn compress_zstd(input: &[u8], output: &mut Vec<u8>) -> Result<(), ArrowError> {
-    use std::io::Write;
-    let mut encoder = zstd::Encoder::new(output, 0)?;
-    encoder.write_all(input)?;
-    encoder.finish()?;
+fn compress_zstd(
+    input: &[u8],
+    output: &mut Vec<u8>,
+    context: &mut CompressionContext,
+) -> Result<(), ArrowError> {
+    let result = context.compressor.compress(input)?;
+    output.extend_from_slice(&result);
     Ok(())
 }
 
 #[cfg(not(feature = "zstd"))]
 #[allow(clippy::ptr_arg)]
-fn compress_zstd(_input: &[u8], _output: &mut Vec<u8>) -> Result<(), ArrowError> {
+fn compress_zstd(
+    _input: &[u8],
+    _output: &mut Vec<u8>,
+    _context: &mut CompressionContext,
+) -> Result<(), ArrowError> {
     Err(ArrowError::InvalidArgumentError(
         "zstd IPC compression requires the zstd feature".to_string(),
     ))
@@ -227,7 +274,9 @@ mod tests {
         let input_bytes = b"hello lz4";
         let codec = super::CompressionCodec::Lz4Frame;
         let mut output_bytes: Vec<u8> = Vec::new();
-        codec.compress(input_bytes, &mut output_bytes).unwrap();
+        codec
+            .compress(input_bytes, &mut output_bytes, &mut Default::default())
+            .unwrap();
         let result = codec
             .decompress(output_bytes.as_slice(), input_bytes.len())
             .unwrap();
@@ -240,7 +289,9 @@ mod tests {
         let input_bytes = b"hello zstd";
         let codec = super::CompressionCodec::Zstd;
         let mut output_bytes: Vec<u8> = Vec::new();
-        codec.compress(input_bytes, &mut output_bytes).unwrap();
+        codec
+            .compress(input_bytes, &mut output_bytes, &mut Default::default())
+            .unwrap();
         let result = codec
             .decompress(output_bytes.as_slice(), input_bytes.len())
             .unwrap();
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 9c6c3831067c..16e61deadb0f 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -19,6 +19,7 @@
 
 use arrow_buffer::Buffer;
 use arrow_schema::*;
+use core::panic;
 use flatbuffers::{
     FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, Verifiable, Verifier,
     VerifierOptions, WIPOffset,
@@ -28,7 +29,7 @@ use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
 use crate::writer::DictionaryTracker;
-use crate::{KeyValue, Message, CONTINUATION_MARKER};
+use crate::{CONTINUATION_MARKER, KeyValue, Message};
 use DataType::*;
 
 /// Low level Arrow [Schema] to IPC bytes converter
@@ -127,12 +128,6 @@ impl<'a> IpcSchemaEncoder<'a> {
     }
 }
 
-/// Serialize a schema in IPC format
-#[deprecated(since = "54.0.0", note = "Use `IpcSchemaConverter`.")]
-pub fn schema_to_fb(schema: &Schema) -> FlatBufferBuilder<'_> {
-    IpcSchemaEncoder::new().schema_to_fb(schema)
-}
-
 /// Push a key-value metadata into a FlatBufferBuilder and return [WIPOffset]
 pub fn metadata_to_fb<'a>(
     fbb: &mut FlatBufferBuilder<'a>,
@@ -170,7 +165,7 @@ impl From<crate::Field<'_>> for Field {
         let arrow_field = if let Some(dictionary) = field.dictionary() {
             #[allow(deprecated)]
             Field::new_dict(
-                field.name().unwrap(),
+                field.name().unwrap_or_default(),
                 get_data_type(field, true),
                 field.nullable(),
                 dictionary.id(),
@@ -178,7 +173,7 @@ impl From<crate::Field<'_>> for Field {
             )
         } else {
             Field::new(
-                field.name().unwrap(),
+                field.name().unwrap_or_default(),
                 get_data_type(field, true),
                 field.nullable(),
             )
@@ -284,9 +279,9 @@ pub fn try_schema_from_ipc_buffer(buffer: &[u8]) -> Result<Schema, ArrowError> {
 
     if buffer.len() < len as usize {
         let actual_len = buffer.len();
-        return Err(ArrowError::ParseError(
-            format!("The buffer length ({actual_len}) is less than the encapsulated message's reported length ({len})")
-        ));
+        return Err(ArrowError::ParseError(format!(
+            "The buffer length ({actual_len}) is less than the encapsulated message's reported length ({len})"
+        )));
     }
 
     let msg = crate::root_as_message(buffer)
@@ -430,6 +425,20 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             }
             DataType::LargeList(Arc::new(children.get(0).into()))
         }
+        crate::Type::ListView => {
+            let children = field.children().unwrap();
+            if children.len() != 1 {
+                panic!("expect a listview to have one child")
+            }
+            DataType::ListView(Arc::new(children.get(0).into()))
+        }
+        crate::Type::LargeListView => {
+            let children = field.children().unwrap();
+            if children.len() != 1 {
+                panic!("expect a large listview to have one child")
+            }
+            DataType::LargeListView(Arc::new(children.get(0).into()))
+        }
         crate::Type::FixedSizeList => {
             let children = field.children().unwrap();
             if children.len() != 1 {
@@ -471,6 +480,8 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             let precision: u8 = fsb.precision().try_into().unwrap();
             let scale: i8 = fsb.scale().try_into().unwrap();
             match bit_width {
+                32 => DataType::Decimal32(precision, scale),
+                64 => DataType::Decimal64(precision, scale),
                 128 => DataType::Decimal128(precision, scale),
                 256 => DataType::Decimal256(precision, scale),
                 _ => panic!("Unexpected decimal bit width {bit_width}"),
@@ -493,8 +504,9 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             };
 
             let fields = match union.typeIds() {
-                None => UnionFields::new(0_i8..fields.len() as i8, fields),
-                Some(ids) => UnionFields::new(ids.iter().map(|i| i as i8), fields),
+                None => UnionFields::from_fields(fields),
+                Some(ids) => UnionFields::try_new(ids.iter().map(|i| i as i8), fields)
+                    .expect("invalid union field"),
             };
 
             DataType::Union(fields, union_mode)
@@ -528,24 +540,13 @@ pub(crate) fn build_field<'a>(
         match dictionary_tracker {
             Some(tracker) => Some(get_fb_dictionary(
                 index_type,
-                #[allow(deprecated)]
-                tracker.set_dict_id(field),
-                field
-                    .dict_is_ordered()
-                    .expect("All Dictionary types have `dict_is_ordered`"),
-                fbb,
-            )),
-            None => Some(get_fb_dictionary(
-                index_type,
-                #[allow(deprecated)]
-                field
-                    .dict_id()
-                    .expect("Dictionary type must have a dictionary id"),
+                tracker.next_dict_id(),
                 field
                     .dict_is_ordered()
                     .expect("All Dictionary types have `dict_is_ordered`"),
                 fbb,
             )),
+            None => panic!("IPC must no longer be used without dictionary tracker"),
         }
     } else {
         None
@@ -774,7 +775,7 @@ pub(crate) fn get_fb_field_type<'a>(
                 children: Some(fbb.create_vector(&empty_fields[..])),
             }
         }
-        List(ref list_type) => {
+        List(list_type) => {
             let child = build_field(fbb, dictionary_tracker, list_type);
             FBFieldType {
                 type_type: crate::Type::List,
@@ -782,8 +783,25 @@ pub(crate) fn get_fb_field_type<'a>(
                 children: Some(fbb.create_vector(&[child])),
             }
         }
-        ListView(_) | LargeListView(_) => unimplemented!("ListView/LargeListView not implemented"),
-        LargeList(ref list_type) => {
+        ListView(list_type) => {
+            let child = build_field(fbb, dictionary_tracker, list_type);
+            FBFieldType {
+                type_type: crate::Type::ListView,
+                type_: crate::ListViewBuilder::new(fbb).finish().as_union_value(),
+                children: Some(fbb.create_vector(&[child])),
+            }
+        }
+        LargeListView(list_type) => {
+            let child = build_field(fbb, dictionary_tracker, list_type);
+            FBFieldType {
+                type_type: crate::Type::LargeListView,
+                type_: crate::LargeListViewBuilder::new(fbb)
+                    .finish()
+                    .as_union_value(),
+                children: Some(fbb.create_vector(&[child])),
+            }
+        }
+        LargeList(list_type) => {
             let child = build_field(fbb, dictionary_tracker, list_type);
             FBFieldType {
                 type_type: crate::Type::LargeList,
@@ -791,7 +809,7 @@ pub(crate) fn get_fb_field_type<'a>(
                 children: Some(fbb.create_vector(&[child])),
             }
         }
-        FixedSizeList(ref list_type, len) => {
+        FixedSizeList(list_type, len) => {
             let child = build_field(fbb, dictionary_tracker, list_type);
             let mut builder = crate::FixedSizeListBuilder::new(fbb);
             builder.add_listSize(*len);
@@ -841,6 +859,28 @@ pub(crate) fn get_fb_field_type<'a>(
             // type in the DictionaryEncoding metadata in the parent field
             get_fb_field_type(value_type, dictionary_tracker, fbb)
         }
+        Decimal32(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(32);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
+        Decimal64(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(64);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
         Decimal128(precision, scale) => {
             let mut builder = crate::DecimalBuilder::new(fbb);
             builder.add_precision(*precision as i32);
@@ -1143,13 +1183,14 @@ mod tests {
                 Field::new(
                     "union<int32, utf8>",
                     DataType::Union(
-                        UnionFields::new(
+                        UnionFields::try_new(
                             vec![2, 3], // non-default type ids
                             vec![
                                 Field::new("int32", DataType::Int32, true),
                                 Field::new("utf8", DataType::Utf8, true),
                             ],
-                        ),
+                        )
+                        .unwrap(),
                         UnionMode::Dense,
                     ),
                     true,
diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs
index 427cf75de096..ab2273614759 100644
--- a/arrow-ipc/src/gen/File.rs
+++ b/arrow-ipc/src/gen/File.rs
@@ -18,7 +18,7 @@
 #![allow(dead_code)]
 #![allow(unused_imports)]
 
-use crate::gen::Schema::*;
+use crate::r#gen::Schema::*;
 use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
@@ -49,21 +49,26 @@ impl<'a> flatbuffers::Follow<'a> for Block {
     type Inner = &'a Block;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Block>::follow(buf, loc)
+        unsafe { <&'a Block>::follow(buf, loc) }
     }
 }
 impl<'a> flatbuffers::Follow<'a> for &'a Block {
     type Inner = &'a Block;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Block>(buf, loc)
+        unsafe { flatbuffers::follow_cast_ref::<Block>(buf, loc) }
     }
 }
 impl<'b> flatbuffers::Push for Block {
     type Output = Block;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
+        let src = unsafe {
+            ::core::slice::from_raw_parts(
+                self as *const Block as *const u8,
+                <Self as flatbuffers::Push>::size(),
+            )
+        };
         dst.copy_from_slice(src);
     }
     #[inline]
@@ -200,7 +205,7 @@ impl<'a> flatbuffers::Follow<'a> for Footer<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -470,14 +475,14 @@ pub fn size_prefixed_root_as_footer_with_opts<'b, 'o>(
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid `Footer`.
 pub unsafe fn root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::root_unchecked::<Footer>(buf)
+    unsafe { flatbuffers::root_unchecked::<Footer>(buf) }
 }
 #[inline]
 /// Assumes, without verification, that a buffer of bytes contains a size prefixed Footer and returns it.
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid size prefixed `Footer`.
 pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
-    flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
+    unsafe { flatbuffers::size_prefixed_root_unchecked::<Footer>(buf) }
 }
 #[inline]
 pub fn finish_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
diff --git a/arrow-ipc/src/gen/Message.rs b/arrow-ipc/src/gen/Message.rs
index 928b41cc0699..082461c900e1 100644
--- a/arrow-ipc/src/gen/Message.rs
+++ b/arrow-ipc/src/gen/Message.rs
@@ -18,9 +18,9 @@
 #![allow(dead_code)]
 #![allow(unused_imports)]
 
-use crate::gen::Schema::*;
-use crate::gen::SparseTensor::*;
-use crate::gen::Tensor::*;
+use crate::r#gen::Schema::*;
+use crate::r#gen::SparseTensor::*;
+use crate::r#gen::Tensor::*;
 use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
@@ -78,7 +78,7 @@ impl<'a> flatbuffers::Follow<'a> for CompressionType {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i8>(buf, loc) };
         Self(b)
     }
 }
@@ -87,7 +87,9 @@ impl flatbuffers::Push for CompressionType {
     type Output = CompressionType;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i8>(dst, self.0);
+        }
     }
 }
 
@@ -176,7 +178,7 @@ impl<'a> flatbuffers::Follow<'a> for BodyCompressionMethod {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i8>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i8>(buf, loc) };
         Self(b)
     }
 }
@@ -185,7 +187,9 @@ impl flatbuffers::Push for BodyCompressionMethod {
     type Output = BodyCompressionMethod;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i8>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i8>(dst, self.0);
+        }
     }
 }
 
@@ -295,7 +299,7 @@ impl<'a> flatbuffers::Follow<'a> for MessageHeader {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<u8>(buf, loc) };
         Self(b)
     }
 }
@@ -304,7 +308,9 @@ impl flatbuffers::Push for MessageHeader {
     type Output = MessageHeader;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        }
     }
 }
 
@@ -368,22 +374,26 @@ impl<'a> flatbuffers::Follow<'a> for FieldNode {
     type Inner = &'a FieldNode;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a FieldNode>::follow(buf, loc)
+        unsafe { <&'a FieldNode>::follow(buf, loc) }
     }
 }
 impl<'a> flatbuffers::Follow<'a> for &'a FieldNode {
     type Inner = &'a FieldNode;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<FieldNode>(buf, loc)
+        unsafe { flatbuffers::follow_cast_ref::<FieldNode>(buf, loc) }
     }
 }
 impl<'b> flatbuffers::Push for FieldNode {
     type Output = FieldNode;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src =
-            ::core::slice::from_raw_parts(self as *const FieldNode as *const u8, Self::size());
+        let src = unsafe {
+            ::core::slice::from_raw_parts(
+                self as *const FieldNode as *const u8,
+                <Self as flatbuffers::Push>::size(),
+            )
+        };
         dst.copy_from_slice(src);
     }
     #[inline]
@@ -491,7 +501,7 @@ impl<'a> flatbuffers::Follow<'a> for BodyCompression<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -634,7 +644,7 @@ impl<'a> flatbuffers::Follow<'a> for RecordBatch<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -901,7 +911,7 @@ impl<'a> flatbuffers::Follow<'a> for DictionaryBatch<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1057,7 +1067,7 @@ impl<'a> flatbuffers::Follow<'a> for Message<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1474,14 +1484,14 @@ pub fn size_prefixed_root_as_message_with_opts<'b, 'o>(
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid `Message`.
 pub unsafe fn root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::root_unchecked::<Message>(buf)
+    unsafe { flatbuffers::root_unchecked::<Message>(buf) }
 }
 #[inline]
 /// Assumes, without verification, that a buffer of bytes contains a size prefixed Message and returns it.
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid size prefixed `Message`.
 pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message {
-    flatbuffers::size_prefixed_root_unchecked::<Message>(buf)
+    unsafe { flatbuffers::size_prefixed_root_unchecked::<Message>(buf) }
 }
 #[inline]
 pub fn finish_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index 223e5a2f6c6d..0472455ce0b7 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -97,7 +97,7 @@ impl<'a> flatbuffers::Follow<'a> for MetadataVersion {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -106,7 +106,9 @@ impl flatbuffers::Push for MetadataVersion {
     type Output = MetadataVersion;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -219,7 +221,7 @@ impl<'a> flatbuffers::Follow<'a> for Feature {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i64>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i64>(buf, loc) };
         Self(b)
     }
 }
@@ -228,7 +230,9 @@ impl flatbuffers::Push for Feature {
     type Output = Feature;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i64>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i64>(dst, self.0);
+        }
     }
 }
 
@@ -308,7 +312,7 @@ impl<'a> flatbuffers::Follow<'a> for UnionMode {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -317,7 +321,9 @@ impl flatbuffers::Push for UnionMode {
     type Output = UnionMode;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -400,7 +406,7 @@ impl<'a> flatbuffers::Follow<'a> for Precision {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -409,7 +415,9 @@ impl flatbuffers::Push for Precision {
     type Output = Precision;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -489,7 +497,7 @@ impl<'a> flatbuffers::Follow<'a> for DateUnit {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -498,7 +506,9 @@ impl flatbuffers::Push for DateUnit {
     type Output = DateUnit;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -592,7 +602,7 @@ impl<'a> flatbuffers::Follow<'a> for TimeUnit {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -601,7 +611,9 @@ impl flatbuffers::Push for TimeUnit {
     type Output = TimeUnit;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -688,7 +700,7 @@ impl<'a> flatbuffers::Follow<'a> for IntervalUnit {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -697,7 +709,9 @@ impl flatbuffers::Push for IntervalUnit {
     type Output = IntervalUnit;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -886,7 +900,7 @@ impl<'a> flatbuffers::Follow<'a> for Type {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<u8>(buf, loc) };
         Self(b)
     }
 }
@@ -895,7 +909,9 @@ impl flatbuffers::Push for Type {
     type Output = Type;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        }
     }
 }
 
@@ -980,7 +996,7 @@ impl<'a> flatbuffers::Follow<'a> for DictionaryKind {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -989,7 +1005,9 @@ impl flatbuffers::Push for DictionaryKind {
     type Output = DictionaryKind;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -1071,7 +1089,7 @@ impl<'a> flatbuffers::Follow<'a> for Endianness {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -1080,7 +1098,9 @@ impl flatbuffers::Push for Endianness {
     type Output = Endianness;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -1135,21 +1155,26 @@ impl<'a> flatbuffers::Follow<'a> for Buffer {
     type Inner = &'a Buffer;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        <&'a Buffer>::follow(buf, loc)
+        unsafe { <&'a Buffer>::follow(buf, loc) }
     }
 }
 impl<'a> flatbuffers::Follow<'a> for &'a Buffer {
     type Inner = &'a Buffer;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        flatbuffers::follow_cast_ref::<Buffer>(buf, loc)
+        unsafe { flatbuffers::follow_cast_ref::<Buffer>(buf, loc) }
     }
 }
 impl<'b> flatbuffers::Push for Buffer {
     type Output = Buffer;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src = ::core::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size());
+        let src = unsafe {
+            ::core::slice::from_raw_parts(
+                self as *const Buffer as *const u8,
+                <Self as flatbuffers::Push>::size(),
+            )
+        };
         dst.copy_from_slice(src);
     }
     #[inline]
@@ -1257,7 +1282,7 @@ impl<'a> flatbuffers::Follow<'a> for Null<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1337,7 +1362,7 @@ impl<'a> flatbuffers::Follow<'a> for Struct_<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1414,7 +1439,7 @@ impl<'a> flatbuffers::Follow<'a> for List<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1493,7 +1518,7 @@ impl<'a> flatbuffers::Follow<'a> for LargeList<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1573,7 +1598,7 @@ impl<'a> flatbuffers::Follow<'a> for ListView<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1652,7 +1677,7 @@ impl<'a> flatbuffers::Follow<'a> for LargeListView<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1731,7 +1756,7 @@ impl<'a> flatbuffers::Follow<'a> for FixedSizeList<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1861,7 +1886,7 @@ impl<'a> flatbuffers::Follow<'a> for Map<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1968,7 +1993,7 @@ impl<'a> flatbuffers::Follow<'a> for Union<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2102,7 +2127,7 @@ impl<'a> flatbuffers::Follow<'a> for Int<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2223,7 +2248,7 @@ impl<'a> flatbuffers::Follow<'a> for FloatingPoint<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2330,7 +2355,7 @@ impl<'a> flatbuffers::Follow<'a> for Utf8<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2408,7 +2433,7 @@ impl<'a> flatbuffers::Follow<'a> for Binary<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2487,7 +2512,7 @@ impl<'a> flatbuffers::Follow<'a> for LargeUtf8<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2566,7 +2591,7 @@ impl<'a> flatbuffers::Follow<'a> for LargeBinary<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2652,7 +2677,7 @@ impl<'a> flatbuffers::Follow<'a> for Utf8View<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2736,7 +2761,7 @@ impl<'a> flatbuffers::Follow<'a> for BinaryView<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2815,7 +2840,7 @@ impl<'a> flatbuffers::Follow<'a> for FixedSizeBinary<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2920,7 +2945,7 @@ impl<'a> flatbuffers::Follow<'a> for Bool<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3002,7 +3027,7 @@ impl<'a> flatbuffers::Follow<'a> for RunEndEncoded<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3085,7 +3110,7 @@ impl<'a> flatbuffers::Follow<'a> for Decimal<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3238,7 +3263,7 @@ impl<'a> flatbuffers::Follow<'a> for Date<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3356,7 +3381,7 @@ impl<'a> flatbuffers::Follow<'a> for Time<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3582,7 +3607,7 @@ impl<'a> flatbuffers::Follow<'a> for Timestamp<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3723,7 +3748,7 @@ impl<'a> flatbuffers::Follow<'a> for Interval<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3827,7 +3852,7 @@ impl<'a> flatbuffers::Follow<'a> for Duration<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -3934,7 +3959,7 @@ impl<'a> flatbuffers::Follow<'a> for KeyValue<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -4062,7 +4087,7 @@ impl<'a> flatbuffers::Follow<'a> for DictionaryEncoding<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -4261,7 +4286,7 @@ impl<'a> flatbuffers::Follow<'a> for Field<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -5326,7 +5351,7 @@ impl<'a> flatbuffers::Follow<'a> for Schema<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -5578,14 +5603,14 @@ pub fn size_prefixed_root_as_schema_with_opts<'b, 'o>(
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid `Schema`.
 pub unsafe fn root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::root_unchecked::<Schema>(buf)
+    unsafe { flatbuffers::root_unchecked::<Schema>(buf) }
 }
 #[inline]
 /// Assumes, without verification, that a buffer of bytes contains a size prefixed Schema and returns it.
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid size prefixed `Schema`.
 pub unsafe fn size_prefixed_root_as_schema_unchecked(buf: &[u8]) -> Schema {
-    flatbuffers::size_prefixed_root_unchecked::<Schema>(buf)
+    unsafe { flatbuffers::size_prefixed_root_unchecked::<Schema>(buf) }
 }
 #[inline]
 pub fn finish_schema_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
diff --git a/arrow-ipc/src/gen/SparseTensor.rs b/arrow-ipc/src/gen/SparseTensor.rs
index 21cb7e116c24..1cadd1d3e0cc 100644
--- a/arrow-ipc/src/gen/SparseTensor.rs
+++ b/arrow-ipc/src/gen/SparseTensor.rs
@@ -18,8 +18,8 @@
 #![allow(dead_code)]
 #![allow(unused_imports)]
 
-use crate::gen::Schema::*;
-use crate::gen::Tensor::*;
+use crate::r#gen::Schema::*;
+use crate::r#gen::Tensor::*;
 use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
@@ -79,7 +79,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseMatrixCompressedAxis {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<i16>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<i16>(buf, loc) };
         Self(b)
     }
 }
@@ -88,7 +88,9 @@ impl flatbuffers::Push for SparseMatrixCompressedAxis {
     type Output = SparseMatrixCompressedAxis;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<i16>(dst, self.0);
+        }
     }
 }
 
@@ -182,7 +184,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseTensorIndex {
     type Inner = Self;
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
-        let b = flatbuffers::read_scalar_at::<u8>(buf, loc);
+        let b = unsafe { flatbuffers::read_scalar_at::<u8>(buf, loc) };
         Self(b)
     }
 }
@@ -191,7 +193,9 @@ impl flatbuffers::Push for SparseTensorIndex {
     type Output = SparseTensorIndex;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        unsafe {
+            flatbuffers::emplace_scalar::<u8>(dst, self.0);
+        }
     }
 }
 
@@ -267,7 +271,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCOO<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -479,7 +483,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseMatrixIndexCSX<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -750,7 +754,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseTensorIndexCSF<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1078,7 +1082,7 @@ impl<'a> flatbuffers::Follow<'a> for SparseTensor<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -2263,14 +2267,14 @@ pub fn size_prefixed_root_as_sparse_tensor_with_opts<'b, 'o>(
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid `SparseTensor`.
 pub unsafe fn root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::root_unchecked::<SparseTensor>(buf)
+    unsafe { flatbuffers::root_unchecked::<SparseTensor>(buf) }
 }
 #[inline]
 /// Assumes, without verification, that a buffer of bytes contains a size prefixed SparseTensor and returns it.
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid size prefixed `SparseTensor`.
 pub unsafe fn size_prefixed_root_as_sparse_tensor_unchecked(buf: &[u8]) -> SparseTensor {
-    flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf)
+    unsafe { flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf) }
 }
 #[inline]
 pub fn finish_sparse_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
diff --git a/arrow-ipc/src/gen/Tensor.rs b/arrow-ipc/src/gen/Tensor.rs
index b332a5d77e96..80be30b68615 100644
--- a/arrow-ipc/src/gen/Tensor.rs
+++ b/arrow-ipc/src/gen/Tensor.rs
@@ -18,7 +18,7 @@
 #![allow(dead_code)]
 #![allow(unused_imports)]
 
-use crate::gen::Schema::*;
+use crate::r#gen::Schema::*;
 use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
@@ -40,13 +40,13 @@ impl<'a> flatbuffers::Follow<'a> for TensorDim<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
 
 impl<'a> TensorDim<'a> {
-    pub const VT_SIZE_: flatbuffers::VOffsetT = 4;
+    pub const VT_SIZE: flatbuffers::VOffsetT = 4;
     pub const VT_NAME: flatbuffers::VOffsetT = 6;
 
     #[inline]
@@ -59,7 +59,7 @@ impl<'a> TensorDim<'a> {
         args: &'args TensorDimArgs<'args>,
     ) -> flatbuffers::WIPOffset<TensorDim<'bldr>> {
         let mut builder = TensorDimBuilder::new(_fbb);
-        builder.add_size_(args.size_);
+        builder.add_size(args.size);
         if let Some(x) = args.name {
             builder.add_name(x);
         }
@@ -68,11 +68,11 @@ impl<'a> TensorDim<'a> {
 
     /// Length of dimension
     #[inline]
-    pub fn size_(&self) -> i64 {
+    pub fn size(&self) -> i64 {
         // Safety:
         // Created from valid Table for this object
         // which contains a valid value in this slot
-        unsafe { self._tab.get::<i64>(TensorDim::VT_SIZE_, Some(0)).unwrap() }
+        unsafe { self._tab.get::<i64>(TensorDim::VT_SIZE, Some(0)).unwrap() }
     }
     /// Name of the dimension, optional
     #[inline]
@@ -95,21 +95,21 @@ impl flatbuffers::Verifiable for TensorDim<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-            .visit_field::<i64>("size_", Self::VT_SIZE_, false)?
+            .visit_field::<i64>("size", Self::VT_SIZE, false)?
             .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
             .finish();
         Ok(())
     }
 }
 pub struct TensorDimArgs<'a> {
-    pub size_: i64,
+    pub size: i64,
     pub name: Option<flatbuffers::WIPOffset<&'a str>>,
 }
 impl<'a> Default for TensorDimArgs<'a> {
     #[inline]
     fn default() -> Self {
         TensorDimArgs {
-            size_: 0,
+            size: 0,
             name: None,
         }
     }
@@ -121,8 +121,8 @@ pub struct TensorDimBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
 }
 impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorDimBuilder<'a, 'b, A> {
     #[inline]
-    pub fn add_size_(&mut self, size_: i64) {
-        self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE_, size_, 0);
+    pub fn add_size(&mut self, size: i64) {
+        self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE, size, 0);
     }
     #[inline]
     pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
@@ -147,7 +147,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorDimBuilder<'a, 'b, A> {
 impl core::fmt::Debug for TensorDim<'_> {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
         let mut ds = f.debug_struct("TensorDim");
-        ds.field("size_", &self.size_());
+        ds.field("size", &self.size());
         ds.field("name", &self.name());
         ds.finish()
     }
@@ -164,7 +164,7 @@ impl<'a> flatbuffers::Follow<'a> for Tensor<'a> {
     #[inline]
     unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
         Self {
-            _tab: flatbuffers::Table::new(buf, loc),
+            _tab: unsafe { flatbuffers::Table::new(buf, loc) },
         }
     }
 }
@@ -1182,14 +1182,14 @@ pub fn size_prefixed_root_as_tensor_with_opts<'b, 'o>(
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid `Tensor`.
 pub unsafe fn root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::root_unchecked::<Tensor>(buf)
+    unsafe { flatbuffers::root_unchecked::<Tensor>(buf) }
 }
 #[inline]
 /// Assumes, without verification, that a buffer of bytes contains a size prefixed Tensor and returns it.
 /// # Safety
 /// Callers must trust the given bytes do indeed contain a valid size prefixed `Tensor`.
 pub unsafe fn size_prefixed_root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
-    flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf)
+    unsafe { flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf) }
 }
 #[inline]
 pub fn finish_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index aa10031933c6..d25e23102242 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -42,7 +42,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 pub mod convert;
 pub mod reader;
@@ -50,6 +50,10 @@ pub mod writer;
 
 mod compression;
 
+#[cfg(test)]
+mod tests;
+
+#[allow(mismatched_lifetime_syntaxes)]
 #[allow(clippy::redundant_closure)]
 #[allow(clippy::needless_lifetimes)]
 #[allow(clippy::extra_unused_lifetimes)]
@@ -57,13 +61,13 @@ mod compression;
 #[allow(clippy::redundant_field_names)]
 #[allow(non_camel_case_types)]
 #[allow(missing_docs)] // Because this is autogenerated
-pub mod gen;
+pub mod r#gen;
 
-pub use self::gen::File::*;
-pub use self::gen::Message::*;
-pub use self::gen::Schema::*;
-pub use self::gen::SparseTensor::*;
-pub use self::gen::Tensor::*;
+pub use self::r#gen::File::*;
+pub use self::r#gen::Message::*;
+pub use self::r#gen::Schema::*;
+pub use self::r#gen::SparseTensor::*;
+pub use self::r#gen::Tensor::*;
 
 const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
 const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 7f9b4b2937a9..4279090dbee7 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -25,9 +25,10 @@
 //! [`Seek`]: std::io::Seek
 
 mod stream;
-
 pub use stream::*;
 
+use arrow_select::concat;
+
 use flatbuffers::{VectorIter, VerifierOptions};
 use std::collections::{HashMap, VecDeque};
 use std::fmt;
@@ -35,12 +36,15 @@ use std::io::{BufReader, Read, Seek, SeekFrom};
 use std::sync::Arc;
 
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, ScalarBuffer};
+use arrow_buffer::{
+    ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, ScalarBuffer,
+};
 use arrow_data::{ArrayData, ArrayDataBuilder, UnsafeFlag};
 use arrow_schema::*;
 
 use crate::compression::CompressionCodec;
-use crate::{Block, FieldNode, Message, MetadataVersion, CONTINUATION_MARKER};
+use crate::r#gen::Message::{self};
+use crate::{Block, CONTINUATION_MARKER, FieldNode, MetadataVersion};
 use DataType::*;
 
 /// Read a buffer based on offset and length
@@ -112,13 +116,23 @@ impl RecordBatchDecoder<'_> {
                 let buffers = [self.next_buffer()?, self.next_buffer()?];
                 self.create_primitive_array(field_node, data_type, &buffers)
             }
-            List(ref list_field) | LargeList(ref list_field) | Map(ref list_field, _) => {
+            List(list_field) | LargeList(list_field) | Map(list_field, _) => {
                 let list_node = self.next_node(field)?;
                 let list_buffers = [self.next_buffer()?, self.next_buffer()?];
                 let values = self.create_array(list_field, variadic_counts)?;
                 self.create_list_array(list_node, data_type, &list_buffers, values)
             }
-            FixedSizeList(ref list_field, _) => {
+            ListView(list_field) | LargeListView(list_field) => {
+                let list_node = self.next_node(field)?;
+                let list_buffers = [
+                    self.next_buffer()?, // null buffer
+                    self.next_buffer()?, // offsets
+                    self.next_buffer()?, // sizes
+                ];
+                let values = self.create_array(list_field, variadic_counts)?;
+                self.create_list_view_array(list_node, data_type, &list_buffers, values)
+            }
+            FixedSizeList(list_field, _) => {
                 let list_node = self.next_node(field)?;
                 let list_buffers = [self.next_buffer()?];
                 let values = self.create_array(list_field, variadic_counts)?;
@@ -148,7 +162,9 @@ impl RecordBatchDecoder<'_> {
                     .len(run_array_length)
                     .offset(0)
                     .add_child_data(run_ends.into_data())
-                    .add_child_data(values.into_data());
+                    .add_child_data(values.into_data())
+                    .null_count(run_node.null_count() as usize);
+
                 self.create_array_from_builder(builder)
             }
             // Create dictionary array from RecordBatch
@@ -247,7 +263,7 @@ impl RecordBatchDecoder<'_> {
     ) -> Result<ArrayRef, ArrowError> {
         let length = field_node.length() as usize;
         let null_buffer = (field_node.null_count() > 0).then_some(buffers[0].clone());
-        let builder = match data_type {
+        let mut builder = match data_type {
             Utf8 | Binary | LargeBinary | LargeUtf8 => {
                 // read 3 buffers: null buffer (optional), offsets buffer and data buffer
                 ArrayData::builder(data_type.clone())
@@ -269,6 +285,8 @@ impl RecordBatchDecoder<'_> {
             t => unreachable!("Data type {:?} either unsupported or not primitive", t),
         };
 
+        builder = builder.null_count(field_node.null_count() as usize);
+
         self.create_array_from_builder(builder)
     }
 
@@ -294,7 +312,7 @@ impl RecordBatchDecoder<'_> {
         let null_buffer = (field_node.null_count() > 0).then_some(buffers[0].clone());
         let length = field_node.length() as usize;
         let child_data = child_array.into_data();
-        let builder = match data_type {
+        let mut builder = match data_type {
             List(_) | LargeList(_) | Map(_, _) => ArrayData::builder(data_type.clone())
                 .len(length)
                 .add_buffer(buffers[1].clone())
@@ -309,9 +327,35 @@ impl RecordBatchDecoder<'_> {
             _ => unreachable!("Cannot create list or map array from {:?}", data_type),
         };
 
+        builder = builder.null_count(field_node.null_count() as usize);
+
         self.create_array_from_builder(builder)
     }
 
+    fn create_list_view_array(
+        &self,
+        field_node: &FieldNode,
+        data_type: &DataType,
+        buffers: &[Buffer],
+        child_array: ArrayRef,
+    ) -> Result<ArrayRef, ArrowError> {
+        assert!(matches!(data_type, ListView(_) | LargeListView(_)));
+
+        let null_buffer = (field_node.null_count() > 0).then_some(buffers[0].clone());
+        let length = field_node.length() as usize;
+        let child_data = child_array.into_data();
+
+        self.create_array_from_builder(
+            ArrayData::builder(data_type.clone())
+                .len(length)
+                .add_buffer(buffers[1].clone()) // offsets
+                .add_buffer(buffers[2].clone()) // sizes
+                .add_child_data(child_data)
+                .null_bit_buffer(null_buffer)
+                .null_count(field_node.null_count() as usize),
+        )
+    }
+
     fn create_struct_array(
         &self,
         struct_node: &FieldNode,
@@ -321,15 +365,38 @@ impl RecordBatchDecoder<'_> {
     ) -> Result<ArrayRef, ArrowError> {
         let null_count = struct_node.null_count() as usize;
         let len = struct_node.length() as usize;
+        let skip_validation = self.skip_validation.get();
+
+        let nulls = if null_count > 0 {
+            let validity_buffer = BooleanBuffer::new(null_buffer, 0, len);
+            let null_buffer = if skip_validation {
+                // safety: flag can only be set via unsafe code
+                unsafe { NullBuffer::new_unchecked(validity_buffer, null_count) }
+            } else {
+                let null_buffer = NullBuffer::new(validity_buffer);
+
+                if null_buffer.null_count() != null_count {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "null_count value ({}) doesn't match actual number of nulls in array ({})",
+                        null_count,
+                        null_buffer.null_count()
+                    )));
+                }
 
-        let nulls = (null_count > 0).then(|| BooleanBuffer::new(null_buffer, 0, len).into());
+                null_buffer
+            };
+
+            Some(null_buffer)
+        } else {
+            None
+        };
         if struct_arrays.is_empty() {
             // `StructArray::from` can't infer the correct row count
             // if we have zero fields
             return Ok(Arc::new(StructArray::new_empty_fields(len, nulls)));
         }
 
-        let struct_array = if self.skip_validation.get() {
+        let struct_array = if skip_validation {
             // safety: flag can only be set via unsafe code
             unsafe { StructArray::new_unchecked(struct_fields.clone(), struct_arrays, nulls) }
         } else {
@@ -354,7 +421,8 @@ impl RecordBatchDecoder<'_> {
                 .len(field_node.length() as usize)
                 .add_buffer(buffers[1].clone())
                 .add_child_data(value_array.into_data())
-                .null_bit_buffer(null_buffer);
+                .null_bit_buffer(null_buffer)
+                .null_count(field_node.null_count() as usize);
             self.create_array_from_builder(builder)
         } else {
             unreachable!("Cannot create dictionary array from {:?}", data_type)
@@ -366,7 +434,8 @@ impl RecordBatchDecoder<'_> {
 /// [`RecordBatch`]
 ///
 /// [IPC RecordBatch]: crate::RecordBatch
-struct RecordBatchDecoder<'a> {
+///
+pub struct RecordBatchDecoder<'a> {
     /// The flatbuffers encoded record batch
     batch: crate::RecordBatch<'a>,
     /// The output schema
@@ -534,7 +603,10 @@ impl<'a> RecordBatchDecoder<'a> {
     }
 
     fn next_buffer(&mut self) -> Result<Buffer, ArrowError> {
-        read_buffer(self.buffers.next().unwrap(), self.data, self.compression)
+        let buffer = self.buffers.next().ok_or_else(|| {
+            ArrowError::IpcError("Buffer count mismatched with metadata".to_string())
+        })?;
+        read_buffer(buffer, self.data, self.compression)
     }
 
     fn skip_buffer(&mut self) {
@@ -544,8 +616,7 @@ impl<'a> RecordBatchDecoder<'a> {
     fn next_node(&mut self, field: &Field) -> Result<&'a FieldNode, ArrowError> {
         self.nodes.next().ok_or_else(|| {
             ArrowError::SchemaError(format!(
-                "Invalid data for schema. {} refers to node not found in schema",
-                field
+                "Invalid data for schema. {field} refers to node not found in schema",
             ))
         })
     }
@@ -679,12 +750,72 @@ fn read_dictionary_impl(
     require_alignment: bool,
     skip_validation: UnsafeFlag,
 ) -> Result<(), ArrowError> {
-    if batch.isDelta() {
-        return Err(ArrowError::InvalidArgumentError(
-            "delta dictionary batches not supported".to_string(),
-        ));
-    }
+    let id = batch.id();
+
+    let dictionary_values = get_dictionary_values(
+        buf,
+        batch,
+        schema,
+        dictionaries_by_id,
+        metadata,
+        require_alignment,
+        skip_validation,
+    )?;
 
+    update_dictionaries(dictionaries_by_id, batch.isDelta(), id, dictionary_values)?;
+
+    Ok(())
+}
+
+/// Updates the `dictionaries_by_id` with the provided dictionary values and id.
+///
+/// # Errors
+/// - If `is_delta` is true and there is no existing dictionary for the given
+///   `dict_id`
+/// - If `is_delta` is true and the concatenation of the existing and new
+///   dictionary fails. This usually signals a type mismatch between the old and
+///   new values.
+fn update_dictionaries(
+    dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
+    is_delta: bool,
+    dict_id: i64,
+    dict_values: ArrayRef,
+) -> Result<(), ArrowError> {
+    if !is_delta {
+        // We don't currently record the isOrdered field. This could be general
+        // attributes of arrays.
+        // Add (possibly multiple) array refs to the dictionaries array.
+        dictionaries_by_id.insert(dict_id, dict_values.clone());
+        return Ok(());
+    }
+
+    let existing = dictionaries_by_id.get(&dict_id).ok_or_else(|| {
+        ArrowError::InvalidArgumentError(format!(
+            "No existing dictionary for delta dictionary with id '{dict_id}'"
+        ))
+    })?;
+
+    let combined = concat::concat(&[existing, &dict_values]).map_err(|e| {
+        ArrowError::InvalidArgumentError(format!("Failed to concat delta dictionary: {e}"))
+    })?;
+
+    dictionaries_by_id.insert(dict_id, combined);
+
+    Ok(())
+}
+
+/// Given a dictionary batch IPC message/body along with the full state of a
+/// stream including schema, dictionary cache, metadata, and other flags, this
+/// function will parse the buffer into an array of dictionary values.
+fn get_dictionary_values(
+    buf: &Buffer,
+    batch: crate::DictionaryBatch,
+    schema: &Schema,
+    dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
+    metadata: &MetadataVersion,
+    require_alignment: bool,
+    skip_validation: UnsafeFlag,
+) -> Result<ArrayRef, ArrowError> {
     let id = batch.id();
     #[allow(deprecated)]
     let fields_using_this_dictionary = schema.fields_with_dict_id(id);
@@ -696,7 +827,7 @@ fn read_dictionary_impl(
     // values array, we need to retrieve this from the schema.
     // Get an array representing this dictionary's values.
     let dictionary_values: ArrayRef = match first_field.data_type() {
-        DataType::Dictionary(_, ref value_type) => {
+        DataType::Dictionary(_, value_type) => {
             // Make a fake schema for the dictionary batch.
             let value = value_type.as_ref().clone();
             let schema = Schema::new(vec![Field::new("", value, true)]);
@@ -720,12 +851,7 @@ fn read_dictionary_impl(
         ArrowError::InvalidArgumentError(format!("dictionary id {id} not found in schema"))
     })?;
 
-    // We don't currently record the isOrdered field. This could be general
-    // attributes of arrays.
-    // Add (possibly multiple) array refs to the dictionaries array.
-    dictionaries_by_id.insert(id, dictionary_values.clone());
-
-    Ok(())
+    Ok(dictionary_values)
 }
 
 /// Read the data for a given block
@@ -743,7 +869,7 @@ fn read_block<R: Read + Seek>(mut reader: R, block: &Block) -> Result<Buffer, Ar
 /// Parse an encapsulated message
 ///
 /// <https://arrow.apache.org/docs/format/Columnar.html#encapsulated-message-format>
-fn parse_message(buf: &[u8]) -> Result<Message, ArrowError> {
+fn parse_message(buf: &[u8]) -> Result<Message::Message<'_>, ArrowError> {
     let buf = match buf[..4] == CONTINUATION_MARKER {
         true => &buf[8..],
         false => &buf[4..],
@@ -890,11 +1016,11 @@ impl FileDecoder {
     /// For example, some programs may wish to trust reading IPC files written
     /// by the same process that created the files.
     pub unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self {
-        self.skip_validation.set(skip_validation);
+        unsafe { self.skip_validation.set(skip_validation) };
         self
     }
 
-    fn read_message<'a>(&self, buf: &'a [u8]) -> Result<Message<'a>, ArrowError> {
+    fn read_message<'a>(&self, buf: &'a [u8]) -> Result<Message::Message<'a>, ArrowError> {
         let message = parse_message(buf)?;
 
         // some old test data's footer metadata is not set, so we account for that
@@ -1197,7 +1323,7 @@ impl<R: Read + Seek> FileReader<R> {
     /// Try to create a new file reader.
     ///
     /// There is no internal buffering. If buffered reads are needed you likely want to use
-    /// [`FileReader::try_new_buffered`] instead.    
+    /// [`FileReader::try_new_buffered`] instead.
     ///
     /// # Errors
     ///
@@ -1271,7 +1397,7 @@ impl<R: Read + Seek> FileReader<R> {
     ///
     /// See [`FileDecoder::with_skip_validation`]
     pub unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self {
-        self.decoder = self.decoder.with_skip_validation(skip_validation);
+        self.decoder = unsafe { self.decoder.with_skip_validation(skip_validation) };
         self
     }
 }
@@ -1330,7 +1456,7 @@ impl<R: Read + Seek> RecordBatchReader for FileReader<R> {
 /// [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
 pub struct StreamReader<R> {
     /// Stream reader
-    reader: R,
+    reader: MessageReader<R>,
 
     /// The schema that is read from the stream's first message
     schema: SchemaRef,
@@ -1388,32 +1514,28 @@ impl<R: Read> StreamReader<R> {
     /// An ['Err'](Result::Err) may be returned if the reader does not encounter a schema
     /// as the first message in the stream.
     pub fn try_new(
-        mut reader: R,
+        reader: R,
         projection: Option<Vec<usize>>,
     ) -> Result<StreamReader<R>, ArrowError> {
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
-        reader.read_exact(&mut meta_size)?;
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
+        let mut msg_reader = MessageReader::new(reader);
+        let message = msg_reader.maybe_next()?;
+        let Some((message, _)) = message else {
+            return Err(ArrowError::IpcError(
+                "Expected schema message, found empty stream.".to_string(),
+            ));
         };
 
-        let mut meta_buffer = vec![0; meta_len as usize];
-        reader.read_exact(&mut meta_buffer)?;
+        if message.header_type() != Message::MessageHeader::Schema {
+            return Err(ArrowError::IpcError(format!(
+                "Expected a schema as the first message in the stream, got: {:?}",
+                message.header_type()
+            )));
+        }
 
-        let message = crate::root_as_message(meta_buffer.as_slice()).map_err(|err| {
-            ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
+        let schema = message.header_as_schema().ok_or_else(|| {
+            ArrowError::ParseError("Failed to parse schema from message header".to_string())
         })?;
-        // message header is a Schema, so read it
-        let ipc_schema: crate::Schema = message.header_as_schema().ok_or_else(|| {
-            ArrowError::ParseError("Unable to read IPC message as schema".to_string())
-        })?;
-        let schema = crate::convert::fb_to_schema(ipc_schema);
+        let schema = crate::convert::fb_to_schema(schema);
 
         // Create an array of optional dictionary value arrays, one per field.
         let dictionaries_by_id = HashMap::new();
@@ -1425,8 +1547,9 @@ impl<R: Read> StreamReader<R> {
             }
             _ => None,
         };
+
         Ok(Self {
-            reader,
+            reader: msg_reader,
             schema: Arc::new(schema),
             finished: false,
             dictionaries_by_id,
@@ -1458,114 +1581,127 @@ impl<R: Read> StreamReader<R> {
         if self.finished {
             return Ok(None);
         }
-        // determine metadata length
-        let mut meta_size: [u8; 4] = [0; 4];
 
-        match self.reader.read_exact(&mut meta_size) {
-            Ok(()) => (),
-            Err(e) => {
-                return if e.kind() == std::io::ErrorKind::UnexpectedEof {
-                    // Handle EOF without the "0xFFFFFFFF 0x00000000"
-                    // valid according to:
-                    // https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
-                    self.finished = true;
-                    Ok(None)
-                } else {
-                    Err(ArrowError::from(e))
-                };
-            }
-        }
-
-        let meta_len = {
-            // If a continuation marker is encountered, skip over it and read
-            // the size from the next four bytes.
-            if meta_size == CONTINUATION_MARKER {
-                self.reader.read_exact(&mut meta_size)?;
-            }
-            i32::from_le_bytes(meta_size)
-        };
+        // Read messages until we get a record batch or end of stream
+        loop {
+            let message = self.next_ipc_message()?;
+            let Some(message) = message else {
+                // If the message is None, we have reached the end of the stream.
+                self.finished = true;
+                return Ok(None);
+            };
 
-        if meta_len == 0 {
-            // the stream has ended, mark the reader as finished
-            self.finished = true;
-            return Ok(None);
+            match message {
+                IpcMessage::Schema(_) => {
+                    return Err(ArrowError::IpcError(
+                        "Expected a record batch, but found a schema".to_string(),
+                    ));
+                }
+                IpcMessage::RecordBatch(record_batch) => {
+                    return Ok(Some(record_batch));
+                }
+                IpcMessage::DictionaryBatch { .. } => {
+                    continue;
+                }
+            };
         }
+    }
 
-        let mut meta_buffer = vec![0; meta_len as usize];
-        self.reader.read_exact(&mut meta_buffer)?;
-
-        let vecs = &meta_buffer.to_vec();
-        let message = crate::root_as_message(vecs).map_err(|err| {
-            ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
-        })?;
+    /// Reads and fully parses the next IPC message from the stream. Whereas
+    /// [`Self::maybe_next`] is a higher level method focused on reading
+    /// `RecordBatch`es, this method returns the individual fully parsed IPC
+    /// messages from the underlying stream.
+    ///
+    /// This is useful primarily for testing reader/writer behaviors as it
+    /// allows a full view into the messages that have been written to a stream.
+    pub(crate) fn next_ipc_message(&mut self) -> Result<Option<IpcMessage>, ArrowError> {
+        let message = self.reader.maybe_next()?;
+        let Some((message, body)) = message else {
+            // If the message is None, we have reached the end of the stream.
+            return Ok(None);
+        };
 
-        match message.header_type() {
-            crate::MessageHeader::Schema => Err(ArrowError::IpcError(
-                "Not expecting a schema when messages are read".to_string(),
-            )),
-            crate::MessageHeader::RecordBatch => {
+        let ipc_message = match message.header_type() {
+            Message::MessageHeader::Schema => {
+                let schema = message.header_as_schema().ok_or_else(|| {
+                    ArrowError::ParseError("Failed to parse schema from message header".to_string())
+                })?;
+                let arrow_schema = crate::convert::fb_to_schema(schema);
+                IpcMessage::Schema(arrow_schema)
+            }
+            Message::MessageHeader::RecordBatch => {
                 let batch = message.header_as_record_batch().ok_or_else(|| {
                     ArrowError::IpcError("Unable to read IPC message as record batch".to_string())
                 })?;
-                // read the block that makes up the record batch into a buffer
-                let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
-                self.reader.read_exact(&mut buf)?;
 
-                RecordBatchDecoder::try_new(
-                    &buf.into(),
+                let version = message.version();
+                let schema = self.schema.clone();
+                let record_batch = RecordBatchDecoder::try_new(
+                    &body.into(),
                     batch,
-                    self.schema(),
+                    schema,
                     &self.dictionaries_by_id,
-                    &message.version(),
+                    &version,
                 )?
                 .with_projection(self.projection.as_ref().map(|x| x.0.as_ref()))
                 .with_require_alignment(false)
                 .with_skip_validation(self.skip_validation.clone())
-                .read_record_batch()
-                .map(Some)
+                .read_record_batch()?;
+                IpcMessage::RecordBatch(record_batch)
             }
-            crate::MessageHeader::DictionaryBatch => {
-                let batch = message.header_as_dictionary_batch().ok_or_else(|| {
-                    ArrowError::IpcError(
-                        "Unable to read IPC message as dictionary batch".to_string(),
+            Message::MessageHeader::DictionaryBatch => {
+                let dict = message.header_as_dictionary_batch().ok_or_else(|| {
+                    ArrowError::ParseError(
+                        "Failed to parse dictionary batch from message header".to_string(),
                     )
                 })?;
-                // read the block that makes up the dictionary batch into a buffer
-                let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
-                self.reader.read_exact(&mut buf)?;
 
-                read_dictionary_impl(
-                    &buf.into(),
-                    batch,
+                let version = message.version();
+                let dict_values = get_dictionary_values(
+                    &body.into(),
+                    dict,
                     &self.schema,
                     &mut self.dictionaries_by_id,
-                    &message.version(),
+                    &version,
                     false,
                     self.skip_validation.clone(),
                 )?;
 
-                // read the next message until we encounter a RecordBatch
-                self.maybe_next()
+                update_dictionaries(
+                    &mut self.dictionaries_by_id,
+                    dict.isDelta(),
+                    dict.id(),
+                    dict_values.clone(),
+                )?;
+
+                IpcMessage::DictionaryBatch {
+                    id: dict.id(),
+                    is_delta: (dict.isDelta()),
+                    values: (dict_values),
+                }
             }
-            crate::MessageHeader::NONE => Ok(None),
-            t => Err(ArrowError::InvalidArgumentError(format!(
-                "Reading types other than record batches not yet supported, unable to read {t:?} "
-            ))),
-        }
+            x => {
+                return Err(ArrowError::ParseError(format!(
+                    "Unsupported message header type in IPC stream: '{x:?}'"
+                )));
+            }
+        };
+
+        Ok(Some(ipc_message))
     }
 
     /// Gets a reference to the underlying reader.
     ///
     /// It is inadvisable to directly read from the underlying reader.
     pub fn get_ref(&self) -> &R {
-        &self.reader
+        self.reader.inner()
     }
 
     /// Gets a mutable reference to the underlying reader.
     ///
     /// It is inadvisable to directly read from the underlying reader.
     pub fn get_mut(&mut self) -> &mut R {
-        &mut self.reader
+        self.reader.inner_mut()
     }
 
     /// Specifies if validation should be skipped when reading data (defaults to `false`)
@@ -1574,7 +1710,7 @@ impl<R: Read> StreamReader<R> {
     ///
     /// See [`FileDecoder::with_skip_validation`]
     pub unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self {
-        self.skip_validation.set(skip_validation);
+        unsafe { self.skip_validation.set(skip_validation) };
         self
     }
 }
@@ -1593,11 +1729,129 @@ impl<R: Read> RecordBatchReader for StreamReader<R> {
     }
 }
 
+/// Representation of a fully parsed IpcMessage from the underlying stream.
+/// Parsing this kind of message is done by higher level constructs such as
+/// [`StreamReader`], because fully interpreting the messages into a record
+/// batch or dictionary batch requires access to stream state such as schema
+/// and the full dictionary cache.
+#[derive(Debug)]
+#[allow(dead_code)]
+pub(crate) enum IpcMessage {
+    Schema(arrow_schema::Schema),
+    RecordBatch(RecordBatch),
+    DictionaryBatch {
+        id: i64,
+        is_delta: bool,
+        values: ArrayRef,
+    },
+}
+
+/// A low-level construct that reads [`Message::Message`]s from a reader while
+/// re-using a buffer for metadata. This is composed into [`StreamReader`].
+struct MessageReader<R> {
+    reader: R,
+    buf: Vec<u8>,
+}
+
+impl<R: Read> MessageReader<R> {
+    fn new(reader: R) -> Self {
+        Self {
+            reader,
+            buf: Vec::new(),
+        }
+    }
+
+    /// Reads the entire next message from the underlying reader which includes
+    /// the metadata length, the metadata, and the body.
+    ///
+    /// # Returns
+    /// - `Ok(None)` if the the reader signals the end of stream with EOF on
+    ///   the first read
+    /// - `Err(_)` if the reader returns an error other than on the first
+    ///   read, or if the metadata length is invalid
+    /// - `Ok(Some(_))` with the Message and buffer containiner the
+    ///   body bytes otherwise.
+    fn maybe_next(&mut self) -> Result<Option<(Message::Message<'_>, MutableBuffer)>, ArrowError> {
+        let meta_len = self.read_meta_len()?;
+        let Some(meta_len) = meta_len else {
+            return Ok(None);
+        };
+
+        self.buf.resize(meta_len, 0);
+        self.reader.read_exact(&mut self.buf)?;
+
+        let message = crate::root_as_message(self.buf.as_slice()).map_err(|err| {
+            ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
+        })?;
+
+        let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
+        self.reader.read_exact(&mut buf)?;
+
+        Ok(Some((message, buf)))
+    }
+
+    /// Get a mutable reference to the underlying reader.
+    fn inner_mut(&mut self) -> &mut R {
+        &mut self.reader
+    }
+
+    /// Get an immutable reference to the underlying reader.
+    fn inner(&self) -> &R {
+        &self.reader
+    }
+
+    /// Read the metadata length for the next message from the underlying stream.
+    ///
+    /// # Returns
+    /// - `Ok(None)` if the the reader signals the end of stream with EOF on
+    ///   the first read
+    /// - `Err(_)` if the reader returns an error other than on the first
+    ///   read, or if the metadata length is less than 0.
+    /// - `Ok(Some(_))` with the length otherwise.
+    pub fn read_meta_len(&mut self) -> Result<Option<usize>, ArrowError> {
+        let mut meta_len: [u8; 4] = [0; 4];
+        match self.reader.read_exact(&mut meta_len) {
+            Ok(_) => {}
+            Err(e) => {
+                return if e.kind() == std::io::ErrorKind::UnexpectedEof {
+                    // Handle EOF without the "0xFFFFFFFF 0x00000000"
+                    // valid according to:
+                    // https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
+                    Ok(None)
+                } else {
+                    Err(ArrowError::from(e))
+                };
+            }
+        };
+
+        let meta_len = {
+            // If a continuation marker is encountered, skip over it and read
+            // the size from the next four bytes.
+            if meta_len == CONTINUATION_MARKER {
+                self.reader.read_exact(&mut meta_len)?;
+            }
+
+            i32::from_le_bytes(meta_len)
+        };
+
+        if meta_len == 0 {
+            return Ok(None);
+        }
+
+        let meta_len = usize::try_from(meta_len)
+            .map_err(|_| ArrowError::ParseError(format!("Invalid metadata length: {meta_len}")))?;
+
+        Ok(Some(meta_len))
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use std::io::Cursor;
+
     use crate::convert::fb_to_schema;
     use crate::writer::{
-        unslice_run_array, write_message, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
+        DictionaryTracker, IpcDataGenerator, IpcWriteOptions, unslice_run_array, write_message,
     };
 
     use super::*;
@@ -1615,13 +1869,10 @@ mod tests {
         let fixed_size_list_data_type =
             DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, false)), 3);
 
-        let union_fields = UnionFields::new(
-            vec![0, 1],
-            vec![
-                Field::new("a", DataType::Int32, false),
-                Field::new("b", DataType::Float64, false),
-            ],
-        );
+        let union_fields = UnionFields::from_fields(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Float64, false),
+        ]);
 
         let union_data_type = DataType::Union(union_fields, UnionMode::Dense);
 
@@ -1741,6 +1992,98 @@ mod tests {
         .unwrap()
     }
 
+    #[test]
+    fn test_negative_meta_len_start_stream() {
+        let bytes = i32::to_le_bytes(-1);
+        let mut buf = vec![];
+        buf.extend(CONTINUATION_MARKER);
+        buf.extend(bytes);
+
+        let reader_err = StreamReader::try_new(Cursor::new(buf), None).err();
+        assert!(reader_err.is_some());
+        assert_eq!(
+            reader_err.unwrap().to_string(),
+            "Parser error: Invalid metadata length: -1"
+        );
+    }
+
+    #[test]
+    fn test_negative_meta_len_mid_stream() {
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let mut buf = Vec::new();
+        {
+            let mut writer = crate::writer::StreamWriter::try_new(&mut buf, &schema).unwrap();
+            let batch =
+                RecordBatch::try_new(Arc::new(schema), vec![Arc::new(Int32Array::from(vec![1]))])
+                    .unwrap();
+            writer.write(&batch).unwrap();
+        }
+
+        let bytes = i32::to_le_bytes(-1);
+        buf.extend(CONTINUATION_MARKER);
+        buf.extend(bytes);
+
+        let mut reader = StreamReader::try_new(Cursor::new(buf), None).unwrap();
+        // Read the valid value
+        assert!(reader.maybe_next().is_ok());
+        // Read the invalid meta len
+        let batch_err = reader.maybe_next().err();
+        assert!(batch_err.is_some());
+        assert_eq!(
+            batch_err.unwrap().to_string(),
+            "Parser error: Invalid metadata length: -1"
+        );
+    }
+
+    #[test]
+    fn test_missing_buffer_metadata_error() {
+        use crate::r#gen::Message::*;
+        use flatbuffers::FlatBufferBuilder;
+
+        let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Int32, true)]));
+
+        // create RecordBatch buffer metadata with invalid buffer count
+        // Int32Array needs 2 buffers (validity + data) but we provide only 1
+        let mut fbb = FlatBufferBuilder::new();
+        let nodes = fbb.create_vector(&[FieldNode::new(2, 0)]);
+        let buffers = fbb.create_vector(&[crate::Buffer::new(0, 8)]);
+        let batch_offset = RecordBatch::create(
+            &mut fbb,
+            &RecordBatchArgs {
+                length: 2,
+                nodes: Some(nodes),
+                buffers: Some(buffers),
+                compression: None,
+                variadicBufferCounts: None,
+            },
+        );
+        fbb.finish_minimal(batch_offset);
+        let batch_bytes = fbb.finished_data().to_vec();
+        let batch = flatbuffers::root::<RecordBatch>(&batch_bytes).unwrap();
+
+        let data_buffer = Buffer::from(vec![0u8; 8]);
+        let dictionaries: HashMap<i64, ArrayRef> = HashMap::new();
+        let metadata = MetadataVersion::V5;
+
+        let decoder = RecordBatchDecoder::try_new(
+            &data_buffer,
+            batch,
+            schema.clone(),
+            &dictionaries,
+            &metadata,
+        )
+        .unwrap();
+
+        let result = decoder.read_record_batch();
+
+        match result {
+            Err(ArrowError::IpcError(msg)) => {
+                assert_eq!(msg, "Buffer count mismatched with metadata");
+            }
+            other => panic!("unexpected error: {other:?}"),
+        }
+    }
+
     #[test]
     fn test_projection_array_values() {
         // define schema
@@ -2008,8 +2351,7 @@ mod tests {
         let mut writer = crate::writer::FileWriter::try_new_with_options(
             &mut buf,
             batch.schema_ref(),
-            #[allow(deprecated)]
-            IpcWriteOptions::default().with_preserve_dict_id(false),
+            IpcWriteOptions::default(),
         )
         .unwrap();
         writer.write(&batch).unwrap();
@@ -2440,11 +2782,15 @@ mod tests {
         )])
         .unwrap();
 
-        let gen = IpcDataGenerator {};
-        #[allow(deprecated)]
-        let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
-        let (_, encoded) = gen
-            .encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        let r#gen = IpcDataGenerator {};
+        let mut dict_tracker = DictionaryTracker::new(false);
+        let (_, encoded) = r#gen
+            .encode(
+                &batch,
+                &mut dict_tracker,
+                &Default::default(),
+                &mut Default::default(),
+            )
             .unwrap();
 
         let message = root_as_message(&encoded.ipc_message).unwrap();
@@ -2479,11 +2825,15 @@ mod tests {
         )])
         .unwrap();
 
-        let gen = IpcDataGenerator {};
-        #[allow(deprecated)]
-        let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
-        let (_, encoded) = gen
-            .encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        let r#gen = IpcDataGenerator {};
+        let mut dict_tracker = DictionaryTracker::new(false);
+        let (_, encoded) = r#gen
+            .encode(
+                &batch,
+                &mut dict_tracker,
+                &Default::default(),
+                &mut Default::default(),
+            )
             .unwrap();
 
         let message = root_as_message(&encoded.ipc_message).unwrap();
@@ -2692,8 +3042,7 @@ mod tests {
             let mut writer = crate::writer::StreamWriter::try_new_with_options(
                 &mut buf,
                 batch.schema().as_ref(),
-                #[allow(deprecated)]
-                crate::writer::IpcWriteOptions::default().with_preserve_dict_id(false),
+                crate::writer::IpcWriteOptions::default(),
             )
             .expect("Failed to create StreamWriter");
             writer.write(&batch).expect("Failed to write RecordBatch");
@@ -2721,7 +3070,7 @@ mod tests {
 
         expect_ipc_validation_error(
             Arc::new(array),
-            "Invalid argument error: Offset invariant failure: offset at position 2 out of bounds: 4 > 2"
+            "Invalid argument error: Offset invariant failure: offset at position 2 out of bounds: 4 > 2",
         );
     }
 
@@ -2743,7 +3092,7 @@ mod tests {
         };
         expect_ipc_validation_error(
             Arc::new(array),
-            "Invalid argument error: Invalid UTF8 sequence at string index 3 (3..45): invalid utf-8 sequence of 1 bytes from index 38"
+            "Invalid argument error: Invalid UTF8 sequence at string index 3 (3..45): invalid utf-8 sequence of 1 bytes from index 38",
         );
     }
 
@@ -2766,7 +3115,7 @@ mod tests {
         };
         expect_ipc_validation_error(
             Arc::new(array),
-            "Invalid argument error: Encountered non-UTF-8 data at index 3: invalid utf-8 sequence of 1 bytes from index 38"
+            "Invalid argument error: Encountered non-UTF-8 data at index 3: invalid utf-8 sequence of 1 bytes from index 38",
         );
     }
 
@@ -2789,13 +3138,14 @@ mod tests {
     #[test]
     fn test_validation_of_invalid_union_array() {
         let array = unsafe {
-            let fields = UnionFields::new(
+            let fields = UnionFields::try_new(
                 vec![1, 3], // typeids : type id 2 is not valid
                 vec![
                     Field::new("a", DataType::Int32, false),
                     Field::new("b", DataType::Utf8, false),
                 ],
-            );
+            )
+            .unwrap();
             let type_ids = ScalarBuffer::from(vec![1i8, 2, 3]); // 2 is invalid
             let offsets = None;
             let children: Vec<ArrayRef> = vec![
@@ -2876,4 +3226,15 @@ mod tests {
 
         assert_eq!(schema, new_schema);
     }
+
+    #[test]
+    fn test_negative_meta_len() {
+        let bytes = i32::to_le_bytes(-1);
+        let mut buf = vec![];
+        buf.extend(CONTINUATION_MARKER);
+        buf.extend(bytes);
+
+        let reader = StreamReader::try_new(Cursor::new(buf), None);
+        assert!(reader.is_err());
+    }
 }
diff --git a/arrow-ipc/src/reader/stream.rs b/arrow-ipc/src/reader/stream.rs
index f3aab9a82b04..d0d833b471b8 100644
--- a/arrow-ipc/src/reader/stream.rs
+++ b/arrow-ipc/src/reader/stream.rs
@@ -25,8 +25,8 @@ use arrow_data::UnsafeFlag;
 use arrow_schema::{ArrowError, SchemaRef};
 
 use crate::convert::MessageBuffer;
-use crate::reader::{read_dictionary_impl, RecordBatchDecoder};
-use crate::{MessageHeader, CONTINUATION_MARKER};
+use crate::reader::{RecordBatchDecoder, read_dictionary_impl};
+use crate::{CONTINUATION_MARKER, MessageHeader};
 
 /// A low-level interface for reading [`RecordBatch`] data from a stream of bytes
 ///
@@ -260,12 +260,12 @@ impl StreamDecoder {
                         t => {
                             return Err(ArrowError::IpcError(format!(
                                 "Message type unsupported by StreamDecoder: {t:?}"
-                            )))
+                            )));
                         }
                     }
                 }
                 DecoderState::Finished => {
-                    return Err(ArrowError::IpcError("Unexpected EOS".to_string()))
+                    return Err(ArrowError::IpcError("Unexpected EOS".to_string()));
                 }
             }
         }
@@ -293,7 +293,7 @@ mod tests {
     use super::*;
     use crate::writer::{IpcWriteOptions, StreamWriter};
     use arrow_array::{
-        types::Int32Type, DictionaryArray, Int32Array, Int64Array, RecordBatch, RunArray,
+        DictionaryArray, Int32Array, Int64Array, RecordBatch, RunArray, types::Int32Type,
     };
     use arrow_schema::{DataType, Field, Schema};
 
@@ -395,8 +395,7 @@ mod tests {
             let mut writer = StreamWriter::try_new_with_options(
                 &mut buffer,
                 &schema,
-                #[allow(deprecated)]
-                IpcWriteOptions::default().with_preserve_dict_id(false),
+                IpcWriteOptions::default(),
             )
             .expect("Failed to create StreamWriter");
             writer.write(&batch).expect("Failed to write RecordBatch");
@@ -408,7 +407,7 @@ mod tests {
         while let Some(batch) = decoder
             .decode(buf)
             .map_err(|e| {
-                ArrowError::ExternalError(format!("Failed to decode record batch: {}", e).into())
+                ArrowError::ExternalError(format!("Failed to decode record batch: {e}").into())
             })
             .expect("Failed to decode record batch")
         {
diff --git a/arrow-ipc/src/tests/delta_dictionary.rs b/arrow-ipc/src/tests/delta_dictionary.rs
new file mode 100644
index 000000000000..dfd8cd33e550
--- /dev/null
+++ b/arrow-ipc/src/tests/delta_dictionary.rs
@@ -0,0 +1,479 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    reader::IpcMessage,
+    writer::{DictionaryHandling, IpcWriteOptions, StreamWriter},
+};
+use crate::{
+    reader::{FileReader, StreamReader},
+    writer::FileWriter,
+};
+use arrow_array::{
+    Array, ArrayRef, DictionaryArray, RecordBatch, StringArray, builder::StringDictionaryBuilder,
+    types::Int32Type,
+};
+use arrow_schema::{DataType, Field, Schema};
+use std::io::Cursor;
+use std::sync::Arc;
+
+#[test]
+fn test_zero_row_dict() {
+    let batches: &[&[&str]] = &[&[], &["A"], &[], &["B", "C"], &[]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(vec![]),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["B", "C"])),
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(vec![]),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[test]
+fn test_mixed_delta() {
+    let batches: &[&[&str]] = &[
+        &["A"],
+        &["A", "B"],
+        &["C"],
+        &["D", "E"],
+        &["A", "B", "C", "D", "E"],
+    ];
+
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["B"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["C"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["D", "E"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C", "D", "E"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[test]
+fn test_disjoint_delta() {
+    let batches: &[&[&str]] = &[&["A"], &["B"], &["C", "E"]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["B"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["C", "E"])),
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C", "E"])),
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[test]
+fn test_increasing_delta() {
+    let batches: &[&[&str]] = &[&["A"], &["A", "B"], &["A", "B", "C"]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["B"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["C"])),
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[test]
+fn test_single_delta() {
+    let batches: &[&[&str]] = &[&["A", "B", "C"], &["D"]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+            MessageType::DeltaDict(str_vec(&["D"])),
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+            MessageType::Dict(str_vec(&["A", "B", "C", "D"])),
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[test]
+fn test_single_same_value_sequence() {
+    let batches: &[&[&str]] = &[&["A"], &["A"], &["A"], &["A"]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+        ],
+    );
+
+    run_resend_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A"])),
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+fn str_vec(strings: &[&str]) -> Vec<String> {
+    strings.iter().map(|s| s.to_string()).collect()
+}
+
+#[test]
+fn test_multi_same_value_sequence() {
+    let batches: &[&[&str]] = &[&["A", "B", "C"], &["A", "B", "C"]];
+    run_delta_sequence_test(
+        batches,
+        &[
+            MessageType::Dict(str_vec(&["A", "B", "C"])),
+            MessageType::RecordBatch,
+        ],
+    );
+}
+
+#[derive(Debug, PartialEq)]
+enum MessageType {
+    Schema,
+    Dict(Vec<String>),
+    DeltaDict(Vec<String>),
+    RecordBatch,
+}
+
+fn run_resend_sequence_test(batches: &[&[&str]], sequence: &[MessageType]) {
+    let opts = IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Resend);
+    run_sequence_test(batches, sequence, opts);
+}
+
+fn run_delta_sequence_test(batches: &[&[&str]], sequence: &[MessageType]) {
+    let opts = IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+    run_sequence_test(batches, sequence, opts);
+}
+
+fn run_sequence_test(batches: &[&[&str]], sequence: &[MessageType], options: IpcWriteOptions) {
+    let stream_buf = write_all_to_stream(options.clone(), batches);
+    let ipc_stream = get_ipc_message_stream(stream_buf);
+    for (message, expected) in ipc_stream.iter().zip(sequence.iter()) {
+        match message {
+            IpcMessage::Schema(_) => {
+                assert_eq!(expected, &MessageType::Schema, "Expected schema message");
+            }
+            IpcMessage::RecordBatch(_) => {
+                assert_eq!(
+                    expected,
+                    &MessageType::RecordBatch,
+                    "Expected record batch message"
+                );
+            }
+            IpcMessage::DictionaryBatch {
+                id: _,
+                is_delta,
+                values,
+            } => {
+                let expected_values = if *is_delta {
+                    let MessageType::DeltaDict(values) = expected else {
+                        panic!("Expected DeltaDict message type");
+                    };
+
+                    values
+                } else {
+                    let MessageType::Dict(values) = expected else {
+                        panic!("Expected Dict message type");
+                    };
+                    values
+                };
+
+                let values: Vec<String> = values
+                    .as_any()
+                    .downcast_ref::<StringArray>()
+                    .unwrap()
+                    .iter()
+                    .map(|v| v.map(|s| s.to_string()).unwrap_or_default())
+                    .collect();
+
+                assert_eq!(*expected_values, values)
+            }
+        }
+    }
+}
+
+fn get_ipc_message_stream(buf: Vec<u8>) -> Vec<IpcMessage> {
+    let mut reader = StreamReader::try_new(Cursor::new(buf), None).unwrap();
+    let mut results = vec![];
+
+    loop {
+        match reader.next_ipc_message() {
+            Ok(Some(message)) => results.push(message),
+            Ok(None) => break, // End of stream
+            Err(e) => panic!("Error reading IPC message: {e:?}"),
+        }
+    }
+
+    results
+}
+
+#[test]
+fn test_replace_same_length() {
+    let batches: &[&[&str]] = &[
+        &["A", "B", "C", "D", "E", "F"],
+        &["A", "G", "H", "I", "J", "K"],
+    ];
+    run_parity_test(batches);
+}
+
+#[test]
+fn test_sparse_deltas() {
+    let batches: &[&[&str]] = &[
+        &["A"],
+        &["C"],
+        &["E", "F", "D"],
+        &["FOO"],
+        &["parquet", "B"],
+        &["123", "B", "C"],
+    ];
+    run_parity_test(batches);
+}
+
+#[test]
+fn test_deltas_with_reset() {
+    // Dictionary resets at ["C", "D"]
+    let batches: &[&[&str]] = &[&["A"], &["A", "B"], &["C", "D"], &["A", "B", "C", "D"]];
+    run_parity_test(batches);
+}
+
+/// FileWriter can only tolerate very specific patterns of delta dictionaries,
+/// because the dictionary cannot be replaced/reset.
+#[test]
+fn test_deltas_with_file() {
+    let batches: &[&[&str]] = &[&["A"], &["A", "B"], &["A", "B", "C"], &["A", "B", "C", "D"]];
+    run_parity_test(batches);
+}
+
+/// Encode all batches three times and compare all three for the same results
+/// on the other end.
+///
+/// - Stream encoding with delta
+/// - Stream encoding without delta
+/// - File encoding with delta (File format does not allow replacement
+///   dictionaries)
+fn run_parity_test(batches: &[&[&str]]) {
+    let delta_options =
+        IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+    let delta_stream_buf = write_all_to_stream(delta_options.clone(), batches);
+
+    let resend_options =
+        IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Resend);
+    let resend_stream_buf = write_all_to_stream(resend_options.clone(), batches);
+
+    let delta_file_buf = write_all_to_file(delta_options, batches);
+
+    let mut streams = [
+        get_stream_batches(delta_stream_buf),
+        get_stream_batches(resend_stream_buf),
+        get_file_batches(delta_file_buf),
+    ];
+
+    let (first_stream, other_streams) = streams.split_first_mut().unwrap();
+
+    for (idx, batch) in first_stream.by_ref().enumerate() {
+        let first_dict = extract_dictionary(batch);
+        let expected_values = batches[idx];
+        assert_eq!(expected_values, &dict_to_vec(first_dict.clone()));
+
+        for stream in other_streams.iter_mut() {
+            let next_batch = stream
+                .next()
+                .expect("All streams should yield same number of elements");
+            let next_dict = extract_dictionary(next_batch);
+            assert_eq!(expected_values, &dict_to_vec(next_dict.clone()));
+            assert_eq!(first_dict, next_dict);
+        }
+    }
+
+    for stream in other_streams.iter_mut() {
+        assert!(
+            stream.next().is_none(),
+            "All streams should yield same number of elements"
+        );
+    }
+}
+
+fn dict_to_vec(dict: DictionaryArray<Int32Type>) -> Vec<String> {
+    dict.downcast_dict::<StringArray>()
+        .unwrap()
+        .into_iter()
+        .map(|v| v.unwrap_or_default().to_string())
+        .collect()
+}
+
+fn get_stream_batches(buf: Vec<u8>) -> Box<dyn Iterator<Item = RecordBatch>> {
+    let reader = StreamReader::try_new(Cursor::new(buf), None).unwrap();
+    Box::new(
+        reader
+            .collect::<Vec<Result<_, _>>>()
+            .into_iter()
+            .map(|r| r.unwrap()),
+    )
+}
+
+fn get_file_batches(buf: Vec<u8>) -> Box<dyn Iterator<Item = RecordBatch>> {
+    let reader = FileReader::try_new(Cursor::new(buf), None).unwrap();
+    Box::new(
+        reader
+            .collect::<Vec<Result<_, _>>>()
+            .into_iter()
+            .map(|r| r.unwrap()),
+    )
+}
+
+fn extract_dictionary(batch: RecordBatch) -> DictionaryArray<arrow_array::types::Int32Type> {
+    batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap()
+        .clone()
+}
+
+fn write_all_to_file(options: IpcWriteOptions, vals: &[&[&str]]) -> Vec<u8> {
+    let batches = build_batches(vals);
+    let mut buf: Vec<u8> = Vec::new();
+    let mut writer =
+        FileWriter::try_new_with_options(&mut buf, &batches[0].schema(), options).unwrap();
+    for batch in batches {
+        writer.write(&batch).unwrap();
+    }
+    writer.finish().unwrap();
+    buf
+}
+
+fn write_all_to_stream(options: IpcWriteOptions, vals: &[&[&str]]) -> Vec<u8> {
+    let batches = build_batches(vals);
+
+    let mut buf: Vec<u8> = Vec::new();
+    let mut writer =
+        StreamWriter::try_new_with_options(&mut buf, &batches[0].schema(), options).unwrap();
+    for batch in batches {
+        writer.write(&batch).unwrap();
+    }
+
+    writer.finish().unwrap();
+
+    buf
+}
+
+fn build_batches(vals: &[&[&str]]) -> Vec<RecordBatch> {
+    let mut builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    vals.iter().map(|v| build_batch(v, &mut builder)).collect()
+}
+
+fn build_batch(
+    vals: &[&str],
+    builder: &mut StringDictionaryBuilder<arrow_array::types::Int32Type>,
+) -> RecordBatch {
+    for &val in vals {
+        builder.append_value(val);
+    }
+
+    let array = builder.finish_preserve_values();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "dict",
+        DataType::Dictionary(Box::from(DataType::Int32), Box::from(DataType::Utf8)),
+        true,
+    )]));
+
+    RecordBatch::try_new(schema.clone(), vec![Arc::new(array) as ArrayRef]).unwrap()
+}
diff --git a/arrow-ipc/src/tests/mod.rs b/arrow-ipc/src/tests/mod.rs
new file mode 100644
index 000000000000..e98b28de1482
--- /dev/null
+++ b/arrow-ipc/src/tests/mod.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/*!
+This module contains cross-functional tests for various ipc components. Some
+tests rely on functionality that is not public and so they're placed here rather
+than in integration tests or unit tests for a specific module.
+*/
+mod delta_dictionary;
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index c800ddd29005..86376c8e5e84 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -38,12 +38,13 @@ use arrow_array::types::{Int16Type, Int32Type, Int64Type, RunEndIndexType};
 use arrow_array::*;
 use arrow_buffer::bit_util;
 use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
-use arrow_data::{layout, ArrayData, ArrayDataBuilder, BufferSpec};
+use arrow_data::{ArrayData, ArrayDataBuilder, BufferSpec, layout};
 use arrow_schema::*;
 
+use crate::CONTINUATION_MARKER;
 use crate::compression::CompressionCodec;
+pub use crate::compression::CompressionContext;
 use crate::convert::IpcSchemaEncoder;
-use crate::CONTINUATION_MARKER;
 
 /// IPC write options used to control the behaviour of the [`IpcDataGenerator`]
 #[derive(Debug, Clone)]
@@ -65,15 +66,8 @@ pub struct IpcWriteOptions {
     /// Compression, if desired. Will result in a runtime error
     /// if the corresponding feature is not enabled
     batch_compression_type: Option<crate::CompressionType>,
-    /// Flag indicating whether the writer should preserve the dictionary IDs defined in the
-    /// schema or generate unique dictionary IDs internally during encoding.
-    ///
-    /// Defaults to `false`
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
-    )]
-    preserve_dict_id: bool,
+    /// How to handle updating dictionaries in IPC messages
+    dictionary_handling: DictionaryHandling,
 }
 
 impl IpcWriteOptions {
@@ -122,7 +116,7 @@ impl IpcWriteOptions {
                 write_legacy_ipc_format,
                 metadata_version,
                 batch_compression_type: None,
-                preserve_dict_id: false,
+                dictionary_handling: DictionaryHandling::default(),
             }),
             crate::MetadataVersion::V5 => {
                 if write_legacy_ipc_format {
@@ -130,13 +124,12 @@ impl IpcWriteOptions {
                         "Legacy IPC format only supported on metadata version 4".to_string(),
                     ))
                 } else {
-                    #[allow(deprecated)]
                     Ok(Self {
                         alignment,
                         write_legacy_ipc_format,
                         metadata_version,
                         batch_compression_type: None,
-                        preserve_dict_id: false,
+                        dictionary_handling: DictionaryHandling::default(),
                     })
                 }
             }
@@ -146,44 +139,21 @@ impl IpcWriteOptions {
         }
     }
 
-    /// Return whether the writer is configured to preserve the dictionary IDs
-    /// defined in the schema
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
-    )]
-    pub fn preserve_dict_id(&self) -> bool {
-        #[allow(deprecated)]
-        self.preserve_dict_id
-    }
-
-    /// Set whether the IPC writer should preserve the dictionary IDs in the schema
-    /// or auto-assign unique dictionary IDs during encoding (defaults to true)
-    ///
-    /// If this option is true,  the application must handle assigning ids
-    /// to the dictionary batches in order to encode them correctly
-    ///
-    /// The default will change to `false`  in future releases
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
-    )]
-    #[allow(deprecated)]
-    pub fn with_preserve_dict_id(mut self, preserve_dict_id: bool) -> Self {
-        self.preserve_dict_id = preserve_dict_id;
+    /// Configure how dictionaries are handled in IPC messages
+    pub fn with_dictionary_handling(mut self, dictionary_handling: DictionaryHandling) -> Self {
+        self.dictionary_handling = dictionary_handling;
         self
     }
 }
 
 impl Default for IpcWriteOptions {
     fn default() -> Self {
-        #[allow(deprecated)]
         Self {
             alignment: 64,
             write_legacy_ipc_format: false,
             metadata_version: crate::MetadataVersion::V5,
             batch_compression_type: None,
-            preserve_dict_id: false,
+            dictionary_handling: DictionaryHandling::default(),
         }
     }
 }
@@ -198,7 +168,7 @@ impl Default for IpcWriteOptions {
 /// # use std::sync::Arc;
 /// # use arrow_array::UInt64Array;
 /// # use arrow_array::RecordBatch;
-/// # use arrow_ipc::writer::{DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
+/// # use arrow_ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions};
 ///
 /// // Create a record batch
 /// let batch = RecordBatch::try_from_iter(vec![
@@ -210,11 +180,13 @@ impl Default for IpcWriteOptions {
 /// let options = IpcWriteOptions::default();
 /// let mut dictionary_tracker = DictionaryTracker::new(error_on_replacement);
 ///
+/// let mut compression_context = CompressionContext::default();
+///
 /// // encode the batch into zero or more encoded dictionaries
 /// // and the data for the actual array.
 /// let data_gen = IpcDataGenerator::default();
 /// let (encoded_dictionaries, encoded_message) = data_gen
-///   .encoded_batch(&batch, &mut dictionary_tracker, &options)
+///   .encode(&batch, &mut dictionary_tracker, &options, &mut compression_context)
 ///   .unwrap();
 /// # }
 /// ```
@@ -224,10 +196,7 @@ pub struct IpcDataGenerator {}
 
 impl IpcDataGenerator {
     /// Converts a schema to an IPC message along with `dictionary_tracker`
-    /// and returns it encoded inside [EncodedData] as a flatbuffer
-    ///
-    /// Preferred method over [IpcDataGenerator::schema_to_bytes] since it's
-    /// deprecated since Arrow v54.0.0
+    /// and returns it encoded inside [EncodedData] as a flatbuffer.
     pub fn schema_to_bytes_with_dictionary_tracker(
         &self,
         schema: &Schema,
@@ -258,36 +227,6 @@ impl IpcDataGenerator {
         }
     }
 
-    #[deprecated(
-        since = "54.0.0",
-        note = "Use `schema_to_bytes_with_dictionary_tracker` instead. This function signature of `schema_to_bytes_with_dictionary_tracker` in the next release."
-    )]
-    /// Converts a schema to an IPC message and returns it encoded inside [EncodedData] as a flatbuffer
-    pub fn schema_to_bytes(&self, schema: &Schema, write_options: &IpcWriteOptions) -> EncodedData {
-        let mut fbb = FlatBufferBuilder::new();
-        let schema = {
-            #[allow(deprecated)]
-            // This will be replaced with the IpcSchemaConverter in the next release.
-            let fb = crate::convert::schema_to_fb_offset(&mut fbb, schema);
-            fb.as_union_value()
-        };
-
-        let mut message = crate::MessageBuilder::new(&mut fbb);
-        message.add_version(write_options.metadata_version);
-        message.add_header_type(crate::MessageHeader::Schema);
-        message.add_bodyLength(0);
-        message.add_header(schema);
-        // TODO: custom metadata
-        let data = message.finish();
-        fbb.finish(data, None);
-
-        let data = fbb.finished_data();
-        EncodedData {
-            ipc_message: data.to_vec(),
-            arrow_data: vec![],
-        }
-    }
-
     fn _encode_dictionaries<I: Iterator<Item = i64>>(
         &self,
         column: &ArrayRef,
@@ -295,6 +234,7 @@ impl IpcDataGenerator {
         dictionary_tracker: &mut DictionaryTracker,
         write_options: &IpcWriteOptions,
         dict_id: &mut I,
+        compression_context: &mut CompressionContext,
     ) -> Result<(), ArrowError> {
         match column.data_type() {
             DataType::Struct(fields) => {
@@ -307,6 +247,7 @@ impl IpcDataGenerator {
                         dictionary_tracker,
                         write_options,
                         dict_id,
+                        compression_context,
                     )?;
                 }
             }
@@ -328,6 +269,7 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
                 )?;
             }
             DataType::List(field) => {
@@ -339,6 +281,7 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
                 )?;
             }
             DataType::LargeList(field) => {
@@ -350,6 +293,31 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
+                )?;
+            }
+            DataType::ListView(field) => {
+                let list = column.as_list_view::<i32>();
+                self.encode_dictionaries(
+                    field,
+                    list.values(),
+                    encoded_dictionaries,
+                    dictionary_tracker,
+                    write_options,
+                    dict_id,
+                    compression_context,
+                )?;
+            }
+            DataType::LargeListView(field) => {
+                let list = column.as_list_view::<i64>();
+                self.encode_dictionaries(
+                    field,
+                    list.values(),
+                    encoded_dictionaries,
+                    dictionary_tracker,
+                    write_options,
+                    dict_id,
+                    compression_context,
                 )?;
             }
             DataType::FixedSizeList(field, _) => {
@@ -364,6 +332,7 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
                 )?;
             }
             DataType::Map(field, _) => {
@@ -382,6 +351,7 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
                 )?;
 
                 // values
@@ -392,6 +362,7 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id,
+                    compression_context,
                 )?;
             }
             DataType::Union(fields, _) => {
@@ -405,6 +376,7 @@ impl IpcDataGenerator {
                         dictionary_tracker,
                         write_options,
                         dict_id,
+                        compression_context,
                     )?;
                 }
             }
@@ -414,6 +386,7 @@ impl IpcDataGenerator {
         Ok(())
     }
 
+    #[allow(clippy::too_many_arguments)]
     fn encode_dictionaries<I: Iterator<Item = i64>>(
         &self,
         field: &Field,
@@ -422,6 +395,7 @@ impl IpcDataGenerator {
         dictionary_tracker: &mut DictionaryTracker,
         write_options: &IpcWriteOptions,
         dict_id_seq: &mut I,
+        compression_context: &mut CompressionContext,
     ) -> Result<(), ArrowError> {
         match column.data_type() {
             DataType::Dictionary(_key_type, _value_type) => {
@@ -436,27 +410,40 @@ impl IpcDataGenerator {
                     dictionary_tracker,
                     write_options,
                     dict_id_seq,
+                    compression_context,
                 )?;
 
-                // It's importnat to only take the dict_id at this point, because the dict ID
+                // It's important to only take the dict_id at this point, because the dict ID
                 // sequence is assigned depth-first, so we need to first encode children and have
                 // them take their assigned dict IDs before we take the dict ID for this field.
-                #[allow(deprecated)]
-                let dict_id = dict_id_seq
-                    .next()
-                    .or_else(|| field.dict_id())
-                    .ok_or_else(|| {
-                        ArrowError::IpcError(format!("no dict id for field {}", field.name()))
-                    })?;
-
-                let emit = dictionary_tracker.insert(dict_id, column)?;
-
-                if emit {
-                    encoded_dictionaries.push(self.dictionary_batch_to_bytes(
-                        dict_id,
-                        dict_values,
-                        write_options,
-                    )?);
+                let dict_id = dict_id_seq.next().ok_or_else(|| {
+                    ArrowError::IpcError(format!("no dict id for field {}", field.name()))
+                })?;
+
+                match dictionary_tracker.insert_column(
+                    dict_id,
+                    column,
+                    write_options.dictionary_handling,
+                )? {
+                    DictionaryUpdate::None => {}
+                    DictionaryUpdate::New | DictionaryUpdate::Replaced => {
+                        encoded_dictionaries.push(self.dictionary_batch_to_bytes(
+                            dict_id,
+                            dict_values,
+                            write_options,
+                            false,
+                            compression_context,
+                        )?);
+                    }
+                    DictionaryUpdate::Delta(data) => {
+                        encoded_dictionaries.push(self.dictionary_batch_to_bytes(
+                            dict_id,
+                            &data,
+                            write_options,
+                            true,
+                            compression_context,
+                        )?);
+                    }
                 }
             }
             _ => self._encode_dictionaries(
@@ -465,6 +452,7 @@ impl IpcDataGenerator {
                 dictionary_tracker,
                 write_options,
                 dict_id_seq,
+                compression_context,
             )?,
         }
 
@@ -474,11 +462,12 @@ impl IpcDataGenerator {
     /// Encodes a batch to a number of [EncodedData] items (dictionary batches + the record batch).
     /// The [DictionaryTracker] keeps track of dictionaries with new `dict_id`s  (so they are only sent once)
     /// Make sure the [DictionaryTracker] is initialized at the start of the stream.
-    pub fn encoded_batch(
+    pub fn encode(
         &self,
         batch: &RecordBatch,
         dictionary_tracker: &mut DictionaryTracker,
         write_options: &IpcWriteOptions,
+        compression_context: &mut CompressionContext,
     ) -> Result<(Vec<EncodedData>, EncodedData), ArrowError> {
         let schema = batch.schema();
         let mut encoded_dictionaries = Vec::with_capacity(schema.flattened_fields().len());
@@ -494,19 +483,40 @@ impl IpcDataGenerator {
                 dictionary_tracker,
                 write_options,
                 &mut dict_id,
+                compression_context,
             )?;
         }
 
-        let encoded_message = self.record_batch_to_bytes(batch, write_options)?;
+        let encoded_message =
+            self.record_batch_to_bytes(batch, write_options, compression_context)?;
         Ok((encoded_dictionaries, encoded_message))
     }
 
+    /// Encodes a batch to a number of [EncodedData] items (dictionary batches + the record batch).
+    /// The [DictionaryTracker] keeps track of dictionaries with new `dict_id`s  (so they are only sent once)
+    /// Make sure the [DictionaryTracker] is initialized at the start of the stream.
+    #[deprecated(since = "57.0.0", note = "Use `encode` instead")]
+    pub fn encoded_batch(
+        &self,
+        batch: &RecordBatch,
+        dictionary_tracker: &mut DictionaryTracker,
+        write_options: &IpcWriteOptions,
+    ) -> Result<(Vec<EncodedData>, EncodedData), ArrowError> {
+        self.encode(
+            batch,
+            dictionary_tracker,
+            write_options,
+            &mut Default::default(),
+        )
+    }
+
     /// Write a `RecordBatch` into two sets of bytes, one for the header (crate::Message) and the
     /// other for the batch's data
     fn record_batch_to_bytes(
         &self,
         batch: &RecordBatch,
         write_options: &IpcWriteOptions,
+        compression_context: &mut CompressionContext,
     ) -> Result<EncodedData, ArrowError> {
         let mut fbb = FlatBufferBuilder::new();
 
@@ -541,6 +551,7 @@ impl IpcDataGenerator {
                 array.len(),
                 array.null_count(),
                 compression_codec,
+                compression_context,
                 write_options,
             )?;
 
@@ -598,6 +609,8 @@ impl IpcDataGenerator {
         dict_id: i64,
         array_data: &ArrayData,
         write_options: &IpcWriteOptions,
+        is_delta: bool,
+        compression_context: &mut CompressionContext,
     ) -> Result<EncodedData, ArrowError> {
         let mut fbb = FlatBufferBuilder::new();
 
@@ -628,6 +641,7 @@ impl IpcDataGenerator {
             array_data.len(),
             array_data.null_count(),
             compression_codec,
+            compression_context,
             write_options,
         )?;
 
@@ -666,6 +680,7 @@ impl IpcDataGenerator {
             let mut batch_builder = crate::DictionaryBatchBuilder::new(&mut fbb);
             batch_builder.add_id(dict_id);
             batch_builder.add_data(root);
+            batch_builder.add_isDelta(is_delta);
             batch_builder.finish().as_union_value()
         };
 
@@ -779,6 +794,34 @@ fn into_zero_offset_run_array<R: RunEndIndexType>(
     Ok(array_data.into())
 }
 
+/// Controls how dictionaries are handled in Arrow IPC messages
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum DictionaryHandling {
+    /// Send the entire dictionary every time it is encountered (default)
+    #[default]
+    Resend,
+    /// Send only new dictionary values since the last batch (delta encoding)
+    ///
+    /// When a dictionary is first encountered, the entire dictionary is sent.
+    /// For subsequent batches, only values that are new (not previously sent)
+    /// are transmitted with the `isDelta` flag set to true.
+    Delta,
+}
+
+/// Describes what kind of update took place after a call to [`DictionaryTracker::insert`].
+#[derive(Debug, Clone)]
+pub enum DictionaryUpdate {
+    /// No dictionary was written, the dictionary was identical to what was already
+    /// in the tracker.
+    None,
+    /// No dictionary was present in the tracker
+    New,
+    /// Dictionary was replaced with the new data
+    Replaced,
+    /// Dictionary was updated, ArrayData is the delta between old and new
+    Delta(ArrayData),
+}
+
 /// Keeps track of dictionaries that have been written, to avoid emitting the same dictionary
 /// multiple times.
 ///
@@ -789,11 +832,6 @@ pub struct DictionaryTracker {
     written: HashMap<i64, ArrayData>,
     dict_ids: Vec<i64>,
     error_on_replacement: bool,
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
-    )]
-    preserve_dict_id: bool,
 }
 
 impl DictionaryTracker {
@@ -802,63 +840,23 @@ impl DictionaryTracker {
     /// If `error_on_replacement`
     /// is true, an error will be generated if an update to an
     /// existing dictionary is attempted.
-    ///
-    /// If `preserve_dict_id` is true, the dictionary ID defined in the schema
-    /// is used, otherwise a unique dictionary ID will be assigned by incrementing
-    /// the last seen dictionary ID (or using `0` if no other dictionary IDs have been
-    /// seen)
     pub fn new(error_on_replacement: bool) -> Self {
         #[allow(deprecated)]
         Self {
             written: HashMap::new(),
             dict_ids: Vec::new(),
             error_on_replacement,
-            preserve_dict_id: false,
-        }
-    }
-
-    /// Create a new [`DictionaryTracker`].
-    ///
-    /// If `error_on_replacement`
-    /// is true, an error will be generated if an update to an
-    /// existing dictionary is attempted.
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
-    )]
-    pub fn new_with_preserve_dict_id(error_on_replacement: bool, preserve_dict_id: bool) -> Self {
-        #[allow(deprecated)]
-        Self {
-            written: HashMap::new(),
-            dict_ids: Vec::new(),
-            error_on_replacement,
-            preserve_dict_id,
         }
     }
 
-    /// Set the dictionary ID for `field`.
-    ///
-    /// If `preserve_dict_id` is true, this will return the `dict_id` in `field` (or panic if `field` does
-    /// not have a `dict_id` defined).
-    ///
-    /// If `preserve_dict_id` is false, this will return the value of the last `dict_id` assigned incremented by 1
-    /// or 0 in the case where no dictionary IDs have yet been assigned
-    #[deprecated(
-        since = "54.0.0",
-        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
-    )]
-    pub fn set_dict_id(&mut self, field: &Field) -> i64 {
-        #[allow(deprecated)]
-        let next = if self.preserve_dict_id {
-            #[allow(deprecated)]
-            field.dict_id().expect("no dict_id in field")
-        } else {
-            self.dict_ids
-                .last()
-                .copied()
-                .map(|i| i + 1)
-                .unwrap_or_default()
-        };
+    /// Record and return the next dictionary ID.
+    pub fn next_dict_id(&mut self) -> i64 {
+        let next = self
+            .dict_ids
+            .last()
+            .copied()
+            .map(|i| i + 1)
+            .unwrap_or_default();
 
         self.dict_ids.push(next);
         next
@@ -879,6 +877,7 @@ impl DictionaryTracker {
     /// * If the tracker has not been configured to error on replacement or this dictionary
     ///   has never been seen before, return `Ok(true)` to indicate that the dictionary was just
     ///   inserted.
+    #[deprecated(since = "56.1.0", note = "Use `insert_column` instead")]
     pub fn insert(&mut self, dict_id: i64, column: &ArrayRef) -> Result<bool, ArrowError> {
         let dict_data = column.to_data();
         let dict_values = &dict_data.child_data()[0];
@@ -907,6 +906,124 @@ impl DictionaryTracker {
         self.written.insert(dict_id, dict_data);
         Ok(true)
     }
+
+    /// Keep track of the dictionary with the given ID and values. The return
+    /// value indicates what, if any, update to the internal map took place
+    /// and how it should be interpreted based on the `dict_handling` parameter.
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(Dictionary::New)` - If the dictionary was not previously written
+    /// * `Ok(Dictionary::Replaced)` - If the dictionary was previously written
+    ///   with completely different data, or if the data is a delta of the existing,
+    ///   but with `dict_handling` set to `DictionaryHandling::Resend`
+    /// * `Ok(Dictionary::Delta)` - If the dictionary was previously written, but
+    ///   the new data is a delta of the old and the `dict_handling` is set to
+    ///   `DictionaryHandling::Delta`
+    /// * `Err(e)` - If the dictionary was previously written with different data,
+    ///   and `error_on_replacement` is set to `true`.
+    pub fn insert_column(
+        &mut self,
+        dict_id: i64,
+        column: &ArrayRef,
+        dict_handling: DictionaryHandling,
+    ) -> Result<DictionaryUpdate, ArrowError> {
+        let new_data = column.to_data();
+        let new_values = &new_data.child_data()[0];
+
+        // If there is no existing dictionary with this ID, we always insert
+        let Some(old) = self.written.get(&dict_id) else {
+            self.written.insert(dict_id, new_data);
+            return Ok(DictionaryUpdate::New);
+        };
+
+        // Fast path - If the array data points to the same buffer as the
+        // existing then they're the same.
+        let old_values = &old.child_data()[0];
+        if ArrayData::ptr_eq(old_values, new_values) {
+            return Ok(DictionaryUpdate::None);
+        }
+
+        // Slow path - Compare the dictionaries value by value
+        let comparison = compare_dictionaries(old_values, new_values);
+        if matches!(comparison, DictionaryComparison::Equal) {
+            return Ok(DictionaryUpdate::None);
+        }
+
+        const REPLACEMENT_ERROR: &str = "Dictionary replacement detected when writing IPC file format. \
+                 Arrow IPC files only support a single dictionary for a given field \
+                 across all batches.";
+
+        match comparison {
+            DictionaryComparison::NotEqual => {
+                if self.error_on_replacement {
+                    return Err(ArrowError::InvalidArgumentError(
+                        REPLACEMENT_ERROR.to_string(),
+                    ));
+                }
+
+                self.written.insert(dict_id, new_data);
+                Ok(DictionaryUpdate::Replaced)
+            }
+            DictionaryComparison::Delta => match dict_handling {
+                DictionaryHandling::Resend => {
+                    if self.error_on_replacement {
+                        return Err(ArrowError::InvalidArgumentError(
+                            REPLACEMENT_ERROR.to_string(),
+                        ));
+                    }
+
+                    self.written.insert(dict_id, new_data);
+                    Ok(DictionaryUpdate::Replaced)
+                }
+                DictionaryHandling::Delta => {
+                    let delta =
+                        new_values.slice(old_values.len(), new_values.len() - old_values.len());
+                    self.written.insert(dict_id, new_data);
+                    Ok(DictionaryUpdate::Delta(delta))
+                }
+            },
+            DictionaryComparison::Equal => unreachable!("Already checked equal case"),
+        }
+    }
+}
+
+/// Describes how two dictionary arrays compare to each other.
+#[derive(Debug, Clone)]
+enum DictionaryComparison {
+    /// Neither a delta, nor an exact match
+    NotEqual,
+    /// Exact element-wise match
+    Equal,
+    /// The two arrays are dictionary deltas of each other, meaning the first
+    /// is a prefix of the second.
+    Delta,
+}
+
+// Compares two dictionaries and returns a [`DictionaryComparison`].
+fn compare_dictionaries(old: &ArrayData, new: &ArrayData) -> DictionaryComparison {
+    // Check for exact match
+    let existing_len = old.len();
+    let new_len = new.len();
+    if existing_len == new_len {
+        if *old == *new {
+            return DictionaryComparison::Equal;
+        } else {
+            return DictionaryComparison::NotEqual;
+        }
+    }
+
+    // Can't be a delta if the new is shorter than the existing
+    if new_len < existing_len {
+        return DictionaryComparison::NotEqual;
+    }
+
+    // Check for delta
+    if new.slice(0, existing_len) == *old {
+        return DictionaryComparison::Delta;
+    }
+
+    DictionaryComparison::NotEqual
 }
 
 /// Arrow File Writer
@@ -952,6 +1069,8 @@ pub struct FileWriter<W> {
     custom_metadata: HashMap<String, String>,
 
     data_gen: IpcDataGenerator,
+
+    compression_context: CompressionContext,
 }
 
 impl<W: Write> FileWriter<BufWriter<W>> {
@@ -995,11 +1114,7 @@ impl<W: Write> FileWriter<W> {
         writer.write_all(&super::ARROW_MAGIC)?;
         writer.write_all(&PADDING[..pad_len])?;
         // write the schema, set the written bytes to the schema + header
-        #[allow(deprecated)]
-        let preserve_dict_id = write_options.preserve_dict_id;
-        #[allow(deprecated)]
-        let mut dictionary_tracker =
-            DictionaryTracker::new_with_preserve_dict_id(true, preserve_dict_id);
+        let mut dictionary_tracker = DictionaryTracker::new(true);
         let encoded_message = data_gen.schema_to_bytes_with_dictionary_tracker(
             schema,
             &mut dictionary_tracker,
@@ -1017,6 +1132,7 @@ impl<W: Write> FileWriter<W> {
             dictionary_tracker,
             custom_metadata: HashMap::new(),
             data_gen,
+            compression_context: CompressionContext::default(),
         })
     }
 
@@ -1033,10 +1149,11 @@ impl<W: Write> FileWriter<W> {
             ));
         }
 
-        let (encoded_dictionaries, encoded_message) = self.data_gen.encoded_batch(
+        let (encoded_dictionaries, encoded_message) = self.data_gen.encode(
             batch,
             &mut self.dictionary_tracker,
             &self.write_options,
+            &mut self.compression_context,
         )?;
 
         for encoded_dictionary in encoded_dictionaries {
@@ -1049,6 +1166,7 @@ impl<W: Write> FileWriter<W> {
         }
 
         let (meta, data) = write_message(&mut self.writer, encoded_message, &self.write_options)?;
+
         // add a record block for the footer
         let block = crate::Block::new(
             self.block_offsets as i64,
@@ -1074,11 +1192,7 @@ impl<W: Write> FileWriter<W> {
         let mut fbb = FlatBufferBuilder::new();
         let dictionaries = fbb.create_vector(&self.dictionary_blocks);
         let record_batches = fbb.create_vector(&self.record_blocks);
-        #[allow(deprecated)]
-        let preserve_dict_id = self.write_options.preserve_dict_id;
-        #[allow(deprecated)]
-        let mut dictionary_tracker =
-            DictionaryTracker::new_with_preserve_dict_id(true, preserve_dict_id);
+        let mut dictionary_tracker = DictionaryTracker::new(true);
         let schema = IpcSchemaEncoder::new()
             .with_dictionary_tracker(&mut dictionary_tracker)
             .schema_to_fb_offset(&mut fbb, &self.schema);
@@ -1168,7 +1282,7 @@ impl<W: Write> RecordBatchWriter for FileWriter<W> {
 ///
 /// * [`FileWriter`] for writing IPC Files
 ///
-/// # Example
+/// # Example - Basic usage
 /// ```
 /// # use arrow_array::record_batch;
 /// # use arrow_ipc::writer::StreamWriter;
@@ -1181,7 +1295,57 @@ impl<W: Write> RecordBatchWriter for FileWriter<W> {
 /// // When all batches are written, call finish to flush all buffers
 /// writer.finish().unwrap();
 /// ```
+/// # Example - Efficient delta dictionaries
+/// ```
+/// # use arrow_array::record_batch;
+/// # use arrow_ipc::writer::{StreamWriter, IpcWriteOptions};
+/// # use arrow_ipc::writer::DictionaryHandling;
+/// # use arrow_schema::{DataType, Field, Schema, SchemaRef};
+/// # use arrow_array::{
+/// #    builder::StringDictionaryBuilder, types::Int32Type, Array, ArrayRef, DictionaryArray,
+/// #    RecordBatch, StringArray,
+/// # };
+/// # use std::sync::Arc;
+///
+/// let schema = Arc::new(Schema::new(vec![Field::new(
+///    "col1",
+///    DataType::Dictionary(Box::from(DataType::Int32), Box::from(DataType::Utf8)),
+///    true,
+/// )]));
+///
+/// let mut builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+///
+/// // `finish_preserve_values` will keep the dictionary values along with their
+/// // key assignments so that they can be re-used in the next batch.
+/// builder.append("a").unwrap();
+/// builder.append("b").unwrap();
+/// let array1 = builder.finish_preserve_values();
+/// let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(array1) as ArrayRef]).unwrap();
+///
+/// // In this batch, 'a' will have the same dictionary key as 'a' in the previous batch,
+/// // and 'd' will take the next available key.
+/// builder.append("a").unwrap();
+/// builder.append("d").unwrap();
+/// let array2 = builder.finish_preserve_values();
+/// let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(array2) as ArrayRef]).unwrap();
+///
+/// let mut stream = vec![];
+/// // You must set `.with_dictionary_handling(DictionaryHandling::Delta)` to
+/// // enable delta dictionaries in the writer
+/// let options = IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+/// let mut writer = StreamWriter::try_new(&mut stream, &schema).unwrap();
 ///
+/// // When writing the first batch, a dictionary message with 'a' and 'b' will be written
+/// // prior to the record batch.
+/// writer.write(&batch1).unwrap();
+/// // With the second batch only a delta dictionary with 'd' will be written
+/// // prior to the record batch. This is only possible with `finish_preserve_values`.
+/// // Without it, 'a' and 'd' in this batch would have different keys than the
+/// // first batch and so we'd have to send a replacement dictionary with new keys
+/// // for both.
+/// writer.write(&batch2).unwrap();
+/// writer.finish().unwrap();
+/// ```
 /// [IPC Streaming Format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
 pub struct StreamWriter<W> {
     /// The object to write to
@@ -1194,6 +1358,8 @@ pub struct StreamWriter<W> {
     dictionary_tracker: DictionaryTracker,
 
     data_gen: IpcDataGenerator,
+
+    compression_context: CompressionContext,
 }
 
 impl<W: Write> StreamWriter<BufWriter<W>> {
@@ -1229,11 +1395,7 @@ impl<W: Write> StreamWriter<W> {
         write_options: IpcWriteOptions,
     ) -> Result<Self, ArrowError> {
         let data_gen = IpcDataGenerator::default();
-        #[allow(deprecated)]
-        let preserve_dict_id = write_options.preserve_dict_id;
-        #[allow(deprecated)]
-        let mut dictionary_tracker =
-            DictionaryTracker::new_with_preserve_dict_id(false, preserve_dict_id);
+        let mut dictionary_tracker = DictionaryTracker::new(false);
 
         // write the schema, set the written bytes to the schema
         let encoded_message = data_gen.schema_to_bytes_with_dictionary_tracker(
@@ -1248,6 +1410,7 @@ impl<W: Write> StreamWriter<W> {
             finished: false,
             dictionary_tracker,
             data_gen,
+            compression_context: CompressionContext::default(),
         })
     }
 
@@ -1261,7 +1424,12 @@ impl<W: Write> StreamWriter<W> {
 
         let (encoded_dictionaries, encoded_message) = self
             .data_gen
-            .encoded_batch(batch, &mut self.dictionary_tracker, &self.write_options)
+            .encode(
+                batch,
+                &mut self.dictionary_tracker,
+                &self.write_options,
+                &mut self.compression_context,
+            )
             .expect("StreamWriter is configured to not error on dictionary replacement");
 
         for encoded_dictionary in encoded_dictionaries {
@@ -1561,6 +1729,58 @@ fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Arra
     (offsets, child_data)
 }
 
+/// Returns the offsets, sizes, and child data buffers for a ListView array.
+///
+/// Unlike List arrays, ListView arrays store both offsets and sizes explicitly,
+/// and offsets can be non-monotonic. When slicing, we simply pass through the
+/// offsets and sizes without re-encoding, and do not slice the child data.
+fn get_list_view_array_buffers<O: OffsetSizeTrait>(
+    data: &ArrayData,
+) -> (Buffer, Buffer, ArrayData) {
+    if data.is_empty() {
+        return (
+            MutableBuffer::new(0).into(),
+            MutableBuffer::new(0).into(),
+            data.child_data()[0].slice(0, 0),
+        );
+    }
+
+    let offsets = &data.buffers()[0];
+    let sizes = &data.buffers()[1];
+
+    let element_size = std::mem::size_of::<O>();
+    let offsets_slice =
+        offsets.slice_with_length(data.offset() * element_size, data.len() * element_size);
+    let sizes_slice =
+        sizes.slice_with_length(data.offset() * element_size, data.len() * element_size);
+
+    let child_data = data.child_data()[0].clone();
+
+    (offsets_slice, sizes_slice, child_data)
+}
+
+/// Returns the sliced views [`Buffer`] for a BinaryView/Utf8View array.
+///
+/// The views buffer is sliced to only include views in the valid range based on
+/// the array's offset and length. This helps reduce the encoded size of sliced
+/// arrays
+///
+fn get_or_truncate_buffer(array_data: &ArrayData) -> &[u8] {
+    let buffer = &array_data.buffers()[0];
+    let layout = layout(array_data.data_type());
+    let spec = &layout.buffers[0];
+
+    let byte_width = get_buffer_element_width(spec);
+    let min_length = array_data.len() * byte_width;
+    if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
+        let byte_offset = array_data.offset() * byte_width;
+        let buffer_length = min(min_length, buffer.len() - byte_offset);
+        &buffer.as_slice()[byte_offset..(byte_offset + buffer_length)]
+    } else {
+        buffer.as_slice()
+    }
+}
+
 /// Write array data to a vector of bytes
 #[allow(clippy::too_many_arguments)]
 fn write_array_data(
@@ -1572,6 +1792,7 @@ fn write_array_data(
     num_rows: usize,
     null_count: usize,
     compression_codec: Option<CompressionCodec>,
+    compression_context: &mut CompressionContext,
     write_options: &IpcWriteOptions,
 ) -> Result<i64, ArrowError> {
     let mut offset = offset;
@@ -1601,6 +1822,7 @@ fn write_array_data(
             arrow_data,
             offset,
             compression_codec,
+            compression_context,
             write_options.alignment,
         )?;
     }
@@ -1615,6 +1837,7 @@ fn write_array_data(
                 arrow_data,
                 offset,
                 compression_codec,
+                compression_context,
                 write_options.alignment,
             )?;
         }
@@ -1625,13 +1848,25 @@ fn write_array_data(
         // Current implementation just serialize the raw arrays as given and not try to optimize anything.
         // If users wants to "compact" the arrays prior to sending them over IPC,
         // they should consider the gc API suggested in #5513
-        for buffer in array_data.buffers() {
+        let views = get_or_truncate_buffer(array_data);
+        offset = write_buffer(
+            views,
+            buffers,
+            arrow_data,
+            offset,
+            compression_codec,
+            compression_context,
+            write_options.alignment,
+        )?;
+
+        for buffer in array_data.buffers().iter().skip(1) {
             offset = write_buffer(
                 buffer.as_slice(),
                 buffers,
                 arrow_data,
                 offset,
                 compression_codec,
+                compression_context,
                 write_options.alignment,
             )?;
         }
@@ -1644,6 +1879,7 @@ fn write_array_data(
                 arrow_data,
                 offset,
                 compression_codec,
+                compression_context,
                 write_options.alignment,
             )?;
         }
@@ -1657,25 +1893,14 @@ fn write_array_data(
         // Truncate values
         assert_eq!(array_data.buffers().len(), 1);
 
-        let buffer = &array_data.buffers()[0];
-        let layout = layout(data_type);
-        let spec = &layout.buffers[0];
-
-        let byte_width = get_buffer_element_width(spec);
-        let min_length = array_data.len() * byte_width;
-        let buffer_slice = if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
-            let byte_offset = array_data.offset() * byte_width;
-            let buffer_length = min(min_length, buffer.len() - byte_offset);
-            &buffer.as_slice()[byte_offset..(byte_offset + buffer_length)]
-        } else {
-            buffer.as_slice()
-        };
+        let buffer = get_or_truncate_buffer(array_data);
         offset = write_buffer(
-            buffer_slice,
+            buffer,
             buffers,
             arrow_data,
             offset,
             compression_codec,
+            compression_context,
             write_options.alignment,
         )?;
     } else if matches!(data_type, DataType::Boolean) {
@@ -1691,6 +1916,7 @@ fn write_array_data(
             arrow_data,
             offset,
             compression_codec,
+            compression_context,
             write_options.alignment,
         )?;
     } else if matches!(
@@ -1713,6 +1939,7 @@ fn write_array_data(
             arrow_data,
             offset,
             compression_codec,
+            compression_context,
             write_options.alignment,
         )?;
         offset = write_array_data(
@@ -1724,6 +1951,53 @@ fn write_array_data(
             sliced_child_data.len(),
             sliced_child_data.null_count(),
             compression_codec,
+            compression_context,
+            write_options,
+        )?;
+        return Ok(offset);
+    } else if matches!(
+        data_type,
+        DataType::ListView(_) | DataType::LargeListView(_)
+    ) {
+        assert_eq!(array_data.buffers().len(), 2); // offsets + sizes
+        assert_eq!(array_data.child_data().len(), 1);
+
+        let (offsets, sizes, child_data) = match data_type {
+            DataType::ListView(_) => get_list_view_array_buffers::<i32>(array_data),
+            DataType::LargeListView(_) => get_list_view_array_buffers::<i64>(array_data),
+            _ => unreachable!(),
+        };
+
+        offset = write_buffer(
+            offsets.as_slice(),
+            buffers,
+            arrow_data,
+            offset,
+            compression_codec,
+            compression_context,
+            write_options.alignment,
+        )?;
+
+        offset = write_buffer(
+            sizes.as_slice(),
+            buffers,
+            arrow_data,
+            offset,
+            compression_codec,
+            compression_context,
+            write_options.alignment,
+        )?;
+
+        offset = write_array_data(
+            &child_data,
+            buffers,
+            arrow_data,
+            nodes,
+            offset,
+            child_data.len(),
+            child_data.null_count(),
+            compression_codec,
+            compression_context,
             write_options,
         )?;
         return Ok(offset);
@@ -1744,6 +2018,7 @@ fn write_array_data(
             child_data.len(),
             child_data.null_count(),
             compression_codec,
+            compression_context,
             write_options,
         )?;
         return Ok(offset);
@@ -1755,6 +2030,7 @@ fn write_array_data(
                 arrow_data,
                 offset,
                 compression_codec,
+                compression_context,
                 write_options.alignment,
             )?;
         }
@@ -1777,6 +2053,7 @@ fn write_array_data(
                     data_ref.len(),
                     data_ref.null_count(),
                     compression_codec,
+                    compression_context,
                     write_options,
                 )?;
             }
@@ -1794,6 +2071,7 @@ fn write_array_data(
                     data_ref.len(),
                     data_ref.null_count(),
                     compression_codec,
+                    compression_context,
                     write_options,
                 )?;
             }
@@ -1820,10 +2098,11 @@ fn write_buffer(
     arrow_data: &mut Vec<u8>,         // output stream
     offset: i64,                      // current output stream offset
     compression_codec: Option<CompressionCodec>,
+    compression_context: &mut CompressionContext,
     alignment: u8,
 ) -> Result<i64, ArrowError> {
     let len: i64 = match compression_codec {
-        Some(compressor) => compressor.compress_to_vec(buffer, arrow_data)?,
+        Some(compressor) => compressor.compress_to_vec(buffer, arrow_data, compression_context)?,
         None => {
             arrow_data.extend_from_slice(buffer);
             buffer.len()
@@ -1862,16 +2141,19 @@ mod tests {
     use arrow_array::builder::Float32Builder;
     use arrow_array::builder::Int64Builder;
     use arrow_array::builder::MapBuilder;
+    use arrow_array::builder::StringViewBuilder;
     use arrow_array::builder::UnionBuilder;
-    use arrow_array::builder::{GenericListBuilder, ListBuilder, StringBuilder};
+    use arrow_array::builder::{
+        GenericListBuilder, GenericListViewBuilder, ListBuilder, StringBuilder,
+    };
     use arrow_array::builder::{PrimitiveRunBuilder, UInt32Builder};
     use arrow_array::types::*;
     use arrow_buffer::ScalarBuffer;
 
+    use crate::MetadataVersion;
     use crate::convert::fb_to_schema;
     use crate::reader::*;
     use crate::root_as_footer;
-    use crate::MetadataVersion;
 
     use super::*;
 
@@ -2141,7 +2423,7 @@ mod tests {
 
         // Dict field with id 2
         #[allow(deprecated)]
-        let dctfield = Field::new_dict("dict", array.data_type().clone(), false, 2, false);
+        let dctfield = Field::new_dict("dict", array.data_type().clone(), false, 0, false);
         let union_fields = [(0, Arc::new(dctfield))].into_iter().collect();
 
         let types = [0, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
@@ -2155,17 +2437,28 @@ mod tests {
             false,
         )]));
 
+        let r#gen = IpcDataGenerator::default();
+        let mut dict_tracker = DictionaryTracker::new(false);
+        r#gen.schema_to_bytes_with_dictionary_tracker(
+            &schema,
+            &mut dict_tracker,
+            &IpcWriteOptions::default(),
+        );
+
         let batch = RecordBatch::try_new(schema, vec![Arc::new(union)]).unwrap();
 
-        let gen = IpcDataGenerator {};
-        #[allow(deprecated)]
-        let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
-        gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        r#gen
+            .encode(
+                &batch,
+                &mut dict_tracker,
+                &Default::default(),
+                &mut Default::default(),
+            )
             .unwrap();
 
         // The encoder will assign dict IDs itself to ensure uniqueness and ignore the dict ID in the schema
         // so we expect the dict will be keyed to 0
-        assert!(dict_tracker.written.contains_key(&2));
+        assert!(dict_tracker.written.contains_key(&0));
     }
 
     #[test]
@@ -2193,15 +2486,26 @@ mod tests {
             false,
         )]));
 
+        let r#gen = IpcDataGenerator::default();
+        let mut dict_tracker = DictionaryTracker::new(false);
+        r#gen.schema_to_bytes_with_dictionary_tracker(
+            &schema,
+            &mut dict_tracker,
+            &IpcWriteOptions::default(),
+        );
+
         let batch = RecordBatch::try_new(schema, vec![struct_array]).unwrap();
 
-        let gen = IpcDataGenerator {};
-        #[allow(deprecated)]
-        let mut dict_tracker = DictionaryTracker::new_with_preserve_dict_id(false, true);
-        gen.encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        r#gen
+            .encode(
+                &batch,
+                &mut dict_tracker,
+                &Default::default(),
+                &mut Default::default(),
+            )
             .unwrap();
 
-        assert!(dict_tracker.written.contains_key(&2));
+        assert!(dict_tracker.written.contains_key(&0));
     }
 
     fn write_union_file(options: IpcWriteOptions) {
@@ -2503,13 +2807,9 @@ mod tests {
 
     #[test]
     fn test_large_slice_uint32() {
-        ensure_roundtrip(Arc::new(UInt32Array::from_iter((0..8000).map(|i| {
-            if i % 2 == 0 {
-                Some(i)
-            } else {
-                None
-            }
-        }))));
+        ensure_roundtrip(Arc::new(UInt32Array::from_iter(
+            (0..8000).map(|i| if i % 2 == 0 { Some(i) } else { None }),
+        )));
     }
 
     #[test]
@@ -2517,7 +2817,7 @@ mod tests {
         let strings: Vec<_> = (0..8000)
             .map(|i| {
                 if i % 2 == 0 {
-                    Some(format!("value{}", i))
+                    Some(format!("value{i}"))
                 } else {
                     None
                 }
@@ -2739,6 +3039,40 @@ mod tests {
         ls.finish()
     }
 
+    fn generate_utf8view_list_data<O: OffsetSizeTrait>() -> GenericListArray<O> {
+        let mut ls = GenericListBuilder::<O, _>::new(StringViewBuilder::new());
+
+        for i in 0..100_000 {
+            for value in [
+                format!("value{}", i),
+                format!("value{}", i),
+                format!("value{}", i),
+            ] {
+                ls.values().append_value(&value);
+            }
+            ls.append(true)
+        }
+
+        ls.finish()
+    }
+
+    fn generate_string_list_data<O: OffsetSizeTrait>() -> GenericListArray<O> {
+        let mut ls = GenericListBuilder::<O, _>::new(StringBuilder::new());
+
+        for i in 0..100_000 {
+            for value in [
+                format!("value{}", i),
+                format!("value{}", i),
+                format!("value{}", i),
+            ] {
+                ls.values().append_value(&value);
+            }
+            ls.append(true)
+        }
+
+        ls.finish()
+    }
+
     fn generate_nested_list_data<O: OffsetSizeTrait>() -> GenericListArray<O> {
         let mut ls =
             GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(UInt32Builder::new()));
@@ -2898,6 +3232,49 @@ mod tests {
         roundtrip_ensure_sliced_smaller(in_batch, 1000);
     }
 
+    #[test]
+    fn encode_large_lists_non_zero_offset() {
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
+        let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false);
+        let schema = Arc::new(Schema::new(vec![val_list_field]));
+
+        let values = Arc::new(generate_list_data::<i64>());
+
+        check_sliced_list_array(schema, values);
+    }
+
+    #[test]
+    fn encode_large_lists_string_non_zero_offset() {
+        let val_inner = Field::new_list_field(DataType::Utf8, true);
+        let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false);
+        let schema = Arc::new(Schema::new(vec![val_list_field]));
+
+        let values = Arc::new(generate_string_list_data::<i64>());
+
+        check_sliced_list_array(schema, values);
+    }
+
+    #[test]
+    fn encode_large_list_string_view_non_zero_offset() {
+        let val_inner = Field::new_list_field(DataType::Utf8View, true);
+        let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false);
+        let schema = Arc::new(Schema::new(vec![val_list_field]));
+
+        let values = Arc::new(generate_utf8view_list_data::<i64>());
+
+        check_sliced_list_array(schema, values);
+    }
+
+    fn check_sliced_list_array(schema: Arc<Schema>, values: Arc<GenericListArray<i64>>) {
+        for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13, 13)] {
+            let in_batch = RecordBatch::try_new(schema.clone(), vec![values.clone()])
+                .unwrap()
+                .slice(offset, len);
+            let out_batch = deserialize_file(serialize_file(&in_batch));
+            assert_eq!(in_batch, out_batch);
+        }
+    }
+
     #[test]
     fn encode_nested_lists() {
         let inner_int = Arc::new(Field::new_list_field(DataType::UInt32, true));
@@ -2937,6 +3314,453 @@ mod tests {
         roundtrip_ensure_sliced_smaller(in_batch, 1000);
     }
 
+    fn generate_list_view_data<O: OffsetSizeTrait>() -> GenericListViewArray<O> {
+        let mut builder = GenericListViewBuilder::<O, _>::new(UInt32Builder::new());
+
+        for i in 0u32..100_000 {
+            if i.is_multiple_of(10_000) {
+                builder.append(false);
+                continue;
+            }
+            for value in [i, i, i] {
+                builder.values().append_value(value);
+            }
+            builder.append(true);
+        }
+
+        builder.finish()
+    }
+
+    #[test]
+    fn encode_list_view_arrays() {
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
+        let val_field = Field::new("val", DataType::ListView(Arc::new(val_inner)), true);
+        let schema = Arc::new(Schema::new(vec![val_field]));
+
+        let values = Arc::new(generate_list_view_data::<i32>());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        let out_batch = deserialize_file(serialize_file(&in_batch));
+        assert_eq!(in_batch, out_batch);
+    }
+
+    #[test]
+    fn encode_large_list_view_arrays() {
+        let val_inner = Field::new_list_field(DataType::UInt32, true);
+        let val_field = Field::new("val", DataType::LargeListView(Arc::new(val_inner)), true);
+        let schema = Arc::new(Schema::new(vec![val_field]));
+
+        let values = Arc::new(generate_list_view_data::<i64>());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        let out_batch = deserialize_file(serialize_file(&in_batch));
+        assert_eq!(in_batch, out_batch);
+    }
+
+    #[test]
+    fn check_sliced_list_view_array() {
+        let inner = Field::new_list_field(DataType::UInt32, true);
+        let field = Field::new("val", DataType::ListView(Arc::new(inner)), true);
+        let schema = Arc::new(Schema::new(vec![field]));
+        let values = Arc::new(generate_list_view_data::<i32>());
+
+        for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13, 13)] {
+            let in_batch = RecordBatch::try_new(schema.clone(), vec![values.clone()])
+                .unwrap()
+                .slice(offset, len);
+            let out_batch = deserialize_file(serialize_file(&in_batch));
+            assert_eq!(in_batch, out_batch);
+        }
+    }
+
+    #[test]
+    fn check_sliced_large_list_view_array() {
+        let inner = Field::new_list_field(DataType::UInt32, true);
+        let field = Field::new("val", DataType::LargeListView(Arc::new(inner)), true);
+        let schema = Arc::new(Schema::new(vec![field]));
+        let values = Arc::new(generate_list_view_data::<i64>());
+
+        for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13, 13)] {
+            let in_batch = RecordBatch::try_new(schema.clone(), vec![values.clone()])
+                .unwrap()
+                .slice(offset, len);
+            let out_batch = deserialize_file(serialize_file(&in_batch));
+            assert_eq!(in_batch, out_batch);
+        }
+    }
+
+    fn generate_nested_list_view_data<O: OffsetSizeTrait>() -> GenericListViewArray<O> {
+        let inner_builder = UInt32Builder::new();
+        let middle_builder = GenericListViewBuilder::<O, _>::new(inner_builder);
+        let mut outer_builder = GenericListViewBuilder::<O, _>::new(middle_builder);
+
+        for i in 0u32..10_000 {
+            if i.is_multiple_of(1_000) {
+                outer_builder.append(false);
+                continue;
+            }
+
+            for _ in 0..3 {
+                for value in [i, i + 1, i + 2] {
+                    outer_builder.values().values().append_value(value);
+                }
+                outer_builder.values().append(true);
+            }
+            outer_builder.append(true);
+        }
+
+        outer_builder.finish()
+    }
+
+    #[test]
+    fn encode_nested_list_views() {
+        let inner_int = Arc::new(Field::new_list_field(DataType::UInt32, true));
+        let inner_list_field = Arc::new(Field::new_list_field(DataType::ListView(inner_int), true));
+        let list_field = Field::new("val", DataType::ListView(inner_list_field), true);
+        let schema = Arc::new(Schema::new(vec![list_field]));
+
+        let values = Arc::new(generate_nested_list_view_data::<i32>());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        let out_batch = deserialize_file(serialize_file(&in_batch));
+        assert_eq!(in_batch, out_batch);
+    }
+
+    fn test_roundtrip_list_view_of_dict_impl<OffsetSize: OffsetSizeTrait, U: ArrowNativeType>(
+        list_data_type: DataType,
+        offsets: &[U; 5],
+        sizes: &[U; 4],
+    ) {
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+        let dict_data = dict_array.to_data();
+
+        let value_offsets = Buffer::from_slice_ref(offsets);
+        let value_sizes = Buffer::from_slice_ref(sizes);
+
+        let list_data = ArrayData::builder(list_data_type)
+            .len(4)
+            .add_buffer(value_offsets)
+            .add_buffer(value_sizes)
+            .add_child_data(dict_data)
+            .build()
+            .unwrap();
+        let list_view_array = GenericListViewArray::<OffsetSize>::from(list_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "f1",
+            list_view_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(list_view_array)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_list_view_of_dict() {
+        #[allow(deprecated)]
+        let list_data_type = DataType::ListView(Arc::new(Field::new_dict(
+            "item",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+            1,
+            false,
+        )));
+        let offsets: &[i32; 5] = &[0, 2, 4, 4, 7];
+        let sizes: &[i32; 4] = &[2, 2, 0, 3];
+        test_roundtrip_list_view_of_dict_impl::<i32, i32>(list_data_type, offsets, sizes);
+    }
+
+    #[test]
+    fn test_roundtrip_large_list_view_of_dict() {
+        #[allow(deprecated)]
+        let list_data_type = DataType::LargeListView(Arc::new(Field::new_dict(
+            "item",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+            2,
+            false,
+        )));
+        let offsets: &[i64; 5] = &[0, 2, 4, 4, 7];
+        let sizes: &[i64; 4] = &[2, 2, 0, 3];
+        test_roundtrip_list_view_of_dict_impl::<i64, i64>(list_data_type, offsets, sizes);
+    }
+
+    #[test]
+    fn test_roundtrip_sliced_list_view_of_dict() {
+        #[allow(deprecated)]
+        let list_data_type = DataType::ListView(Arc::new(Field::new_dict(
+            "item",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+            3,
+            false,
+        )));
+
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2, 1, 0, 3, 2, 1]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+        let dict_data = dict_array.to_data();
+
+        let offsets: &[i32; 7] = &[0, 2, 4, 4, 7, 9, 12];
+        let sizes: &[i32; 6] = &[2, 2, 0, 3, 2, 3];
+        let value_offsets = Buffer::from_slice_ref(offsets);
+        let value_sizes = Buffer::from_slice_ref(sizes);
+
+        let list_data = ArrayData::builder(list_data_type)
+            .len(6)
+            .add_buffer(value_offsets)
+            .add_buffer(value_sizes)
+            .add_child_data(dict_data)
+            .build()
+            .unwrap();
+        let list_view_array = GenericListViewArray::<i32>::from(list_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "f1",
+            list_view_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(list_view_array)]).unwrap();
+
+        let sliced_batch = input_batch.slice(1, 4);
+
+        let output_batch = deserialize_file(serialize_file(&sliced_batch));
+        assert_eq!(sliced_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&sliced_batch));
+        assert_eq!(sliced_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_dense_union_of_dict() {
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+
+        #[allow(deprecated)]
+        let dict_field = Arc::new(Field::new_dict(
+            "dict",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+            1,
+            false,
+        ));
+        let int_field = Arc::new(Field::new("int", DataType::Int32, false));
+        let union_fields = UnionFields::try_new(vec![0, 1], vec![dict_field, int_field]).unwrap();
+
+        let types = ScalarBuffer::from(vec![0i8, 0, 1, 0, 1, 0, 0]);
+        let offsets = ScalarBuffer::from(vec![0i32, 1, 0, 2, 1, 3, 4]);
+
+        let int_array = Int32Array::from(vec![100, 200]);
+
+        let union = UnionArray::try_new(
+            union_fields.clone(),
+            types,
+            Some(offsets),
+            vec![Arc::new(dict_array), Arc::new(int_array)],
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "union",
+            DataType::Union(union_fields, UnionMode::Dense),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(union)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_sparse_union_of_dict() {
+        let values = StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]);
+        let keys = Int32Array::from_iter_values([0, 0, 1, 2, 3, 0, 2]);
+        let dict_array = DictionaryArray::new(keys, Arc::new(values));
+
+        #[allow(deprecated)]
+        let dict_field = Arc::new(Field::new_dict(
+            "dict",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            true,
+            2,
+            false,
+        ));
+        let int_field = Arc::new(Field::new("int", DataType::Int32, false));
+        let union_fields = UnionFields::try_new(vec![0, 1], vec![dict_field, int_field]).unwrap();
+
+        let types = ScalarBuffer::from(vec![0i8, 0, 1, 0, 1, 0, 0]);
+
+        let int_array = Int32Array::from(vec![0, 0, 100, 0, 200, 0, 0]);
+
+        let union = UnionArray::try_new(
+            union_fields.clone(),
+            types,
+            None,
+            vec![Arc::new(dict_array), Arc::new(int_array)],
+        )
+        .unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "union",
+            DataType::Union(union_fields, UnionMode::Sparse),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(union)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_map_with_dict_keys() {
+        // Building a map array is a bit involved. We first build a struct arary that has a key and
+        // value field and then use that to build the actual map array.
+        let key_values = StringArray::from(vec!["key_a", "key_b", "key_c"]);
+        let keys = Int32Array::from_iter_values([0, 1, 2, 0, 1, 0]);
+        let dict_keys = DictionaryArray::new(keys, Arc::new(key_values));
+
+        let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);
+
+        #[allow(deprecated)]
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new_dict(
+                        "key",
+                        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                        false,
+                        1,
+                        false,
+                    ),
+                    Field::new("value", DataType::Int32, true),
+                ]
+                .into(),
+            ),
+            false,
+        ));
+
+        let entries = StructArray::from(vec![
+            (
+                Arc::new(Field::new(
+                    "key",
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                    false,
+                )),
+                Arc::new(dict_keys) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("value", DataType::Int32, true)),
+                Arc::new(values) as ArrayRef,
+            ),
+        ]);
+
+        let offsets = Buffer::from_slice_ref([0i32, 2, 4, 6]);
+
+        let map_data = ArrayData::builder(DataType::Map(entries_field, false))
+            .len(3)
+            .add_buffer(offsets)
+            .add_child_data(entries.into_data())
+            .build()
+            .unwrap();
+        let map_array = MapArray::from(map_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "map",
+            map_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(map_array)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
+    #[test]
+    fn test_roundtrip_map_with_dict_values() {
+        // Building a map array is a bit involved. We first build a struct arary that has a key and
+        // value field and then use that to build the actual map array.
+        let keys = StringArray::from(vec!["a", "b", "c", "d", "e", "f"]);
+
+        let value_values = StringArray::from(vec!["val_x", "val_y", "val_z"]);
+        let value_keys = Int32Array::from_iter_values([0, 1, 2, 0, 1, 0]);
+        let dict_values = DictionaryArray::new(value_keys, Arc::new(value_values));
+
+        #[allow(deprecated)]
+        let entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new_dict(
+                        "value",
+                        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                        true,
+                        2,
+                        false,
+                    ),
+                ]
+                .into(),
+            ),
+            false,
+        ));
+
+        let entries = StructArray::from(vec![
+            (
+                Arc::new(Field::new("key", DataType::Utf8, false)),
+                Arc::new(keys) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new(
+                    "value",
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                    true,
+                )),
+                Arc::new(dict_values) as ArrayRef,
+            ),
+        ]);
+
+        let offsets = Buffer::from_slice_ref([0i32, 2, 4, 6]);
+
+        let map_data = ArrayData::builder(DataType::Map(entries_field, false))
+            .len(3)
+            .add_buffer(offsets)
+            .add_child_data(entries.into_data())
+            .build()
+            .unwrap();
+        let map_array = MapArray::from(map_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "map",
+            map_array.data_type().clone(),
+            false,
+        )]));
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(map_array)]).unwrap();
+
+        let output_batch = deserialize_file(serialize_file(&input_batch));
+        assert_eq!(input_batch, output_batch);
+
+        let output_batch = deserialize_stream(serialize_stream(&input_batch));
+        assert_eq!(input_batch, output_batch);
+    }
+
     #[test]
     fn test_decimal128_alignment16_is_sufficient() {
         const IPC_ALIGNMENT: usize = 16;
@@ -2951,7 +3775,7 @@ mod tests {
             let mut fields = Vec::new();
             let mut arrays = Vec::new();
             for i in 0..num_cols {
-                let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true);
+                let field = Field::new(format!("col_{i}"), DataType::Decimal128(38, 10), true);
                 let array = Decimal128Array::from(vec![num_cols as i128; num_rows]);
                 fields.push(field);
                 arrays.push(Arc::new(array) as Arc<dyn Array>);
@@ -3006,7 +3830,7 @@ mod tests {
         let mut fields = Vec::new();
         let mut arrays = Vec::new();
         for i in 0..num_cols {
-            let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true);
+            let field = Field::new(format!("col_{i}"), DataType::Decimal128(38, 10), true);
             let array = Decimal128Array::from(vec![num_cols as i128; num_rows]);
             fields.push(field);
             arrays.push(Arc::new(array) as Arc<dyn Array>);
@@ -3029,7 +3853,6 @@ mod tests {
         let trailer_start = buffer.len() - 10;
         let footer_len = read_footer_length(buffer[trailer_start..].try_into().unwrap()).unwrap();
         let footer = root_as_footer(&buffer[trailer_start - footer_len..trailer_start]).unwrap();
-
         let schema = fb_to_schema(footer.schema().unwrap());
 
         // Importantly we set `require_alignment`, otherwise the error later is suppressed due to copying
@@ -3061,7 +3884,7 @@ mod tests {
         let mut fields = Vec::new();
         let options = IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap();
         for i in 0..num_cols {
-            let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true);
+            let field = Field::new(format!("col_{i}"), DataType::Decimal128(38, 10), true);
             fields.push(field);
         }
         let schema = Schema::new(fields);
@@ -3325,7 +4148,7 @@ mod tests {
             // Set metadata on both the schema and a field within it.
             let schema = Arc::new(
                 Schema::new(vec![
-                    Field::new("a", DataType::Int64, true).with_metadata(metadata.clone())
+                    Field::new("a", DataType::Int64, true).with_metadata(metadata.clone()),
                 ])
                 .with_metadata(metadata)
                 .clone(),
diff --git a/arrow-ipc/tests/test_delta_dictionary.rs b/arrow-ipc/tests/test_delta_dictionary.rs
new file mode 100644
index 000000000000..bd4fbf831214
--- /dev/null
+++ b/arrow-ipc/tests/test_delta_dictionary.rs
@@ -0,0 +1,590 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{
+    Array, ArrayRef, DictionaryArray, ListArray, RecordBatch, StringArray,
+    builder::{ListBuilder, PrimitiveDictionaryBuilder, StringDictionaryBuilder},
+};
+use arrow_ipc::reader::StreamReader;
+use arrow_ipc::writer::{DictionaryHandling, IpcWriteOptions, StreamWriter};
+use arrow_schema::{ArrowError, DataType, Field, Schema};
+use std::io::Cursor;
+use std::sync::Arc;
+
+#[test]
+fn test_dictionary_handling_option() {
+    // Test that DictionaryHandling can be set
+    let _options = IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+
+    // Verify it was set (we can't access private field directly)
+    // This test just verifies the API exists
+}
+
+#[test]
+fn test_nested_dictionary_with_delta() -> Result<(), ArrowError> {
+    // Test writing nested dictionaries with delta option
+    // Create a simple nested structure for testing
+
+    // Create dictionary arrays
+    let mut dict_builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    dict_builder.append_value("hello");
+    dict_builder.append_value("world");
+    let dict_array = dict_builder.finish();
+
+    // Create a list of dictionaries
+    let mut list_builder =
+        ListBuilder::new(StringDictionaryBuilder::<arrow_array::types::Int32Type>::new());
+    list_builder.values().append_value("item1");
+    list_builder.values().append_value("item2");
+    list_builder.append(true);
+    list_builder.values().append_value("item3");
+    list_builder.append(true);
+    let list_array = list_builder.finish();
+
+    // Create schema with nested dictionaries
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("dict", dict_array.data_type().clone(), true),
+        Field::new("list_of_dict", list_array.data_type().clone(), true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(dict_array) as ArrayRef,
+            Arc::new(list_array) as ArrayRef,
+        ],
+    )?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch)?;
+        writer.finish()?;
+    }
+
+    // Read back and verify
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+    assert_eq!(read_batches.len(), 1);
+
+    let read_batch = &read_batches[0];
+    assert_eq!(read_batch.num_columns(), 2);
+    assert_eq!(read_batch.num_rows(), 2);
+    let dict_array = read_batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+    let dict_values = dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    assert_eq!(dict_values.len(), 2);
+    assert_eq!(dict_values.value(0), "hello");
+    assert_eq!(dict_values.value(1), "world");
+    let list_array = read_batch
+        .column(1)
+        .as_any()
+        .downcast_ref::<ListArray>()
+        .unwrap();
+    let list_dict_array = list_array
+        .values()
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+    let list_values = list_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    assert_eq!(list_values.len(), 3);
+    assert_eq!(list_values.value(0), "item1");
+    assert_eq!(list_values.value(1), "item2");
+    assert_eq!(list_values.value(2), "item3");
+
+    Ok(())
+}
+
+#[test]
+fn test_complex_nested_dictionaries() -> Result<(), ArrowError> {
+    // Test nested structure with dictionaries at multiple levels
+
+    // Create a nested structure: List(Dictionary(List(Dictionary)))
+
+    // Inner dictionary for the nested list
+    let _inner_dict_field = Field::new(
+        "inner_item",
+        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+        true,
+    );
+
+    // Create a list of dictionaries
+    let mut list_builder =
+        ListBuilder::new(StringDictionaryBuilder::<arrow_array::types::Int32Type>::new());
+
+    // First list
+    list_builder.values().append_value("inner_a");
+    list_builder.values().append_value("inner_b");
+    list_builder.append(true);
+
+    // Second list
+    list_builder.values().append_value("inner_c");
+    list_builder.values().append_value("inner_d");
+    list_builder.append(true);
+
+    let list_array = list_builder.finish();
+
+    // Create outer dictionary containing the list
+    let mut outer_dict_builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    outer_dict_builder.append_value("outer_1");
+    outer_dict_builder.append_value("outer_2");
+    let outer_dict = outer_dict_builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("outer_dict", outer_dict.data_type().clone(), true),
+        Field::new("nested_list", list_array.data_type().clone(), true),
+    ]));
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(outer_dict) as ArrayRef,
+            Arc::new(list_array) as ArrayRef,
+        ],
+    )?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch)?;
+        writer.finish()?;
+    }
+
+    // Verify it writes without error
+    assert!(!buffer.is_empty());
+
+    // Read back and verify
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+
+    assert_eq!(read_batches.len(), 1);
+
+    let read_batch = &read_batches[0];
+    assert_eq!(read_batch.num_columns(), 2);
+    assert_eq!(read_batch.num_rows(), 2);
+    let outer_dict_array = read_batch
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+    let outer_dict_values = outer_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    assert_eq!(outer_dict_values.len(), 2);
+    assert_eq!(outer_dict_values.value(0), "outer_1");
+    assert_eq!(outer_dict_values.value(1), "outer_2");
+
+    let nested_list_array = read_batch
+        .column(1)
+        .as_any()
+        .downcast_ref::<ListArray>()
+        .unwrap();
+    let nested_dict_array = nested_list_array
+        .values()
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+    let nested_dict_values = nested_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+    assert_eq!(nested_dict_values.len(), 4);
+    assert_eq!(nested_dict_values.value(0), "inner_a");
+    assert_eq!(nested_dict_values.value(1), "inner_b");
+    assert_eq!(nested_dict_values.value(2), "inner_c");
+    assert_eq!(nested_dict_values.value(3), "inner_d");
+
+    Ok(())
+}
+
+#[test]
+fn test_multiple_dictionary_types() -> Result<(), ArrowError> {
+    // Test different dictionary value types in one schema
+
+    // String dictionary
+    let mut string_dict_builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    string_dict_builder.append_value("apple");
+    string_dict_builder.append_value("banana");
+    string_dict_builder.append_value("apple");
+    let string_dict = string_dict_builder.finish();
+
+    // Integer dictionary
+    let mut int_dict_builder = PrimitiveDictionaryBuilder::<
+        arrow_array::types::Int32Type,
+        arrow_array::types::Int64Type,
+    >::new();
+    int_dict_builder.append_value(100);
+    int_dict_builder.append_value(200);
+    int_dict_builder.append_value(100);
+    let int_dict = int_dict_builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("string_dict", string_dict.data_type().clone(), true),
+        Field::new("int_dict", int_dict.data_type().clone(), true),
+    ]));
+
+    let batch1 = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(string_dict) as ArrayRef,
+            Arc::new(int_dict) as ArrayRef,
+        ],
+    )?;
+
+    // Create second batch with extended dictionaries
+    let mut string_dict_builder2 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    string_dict_builder2.append_value("apple");
+    string_dict_builder2.append_value("banana");
+    string_dict_builder2.append_value("cherry"); // new
+    string_dict_builder2.append_value("date"); // new
+    let string_dict2 = string_dict_builder2.finish();
+
+    let mut int_dict_builder2 = PrimitiveDictionaryBuilder::<
+        arrow_array::types::Int32Type,
+        arrow_array::types::Int64Type,
+    >::new();
+    int_dict_builder2.append_value(100);
+    int_dict_builder2.append_value(200);
+    int_dict_builder2.append_value(300); // new
+    int_dict_builder2.append_value(400); // new
+    let int_dict2 = int_dict_builder2.finish();
+
+    let batch2 = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(string_dict2) as ArrayRef,
+            Arc::new(int_dict2) as ArrayRef,
+        ],
+    )?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch1)?;
+        writer.write(&batch2)?;
+        writer.finish()?;
+    }
+
+    // Read back and verify
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+
+    assert_eq!(read_batches.len(), 2);
+
+    // Check string dictionary in second batch
+    let read_batch2 = &read_batches[1];
+    let string_dict_array = read_batch2
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+
+    let string_values = string_dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+
+    // Should have all 4 string values
+    assert_eq!(string_values.len(), 4);
+    assert_eq!(string_values.value(0), "apple");
+    assert_eq!(string_values.value(1), "banana");
+    assert_eq!(string_values.value(2), "cherry");
+    assert_eq!(string_values.value(3), "date");
+
+    Ok(())
+}
+
+#[test]
+fn test_empty_dictionary_delta() -> Result<(), ArrowError> {
+    // Test edge case with empty dictionaries
+
+    // First batch with empty dictionary
+    let mut builder1 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    builder1.append_null();
+    builder1.append_null();
+    let array1 = builder1.finish();
+
+    // Second batch with some values
+    let mut builder2 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    builder2.append_value("first");
+    builder2.append_value("second");
+    let array2 = builder2.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "dict",
+        array1.data_type().clone(),
+        true,
+    )]));
+
+    let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(array1) as ArrayRef])?;
+
+    let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(array2) as ArrayRef])?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch1)?;
+        writer.write(&batch2)?;
+        writer.finish()?;
+    }
+
+    // Read back and verify
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+
+    assert_eq!(read_batches.len(), 2);
+
+    // Second batch should have the dictionary values
+    let read_batch2 = &read_batches[1];
+    let dict_array = read_batch2
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+
+    let dict_values = dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+
+    assert_eq!(dict_values.len(), 2);
+    assert_eq!(dict_values.value(0), "first");
+    assert_eq!(dict_values.value(1), "second");
+
+    Ok(())
+}
+
+#[test]
+fn test_delta_with_shared_dictionary_data() -> Result<(), ArrowError> {
+    // Test efficient delta detection when dictionaries share underlying data
+
+    // Create initial dictionary
+    let mut builder = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    builder.append_value("alpha");
+    builder.append_value("beta");
+    let dict1 = builder.finish();
+
+    // Create a dictionary that extends the first one by sharing its data
+    // This simulates a common pattern where dictionaries are built incrementally
+    let dict1_values = dict1.values();
+    let mut builder2 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    // First, add the existing values
+    for i in 0..dict1_values.len() {
+        builder2.append_value(
+            dict1_values
+                .as_any()
+                .downcast_ref::<StringArray>()
+                .unwrap()
+                .value(i),
+        );
+    }
+    // Then add new values
+    builder2.append_value("gamma");
+    builder2.append_value("delta");
+    let dict2 = builder2.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "dict",
+        dict1.data_type().clone(),
+        true,
+    )]));
+
+    let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(dict1) as ArrayRef])?;
+
+    let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(dict2) as ArrayRef])?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch1)?;
+        writer.write(&batch2)?;
+        writer.finish()?;
+    }
+
+    // Read back and verify delta was used correctly
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+
+    assert_eq!(read_batches.len(), 2);
+
+    // Verify second batch has all values
+    let read_batch2 = &read_batches[1];
+    let dict_array = read_batch2
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+
+    let dict_values = dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+
+    assert_eq!(dict_values.len(), 4);
+    assert_eq!(dict_values.value(0), "alpha");
+    assert_eq!(dict_values.value(1), "beta");
+    assert_eq!(dict_values.value(2), "gamma");
+    assert_eq!(dict_values.value(3), "delta");
+
+    Ok(())
+}
+
+#[test]
+fn test_large_dictionary_delta_performance() -> Result<(), ArrowError> {
+    // Test delta dictionary with large dictionaries to ensure efficiency
+
+    // Create a large initial dictionary
+    let mut builder1 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    for i in 0..1000 {
+        builder1.append_value(format!("value_{i}"));
+    }
+    let dict1 = builder1.finish();
+
+    // Create extended dictionary
+    let mut builder2 = StringDictionaryBuilder::<arrow_array::types::Int32Type>::new();
+    for i in 0..1000 {
+        builder2.append_value(format!("value_{i}"));
+    }
+    // Add just a few new values
+    for i in 1000..1005 {
+        builder2.append_value(format!("value_{i}"));
+    }
+    let dict2 = builder2.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "dict",
+        dict1.data_type().clone(),
+        true,
+    )]));
+
+    let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(dict1) as ArrayRef])?;
+
+    let batch2 = RecordBatch::try_new(schema.clone(), vec![Arc::new(dict2) as ArrayRef])?;
+
+    // Write with delta dictionary handling
+    let mut buffer = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Delta);
+        let mut writer = StreamWriter::try_new_with_options(&mut buffer, &schema, options)?;
+        writer.write(&batch1)?;
+        writer.write(&batch2)?;
+        writer.finish()?;
+    }
+
+    // The buffer should be relatively small since we only sent 5 new values
+    // as delta instead of resending all 1005 values
+    let buffer_size = buffer.len();
+
+    // Write without delta for comparison
+    let mut buffer_no_delta = Vec::new();
+    {
+        let options =
+            IpcWriteOptions::default().with_dictionary_handling(DictionaryHandling::Resend);
+        let mut writer =
+            StreamWriter::try_new_with_options(&mut buffer_no_delta, &schema, options)?;
+        writer.write(&batch1)?;
+        writer.write(&batch2)?;
+        writer.finish()?;
+    }
+
+    let buffer_no_delta_size = buffer_no_delta.len();
+
+    // Delta encoding should result in smaller output
+    println!("Delta buffer size: {buffer_size}");
+    println!("Non-delta buffer size: {buffer_size}");
+
+    // Delta encoding should result in significantly smaller output
+    assert!(
+        buffer_size < buffer_no_delta_size,
+        "Delta buffer ({buffer_size}) should be smaller than non-delta buffer ({buffer_no_delta_size})"
+    );
+
+    // The delta should save approximately the size of the second dictionary minus the delta
+    // We sent 5 values instead of 1005, saving ~99.5% on the second dictionary
+    let savings_ratio = (buffer_no_delta_size - buffer_size) as f64 / buffer_no_delta_size as f64;
+    println!("Space savings: {:.1}%", savings_ratio * 100.0);
+
+    // We should save at least 30% (conservative estimate accounting for metadata overhead)
+    assert!(
+        savings_ratio > 0.30,
+        "Delta encoding should provide significant space savings (got {:.1}%)",
+        savings_ratio * 100.0
+    );
+
+    // Verify correctness
+    let reader = StreamReader::try_new(Cursor::new(buffer), None)?;
+    let read_batches: Result<Vec<_>, _> = reader.collect();
+    let read_batches = read_batches?;
+
+    assert_eq!(read_batches.len(), 2);
+
+    let read_batch2 = &read_batches[1];
+    let dict_array = read_batch2
+        .column(0)
+        .as_any()
+        .downcast_ref::<DictionaryArray<arrow_array::types::Int32Type>>()
+        .unwrap();
+
+    let dict_values = dict_array
+        .values()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap();
+
+    assert_eq!(dict_values.len(), 1005);
+    assert_eq!(dict_values.value(1004), "value_1004");
+
+    Ok(())
+}
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index cae0e173b445..5fcde480eb6d 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -43,13 +43,15 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 half = { version = "2.1", default-features = false }
 indexmap = { version = "2.0", default-features = false, features = ["std"] }
-num = { version = "0.4", default-features = false, features = ["std"] }
-serde = { version = "1.0", default-features = false }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
+serde_core = { version = "1.0", default-features = false }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 chrono = { workspace = true }
 lexical-core = { version = "1.0", default-features = false}
 memchr = "2.7.4"
-simdutf8 = "0.1.5"
+simdutf8 = { workspace = true }
+ryu = "1.0"
+itoa = "1.0"
 
 [dev-dependencies]
 flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
@@ -57,9 +59,13 @@ serde = { version = "1.0", default-features = false, features = ["derive"] }
 futures = "0.3"
 tokio = { version = "1.27", default-features = false, features = ["io-util"] }
 bytes = "1.4"
-criterion = { version = "0.5", default-features = false }
+criterion = { workspace = true, default-features = false }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
 
 [[bench]]
 name = "serde"
 harness = false
+
+[[bench]]
+name = "json-reader"
+harness = false
diff --git a/arrow-json/benches/json-reader.rs b/arrow-json/benches/json-reader.rs
new file mode 100644
index 000000000000..504839f8ffe2
--- /dev/null
+++ b/arrow-json/benches/json-reader.rs
@@ -0,0 +1,250 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_json::ReaderBuilder;
+use arrow_json::reader::Decoder;
+use arrow_schema::{DataType, Field, Schema};
+use criterion::{
+    BenchmarkId, Criterion, SamplingMode, Throughput, criterion_group, criterion_main,
+};
+use serde_json::{Map, Number, Value};
+use std::fmt::Write;
+use std::hint::black_box;
+use std::sync::Arc;
+
+const ROWS: usize = 1 << 17; // 128K rows
+const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
+
+const WIDE_FIELDS: usize = 64;
+const BINARY_BYTES: usize = 64;
+const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
+
+fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) {
+    let mut offset = 0;
+    while offset < data.len() {
+        let read = decoder.decode(black_box(&data[offset..])).unwrap();
+        if read == 0 {
+            break;
+        }
+        offset += read;
+        while let Some(_batch) = decoder.flush().unwrap() {}
+    }
+}
+
+fn build_schema(field_count: usize) -> Arc<Schema> {
+    // Builds a schema with fields named f0..f{field_count-1}, all Int64 and non-nullable.
+    let fields: Vec<Field> = (0..field_count)
+        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+fn build_projection_schema(indices: &[usize]) -> Arc<Schema> {
+    let fields: Vec<Field> = indices
+        .iter()
+        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+fn build_wide_json(rows: usize, fields: usize) -> Vec<u8> {
+    // Builds newline-delimited JSON objects with "wide" schema.
+    // Example (rows=2, fields=3):
+    // {"f0":0,"f1":1,"f2":2}
+    // {"f0":1,"f1":2,"f2":3}
+    let mut out = String::with_capacity(rows * fields * 12);
+    for row in 0..rows {
+        out.push('{');
+        for field in 0..fields {
+            if field > 0 {
+                out.push(',');
+            }
+            let value = row as i64 + field as i64;
+            write!(&mut out, "\"f{field}\":{value}").unwrap();
+        }
+        out.push('}');
+        out.push('\n');
+    }
+    out.into_bytes()
+}
+
+fn build_wide_values(rows: usize, fields: usize) -> Vec<Value> {
+    // Mirrors build_wide_json but returns structured serde_json::Value objects.
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let mut map = Map::with_capacity(fields);
+        for field in 0..fields {
+            let key = format!("f{field}");
+            let value = Number::from((row + field) as i64);
+            map.insert(key, Value::Number(value));
+        }
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn bench_decode_wide_object(c: &mut Criterion) {
+    let data = build_wide_json(ROWS, WIDE_FIELDS);
+    let schema = build_schema(WIDE_FIELDS);
+
+    c.bench_function("decode_wide_object_i64_json", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, &data);
+        })
+    });
+}
+
+fn bench_serialize_wide_object(c: &mut Criterion) {
+    let values = build_wide_values(ROWS, WIDE_FIELDS);
+    let schema = build_schema(WIDE_FIELDS);
+
+    c.bench_function("decode_wide_object_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+
+            decoder.serialize(&values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
+fn bench_decode_binary(c: &mut Criterion, name: &str, data: &[u8], field: Arc<Field>) {
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new_with_field(field.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, data);
+        })
+    });
+}
+
+#[inline]
+fn append_hex_byte(buf: &mut String, byte: u8) {
+    const HEX: &[u8; 16] = b"0123456789abcdef";
+    buf.push(HEX[(byte >> 4) as usize] as char);
+    buf.push(HEX[(byte & 0x0f) as usize] as char);
+}
+
+fn build_hex_lines(rows: usize, bytes_per_row: usize) -> Vec<u8> {
+    let mut data = String::with_capacity(rows * (bytes_per_row * 2 + 3));
+    for row in 0..rows {
+        data.push('"');
+        for i in 0..bytes_per_row {
+            let byte = ((row + i) & 0xff) as u8;
+            append_hex_byte(&mut data, byte);
+        }
+        data.push('"');
+        data.push('\n');
+    }
+    data.into_bytes()
+}
+
+fn bench_binary_hex(c: &mut Criterion) {
+    let binary_data = build_hex_lines(ROWS, BINARY_BYTES);
+
+    let binary_field = Arc::new(Field::new("item", DataType::Binary, false));
+    bench_decode_binary(c, "decode_binary_hex_json", &binary_data, binary_field);
+
+    let fixed_field = Arc::new(Field::new(
+        "item",
+        DataType::FixedSizeBinary(BINARY_BYTES as i32),
+        false,
+    ));
+    bench_decode_binary(c, "decode_fixed_binary_hex_json", &binary_data, fixed_field);
+
+    let view_field = Arc::new(Field::new("item", DataType::BinaryView, false));
+    bench_decode_binary(c, "decode_binary_view_hex_json", &binary_data, view_field);
+}
+
+fn bench_decode_schema(c: &mut Criterion, name: &str, data: &[u8], schema: Arc<Schema>) {
+    let mut group = c.benchmark_group(name);
+    group.throughput(Throughput::Bytes(data.len() as u64));
+    group.sample_size(50);
+    group.measurement_time(std::time::Duration::from_secs(5));
+    group.warm_up_time(std::time::Duration::from_secs(2));
+    group.sampling_mode(SamplingMode::Flat);
+    group.bench_function(BenchmarkId::from_parameter(ROWS), |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, data);
+        })
+    });
+    group.finish();
+}
+
+fn build_wide_projection_json(rows: usize, total_fields: usize) -> Vec<u8> {
+    // Estimate: each field ~15 bytes ("fXX":VVVVVVV,), total ~15*100 + overhead
+    let per_row_size = total_fields * 15 + 10;
+    let mut data = String::with_capacity(rows * per_row_size);
+
+    for _row in 0..rows {
+        data.push('{');
+        for i in 0..total_fields {
+            if i > 0 {
+                data.push(',');
+            }
+            // Use fixed-width values for stable benchmarks: 7 digits
+            let _ = write!(data, "\"f{}\":{:07}", i, i);
+        }
+        data.push('}');
+        data.push('\n');
+    }
+    data.into_bytes()
+}
+
+fn bench_wide_projection(c: &mut Criterion) {
+    // Wide projection workload: tests overhead of parsing unused fields
+    let wide_projection_data = build_wide_projection_json(ROWS, WIDE_PROJECTION_TOTAL_FIELDS);
+
+    let full_schema = build_schema(WIDE_PROJECTION_TOTAL_FIELDS);
+    bench_decode_schema(
+        c,
+        "decode_wide_projection_full_json",
+        &wide_projection_data,
+        full_schema,
+    );
+
+    // Projected schema: only 3 fields (f0, f10, f50) out of 100
+    let projected_schema = build_projection_schema(&[0, 10, 50]);
+    bench_decode_schema(
+        c,
+        "decode_wide_projection_narrow_json",
+        &wide_projection_data,
+        projected_schema,
+    );
+}
+
+criterion_group!(
+    benches,
+    bench_decode_wide_object,
+    bench_serialize_wide_object,
+    bench_binary_hex,
+    bench_wide_projection
+);
+criterion_main!(benches);
diff --git a/arrow-json/benches/serde.rs b/arrow-json/benches/serde.rs
index 7baaac458f86..282f2e7c76d0 100644
--- a/arrow-json/benches/serde.rs
+++ b/arrow-json/benches/serde.rs
@@ -18,16 +18,18 @@
 use arrow_json::ReaderBuilder;
 use arrow_schema::{DataType, Field, Schema};
 use criterion::*;
-use rand::{rng, Rng};
+use rand::{Rng, rng};
 use serde::Serialize;
 use std::sync::Arc;
 
+const ROWS: usize = 1 << 18;
+
 #[allow(deprecated)]
 fn do_bench<R: Serialize>(c: &mut Criterion, name: &str, rows: &[R], schema: &Schema) {
     let schema = Arc::new(schema.clone());
     c.bench_function(name, |b| {
         b.iter(|| {
-            let builder = ReaderBuilder::new(schema.clone()).with_batch_size(64);
+            let builder = ReaderBuilder::new(schema.clone()).with_batch_size(8192);
             let mut decoder = builder.build_decoder().unwrap();
             decoder.serialize(rows)
         })
@@ -37,26 +39,26 @@ fn do_bench<R: Serialize>(c: &mut Criterion, name: &str, rows: &[R], schema: &Sc
 fn criterion_benchmark(c: &mut Criterion) {
     let mut rng = rng();
     let schema = Schema::new(vec![Field::new("i32", DataType::Int32, false)]);
-    let v: Vec<i32> = (0..2048).map(|_| rng.random_range(0..10000)).collect();
+    let v: Vec<i32> = (0..ROWS).map(|_| rng.random_range(0..10000)).collect();
 
     do_bench(c, "small_i32", &v, &schema);
-    let v: Vec<i32> = (0..2048).map(|_| rng.random()).collect();
+    let v: Vec<i32> = (0..ROWS).map(|_| rng.random()).collect();
     do_bench(c, "large_i32", &v, &schema);
 
     let schema = Schema::new(vec![Field::new("i64", DataType::Int64, false)]);
-    let v: Vec<i64> = (0..2048).map(|_| rng.random_range(0..10000)).collect();
+    let v: Vec<i64> = (0..ROWS).map(|_| rng.random_range(0..10000)).collect();
     do_bench(c, "small_i64", &v, &schema);
-    let v: Vec<i64> = (0..2048)
+    let v: Vec<i64> = (0..ROWS)
         .map(|_| rng.random_range(0..i32::MAX as _))
         .collect();
     do_bench(c, "medium_i64", &v, &schema);
-    let v: Vec<i64> = (0..2048).map(|_| rng.random()).collect();
+    let v: Vec<i64> = (0..ROWS).map(|_| rng.random()).collect();
     do_bench(c, "large_i64", &v, &schema);
 
     let schema = Schema::new(vec![Field::new("f32", DataType::Float32, false)]);
-    let v: Vec<f32> = (0..2048).map(|_| rng.random_range(0.0..10000.)).collect();
+    let v: Vec<f32> = (0..ROWS).map(|_| rng.random_range(0.0..10000.)).collect();
     do_bench(c, "small_f32", &v, &schema);
-    let v: Vec<f32> = (0..2048).map(|_| rng.random_range(0.0..f32::MAX)).collect();
+    let v: Vec<f32> = (0..ROWS).map(|_| rng.random_range(0.0..f32::MAX)).collect();
     do_bench(c, "large_f32", &v, &schema);
 }
 
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index 6d7ab4400b6e..1b18e0094708 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -20,18 +20,28 @@
 //! See the module level documentation for the
 //! [`reader`] and [`writer`] for usage examples.
 //!
-//! # Binary Data
+//! # Binary Data uses `Base16` Encoding
 //!
-//! As per [RFC7159] JSON cannot encode arbitrary binary data. A common approach to workaround
-//! this is to use a [binary-to-text encoding] scheme, such as base64, to encode the
-//! input data and then decode it on output.
+//! As per [RFC7159] JSON cannot encode arbitrary binary data. This crate works around that
+//! limitation by encoding/decoding binary data as a [hexadecimal] string (i.e.
+//! [`Base16` encoding]).
+//!
+//! Note that `Base16` only has 50% space efficiency (i.e., the encoded data is twice as large
+//! as the original). If that is an issue, we recommend to convert binary data to/from a different
+//! encoding format such as `Base64` instead. See the following example for details.
+//!
+//! ## `Base64` Encoding Example
+//!
+//! [`Base64`] is a common [binary-to-text encoding] scheme with a space efficiency of 75%. The
+//! following example shows how to use the [`arrow_cast`] crate to encode binary data to `Base64`
+//! before converting it to JSON and how to decode it back.
 //!
 //! ```
 //! # use std::io::Cursor;
 //! # use std::sync::Arc;
 //! # use arrow_array::{BinaryArray, RecordBatch, StringArray};
 //! # use arrow_array::cast::AsArray;
-//! # use arrow_cast::base64::{b64_decode, b64_encode, BASE64_STANDARD};
+//! use arrow_cast::base64::{b64_decode, b64_encode, BASE64_STANDARD};
 //! # use arrow_json::{LineDelimitedWriter, ReaderBuilder};
 //! #
 //! // The data we want to write
@@ -61,13 +71,15 @@
 //!
 //! [RFC7159]: https://datatracker.ietf.org/doc/html/rfc7159#section-8.1
 //! [binary-to-text encoding]: https://en.wikipedia.org/wiki/Binary-to-text_encoding
-//!
+//! [hexadecimal]: https://en.wikipedia.org/wiki/Hexadecimal
+//! [`Base16` encoding]: https://en.wikipedia.org/wiki/Base16#Base16
+//! [`Base64`]: https://en.wikipedia.org/wiki/Base64
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![deny(rustdoc::broken_intra_doc_links)]
 #![warn(missing_docs)]
 
@@ -87,7 +99,7 @@ use serde_json::{Number, Value};
 ///
 /// This enum controls which form(s) the Reader will accept and which form the
 /// Writer will produce. For example, if the RecordBatch Schema is
-/// `[("a", Int32), ("r", Struct([("b", Boolean), ("c", Utf8)]))]`
+/// `[("a", Int32), ("r", Struct("b": Boolean, "c" Utf8))]`
 /// then a Reader with [`StructMode::ObjectOnly`] would read rows of the form
 /// `{"a": 1, "r": {"b": true, "c": "cat"}}` while with ['StructMode::ListOnly']
 /// would read rows of the form `[1, [true, "cat"]]`. A Writer would produce
@@ -167,8 +179,16 @@ impl JsonSerializable for f64 {
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
+    use crate::writer::JsonArray;
+
     use super::*;
 
+    use arrow_array::{
+        ArrayRef, GenericBinaryArray, GenericByteViewArray, RecordBatch, RecordBatchWriter,
+        builder::FixedSizeBinaryBuilder, types::BinaryViewType,
+    };
     use serde_json::Value::{Bool, Number as VNumber, String as VString};
 
     #[test]
@@ -261,4 +281,75 @@ mod tests {
             assert_eq!(list_input, &list_output);
         }
     }
+
+    #[test]
+    #[allow(invalid_from_utf8)]
+    fn test_json_roundtrip_binary() {
+        let not_utf8: &[u8] = b"Not UTF8 \xa0\xa1!";
+        assert!(str::from_utf8(not_utf8).is_err());
+
+        let values: &[Option<&[u8]>] = &[
+            Some(b"Ned Flanders" as &[u8]),
+            None,
+            Some(b"Troy McClure" as &[u8]),
+            Some(not_utf8),
+        ];
+        // Binary:
+        assert_binary_json(Arc::new(GenericBinaryArray::<i32>::from_iter(values)));
+
+        // LargeBinary:
+        assert_binary_json(Arc::new(GenericBinaryArray::<i64>::from_iter(values)));
+
+        // FixedSizeBinary:
+        assert_binary_json(build_array_fixed_size_binary(12, values));
+
+        // BinaryView:
+        assert_binary_json(Arc::new(GenericByteViewArray::<BinaryViewType>::from_iter(
+            values,
+        )));
+    }
+
+    fn build_array_fixed_size_binary(byte_width: i32, values: &[Option<&[u8]>]) -> ArrayRef {
+        let mut builder = FixedSizeBinaryBuilder::new(byte_width);
+        for value in values {
+            match value {
+                Some(v) => builder.append_value(v).unwrap(),
+                None => builder.append_null(),
+            }
+        }
+        Arc::new(builder.finish())
+    }
+
+    fn assert_binary_json(array: ArrayRef) {
+        // encode and check JSON with and without explicit nulls
+        assert_binary_json_with_writer(
+            array.clone(),
+            WriterBuilder::new().with_explicit_nulls(true),
+        );
+        assert_binary_json_with_writer(array, WriterBuilder::new().with_explicit_nulls(false));
+    }
+
+    fn assert_binary_json_with_writer(array: ArrayRef, builder: WriterBuilder) {
+        let batch = RecordBatch::try_from_iter([("bytes", array)]).unwrap();
+
+        let mut buf = Vec::new();
+        let json_value: Value = {
+            let mut writer = builder.build::<_, JsonArray>(&mut buf);
+            writer.write(&batch).unwrap();
+            writer.close().unwrap();
+            serde_json::from_slice(&buf).unwrap()
+        };
+
+        let json_array = json_value.as_array().unwrap();
+
+        let decoded = {
+            let mut decoder = ReaderBuilder::new(batch.schema().clone())
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(json_array).unwrap();
+            decoder.flush().unwrap().unwrap()
+        };
+
+        assert_eq!(batch, decoded);
+    }
 }
diff --git a/arrow-json/src/reader/binary_array.rs b/arrow-json/src/reader/binary_array.rs
new file mode 100644
index 000000000000..712eb6bb4db9
--- /dev/null
+++ b/arrow-json/src/reader/binary_array.rs
@@ -0,0 +1,256 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::builder::{BinaryViewBuilder, FixedSizeBinaryBuilder, GenericBinaryBuilder};
+use arrow_array::{Array, GenericStringArray, OffsetSizeTrait};
+use arrow_data::ArrayData;
+use arrow_schema::ArrowError;
+use std::io::Write;
+use std::marker::PhantomData;
+
+use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
+
+#[inline]
+fn decode_hex_digit(byte: u8) -> Option<u8> {
+    match byte {
+        b'0'..=b'9' => Some(byte - b'0'),
+        b'a'..=b'f' => Some(byte - b'a' + 10),
+        b'A'..=b'F' => Some(byte - b'A' + 10),
+        _ => None,
+    }
+}
+
+fn invalid_hex_error_at(index: usize, byte: u8) -> ArrowError {
+    ArrowError::JsonError(format!(
+        "invalid hex encoding in binary data: invalid digit 0x{byte:02x} at position {index}"
+    ))
+}
+
+fn decode_hex_to_writer<W: Write>(hex_string: &str, writer: &mut W) -> Result<(), ArrowError> {
+    let bytes = hex_string.as_bytes();
+    let mut iter = bytes.chunks_exact(2);
+    let mut buffer = [0u8; 64];
+    let mut buffered = 0;
+
+    for (pair_index, pair) in (&mut iter).enumerate() {
+        let base = pair_index * 2;
+        let high = decode_hex_digit(pair[0]).ok_or_else(|| invalid_hex_error_at(base, pair[0]))?;
+        let low =
+            decode_hex_digit(pair[1]).ok_or_else(|| invalid_hex_error_at(base + 1, pair[1]))?;
+        buffer[buffered] = (high << 4) | low;
+        buffered += 1;
+
+        if buffered == buffer.len() {
+            writer
+                .write_all(&buffer)
+                .map_err(|e| ArrowError::JsonError(format!("failed to write binary data: {e}")))?;
+            buffered = 0;
+        }
+    }
+
+    let remainder = iter.remainder();
+    if !remainder.is_empty() {
+        let index = (bytes.len() / 2) * 2;
+        let low = decode_hex_digit(remainder[0])
+            .ok_or_else(|| invalid_hex_error_at(index, remainder[0]))?;
+        buffer[buffered] = low;
+        buffered += 1;
+    }
+
+    if buffered > 0 {
+        writer
+            .write_all(&buffer[..buffered])
+            .map_err(|e| ArrowError::JsonError(format!("failed to write binary data: {e}")))?;
+    }
+
+    Ok(())
+}
+
+#[derive(Default)]
+pub struct BinaryArrayDecoder<O: OffsetSizeTrait> {
+    phantom: PhantomData<O>,
+}
+
+impl<O: OffsetSizeTrait> ArrayDecoder for BinaryArrayDecoder<O> {
+    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
+        let data_capacity = estimate_data_capacity(tape, pos)?;
+
+        if O::from_usize(data_capacity).is_none() {
+            return Err(ArrowError::JsonError(format!(
+                "offset overflow decoding {}",
+                GenericStringArray::<O>::DATA_TYPE
+            )));
+        }
+
+        let mut builder = GenericBinaryBuilder::<O>::with_capacity(pos.len(), data_capacity);
+
+        for p in pos {
+            match tape.get(*p) {
+                TapeElement::String(idx) => {
+                    let string = tape.get_string(idx);
+                    // Decode directly into the builder for performance. If decoding fails,
+                    // the error is terminal and the builder is discarded by the caller.
+                    decode_hex_to_writer(string, &mut builder)?;
+                    builder.append_value(b"");
+                }
+                TapeElement::Null => builder.append_null(),
+                _ => unreachable!(),
+            }
+        }
+
+        Ok(builder.finish().into_data())
+    }
+}
+
+#[derive(Default)]
+pub struct FixedSizeBinaryArrayDecoder {
+    len: i32,
+}
+
+impl FixedSizeBinaryArrayDecoder {
+    pub fn new(len: i32) -> Self {
+        Self { len }
+    }
+}
+
+impl ArrayDecoder for FixedSizeBinaryArrayDecoder {
+    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
+        let mut builder = FixedSizeBinaryBuilder::with_capacity(pos.len(), self.len);
+        // Preallocate for the decoded byte width (FixedSizeBinary len), not the hex string length.
+        let mut scratch = Vec::with_capacity(self.len as usize);
+
+        for p in pos {
+            match tape.get(*p) {
+                TapeElement::String(idx) => {
+                    let string = tape.get_string(idx);
+                    scratch.clear();
+                    scratch.reserve(string.len().div_ceil(2));
+                    decode_hex_to_writer(string, &mut scratch)?;
+                    builder.append_value(&scratch)?;
+                }
+                TapeElement::Null => builder.append_null(),
+                _ => unreachable!(),
+            }
+        }
+
+        Ok(builder.finish().into_data())
+    }
+}
+
+#[derive(Default)]
+pub struct BinaryViewDecoder {}
+
+impl ArrayDecoder for BinaryViewDecoder {
+    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
+        let data_capacity = estimate_data_capacity(tape, pos)?;
+        let mut builder = BinaryViewBuilder::with_capacity(data_capacity);
+        let mut scratch = Vec::new();
+
+        for p in pos {
+            match tape.get(*p) {
+                TapeElement::String(idx) => {
+                    let string = tape.get_string(idx);
+                    scratch.clear();
+                    scratch.reserve(string.len().div_ceil(2));
+                    decode_hex_to_writer(string, &mut scratch)?;
+                    builder.append_value(&scratch);
+                }
+                TapeElement::Null => builder.append_null(),
+                _ => unreachable!(),
+            }
+        }
+
+        Ok(builder.finish().into_data())
+    }
+}
+
+fn estimate_data_capacity(tape: &Tape<'_>, pos: &[u32]) -> Result<usize, ArrowError> {
+    let mut data_capacity = 0;
+    for p in pos {
+        match tape.get(*p) {
+            TapeElement::String(idx) => {
+                let string_len = tape.get_string(idx).len();
+                // two hex characters represent one byte
+                let decoded_len = string_len.div_ceil(2);
+                data_capacity += decoded_len;
+            }
+            TapeElement::Null => {}
+            _ => {
+                return Err(tape.error(*p, "binary data encoded as string"));
+            }
+        }
+    }
+    Ok(data_capacity)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ReaderBuilder;
+    use arrow_schema::{DataType, Field};
+    use std::io::Cursor;
+
+    #[test]
+    fn test_decode_hex_to_writer_empty() {
+        let mut out = Vec::new();
+        decode_hex_to_writer("", &mut out).unwrap();
+        assert!(out.is_empty());
+    }
+
+    #[test]
+    fn test_decode_hex_to_writer_odd_length() {
+        let mut out = Vec::new();
+        decode_hex_to_writer("0f0", &mut out).unwrap();
+        assert_eq!(out, vec![0x0f, 0x00]);
+
+        out.clear();
+        decode_hex_to_writer("a", &mut out).unwrap();
+        assert_eq!(out, vec![0x0a]);
+    }
+
+    #[test]
+    fn test_decode_hex_to_writer_invalid() {
+        let mut out = Vec::new();
+        let err = decode_hex_to_writer("0f0g", &mut out).unwrap_err();
+        match err {
+            ArrowError::JsonError(msg) => {
+                assert!(msg.contains("invalid hex encoding in binary data"));
+                assert!(msg.contains("position 3"));
+            }
+            _ => panic!("expected JsonError"),
+        }
+    }
+
+    #[test]
+    fn test_binary_reader_invalid_hex_is_terminal() {
+        let field = Field::new("item", DataType::Binary, false);
+        let data = b"\"0f0g\"\n\"0f00\"\n";
+        let mut reader = ReaderBuilder::new_with_field(field)
+            .build(Cursor::new(data))
+            .unwrap();
+
+        let err = reader.next().unwrap().unwrap_err().to_string();
+        assert!(err.contains("invalid hex encoding in binary data"));
+
+        match reader.next() {
+            None => {}
+            Some(Err(_)) => {}
+            Some(Ok(_)) => panic!("expected terminal error after invalid hex"),
+        }
+    }
+}
diff --git a/arrow-json/src/reader/boolean_array.rs b/arrow-json/src/reader/boolean_array.rs
index 9094391cd7dd..cb2587edcb85 100644
--- a/arrow-json/src/reader/boolean_array.rs
+++ b/arrow-json/src/reader/boolean_array.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::builder::BooleanBuilder;
 use arrow_array::Array;
+use arrow_array::builder::BooleanBuilder;
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 
 #[derive(Default)]
 pub struct BooleanArrayDecoder {}
diff --git a/arrow-json/src/reader/decimal_array.rs b/arrow-json/src/reader/decimal_array.rs
index d56afcfe807a..07a5e182a354 100644
--- a/arrow-json/src/reader/decimal_array.rs
+++ b/arrow-json/src/reader/decimal_array.rs
@@ -17,15 +17,15 @@
 
 use std::marker::PhantomData;
 
+use arrow_array::Array;
 use arrow_array::builder::PrimitiveBuilder;
 use arrow_array::types::DecimalType;
-use arrow_array::Array;
 use arrow_cast::parse::parse_decimal;
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 
 pub struct DecimalArrayDecoder<D: DecimalType> {
     precision: u8,
diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs
index 1a1dee6a23d4..e74fef79178a 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{make_decoder, ArrayDecoder};
 use crate::StructMode;
-use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
+use crate::reader::tape::{Tape, TapeElement};
+use crate::reader::{ArrayDecoder, make_decoder};
 use arrow_array::OffsetSizeTrait;
+use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
 use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs
index ee78373a551e..c2068577a094 100644
--- a/arrow-json/src/reader/map_array.rs
+++ b/arrow-json/src/reader/map_array.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{make_decoder, ArrayDecoder};
 use crate::StructMode;
+use crate::reader::tape::{Tape, TapeElement};
+use crate::reader::{ArrayDecoder, make_decoder};
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
-use arrow_buffer::buffer::NullBuffer;
 use arrow_buffer::ArrowNativeType;
+use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 
@@ -43,14 +43,14 @@ impl MapArrayDecoder {
             DataType::Map(_, true) => {
                 return Err(ArrowError::NotYetImplemented(
                     "Decoding MapArray with sorted fields".to_string(),
-                ))
+                ));
             }
             DataType::Map(f, _) => match f.data_type() {
                 DataType::Struct(fields) if fields.len() == 2 => fields,
                 d => {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "MapArray must contain struct with two fields, got {d}"
-                    )))
+                    )));
                 }
             },
             _ => unreachable!(),
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index cd33e337be08..f5fd1a8e7c38 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -134,15 +134,18 @@
 //!
 
 use crate::StructMode;
+use crate::reader::binary_array::{
+    BinaryArrayDecoder, BinaryViewDecoder, FixedSizeBinaryArrayDecoder,
+};
 use std::io::BufRead;
 use std::sync::Arc;
 
 use chrono::Utc;
-use serde::Serialize;
+use serde_core::Serialize;
 
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
-use arrow_array::{downcast_integer, make_array, RecordBatch, RecordBatchReader, StructArray};
+use arrow_array::{RecordBatch, RecordBatchReader, StructArray, downcast_integer, make_array};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, FieldRef, Schema, SchemaRef, TimeUnit};
 pub use schema::*;
@@ -159,6 +162,7 @@ use crate::reader::struct_array::StructArrayDecoder;
 use crate::reader::tape::{Tape, TapeDecoder};
 use crate::reader::timestamp_array::TimestampArrayDecoder;
 
+mod binary_array;
 mod boolean_array;
 mod decimal_array;
 mod list_array;
@@ -613,6 +617,8 @@ impl Decoder {
     /// ```
     ///
     /// Note: this ignores any batch size setting, and always decodes all rows
+    ///
+    /// [serde]: https://docs.rs/serde/latest/serde/
     pub fn serialize<S: Serialize>(&mut self, rows: &[S]) -> Result<(), ArrowError> {
         self.tape_decoder.serialize(rows)
     }
@@ -730,6 +736,8 @@ fn make_decoder(
         DataType::Duration(TimeUnit::Microsecond) => primitive_decoder!(DurationMicrosecondType, data_type),
         DataType::Duration(TimeUnit::Millisecond) => primitive_decoder!(DurationMillisecondType, data_type),
         DataType::Duration(TimeUnit::Second) => primitive_decoder!(DurationSecondType, data_type),
+        DataType::Decimal32(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal32Type>::new(p, s))),
+        DataType::Decimal64(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal64Type>::new(p, s))),
         DataType::Decimal128(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
         DataType::Decimal256(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
         DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
@@ -739,9 +747,10 @@ fn make_decoder(
         DataType::List(_) => Ok(Box::new(ListArrayDecoder::<i32>::new(data_type, coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
         DataType::LargeList(_) => Ok(Box::new(ListArrayDecoder::<i64>::new(data_type, coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
         DataType::Struct(_) => Ok(Box::new(StructArrayDecoder::new(data_type, coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
-        DataType::Binary | DataType::LargeBinary | DataType::FixedSizeBinary(_) => {
-            Err(ArrowError::JsonError(format!("{data_type} is not supported by JSON")))
-        }
+        DataType::Binary => Ok(Box::new(BinaryArrayDecoder::<i32>::default())),
+        DataType::LargeBinary => Ok(Box::new(BinaryArrayDecoder::<i64>::default())),
+        DataType::FixedSizeBinary(len) => Ok(Box::new(FixedSizeBinaryArrayDecoder::new(len))),
+        DataType::BinaryView => Ok(Box::new(BinaryViewDecoder::default())),
         DataType::Map(_, _) => Ok(Box::new(MapArrayDecoder::new(data_type, coerce_primitive, strict_mode, is_nullable, struct_mode)?)),
         d => Err(ArrowError::NotYetImplemented(format!("Support for {d} in JSON reader")))
     }
@@ -948,9 +957,7 @@ mod tests {
         // (The actual buffer may be larger than expected due to rounding or internal allocation strategies.)
         assert!(
             data_buffer >= expected_capacity,
-            "Data buffer length ({}) should be at least {}",
-            data_buffer,
-            expected_capacity
+            "Data buffer length ({data_buffer}) should be at least {expected_capacity}",
         );
 
         // Additionally, verify that the decoded values are correct.
@@ -994,9 +1001,7 @@ mod tests {
         let data_buffer = string_view_array.to_data().buffers()[0].len();
         assert!(
             data_buffer >= expected_capacity,
-            "Data buffer length ({}) should be at least {}",
-            data_buffer,
-            expected_capacity
+            "Data buffer length ({data_buffer}) should be at least {expected_capacity}",
         );
 
         // Verify that the converted string values are correct.
@@ -1349,6 +1354,8 @@ mod tests {
 
     #[test]
     fn test_decimals() {
+        test_decimal::<Decimal32Type>(DataType::Decimal32(8, 2));
+        test_decimal::<Decimal64Type>(DataType::Decimal64(10, 2));
         test_decimal::<Decimal128Type>(DataType::Decimal128(10, 2));
         test_decimal::<Decimal256Type>(DataType::Decimal256(10, 2));
     }
diff --git a/arrow-json/src/reader/null_array.rs b/arrow-json/src/reader/null_array.rs
index 4270045fb3c2..aa16678c0a9c 100644
--- a/arrow-json/src/reader/null_array.rs
+++ b/arrow-json/src/reader/null_array.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 
diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs
index 257c216cf5f6..fa8464aa3251 100644
--- a/arrow-json/src/reader/primitive_array.rs
+++ b/arrow-json/src/reader/primitive_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use num::NumCast;
+use num_traits::NumCast;
 use std::marker::PhantomData;
 
 use arrow_array::builder::PrimitiveBuilder;
@@ -25,8 +25,8 @@ use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
 use half::f16;
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 
 /// A trait for JSON-specific primitive parsing logic
 ///
diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs
index 07eb40106de0..fb7d93a85e12 100644
--- a/arrow-json/src/reader/schema.rs
+++ b/arrow-json/src/reader/schema.rs
@@ -250,6 +250,15 @@ pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
 /// original file's cursor. This function is useful when the `reader`'s cursor is not available
 /// (does not implement [`Seek`]), such is the case for compressed streams decoders.
 ///
+///
+/// Note that JSON is not able to represent all Arrow data types exactly. So the inferred schema
+/// might be different from the schema of the original data that was encoded as JSON. For example,
+/// JSON does not have different integer types, so all integers are inferred as `Int64`. Another
+/// example is binary data, which is encoded as a [Base16] string in JSON and therefore inferred
+/// as String type by this function.
+///
+/// [Base16]: https://en.wikipedia.org/wiki/Base16#Base16
+///
 /// # Examples
 /// ```
 /// use std::fs::File;
@@ -655,8 +664,7 @@ mod tests {
         let bigger_than_i64_max = (i64::MAX as i128) + 1;
         let smaller_than_i64_min = (i64::MIN as i128) - 1;
         let json = format!(
-            "{{ \"bigger_than_i64_max\": {}, \"smaller_than_i64_min\": {} }}",
-            bigger_than_i64_max, smaller_than_i64_min
+            "{{ \"bigger_than_i64_max\": {bigger_than_i64_max}, \"smaller_than_i64_min\": {smaller_than_i64_min} }}",
         );
         let mut buf_reader = BufReader::new(json.as_bytes());
         let (inferred_schema, _) = infer_json_schema(&mut buf_reader, Some(1)).unwrap();
diff --git a/arrow-json/src/reader/serializer.rs b/arrow-json/src/reader/serializer.rs
index 95068af67833..5d004fbb5c9b 100644
--- a/arrow-json/src/reader/serializer.rs
+++ b/arrow-json/src/reader/serializer.rs
@@ -17,10 +17,10 @@
 
 use crate::reader::tape::TapeElement;
 use lexical_core::FormattedSize;
-use serde::ser::{
+use serde_core::ser::{
     Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple, SerializeTupleStruct,
 };
-use serde::{Serialize, Serializer};
+use serde_core::{Serialize, Serializer};
 
 #[derive(Debug)]
 pub struct SerializerError(String);
@@ -33,7 +33,7 @@ impl std::fmt::Display for SerializerError {
     }
 }
 
-impl serde::ser::Error for SerializerError {
+impl serde_core::ser::Error for SerializerError {
     fn custom<T>(msg: T) -> Self
     where
         T: std::fmt::Display,
diff --git a/arrow-json/src/reader/string_array.rs b/arrow-json/src/reader/string_array.rs
index 03d07ad8c8b3..7ab5d343c9d6 100644
--- a/arrow-json/src/reader/string_array.rs
+++ b/arrow-json/src/reader/string_array.rs
@@ -21,8 +21,11 @@ use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 use std::marker::PhantomData;
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
+
+use itoa;
+use ryu;
 
 const TRUE: &str = "true";
 const FALSE: &str = "false";
@@ -85,6 +88,9 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
 
         let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
 
+        let mut float_formatter = ryu::Buffer::new();
+        let mut int_formatter = itoa::Buffer::new();
+
         for p in pos {
             match tape.get(*p) {
                 TapeElement::String(idx) => {
@@ -103,20 +109,20 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
                 TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
                     TapeElement::I32(low) => {
                         let val = ((high as i64) << 32) | (low as u32) as i64;
-                        builder.append_value(val.to_string());
+                        builder.append_value(int_formatter.format(val));
                     }
                     _ => unreachable!(),
                 },
                 TapeElement::I32(n) if coerce_primitive => {
-                    builder.append_value(n.to_string());
+                    builder.append_value(int_formatter.format(n));
                 }
                 TapeElement::F32(n) if coerce_primitive => {
-                    builder.append_value(n.to_string());
+                    builder.append_value(int_formatter.format(n));
                 }
                 TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
                     TapeElement::F32(low) => {
                         let val = f64::from_bits(((high as u64) << 32) | low as u64);
-                        builder.append_value(val.to_string());
+                        builder.append_value(float_formatter.format_finite(val));
                     }
                     _ => unreachable!(),
                 },
diff --git a/arrow-json/src/reader/string_view_array.rs b/arrow-json/src/reader/string_view_array.rs
index 8aeb1c805899..dbc27e9c50a0 100644
--- a/arrow-json/src/reader/string_view_array.rs
+++ b/arrow-json/src/reader/string_view_array.rs
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow_array::Array;
 use arrow_array::builder::GenericByteViewBuilder;
 use arrow_array::types::StringViewType;
-use arrow_array::Array;
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 use std::fmt::Write;
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 
 const TRUE: &str = "true";
 const FALSE: &str = "false";
@@ -131,26 +131,26 @@ impl ArrayDecoder for StringViewArrayDecoder {
                         let val = ((high as i64) << 32) | (low as u32) as i64;
                         tmp_buf.clear();
                         // Reuse the temporary buffer instead of allocating a new String
-                        write!(&mut tmp_buf, "{}", val).unwrap();
+                        write!(&mut tmp_buf, "{val}").unwrap();
                         builder.append_value(&tmp_buf);
                     }
                     _ => unreachable!(),
                 },
                 TapeElement::I32(n) if coerce => {
                     tmp_buf.clear();
-                    write!(&mut tmp_buf, "{}", n).unwrap();
+                    write!(&mut tmp_buf, "{n}").unwrap();
                     builder.append_value(&tmp_buf);
                 }
                 TapeElement::F32(n) if coerce => {
                     tmp_buf.clear();
-                    write!(&mut tmp_buf, "{}", n).unwrap();
+                    write!(&mut tmp_buf, "{n}").unwrap();
                     builder.append_value(&tmp_buf);
                 }
                 TapeElement::F64(high) if coerce => match tape.get(p + 1) {
                     TapeElement::F32(low) => {
                         let val = f64::from_bits(((high as u64) << 32) | (low as u64));
                         tmp_buf.clear();
-                        write!(&mut tmp_buf, "{}", val).unwrap();
+                        write!(&mut tmp_buf, "{val}").unwrap();
                         builder.append_value(&tmp_buf);
                     }
                     _ => unreachable!(),
diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs
index b9408df77a43..262097ace396 100644
--- a/arrow-json/src/reader/struct_array.rs
+++ b/arrow-json/src/reader/struct_array.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::reader::tape::{Tape, TapeElement};
-use crate::reader::{make_decoder, ArrayDecoder, StructMode};
+use crate::reader::{ArrayDecoder, StructMode, make_decoder};
 use arrow_array::builder::BooleanBufferBuilder;
 use arrow_buffer::buffer::NullBuffer;
 use arrow_data::{ArrayData, ArrayDataBuilder};
@@ -106,8 +106,7 @@ impl ArrayDecoder for StructArrayDecoder {
                             None => {
                                 if self.strict_mode {
                                     return Err(ArrowError::JsonError(format!(
-                                        "column '{}' missing from schema",
-                                        field_name
+                                        "column '{field_name}' missing from schema",
                                     )));
                                 }
                             }
diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs
index 26236960a735..89ee3f778765 100644
--- a/arrow-json/src/reader/tape.rs
+++ b/arrow-json/src/reader/tape.rs
@@ -18,7 +18,7 @@
 use crate::reader::serializer::TapeSerializer;
 use arrow_schema::ArrowError;
 use memchr::memchr2;
-use serde::Serialize;
+use serde_core::Serialize;
 use std::fmt::Write;
 
 /// We decode JSON to a flattened tape representation,
@@ -567,7 +567,10 @@ impl TapeDecoder {
         }
 
         if self.offsets.len() >= u32::MAX as usize {
-            return Err(ArrowError::JsonError(format!("Encountered more than {} bytes of string data, consider using a smaller batch size", u32::MAX)));
+            return Err(ArrowError::JsonError(format!(
+                "Encountered more than {} bytes of string data, consider using a smaller batch size",
+                u32::MAX
+            )));
         }
 
         if self.offsets.len() >= u32::MAX as usize {
diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs
index ee9018702920..79f2b04eeba8 100644
--- a/arrow-json/src/reader/timestamp_array.rs
+++ b/arrow-json/src/reader/timestamp_array.rs
@@ -18,15 +18,15 @@
 use chrono::TimeZone;
 use std::marker::PhantomData;
 
+use arrow_array::Array;
 use arrow_array::builder::PrimitiveBuilder;
 use arrow_array::types::ArrowTimestampType;
-use arrow_array::Array;
 use arrow_cast::parse::string_to_datetime;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, TimeUnit};
 
-use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::ArrayDecoder;
+use crate::reader::tape::{Tape, TapeElement};
 
 /// A specialized [`ArrayDecoder`] for timestamps
 pub struct TimestampArrayDecoder<P: ArrowTimestampType, Tz: TimeZone> {
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index d9481cc484b9..d52af66fcef9 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -26,7 +26,7 @@ use arrow_cast::display::{ArrayFormatter, FormatOptions};
 use arrow_schema::{ArrowError, DataType, FieldRef};
 use half::f16;
 use lexical_core::FormattedSize;
-use serde::Serializer;
+use serde_core::Serializer;
 
 /// Configuration options for the JSON encoder.
 #[derive(Debug, Clone, Default)]
@@ -37,6 +37,16 @@ pub struct EncoderOptions {
     struct_mode: StructMode,
     /// An optional hook for customizing encoding behavior.
     encoder_factory: Option<Arc<dyn EncoderFactory>>,
+    /// Optional date format for date arrays
+    date_format: Option<String>,
+    /// Optional datetime format for datetime arrays
+    datetime_format: Option<String>,
+    /// Optional timestamp format for timestamp arrays
+    timestamp_format: Option<String>,
+    /// Optional timestamp format for timestamp with timezone arrays
+    timestamp_tz_format: Option<String>,
+    /// Optional time format for time arrays
+    time_format: Option<String>,
 }
 
 impl EncoderOptions {
@@ -72,6 +82,61 @@ impl EncoderOptions {
     pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
         self.encoder_factory.as_ref()
     }
+
+    /// Set the JSON file's date format
+    pub fn with_date_format(mut self, format: String) -> Self {
+        self.date_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's date format if set, defaults to RFC3339
+    pub fn date_format(&self) -> Option<&str> {
+        self.date_format.as_deref()
+    }
+
+    /// Set the JSON file's datetime format
+    pub fn with_datetime_format(mut self, format: String) -> Self {
+        self.datetime_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's datetime format if set, defaults to RFC3339
+    pub fn datetime_format(&self) -> Option<&str> {
+        self.datetime_format.as_deref()
+    }
+
+    /// Set the JSON file's time format
+    pub fn with_time_format(mut self, format: String) -> Self {
+        self.time_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's datetime time if set, defaults to RFC3339
+    pub fn time_format(&self) -> Option<&str> {
+        self.time_format.as_deref()
+    }
+
+    /// Set the JSON file's timestamp format
+    pub fn with_timestamp_format(mut self, format: String) -> Self {
+        self.timestamp_format = Some(format);
+        self
+    }
+
+    /// Get the JSON file's timestamp format if set, defaults to RFC3339
+    pub fn timestamp_format(&self) -> Option<&str> {
+        self.timestamp_format.as_deref()
+    }
+
+    /// Set the JSON file's timestamp tz format
+    pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+        self.timestamp_tz_format = Some(tz_format);
+        self
+    }
+
+    /// Get the JSON file's timestamp tz format if set, defaults to RFC3339
+    pub fn timestamp_tz_format(&self) -> Option<&str> {
+        self.timestamp_tz_format.as_deref()
+    }
 }
 
 /// A trait to create custom encoders for specific data types.
@@ -281,6 +346,10 @@ pub fn make_encoder<'a>(
             let array = array.as_string_view();
             NullableEncoder::new(Box::new(StringViewEncoder(array)), array.nulls().cloned())
         }
+        DataType::BinaryView => {
+            let array = array.as_binary_view();
+            NullableEncoder::new(Box::new(BinaryViewEncoder(array)), array.nulls().cloned())
+        }
         DataType::List(_) => {
             let array = array.as_list::<i32>();
             NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned())
@@ -339,7 +408,7 @@ pub fn make_encoder<'a>(
             let nulls = array.nulls().cloned();
             NullableEncoder::new(Box::new(encoder) as Box<dyn Encoder + 'a>, nulls)
         }
-        DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
+        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
             let options = FormatOptions::new().with_display_error(true);
             let formatter = JsonArrayFormatter::new(ArrayFormatter::try_new(array, &options)?);
             NullableEncoder::new(Box::new(RawArrayFormatter(formatter)) as Box<dyn Encoder + 'a>, nulls)
@@ -350,14 +419,19 @@ pub fn make_encoder<'a>(
                 // characters that would need to be escaped within a JSON string, e.g. `'"'`.
                 // If support for user-provided format specifications is added, this assumption
                 // may need to be revisited
-                let options = FormatOptions::new().with_display_error(true);
-                let formatter = ArrayFormatter::try_new(array, &options)?;
+                let fops = FormatOptions::new().with_display_error(true)
+                .with_date_format(options.date_format.as_deref())
+                .with_datetime_format(options.datetime_format.as_deref())
+                .with_timestamp_format(options.timestamp_format.as_deref())
+                .with_timestamp_tz_format(options.timestamp_tz_format.as_deref())
+                .with_time_format(options.time_format.as_deref());
+
+                let formatter = ArrayFormatter::try_new(array, &fops)?;
                 let formatter = JsonArrayFormatter::new(formatter);
                 NullableEncoder::new(Box::new(formatter) as Box<dyn Encoder + 'a>, nulls)
             }
             false => return Err(ArrowError::JsonError(format!(
-                "Unsupported data type for JSON encoding: {:?}",
-                d
+                "Unsupported data type for JSON encoding: {d:?}",
             )))
         }
     };
@@ -370,6 +444,14 @@ fn encode_string(s: &str, out: &mut Vec<u8>) {
     serializer.serialize_str(s).unwrap();
 }
 
+fn encode_binary(bytes: &[u8], out: &mut Vec<u8>) {
+    out.push(b'"');
+    for byte in bytes {
+        write!(out, "{byte:02x}").unwrap();
+    }
+    out.push(b'"');
+}
+
 struct FieldEncoder<'a> {
     field: FieldRef,
     encoder: NullableEncoder<'a>,
@@ -539,6 +621,14 @@ impl Encoder for StringViewEncoder<'_> {
     }
 }
 
+struct BinaryViewEncoder<'a>(&'a BinaryViewArray);
+
+impl Encoder for BinaryViewEncoder<'_> {
+    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
+        encode_binary(self.0.value(idx), out);
+    }
+}
+
 struct ListEncoder<'a, O: OffsetSizeTrait> {
     offsets: OffsetBuffer<O>,
     encoder: NullableEncoder<'a>,
@@ -714,7 +804,10 @@ impl<'a> MapEncoder<'a> {
         let values = array.values();
         let keys = array.keys();
 
-        if !matches!(keys.data_type(), DataType::Utf8 | DataType::LargeUtf8) {
+        if !matches!(
+            keys.data_type(),
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
+        ) {
             return Err(ArrowError::JsonError(format!(
                 "Only UTF8 keys supported by JSON MapArray Writer: got {:?}",
                 keys.data_type()
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index ee1b5fabe538..4d480b576a59 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -112,7 +112,7 @@ use crate::StructMode;
 use arrow_array::*;
 use arrow_schema::*;
 
-pub use encoder::{make_encoder, Encoder, EncoderFactory, EncoderOptions, NullableEncoder};
+pub use encoder::{Encoder, EncoderFactory, EncoderOptions, NullableEncoder, make_encoder};
 
 /// This trait defines how to format a sequence of JSON objects to a
 /// byte stream.
@@ -279,6 +279,36 @@ impl WriterBuilder {
         self
     }
 
+    /// Set the JSON file's date format
+    pub fn with_date_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_date_format(format);
+        self
+    }
+
+    /// Set the JSON file's datetime format
+    pub fn with_datetime_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_datetime_format(format);
+        self
+    }
+
+    /// Set the JSON file's time format
+    pub fn with_time_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_time_format(format);
+        self
+    }
+
+    /// Set the JSON file's timestamp format
+    pub fn with_timestamp_format(mut self, format: String) -> Self {
+        self.0 = self.0.with_timestamp_format(format);
+        self
+    }
+
+    /// Set the JSON file's timestamp tz format
+    pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
+        self.0 = self.0.with_timestamp_tz_format(tz_format);
+        self
+    }
+
     /// Create a new `Writer` with specified `JsonFormat` and builder options.
     pub fn build<W, F>(self, writer: W) -> Writer<W, F>
     where
@@ -413,6 +443,19 @@ where
         Ok(())
     }
 
+    /// Gets a reference to the underlying writer.
+    pub fn get_ref(&self) -> &W {
+        &self.writer
+    }
+
+    /// Gets a mutable reference to the underlying writer.
+    ///
+    /// Writing to the underlying writer must be done with care
+    /// to avoid corrupting the output JSON.
+    pub fn get_mut(&mut self) -> &mut W {
+        &mut self.writer
+    }
+
     /// Unwraps this `Writer<W>`, returning the underlying writer
     pub fn into_inner(self) -> W {
         self.writer
@@ -437,18 +480,18 @@ where
 mod tests {
     use core::str;
     use std::collections::HashMap;
-    use std::fs::{read_to_string, File};
+    use std::fs::{File, read_to_string};
     use std::io::{BufReader, Seek};
     use std::sync::Arc;
 
     use arrow_array::cast::AsArray;
-    use serde_json::{json, Value};
+    use serde_json::{Value, json};
 
     use super::LineDelimited;
     use super::{Encoder, WriterBuilder};
     use arrow_array::builder::*;
     use arrow_array::types::*;
-    use arrow_buffer::{i256, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, ToByteSlice};
+    use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer, ToByteSlice, i256};
     use arrow_data::ArrayData;
 
     use crate::reader::*;
@@ -711,6 +754,21 @@ mod tests {
             &buf,
             r#"{"micros":"2018-11-13T17:11:10.011375","millis":"2018-11-13T17:11:10.011","name":"a","nanos":"2018-11-13T17:11:10.011375885","secs":"2018-11-13T17:11:10"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_timestamp_format("%m-%d-%Y".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"nanos":"11-13-2018","micros":"11-13-2018","millis":"11-13-2018","secs":"11-13-2018","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -772,6 +830,21 @@ mod tests {
             &buf,
             r#"{"micros":"2018-11-13T17:11:10.011375Z","millis":"2018-11-13T17:11:10.011Z","name":"a","nanos":"2018-11-13T17:11:10.011375885Z","secs":"2018-11-13T17:11:10Z"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_timestamp_tz_format("%m-%d-%Y %Z".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"nanos":"11-13-2018 +00:00","micros":"11-13-2018 +00:00","millis":"11-13-2018 +00:00","secs":"11-13-2018 +00:00","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -819,6 +892,22 @@ mod tests {
             &buf,
             r#"{"date32":"2018-11-13","date64":"2018-11-13T17:11:10.011","name":"a"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_date_format("%m-%d-%Y".to_string())
+                .with_datetime_format("%m-%d-%Y %Mmin %Ssec %Hhour".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"date32":"11-13-2018","date64":"11-13-2018 11min 10sec 17hour","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -862,6 +951,21 @@ mod tests {
             &buf,
             r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
 {"name":"b"}
+"#,
+        );
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_time_format("%H-%M-%S %f".to_string())
+                .build::<_, LineDelimited>(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"time32sec":"00-02-00 000000000","time32msec":"00-00-00 120000000","time64usec":"00-00-00 000120000","time64nsec":"00-00-00 000000120","name":"a"}
+{"name":"b"}
 "#,
         );
     }
@@ -1285,16 +1389,14 @@ mod tests {
         );
     }
 
-    #[test]
-    fn json_writer_map() {
-        let keys_array = super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+    fn run_json_writer_map_with_keys(keys_array: ArrayRef) {
         let values_array = super::Int64Array::from(vec![10, 20, 30, 40, 50]);
 
-        let keys = Arc::new(Field::new("keys", DataType::Utf8, false));
-        let values = Arc::new(Field::new("values", DataType::Int64, false));
+        let keys_field = Arc::new(Field::new("keys", keys_array.data_type().clone(), false));
+        let values_field = Arc::new(Field::new("values", DataType::Int64, false));
         let entry_struct = StructArray::from(vec![
-            (keys, Arc::new(keys_array) as ArrayRef),
-            (values, Arc::new(values_array) as ArrayRef),
+            (keys_field, keys_array.clone()),
+            (values_field, Arc::new(values_array) as ArrayRef),
         ]);
 
         let map_data_type = DataType::Map(
@@ -1343,6 +1445,21 @@ mod tests {
         );
     }
 
+    #[test]
+    fn json_writer_map() {
+        // Utf8 (StringArray)
+        let keys_utf8 = super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+        run_json_writer_map_with_keys(Arc::new(keys_utf8) as ArrayRef);
+
+        // LargeUtf8 (LargeStringArray)
+        let keys_large = super::LargeStringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+        run_json_writer_map_with_keys(Arc::new(keys_large) as ArrayRef);
+
+        // Utf8View (StringViewArray)
+        let keys_view = super::StringViewArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+        run_json_writer_map_with_keys(Arc::new(keys_view) as ArrayRef);
+    }
+
     #[test]
     fn test_write_single_batch() {
         let test_file = "test/data/basic.json";
@@ -1647,17 +1764,13 @@ mod tests {
         Ok(())
     }
 
-    fn binary_encoding_test<O: OffsetSizeTrait>() {
-        // set up schema
+    fn build_array_binary<O: OffsetSizeTrait>(values: &[Option<&[u8]>]) -> RecordBatch {
         let schema = SchemaRef::new(Schema::new(vec![Field::new(
             "bytes",
             GenericBinaryType::<O>::DATA_TYPE,
             true,
         )]));
-
-        // build record batch:
         let mut builder = GenericByteBuilder::<GenericBinaryType<O>>::new();
-        let values = [Some(b"Ned Flanders"), None, Some(b"Troy McClure")];
         for value in values {
             match value {
                 Some(v) => builder.append_value(v),
@@ -1665,8 +1778,27 @@ mod tests {
             }
         }
         let array = Arc::new(builder.finish()) as ArrayRef;
-        let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+        RecordBatch::try_new(schema, vec![array]).unwrap()
+    }
+
+    fn build_array_binary_view(values: &[Option<&[u8]>]) -> RecordBatch {
+        let schema = SchemaRef::new(Schema::new(vec![Field::new(
+            "bytes",
+            DataType::BinaryView,
+            true,
+        )]));
+        let mut builder = BinaryViewBuilder::new();
+        for value in values {
+            match value {
+                Some(v) => builder.append_value(v),
+                None => builder.append_null(),
+            }
+        }
+        let array = Arc::new(builder.finish()) as ArrayRef;
+        RecordBatch::try_new(schema, vec![array]).unwrap()
+    }
 
+    fn assert_binary_json(batch: &RecordBatch) {
         // encode and check JSON with explicit nulls:
         {
             let mut buf = Vec::new();
@@ -1674,7 +1806,7 @@ mod tests {
                 let mut writer = WriterBuilder::new()
                     .with_explicit_nulls(true)
                     .build::<_, JsonArray>(&mut buf);
-                writer.write(&batch).unwrap();
+                writer.write(batch).unwrap();
                 writer.close().unwrap();
                 serde_json::from_slice(&buf).unwrap()
             };
@@ -1702,20 +1834,16 @@ mod tests {
                 // explicit nulls are off by default, so we don't need
                 // to set that when creating the writer:
                 let mut writer = ArrayWriter::new(&mut buf);
-                writer.write(&batch).unwrap();
+                writer.write(batch).unwrap();
                 writer.close().unwrap();
                 serde_json::from_slice(&buf).unwrap()
             };
 
             assert_eq!(
                 json!([
-                    {
-                        "bytes": "4e656420466c616e64657273"
-                    },
-                    {}, // empty because nulls are omitted
-                    {
-                        "bytes": "54726f79204d63436c757265"
-                    }
+                    { "bytes": "4e656420466c616e64657273" },
+                    {},
+                    { "bytes": "54726f79204d63436c757265" }
                 ]),
                 json_value
             );
@@ -1724,10 +1852,25 @@ mod tests {
 
     #[test]
     fn test_writer_binary() {
+        let values: [Option<&[u8]>; 3] = [
+            Some(b"Ned Flanders" as &[u8]),
+            None,
+            Some(b"Troy McClure" as &[u8]),
+        ];
         // Binary:
-        binary_encoding_test::<i32>();
+        {
+            let batch = build_array_binary::<i32>(&values);
+            assert_binary_json(&batch);
+        }
         // LargeBinary:
-        binary_encoding_test::<i64>();
+        {
+            let batch = build_array_binary::<i64>(&values);
+            assert_binary_json(&batch);
+        }
+        {
+            let batch = build_array_binary_view(&values);
+            assert_binary_json(&batch);
+        }
     }
 
     #[test]
@@ -1916,6 +2059,54 @@ mod tests {
         )
     }
 
+    #[test]
+    fn test_decimal32_encoder() {
+        let array = Decimal32Array::from_iter_values([1234, 5678, 9012])
+            .with_precision_and_scale(8, 2)
+            .unwrap();
+        let field = Arc::new(Field::new("decimal", array.data_type().clone(), true));
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"decimal":12.34}
+{"decimal":56.78}
+{"decimal":90.12}
+"#,
+        );
+    }
+
+    #[test]
+    fn test_decimal64_encoder() {
+        let array = Decimal64Array::from_iter_values([1234, 5678, 9012])
+            .with_precision_and_scale(10, 2)
+            .unwrap();
+        let field = Arc::new(Field::new("decimal", array.data_type().clone(), true));
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"decimal":12.34}
+{"decimal":56.78}
+{"decimal":90.12}
+"#,
+        );
+    }
+
     #[test]
     fn test_decimal128_encoder() {
         let array = Decimal128Array::from_iter_values([1234, 5678, 9012])
@@ -2078,7 +2269,7 @@ mod tests {
                     None => out.extend_from_slice(b"null"),
                     Some(UnionValue::Int32(v)) => out.extend_from_slice(v.to_string().as_bytes()),
                     Some(UnionValue::String(v)) => {
-                        out.extend_from_slice(format!("\"{}\"", v).as_bytes())
+                        out.extend_from_slice(format!("\"{v}\"").as_bytes())
                     }
                 }
             }
diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
index 46cab1bb8e4c..30943ede4ac0 100644
--- a/arrow-ord/src/cmp.rs
+++ b/arrow-ord/src/cmp.rs
@@ -26,13 +26,14 @@
 use arrow_array::cast::AsArray;
 use arrow_array::types::{ByteArrayType, ByteViewType};
 use arrow_array::{
-    downcast_primitive_array, AnyDictionaryArray, Array, ArrowNativeTypeOp, BooleanArray, Datum,
-    FixedSizeBinaryArray, GenericByteArray, GenericByteViewArray,
+    AnyDictionaryArray, Array, ArrowNativeTypeOp, BooleanArray, Datum, FixedSizeBinaryArray,
+    GenericByteArray, GenericByteViewArray, downcast_primitive_array,
 };
 use arrow_buffer::bit_util::ceil;
-use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
+use arrow_buffer::{BooleanBuffer, NullBuffer};
 use arrow_schema::ArrowError;
 use arrow_select::take::take;
+use std::cmp::Ordering;
 use std::ops::Not;
 
 #[derive(Debug, Copy, Clone)]
@@ -272,7 +273,7 @@ fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray,
                     let r = r.inner().bit_chunks().iter_padded();
                     let ne = values.bit_chunks().iter_padded();
 
-                    let c = |((l, r), n)| ((l ^ r) | (l & r & n));
+                    let c = |((l, r), n)| (l ^ r) | (l & r & n);
                     let buffer = l.zip(r).zip(ne).map(c).collect();
                     BooleanBuffer::new(buffer, 0, len).into()
                 }
@@ -389,14 +390,14 @@ fn take_bits(v: &dyn AnyDictionaryArray, buffer: BooleanBuffer) -> BooleanBuffer
 
 /// Invokes `f` with values `0..len` collecting the boolean results into a new `BooleanBuffer`
 ///
-/// This is similar to [`MutableBuffer::collect_bool`] but with
+/// This is similar to [`arrow_buffer::MutableBuffer::collect_bool`] but with
 /// the option to efficiently negate the result
 fn collect_bool(len: usize, neg: bool, f: impl Fn(usize) -> bool) -> BooleanBuffer {
-    let mut buffer = MutableBuffer::new(ceil(len, 64) * 8);
+    let mut buffer = Vec::with_capacity(ceil(len, 64));
 
     let chunks = len / 64;
     let remainder = len % 64;
-    for chunk in 0..chunks {
+    buffer.extend((0..chunks).map(|chunk| {
         let mut packed = 0;
         for bit_idx in 0..64 {
             let i = bit_idx + chunk * 64;
@@ -406,9 +407,8 @@ fn collect_bool(len: usize, neg: bool, f: impl Fn(usize) -> bool) -> BooleanBuff
             packed = !packed
         }
 
-        // SAFETY: Already allocated sufficient capacity
-        unsafe { buffer.push_unchecked(packed) }
-    }
+        packed
+    }));
 
     if remainder != 0 {
         let mut packed = 0;
@@ -420,8 +420,7 @@ fn collect_bool(len: usize, neg: bool, f: impl Fn(usize) -> bool) -> BooleanBuff
             packed = !packed
         }
 
-        // SAFETY: Already allocated sufficient capacity
-        unsafe { buffer.push_unchecked(packed) }
+        buffer.push(packed);
     }
     BooleanBuffer::new(buffer.into(), 0, len)
 }
@@ -508,7 +507,7 @@ impl ArrayOrd for &BooleanArray {
     }
 
     unsafe fn value_unchecked(&self, idx: usize) -> Self::Item {
-        BooleanArray::value_unchecked(self, idx)
+        unsafe { BooleanArray::value_unchecked(self, idx) }
     }
 
     fn is_eq(l: Self::Item, r: Self::Item) -> bool {
@@ -528,7 +527,7 @@ impl<T: ArrowNativeTypeOp> ArrayOrd for &[T] {
     }
 
     unsafe fn value_unchecked(&self, idx: usize) -> Self::Item {
-        *self.get_unchecked(idx)
+        unsafe { *self.get_unchecked(idx) }
     }
 
     fn is_eq(l: Self::Item, r: Self::Item) -> bool {
@@ -548,7 +547,7 @@ impl<'a, T: ByteArrayType> ArrayOrd for &'a GenericByteArray<T> {
     }
 
     unsafe fn value_unchecked(&self, idx: usize) -> Self::Item {
-        GenericByteArray::value_unchecked(self, idx).as_ref()
+        unsafe { GenericByteArray::value_unchecked(self, idx).as_ref() }
     }
 
     fn is_eq(l: Self::Item, r: Self::Item) -> bool {
@@ -571,7 +570,7 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
         let r_view = unsafe { r.0.views().get_unchecked(r.1) };
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
             // For eq case, we can directly compare the inlined bytes
-            return l_view.cmp(r_view).is_eq();
+            return l_view == r_view;
         }
 
         let l_len = *l_view as u32;
@@ -581,6 +580,9 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
         if l_len != r_len {
             return false;
         }
+        if l_len == 0 && r_len == 0 {
+            return true;
+        }
 
         // # Safety
         // The index is within bounds as it is checked in value()
@@ -589,15 +591,15 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
 
     #[inline(always)]
     fn is_lt(l: Self::Item, r: Self::Item) -> bool {
+        // If both arrays use only the inline buffer
         if l.0.data_buffers().is_empty() && r.0.data_buffers().is_empty() {
             let l_view = unsafe { l.0.views().get_unchecked(l.1) };
             let r_view = unsafe { r.0.views().get_unchecked(r.1) };
-            let l_len = *l_view as u32 as usize;
-            let r_len = *r_view as u32 as usize;
-            let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-            let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-            return l_bytes.cmp(r_bytes).is_lt();
+            return GenericByteViewArray::<T>::inline_key_fast(*l_view)
+                < GenericByteViewArray::<T>::inline_key_fast(*r_view);
         }
+
+        // Fallback to the generic, unchecked comparison for non-inline cases
         // # Safety
         // The index is within bounds as it is checked in value()
         unsafe { GenericByteViewArray::compare_unchecked(l.0, l.1, r.0, r.1).is_lt() }
@@ -620,7 +622,7 @@ impl<'a> ArrayOrd for &'a FixedSizeBinaryArray {
     }
 
     unsafe fn value_unchecked(&self, idx: usize) -> Self::Item {
-        FixedSizeBinaryArray::value_unchecked(self, idx)
+        unsafe { FixedSizeBinaryArray::value_unchecked(self, idx) }
     }
 
     fn is_eq(l: Self::Item, r: Self::Item) -> bool {
@@ -639,85 +641,18 @@ pub fn compare_byte_view<T: ByteViewType>(
     left_idx: usize,
     right: &GenericByteViewArray<T>,
     right_idx: usize,
-) -> std::cmp::Ordering {
+) -> Ordering {
     assert!(left_idx < left.len());
     assert!(right_idx < right.len());
     if left.data_buffers().is_empty() && right.data_buffers().is_empty() {
         let l_view = unsafe { left.views().get_unchecked(left_idx) };
         let r_view = unsafe { right.views().get_unchecked(right_idx) };
-        let l_len = *l_view as u32 as usize;
-        let r_len = *r_view as u32 as usize;
-        let l_bytes = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len) };
-        let r_bytes = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len) };
-        return l_bytes.cmp(r_bytes);
+        return GenericByteViewArray::<T>::inline_key_fast(*l_view)
+            .cmp(&GenericByteViewArray::<T>::inline_key_fast(*r_view));
     }
     unsafe { GenericByteViewArray::compare_unchecked(left, left_idx, right, right_idx) }
 }
 
-/// Comparing two [`GenericByteViewArray`] at index `left_idx` and `right_idx`
-///
-/// Comparing two ByteView types are non-trivial.
-/// It takes a bit of patience to understand why we don't just compare two &[u8] directly.
-///
-/// ByteView types give us the following two advantages, and we need to be careful not to lose them:
-/// (1) For string/byte smaller than 12 bytes, the entire data is inlined in the view.
-///     Meaning that reading one array element requires only one memory access
-///     (two memory access required for StringArray, one for offset buffer, the other for value buffer).
-///
-/// (2) For string/byte larger than 12 bytes, we can still be faster than (for certain operations) StringArray/ByteArray,
-///     thanks to the inlined 4 bytes.
-///     Consider equality check:
-///     If the first four bytes of the two strings are different, we can return false immediately (with just one memory access).
-///
-/// If we directly compare two &[u8], we materialize the entire string (i.e., make multiple memory accesses), which might be unnecessary.
-/// - Most of the time (eq, ord), we only need to look at the first 4 bytes to know the answer,
-///   e.g., if the inlined 4 bytes are different, we can directly return unequal without looking at the full string.
-///
-/// # Order check flow
-/// (1) if both string are smaller than 12 bytes, we can directly compare the data inlined to the view.
-/// (2) if any of the string is larger than 12 bytes, we need to compare the full string.
-///     (2.1) if the inlined 4 bytes are different, we can return the result immediately.
-///     (2.2) o.w., we need to compare the full string.
-///
-/// # Safety
-/// The left/right_idx must within range of each array
-#[deprecated(
-    since = "52.2.0",
-    note = "Use `GenericByteViewArray::compare_unchecked` instead"
-)]
-pub unsafe fn compare_byte_view_unchecked<T: ByteViewType>(
-    left: &GenericByteViewArray<T>,
-    left_idx: usize,
-    right: &GenericByteViewArray<T>,
-    right_idx: usize,
-) -> std::cmp::Ordering {
-    let l_view = left.views().get_unchecked(left_idx);
-    let l_len = *l_view as u32;
-
-    let r_view = right.views().get_unchecked(right_idx);
-    let r_len = *r_view as u32;
-
-    if l_len <= 12 && r_len <= 12 {
-        let l_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, l_len as usize) };
-        let r_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, r_len as usize) };
-        return l_data.cmp(r_data);
-    }
-
-    // one of the string is larger than 12 bytes,
-    // we then try to compare the inlined data first
-    let l_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(l_view, 4) };
-    let r_inlined_data = unsafe { GenericByteViewArray::<T>::inline_value(r_view, 4) };
-    if r_inlined_data != l_inlined_data {
-        return l_inlined_data.cmp(r_inlined_data);
-    }
-
-    // unfortunately, we need to compare the full data
-    let l_full_data: &[u8] = unsafe { left.value_unchecked(left_idx).as_ref() };
-    let r_full_data: &[u8] = unsafe { right.value_unchecked(right_idx).as_ref() };
-
-    l_full_data.cmp(r_full_data)
-}
-
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index bb82f54d4918..3aff2c6234be 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -26,7 +26,7 @@
 use arrow_array::cast::*;
 
 use arrow_array::*;
-use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer};
+use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer, bit_util};
 use arrow_schema::ArrowError;
 
 /// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
@@ -119,7 +119,7 @@ mod tests {
         ListBuilder, PrimitiveDictionaryBuilder, StringBuilder, StringDictionaryBuilder,
     };
     use arrow_array::types::*;
-    use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano};
+    use arrow_buffer::{ArrowNativeType, Buffer, IntervalDayTime, IntervalMonthDayNano, i256};
     use arrow_data::ArrayData;
     use arrow_schema::{DataType, Field};
     use half::f16;
@@ -3059,6 +3059,120 @@ mod tests {
         );
     }
 
+    fn create_decimal_array<T: DecimalType>(data: Vec<Option<T::Native>>) -> PrimitiveArray<T> {
+        data.into_iter().collect::<PrimitiveArray<T>>()
+    }
+
+    fn test_cmp_dict_decimal<T: DecimalType>(
+        values1: Vec<Option<T::Native>>,
+        values2: Vec<Option<T::Native>>,
+    ) {
+        let values = create_decimal_array::<T>(values1);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = create_decimal_array::<T>(values2);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_decimal32() {
+        test_cmp_dict_decimal::<Decimal32Type>(
+            vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
+            vec![Some(7), Some(-3), Some(4), Some(3), Some(5)],
+        );
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal32() {
+        let array1: Decimal32Array = Decimal32Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal32Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_decimal64() {
+        let values = Decimal64Array::from_iter_values([0, 1, 2, 3, 4, 5]);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal64() {
+        let array1: Decimal64Array = Decimal64Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
     #[test]
     fn test_cmp_dict_decimal128() {
         let values = Decimal128Array::from_iter_values([0, 1, 2, 3, 4, 5]);
@@ -3163,6 +3277,103 @@ mod tests {
         assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
     }
 
+    #[test]
+    fn test_decimal32() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal32Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal32_scalar() {
+        let a = Decimal32Array::from(vec![Some(1), Some(2), Some(3), None, Some(4), Some(5)]);
+        let b = Decimal32Array::new_scalar(3_i32);
+        // array eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array neq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::neq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal64Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
     #[test]
     fn test_decimal128() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-ord/src/lib.rs b/arrow-ord/src/lib.rs
index 99b0451992cf..9388007826be 100644
--- a/arrow-ord/src/lib.rs
+++ b/arrow-ord/src/lib.rs
@@ -47,7 +47,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 pub mod cmp;
 #[doc(hidden)]
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 55e397cd8aa4..b12a06732d42 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -21,8 +21,8 @@ use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::{ArrowNativeType, NullBuffer};
-use arrow_schema::{ArrowError, SortOptions};
-use std::cmp::Ordering;
+use arrow_schema::{ArrowError, DataType, SortOptions};
+use std::{cmp::Ordering, collections::HashMap};
 
 /// Compare the values at two arbitrary indices in two arrays.
 pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
@@ -233,6 +233,37 @@ fn compare_fixed_list(
     Ok(f)
 }
 
+fn compare_map(
+    left: &dyn Array,
+    right: &dyn Array,
+    opts: SortOptions,
+) -> Result<DynComparator, ArrowError> {
+    let left = left.as_map();
+    let right = right.as_map();
+
+    let c_opts = child_opts(opts);
+    let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
+
+    let l_o = left.offsets().clone();
+    let r_o = right.offsets().clone();
+    let f = compare(left, right, opts, move |i, j| {
+        let l_end = l_o[i + 1].as_usize();
+        let l_start = l_o[i].as_usize();
+
+        let r_end = r_o[j + 1].as_usize();
+        let r_start = r_o[j].as_usize();
+
+        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
+            match cmp(i, j) {
+                Ordering::Equal => continue,
+                r => return r,
+            }
+        }
+        (l_end - l_start).cmp(&(r_end - r_start))
+    });
+    Ok(f)
+}
+
 fn compare_struct(
     left: &dyn Array,
     right: &dyn Array,
@@ -265,10 +296,76 @@ fn compare_struct(
     Ok(f)
 }
 
-#[deprecated(since = "52.0.0", note = "Use make_comparator")]
-#[doc(hidden)]
-pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result<DynComparator, ArrowError> {
-    make_comparator(left, right, SortOptions::default())
+fn compare_union(
+    left: &dyn Array,
+    right: &dyn Array,
+    opts: SortOptions,
+) -> Result<DynComparator, ArrowError> {
+    let left = left.as_union();
+    let right = right.as_union();
+
+    let (left_fields, left_mode) = match left.data_type() {
+        DataType::Union(fields, mode) => (fields, mode),
+        _ => unreachable!(),
+    };
+    let (right_fields, right_mode) = match right.data_type() {
+        DataType::Union(fields, mode) => (fields, mode),
+        _ => unreachable!(),
+    };
+
+    if left_fields != right_fields {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
+            left_fields, right_fields
+        )));
+    }
+
+    if left_mode != right_mode {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
+            left_mode, right_mode
+        )));
+    }
+
+    let c_opts = child_opts(opts);
+
+    let mut field_comparators = HashMap::with_capacity(left_fields.len());
+
+    for (type_id, _field) in left_fields.iter() {
+        let left_child = left.child(type_id);
+        let right_child = right.child(type_id);
+        let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
+
+        field_comparators.insert(type_id, cmp);
+    }
+
+    let left_type_ids = left.type_ids().clone();
+    let right_type_ids = right.type_ids().clone();
+
+    let left_offsets = left.offsets().cloned();
+    let right_offsets = right.offsets().cloned();
+
+    let f = compare(left, right, opts, move |i, j| {
+        let left_type_id = left_type_ids[i];
+        let right_type_id = right_type_ids[j];
+
+        // first, compare by type_id
+        match left_type_id.cmp(&right_type_id) {
+            Ordering::Equal => {
+                // second, compare by values
+                let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
+                let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
+
+                let cmp = field_comparators
+                    .get(&left_type_id)
+                    .expect("type id not found in field_comparators");
+
+                cmp(left_offset, right_offset)
+            }
+            other => other,
+        }
+    });
+    Ok(f)
 }
 
 /// Returns a comparison function that compares two values at two different positions
@@ -386,6 +483,8 @@ pub fn make_comparator(
                  _ => unreachable!()
              }
         },
+        (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
+        (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
         (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
             true => format!("The data type type {lhs:?} has no natural order"),
             false => "Can't compare arrays of different types".to_string(),
@@ -396,9 +495,9 @@ pub fn make_comparator(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::{Int32Builder, ListBuilder};
-    use arrow_buffer::{i256, IntervalDayTime, OffsetBuffer};
-    use arrow_schema::{DataType, Field, Fields};
+    use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
+    use arrow_buffer::{IntervalDayTime, OffsetBuffer, ScalarBuffer, i256};
+    use arrow_schema::{DataType, Field, Fields, UnionFields};
     use half::f16;
     use std::sync::Arc;
 
@@ -549,7 +648,33 @@ mod tests {
     }
 
     #[test]
-    fn test_decimal() {
+    fn test_decimali32() {
+        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
+            .into_iter()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(8, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali64() {
+        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
+            .into_iter()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(16, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali128() {
         let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
             .into_iter()
             .collect::<Decimal128Array>()
@@ -921,4 +1046,459 @@ mod tests {
         assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
         assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
     }
+
+    #[test]
+    fn test_map() {
+        // Create first map array demonstrating key priority over values:
+        // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
+        let string_builder = StringBuilder::new();
+        let int_builder = Int32Builder::new();
+        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
+
+        // {"a": 100, "b": 1} - high value for "a", low value for "b"
+        map1_builder.keys().append_value("a");
+        map1_builder.values().append_value(100);
+        map1_builder.keys().append_value("b");
+        map1_builder.values().append_value(1);
+        map1_builder.append(true).unwrap();
+
+        // {"b": 999, "c": 1} - very high value for "b", low value for "c"
+        map1_builder.keys().append_value("b");
+        map1_builder.values().append_value(999);
+        map1_builder.keys().append_value("c");
+        map1_builder.values().append_value(1);
+        map1_builder.append(true).unwrap();
+
+        // {}
+        map1_builder.append(true).unwrap();
+
+        // {"x": 1}
+        map1_builder.keys().append_value("x");
+        map1_builder.values().append_value(1);
+        map1_builder.append(true).unwrap();
+
+        let map1 = map1_builder.finish();
+
+        // Create second map array:
+        // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
+        let string_builder = StringBuilder::new();
+        let int_builder = Int32Builder::new();
+        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
+
+        // {"a": 1, "c": 999} - low value for "a", high value for "c"
+        map2_builder.keys().append_value("a");
+        map2_builder.values().append_value(1);
+        map2_builder.keys().append_value("c");
+        map2_builder.values().append_value(999);
+        map2_builder.append(true).unwrap();
+
+        // {"b": 1, "d": 999} - low value for "b", high value for "d"
+        map2_builder.keys().append_value("b");
+        map2_builder.values().append_value(1);
+        map2_builder.keys().append_value("d");
+        map2_builder.values().append_value(999);
+        map2_builder.append(true).unwrap();
+
+        // {"a": 1}
+        map2_builder.keys().append_value("a");
+        map2_builder.values().append_value(1);
+        map2_builder.append(true).unwrap();
+
+        // None
+        map2_builder.append(false).unwrap();
+
+        let map2 = map2_builder.finish();
+
+        let opts = SortOptions {
+            descending: false,
+            nulls_first: true,
+        };
+        let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+        // Test that keys have priority over values:
+        // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
+        // First entries match (a:100 vs a:1), but 100 > 1, so Greater
+        assert_eq!(cmp(0, 0), Ordering::Greater);
+
+        // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
+        // First entries match (b:999 vs b:1), but 999 > 1, so Greater
+        assert_eq!(cmp(1, 1), Ordering::Greater);
+
+        // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1}
+        assert_eq!(cmp(0, 1), Ordering::Less);
+
+        // Empty map vs non-empty
+        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
+
+        // Non-null vs null
+        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
+
+        // Key priority test: "x" > "a", regardless of values
+        assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999}
+
+        // Empty vs non-empty
+        assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
+
+        let opts = SortOptions {
+            descending: true,
+            nulls_first: true,
+        };
+        let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+        // With descending=true, value comparison is reversed
+        assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed)
+        assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed)
+        assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed)
+        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
+        assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
+
+        let opts = SortOptions {
+            descending: false,
+            nulls_first: false,
+        };
+        let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+        // Same key priority behavior with nulls_first=false
+        assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
+        assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
+        assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last)
+        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
+    }
+
+    #[test]
+    fn test_map_vs_list_consistency() {
+        // Create map arrays and convert them to list arrays to verify comparison consistency
+        // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
+        let string_builder = StringBuilder::new();
+        let int_builder = Int32Builder::new();
+        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
+
+        // {"a": 1, "b": 2}
+        map1_builder.keys().append_value("a");
+        map1_builder.values().append_value(1);
+        map1_builder.keys().append_value("b");
+        map1_builder.values().append_value(2);
+        map1_builder.append(true).unwrap();
+
+        // {"x": 10}
+        map1_builder.keys().append_value("x");
+        map1_builder.values().append_value(10);
+        map1_builder.append(true).unwrap();
+
+        // {}
+        map1_builder.append(true).unwrap();
+
+        // {"c": 3}
+        map1_builder.keys().append_value("c");
+        map1_builder.values().append_value(3);
+        map1_builder.append(true).unwrap();
+
+        let map1 = map1_builder.finish();
+
+        // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
+        let string_builder = StringBuilder::new();
+        let int_builder = Int32Builder::new();
+        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
+
+        // {"a": 1, "b": 2}
+        map2_builder.keys().append_value("a");
+        map2_builder.values().append_value(1);
+        map2_builder.keys().append_value("b");
+        map2_builder.values().append_value(2);
+        map2_builder.append(true).unwrap();
+
+        // {"y": 20}
+        map2_builder.keys().append_value("y");
+        map2_builder.values().append_value(20);
+        map2_builder.append(true).unwrap();
+
+        // {"d": 4}
+        map2_builder.keys().append_value("d");
+        map2_builder.values().append_value(4);
+        map2_builder.append(true).unwrap();
+
+        // None
+        map2_builder.append(false).unwrap();
+
+        let map2 = map2_builder.finish();
+
+        // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs)
+        let list1: ListArray = map1.clone().into();
+        let list2: ListArray = map2.clone().into();
+
+        let test_cases = [
+            SortOptions {
+                descending: false,
+                nulls_first: true,
+            },
+            SortOptions {
+                descending: true,
+                nulls_first: true,
+            },
+            SortOptions {
+                descending: false,
+                nulls_first: false,
+            },
+            SortOptions {
+                descending: true,
+                nulls_first: false,
+            },
+        ];
+
+        for opts in test_cases {
+            let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
+            let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
+
+            // Test all possible index combinations
+            for i in 0..map1.len() {
+                for j in 0..map2.len() {
+                    let map_result = map_cmp(i, j);
+                    let list_result = list_cmp(i, j);
+                    assert_eq!(
+                        map_result, list_result,
+                        "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_dense_union() {
+        // create a dense union array with Int32 (type_id = 0) and Utf8 (type_id=1)
+        // the values are: [1, "b", 2, "a", 3]
+        //  type_ids are: [0,  1,  0,  1,  0]
+        //   offsets are: [0, 0, 1, 1, 2] from [1, 2, 3] and ["b", "a"]
+        let int_array = Int32Array::from(vec![1, 2, 3]);
+        let str_array = StringArray::from(vec!["b", "a"]);
+
+        let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("A", DataType::Int32, false))),
+            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
+
+        let array1 =
+            UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
+
+        // create a second array: [2, "a", 1, "c"]
+        //          type ids are: [0,  1,  0,  1]
+        //           offsets are: [0, 0, 1, 1] from [2, 1] and ["a", "c"]
+        let int_array2 = Int32Array::from(vec![2, 1]);
+        let str_array2 = StringArray::from(vec!["a", "c"]);
+        let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
+
+        let array2 =
+            UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
+
+        let opts = SortOptions {
+            descending: false,
+            nulls_first: true,
+        };
+
+        // comparing
+        // [1, "b", 2, "a", 3]
+        // [2, "a", 1, "c"]
+        let cmp = make_comparator(&array1, &array2, opts).unwrap();
+
+        // array1[0] = (type_id=0, value=1)
+        // array2[0] = (type_id=0, value=2)
+        assert_eq!(cmp(0, 0), Ordering::Less); // 1 < 2
+
+        // array1[0] = (type_id=0, value=1)
+        // array2[1] = (type_id=1, value="a")
+        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
+
+        // array1[1] = (type_id=1, value="b")
+        // array2[1] = (type_id=1, value="a")
+        assert_eq!(cmp(1, 1), Ordering::Greater); // "b" > "a"
+
+        // array1[2] = (type_id=0, value=2)
+        // array2[0] = (type_id=0, value=2)
+        assert_eq!(cmp(2, 0), Ordering::Equal); // 2 == 2
+
+        // array1[3] = (type_id=1, value="a")
+        // array2[1] = (type_id=1, value="a")
+        assert_eq!(cmp(3, 1), Ordering::Equal); // "a" == "a"
+
+        // array1[1] = (type_id=1, value="b")
+        // array2[3] = (type_id=1, value="c")
+        assert_eq!(cmp(1, 3), Ordering::Less); // "b" < "c"
+
+        let opts_desc = SortOptions {
+            descending: true,
+            nulls_first: true,
+        };
+        let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
+
+        assert_eq!(cmp_desc(0, 0), Ordering::Greater); // 1 > 2 (reversed)
+        assert_eq!(cmp_desc(0, 1), Ordering::Greater); // type_id 0 < 1, reversed to Greater
+        assert_eq!(cmp_desc(1, 1), Ordering::Less); // "b" < "a" (reversed)
+    }
+
+    #[test]
+    fn test_sparse_union() {
+        // create a sparse union array with Int32 (type_id=0) and Utf8 (type_id=1)
+        // values: [1, "b", 3]
+        // note, in sparse unions, child arrays have the same length as the union
+        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
+        let str_array = StringArray::from(vec![None, Some("b"), None]);
+        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("a", DataType::Int32, false))),
+            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
+
+        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
+
+        let opts = SortOptions::default();
+        let cmp = make_comparator(&array, &array, opts).unwrap();
+
+        // array[0] = (type_id=0, value=1), array[2] = (type_id=0, value=3)
+        assert_eq!(cmp(0, 2), Ordering::Less); // 1 < 3
+        // array[0] = (type_id=0, value=1), array[1] = (type_id=1, value="b")
+        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
+    }
+
+    #[test]
+    #[should_panic(expected = "index out of bounds")]
+    fn test_union_out_of_bounds() {
+        // create a dense union array with 3 elements
+        let int_array = Int32Array::from(vec![1, 2]);
+        let str_array = StringArray::from(vec!["a"]);
+
+        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("A", DataType::Int32, false))),
+            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
+
+        let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
+
+        let opts = SortOptions::default();
+        let cmp = make_comparator(&array, &array, opts).unwrap();
+
+        // oob
+        cmp(0, 3);
+    }
+
+    #[test]
+    fn test_union_incompatible_fields() {
+        // create first union with Int32 and Utf8
+        let int_array1 = Int32Array::from(vec![1, 2]);
+        let str_array1 = StringArray::from(vec!["a", "b"]);
+
+        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let union_fields1 = [
+            (0, Arc::new(Field::new("A", DataType::Int32, false))),
+            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
+
+        let array1 =
+            UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
+
+        // create second union with Int32 and Float64 (incompatible with first)
+        let int_array2 = Int32Array::from(vec![3, 4]);
+        let float_array2 = Float64Array::from(vec![1.0, 2.0]);
+
+        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let union_fields2 = [
+            (0, Arc::new(Field::new("A", DataType::Int32, false))),
+            (1, Arc::new(Field::new("C", DataType::Float64, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
+
+        let array2 =
+            UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
+
+        let opts = SortOptions::default();
+
+        let Result::Err(ArrowError::InvalidArgumentError(out)) =
+            make_comparator(&array1, &array2, opts)
+        else {
+            panic!("expected error when making comparator of incompatible union arrays");
+        };
+
+        assert_eq!(
+            &out,
+            "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
+        );
+    }
+
+    #[test]
+    fn test_union_incompatible_modes() {
+        // create first union as Dense with Int32 and Utf8
+        let int_array1 = Int32Array::from(vec![1, 2]);
+        let str_array1 = StringArray::from(vec!["a", "b"]);
+
+        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
+
+        let union_fields1 = [
+            (0, Arc::new(Field::new("A", DataType::Int32, false))),
+            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect::<UnionFields>();
+
+        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
+
+        let array1 =
+            UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
+                .unwrap();
+
+        // create second union as Sparse with same fields (Int32 and Utf8)
+        let int_array2 = Int32Array::from(vec![Some(3), None]);
+        let str_array2 = StringArray::from(vec![None, Some("c")]);
+
+        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
+
+        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
+
+        let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
+
+        let opts = SortOptions::default();
+
+        let Result::Err(ArrowError::InvalidArgumentError(out)) =
+            make_comparator(&array1, &array2, opts)
+        else {
+            panic!("expected error when making comparator of union arrays with different modes");
+        };
+
+        assert_eq!(
+            &out,
+            "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
+        );
+    }
 }
diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs
index 1b0d2a7e6349..252a41a4daec 100644
--- a/arrow-ord/src/rank.rs
+++ b/arrow-ord/src/rank.rs
@@ -20,7 +20,7 @@
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::{
-    downcast_primitive_array, Array, ArrowNativeTypeOp, BooleanArray, GenericByteArray,
+    Array, ArrowNativeTypeOp, BooleanArray, GenericByteArray, downcast_primitive_array,
 };
 use arrow_buffer::NullBuffer;
 use arrow_schema::{ArrowError, DataType, SortOptions};
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 00606cc6e6c4..39d56f8fe9b2 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -17,14 +17,14 @@
 
 //! Defines sort kernel for `ArrayRef`
 
-use crate::ord::{make_comparator, DynComparator};
+use crate::ord::{DynComparator, make_comparator};
 use arrow_array::builder::BufferBuilder;
 use arrow_array::cast::*;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_buffer::BooleanBufferBuilder;
-use arrow_data::ArrayDataBuilder;
+use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
 use arrow_schema::{ArrowError, DataType};
 use arrow_select::take::take;
 use std::cmp::Ordering;
@@ -120,7 +120,7 @@ where
     }
 
     Ok(Arc::new(
-        PrimitiveArray::<T>::new(mutable_buffer.into(), null_bit_buffer)
+        PrimitiveArray::<T>::try_new(mutable_buffer.into(), null_bit_buffer)?
             .with_data_type(primitive_values.data_type().clone()),
     ))
 }
@@ -178,16 +178,66 @@ where
     }
 }
 
-// partition indices into valid and null indices
-fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
-    match array.null_count() {
-        // faster path
-        0 => ((0..(array.len() as u32)).collect(), vec![]),
-        _ => {
-            let indices = 0..(array.len() as u32);
-            indices.partition(|index| array.is_valid(*index as usize))
+/// Partition indices of an Arrow array into two categories:
+/// - `valid`: indices of non-null elements
+/// - `nulls`: indices of null elements
+///
+/// Optimized for performance with fast-path for all-valid arrays
+/// and bit-parallel scan for null-containing arrays.
+#[inline(always)]
+pub fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
+    let len = array.len();
+    let null_count = array.null_count();
+
+    // Fast path: if there are no nulls, all elements are valid
+    if null_count == 0 {
+        // Simply return a range of indices [0, len)
+        let valid = (0..len as u32).collect();
+        return (valid, Vec::new());
+    }
+
+    // null bitmap exists and some values are null
+    partition_validity_scan(array, len, null_count)
+}
+
+/// Scans the null bitmap and partitions valid/null indices efficiently.
+/// Uses bit-level operations to extract bit positions.
+/// This function is only called when nulls exist.
+#[inline(always)]
+fn partition_validity_scan(
+    array: &dyn Array,
+    len: usize,
+    null_count: usize,
+) -> (Vec<u32>, Vec<u32>) {
+    // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist
+    let bitmap = array.nulls().unwrap();
+
+    // Preallocate result vectors with exact capacities (avoids reallocations)
+    let mut valid = Vec::with_capacity(len - null_count);
+    let mut nulls = Vec::with_capacity(null_count);
+
+    unsafe {
+        // 1) Write valid indices (bits == 1)
+        let valid_slice = valid.spare_capacity_mut();
+        for (i, idx) in bitmap.inner().set_indices_u32().enumerate() {
+            valid_slice[i].write(idx);
+        }
+
+        // 2) Write null indices by inverting
+        let inv_buf = !bitmap.inner();
+        let null_slice = nulls.spare_capacity_mut();
+        for (i, idx) in inv_buf.set_indices_u32().enumerate() {
+            null_slice[i].write(idx);
         }
+
+        // Finalize lengths
+        valid.set_len(len - null_count);
+        nulls.set_len(null_count);
     }
+
+    assert_eq!(valid.len(), len - null_count);
+    assert_eq!(nulls.len(), null_count);
+    (valid, nulls)
 }
 
 /// Whether `sort_to_indices` can sort an array of given data type.
@@ -254,7 +304,7 @@ pub fn sort_to_indices(
         },
         t => {
             return Err(ArrowError::ComputeError(format!(
-                "Sort not supported for data type {t:?}"
+                "Sort not supported for data type {t}"
             )));
         }
     })
@@ -295,12 +345,88 @@ fn sort_bytes<T: ByteArrayType>(
     options: SortOptions,
     limit: Option<usize>,
 ) -> UInt32Array {
-    let mut valids = value_indices
+    // Note: Why do we use 4‑byte prefix?
+    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
+    // Most byte‐sequences differ in their first few bytes, so by
+    // comparing up to 4 bytes as a single u32 we avoid the overhead
+    // of a full lexicographical compare for the vast majority of cases.
+
+    // 1. Build a vector of (index, prefix, length) tuples
+    let mut valids: Vec<(u32, u32, u64)> = value_indices
         .into_iter()
-        .map(|index| (index, values.value(index as usize).as_ref()))
-        .collect::<Vec<(u32, &[u8])>>();
+        .map(|idx| unsafe {
+            let slice: &[u8] = values.value_unchecked(idx as usize).as_ref();
+            let len = slice.len() as u64;
+            // Compute the 4‑byte prefix in BE order, or left‑pad if shorter
+            let prefix = if slice.len() >= 4 {
+                let raw = std::ptr::read_unaligned(slice.as_ptr() as *const u32);
+                u32::from_be(raw)
+            } else if slice.is_empty() {
+                // Handle empty slice case to avoid shift overflow
+                0u32
+            } else {
+                let mut v = 0u32;
+                for &b in slice {
+                    v = (v << 8) | (b as u32);
+                }
+                // Safe shift: slice.len() is in range [1, 3], so shift is in range [8, 24]
+                v << (8 * (4 - slice.len()))
+            };
+            (idx, prefix, len)
+        })
+        .collect();
 
-    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
+    // 2. compute the number of non-null entries to partially sort
+    let vlimit = match (limit, options.nulls_first) {
+        (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
+        _ => valids.len(),
+    };
+
+    // 3. Comparator: compare prefix, then (when both slices shorter than 4) length, otherwise full slice
+    let cmp_bytes = |a: &(u32, u32, u64), b: &(u32, u32, u64)| unsafe {
+        let (ia, pa, la) = *a;
+        let (ib, pb, lb) = *b;
+        // 3.1 prefix (first 4 bytes)
+        let ord = pa.cmp(&pb);
+        if ord != Ordering::Equal {
+            return ord;
+        }
+        // 3.2 only if both slices had length < 4 (so prefix was padded)
+        if la < 4 || lb < 4 {
+            let ord = la.cmp(&lb);
+            if ord != Ordering::Equal {
+                return ord;
+            }
+        }
+        // 3.3 full lexicographical compare
+        let a_bytes: &[u8] = values.value_unchecked(ia as usize).as_ref();
+        let b_bytes: &[u8] = values.value_unchecked(ib as usize).as_ref();
+        a_bytes.cmp(b_bytes)
+    };
+
+    // 4. Partially sort according to ascending/descending
+    if !options.descending {
+        sort_unstable_by(&mut valids, vlimit, cmp_bytes);
+    } else {
+        sort_unstable_by(&mut valids, vlimit, |x, y| cmp_bytes(x, y).reverse());
+    }
+
+    // 5. Assemble nulls and sorted indices into final output
+    let total = valids.len() + nulls.len();
+    let out_limit = limit.unwrap_or(total).min(total);
+    let mut out = Vec::with_capacity(out_limit);
+
+    if options.nulls_first {
+        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
+        let rem = out_limit - out.len();
+        out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
+    } else {
+        out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
+        let rem = out_limit - out.len();
+        out.extend_from_slice(&nulls[..rem]);
+    }
+
+    out.into()
 }
 
 fn sort_byte_view<T: ByteViewType>(
@@ -310,11 +436,92 @@ fn sort_byte_view<T: ByteViewType>(
     options: SortOptions,
     limit: Option<usize>,
 ) -> UInt32Array {
-    let mut valids = value_indices
-        .into_iter()
-        .map(|index| (index, values.value(index as usize).as_ref()))
-        .collect::<Vec<(u32, &[u8])>>();
-    sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
+    // 1. Build a list of (index, raw_view, length)
+    let mut valids: Vec<_>;
+    // 2. Compute the number of non-null entries to partially sort
+    let vlimit: usize = match (limit, options.nulls_first) {
+        (Some(l), true) => l.saturating_sub(nulls.len()).min(value_indices.len()),
+        _ => value_indices.len(),
+    };
+    // 3.a Check if all views are inline (no data buffers)
+    if values.data_buffers().is_empty() {
+        valids = value_indices
+            .into_iter()
+            .map(|idx| {
+                // SAFETY: we know idx < values.len()
+                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
+                let inline_key = GenericByteViewArray::<T>::inline_key_fast(raw);
+                (idx, inline_key)
+            })
+            .collect();
+        let cmp_inline = |a: &(u32, u128), b: &(u32, u128)| a.1.cmp(&b.1);
+
+        // Partially sort according to ascending/descending
+        if !options.descending {
+            sort_unstable_by(&mut valids, vlimit, cmp_inline);
+        } else {
+            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_inline(x, y).reverse());
+        }
+    } else {
+        valids = value_indices
+            .into_iter()
+            .map(|idx| {
+                // SAFETY: we know idx < values.len()
+                let raw = unsafe { *values.views().get_unchecked(idx as usize) };
+                (idx, raw)
+            })
+            .collect();
+        // 3.b Mixed comparator: first prefix, then inline vs full comparison
+        let cmp_mixed = |a: &(u32, u128), b: &(u32, u128)| {
+            let (_, raw_a) = *a;
+            let (_, raw_b) = *b;
+            let len_a = raw_a as u32;
+            let len_b = raw_b as u32;
+            // 3.b.1 Both inline (≤12 bytes): compare full 128-bit key including length
+            if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
+                return GenericByteViewArray::<T>::inline_key_fast(raw_a)
+                    .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
+            }
+
+            // 3.b.2 Compare 4-byte prefix in big-endian order
+            let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
+            let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
+            if pref_a != pref_b {
+                return pref_a.cmp(&pref_b);
+            }
+
+            // 3.b.3 Fallback to full byte-slice comparison
+            let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as usize).as_ref() };
+            let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as usize).as_ref() };
+            full_a.cmp(full_b)
+        };
+
+        // 3.b.4 Partially sort according to ascending/descending
+        if !options.descending {
+            sort_unstable_by(&mut valids, vlimit, cmp_mixed);
+        } else {
+            sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x, y).reverse());
+        }
+    }
+
+    // 5. Assemble nulls and sorted indices into final output
+    let total = valids.len() + nulls.len();
+    let out_limit = limit.unwrap_or(total).min(total);
+    let mut out = Vec::with_capacity(total);
+
+    if options.nulls_first {
+        // Place null indices first
+        out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
+        let rem = out_limit - out.len();
+        out.extend(valids.iter().map(|&(i, _)| i).take(rem));
+    } else {
+        // Place non-null indices first
+        out.extend(valids.iter().map(|&(i, _)| i).take(out_limit));
+        let rem = out_limit - out.len();
+        out.extend_from_slice(&nulls[..rem]);
+    }
+
+    out.into()
 }
 
 fn sort_fixed_size_binary(
@@ -634,12 +841,12 @@ pub struct SortColumn {
 
 /// Sort a list of `ArrayRef` using `SortOptions` provided for each array.
 ///
-/// Performs a stable lexicographical sort on values and indices.
+/// Performs an unstable lexicographical sort on values and indices.
 ///
 /// Returns an `ArrowError::ComputeError(String)` if any of the array type is either unsupported by
 /// `lexsort_to_indices` or `take`.
 ///
-/// Example:
+/// # Example:
 ///
 /// ```
 /// # use std::convert::From;
@@ -648,7 +855,6 @@ pub struct SortColumn {
 /// # use arrow_array::types::Int64Type;
 /// # use arrow_array::cast::AsArray;
 /// # use arrow_ord::sort::{SortColumn, SortOptions, lexsort};
-///
 /// let sorted_columns = lexsort(&vec![
 ///     SortColumn {
 ///         values: Arc::new(PrimitiveArray::<Int64Type>::from(vec![
@@ -865,7 +1071,7 @@ mod tests {
         BooleanBuilder, FixedSizeListBuilder, GenericListBuilder, Int64Builder, ListBuilder,
         PrimitiveRunBuilder,
     };
-    use arrow_buffer::{i256, NullBuffer};
+    use arrow_buffer::{NullBuffer, i256};
     use arrow_schema::Field;
     use half::f16;
     use rand::rngs::StdRng;
@@ -1710,7 +1916,7 @@ mod tests {
                     None => {
                         builder
                             .values()
-                            .extend(std::iter::repeat(None).take(fixed_length as usize));
+                            .extend(std::iter::repeat_n(None, fixed_length as usize));
                         builder.append(false);
                     }
                 }
@@ -2100,6 +2306,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_indices_decimal32() {
+        test_sort_indices_decimal::<Decimal32Type>(8, 3);
+    }
+
+    #[test]
+    fn test_sort_indices_decimal64() {
+        test_sort_indices_decimal::<Decimal64Type>(17, 5);
+    }
+
     #[test]
     fn test_sort_indices_decimal128() {
         test_sort_indices_decimal::<Decimal128Type>(23, 6);
@@ -2253,6 +2469,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_decimal32() {
+        test_sort_decimal::<Decimal32Type>(8, 3);
+    }
+
+    #[test]
+    fn test_sort_decimal64() {
+        test_sort_decimal::<Decimal64Type>(17, 5);
+    }
+
     #[test]
     fn test_sort_decimal128() {
         test_sort_decimal::<Decimal128Type>(23, 6);
@@ -4575,11 +4801,13 @@ mod tests {
         ]);
 
         assert!(!can_sort_to_indices(struct_array.data_type()));
-        assert!(sort_to_indices(&struct_array, None, None)
-            .err()
-            .unwrap()
-            .to_string()
-            .contains("Sort not supported for data type"));
+        assert!(
+            sort_to_indices(&struct_array, None, None)
+                .err()
+                .unwrap()
+                .to_string()
+                .contains("Sort not supported for data type")
+        );
 
         let sort_columns = vec![SortColumn {
             values: Arc::new(struct_array.clone()) as ArrayRef,
@@ -4600,4 +4828,411 @@ mod tests {
 
         assert_eq!(&sorted[0], &expected_struct_array);
     }
+
+    /// A simple, correct but slower reference implementation.
+    fn naive_partition(array: &BooleanArray) -> (Vec<u32>, Vec<u32>) {
+        let len = array.len();
+        let mut valid = Vec::with_capacity(len);
+        let mut nulls = Vec::with_capacity(len);
+        for i in 0..len {
+            if array.is_valid(i) {
+                valid.push(i as u32);
+            } else {
+                nulls.push(i as u32);
+            }
+        }
+        (valid, nulls)
+    }
+
+    #[test]
+    fn fuzz_partition_validity() {
+        let mut rng = StdRng::seed_from_u64(0xF00D_CAFE);
+        for _ in 0..1_000 {
+            // build a random BooleanArray with some nulls
+            let len = rng.random_range(0..512);
+            let mut builder = BooleanBuilder::new();
+            for _ in 0..len {
+                if rng.random_bool(0.2) {
+                    builder.append_null();
+                } else {
+                    builder.append_value(rng.random_bool(0.5));
+                }
+            }
+            let array = builder.finish();
+
+            // Test both implementations on the full array
+            let (v1, n1) = partition_validity(&array);
+            let (v2, n2) = naive_partition(&array);
+            assert_eq!(v1, v2, "valid mismatch on full array");
+            assert_eq!(n1, n2, "null  mismatch on full array");
+
+            if len >= 8 {
+                // 1) Random slice within the array
+                let max_offset = len - 4;
+                let offset = rng.random_range(0..=max_offset);
+                let max_slice_len = len - offset;
+                let slice_len = rng.random_range(1..=max_slice_len);
+
+                // Bind the sliced ArrayRef to keep it alive
+                let sliced = array.slice(offset, slice_len);
+                let slice = sliced
+                    .as_any()
+                    .downcast_ref::<BooleanArray>()
+                    .expect("slice should be a BooleanArray");
+
+                let (sv1, sn1) = partition_validity(slice);
+                let (sv2, sn2) = naive_partition(slice);
+                assert_eq!(
+                    sv1, sv2,
+                    "valid mismatch on random slice at offset {offset} length {slice_len}",
+                );
+                assert_eq!(
+                    sn1, sn2,
+                    "null mismatch on random slice at offset {offset} length {slice_len}",
+                );
+
+                // 2) Ensure we test slices that start beyond one 64-bit chunk boundary
+                if len > 68 {
+                    let offset2 = rng.random_range(65..(len - 3));
+                    let len2 = rng.random_range(1..=(len - offset2));
+
+                    let sliced2 = array.slice(offset2, len2);
+                    let slice2 = sliced2
+                        .as_any()
+                        .downcast_ref::<BooleanArray>()
+                        .expect("slice2 should be a BooleanArray");
+
+                    let (sv3, sn3) = partition_validity(slice2);
+                    let (sv4, sn4) = naive_partition(slice2);
+                    assert_eq!(
+                        sv3, sv4,
+                        "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}",
+                    );
+                    assert_eq!(
+                        sn3, sn4,
+                        "null mismatch on chunk-crossing slice at offset {offset2} length {len2}",
+                    );
+                }
+            }
+        }
+    }
+
+    // A few small deterministic checks
+    #[test]
+    fn test_partition_edge_cases() {
+        // all valid
+        let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
+        let (valid, nulls) = partition_validity(&array);
+        assert_eq!(valid, vec![0, 1, 2]);
+        assert!(nulls.is_empty());
+
+        // all null
+        let array = BooleanArray::from(vec![None, None, None]);
+        let (valid, nulls) = partition_validity(&array);
+        assert!(valid.is_empty());
+        assert_eq!(nulls, vec![0, 1, 2]);
+
+        // alternating
+        let array = BooleanArray::from(vec![Some(true), None, Some(true), None]);
+        let (valid, nulls) = partition_validity(&array);
+        assert_eq!(valid, vec![0, 2]);
+        assert_eq!(nulls, vec![1, 3]);
+    }
+
+    // Test specific edge case strings that exercise the 4-byte prefix logic
+    #[test]
+    fn test_specific_edge_cases() {
+        let test_cases = vec![
+            // Key test cases for lengths 1-4 that test prefix padding
+            "a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda",
+            // Test cases where first 4 bytes are same but subsequent bytes differ
+            "abcd", "abcde", "abcdf", "abcdaaa", "abcdbbb",
+            // Test cases with length < 4 that require padding
+            "z", "za", "zaa", "zaaa", "zaaab", // Empty string
+            "",      // Test various length combinations with same prefix
+            "test", "test1", "test12", "test123", "test1234",
+        ];
+
+        // Use standard library sort as reference
+        let mut expected = test_cases.clone();
+        expected.sort();
+
+        // Use our sorting algorithm
+        let string_array = StringArray::from(test_cases.clone());
+        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
+        let result = sort_bytes(
+            &string_array,
+            indices,
+            vec![], // no nulls
+            SortOptions::default(),
+            None,
+        );
+
+        // Verify results
+        let sorted_strings: Vec<&str> = result
+            .values()
+            .iter()
+            .map(|&idx| test_cases[idx as usize])
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+    }
+
+    // Test sorting correctness for different length combinations
+    #[test]
+    fn test_length_combinations() {
+        let test_cases = vec![
+            // Focus on testing strings of length 1-4, as these affect padding logic
+            ("", 0),
+            ("a", 1),
+            ("ab", 2),
+            ("abc", 3),
+            ("abcd", 4),
+            ("abcde", 5),
+            ("b", 1),
+            ("ba", 2),
+            ("bab", 3),
+            ("babc", 4),
+            ("babcd", 5),
+            // Test same prefix with different lengths
+            ("test", 4),
+            ("test1", 5),
+            ("test12", 6),
+            ("test123", 7),
+        ];
+
+        let strings: Vec<&str> = test_cases.iter().map(|(s, _)| *s).collect();
+        let mut expected = strings.clone();
+        expected.sort();
+
+        let string_array = StringArray::from(strings.clone());
+        let indices: Vec<u32> = (0..strings.len() as u32).collect();
+        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
+
+        let sorted_strings: Vec<&str> = result
+            .values()
+            .iter()
+            .map(|&idx| strings[idx as usize])
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+    }
+
+    // Test UTF-8 string handling
+    #[test]
+    fn test_utf8_strings() {
+        let test_cases = vec![
+            "a",
+            "你",       // 3-byte UTF-8 character
+            "你好",     // 6 bytes
+            "你好世界", // 12 bytes
+            "🎉",       // 4-byte emoji
+            "🎉🎊",     // 8 bytes
+            "café",     // Contains accent character
+            "naïve",
+            "Москва", // Cyrillic script
+            "東京",   // Japanese kanji
+            "한국",   // Korean
+        ];
+
+        let mut expected = test_cases.clone();
+        expected.sort();
+
+        let string_array = StringArray::from(test_cases.clone());
+        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
+        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
+
+        let sorted_strings: Vec<&str> = result
+            .values()
+            .iter()
+            .map(|&idx| test_cases[idx as usize])
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+    }
+
+    // Fuzz testing: generate random UTF-8 strings and verify sort correctness
+    #[test]
+    fn test_fuzz_random_strings() {
+        let mut rng = StdRng::seed_from_u64(42); // Fixed seed for reproducibility
+
+        for _ in 0..100 {
+            // Run 100 rounds of fuzz testing
+            let mut test_strings = Vec::new();
+
+            // Generate 20-50 random strings
+            let num_strings = rng.random_range(20..=50);
+
+            for _ in 0..num_strings {
+                let string = generate_random_string(&mut rng);
+                test_strings.push(string);
+            }
+
+            // Use standard library sort as reference
+            let mut expected = test_strings.clone();
+            expected.sort();
+
+            // Use our sorting algorithm
+            let string_array = StringArray::from(test_strings.clone());
+            let indices: Vec<u32> = (0..test_strings.len() as u32).collect();
+            let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
+
+            let sorted_strings: Vec<String> = result
+                .values()
+                .iter()
+                .map(|&idx| test_strings[idx as usize].clone())
+                .collect();
+
+            assert_eq!(
+                sorted_strings, expected,
+                "Fuzz test failed with input: {test_strings:?}"
+            );
+        }
+    }
+
+    // Helper function to generate random UTF-8 strings
+    fn generate_random_string(rng: &mut StdRng) -> String {
+        // Bias towards generating short strings, especially length 1-4
+        let length = if rng.random_bool(0.6) {
+            rng.random_range(0..=4) // 60% probability for 0-4 length strings
+        } else {
+            rng.random_range(5..=20) // 40% probability for longer strings
+        };
+
+        if length == 0 {
+            return String::new();
+        }
+
+        let mut result = String::new();
+        let mut current_len = 0;
+
+        while current_len < length {
+            let c = generate_random_char(rng);
+            let char_len = c.len_utf8();
+
+            // Ensure we don't exceed target length
+            if current_len + char_len <= length {
+                result.push(c);
+                current_len += char_len;
+            } else {
+                // If adding this character would exceed length, fill with ASCII
+                let remaining = length - current_len;
+                for _ in 0..remaining {
+                    result.push(rng.random_range('a'..='z'));
+                    current_len += 1;
+                }
+                break;
+            }
+        }
+
+        result
+    }
+
+    // Generate random characters (including various UTF-8 characters)
+    fn generate_random_char(rng: &mut StdRng) -> char {
+        match rng.random_range(0..10) {
+            0..=5 => rng.random_range('a'..='z'), // 60% ASCII lowercase
+            6 => rng.random_range('A'..='Z'),     // 10% ASCII uppercase
+            7 => rng.random_range('0'..='9'),     // 10% digits
+            8 => {
+                // 10% Chinese characters
+                let chinese_chars = ['你', '好', '世', '界', '测', '试', '中', '文'];
+                chinese_chars[rng.random_range(0..chinese_chars.len())]
+            }
+            9 => {
+                // 10% other Unicode characters (single `char`s)
+                let special_chars = ['é', 'ï', '🎉', '🎊', 'α', 'β', 'γ'];
+                special_chars[rng.random_range(0..special_chars.len())]
+            }
+            _ => unreachable!(),
+        }
+    }
+
+    // Test descending sort order
+    #[test]
+    fn test_descending_sort() {
+        let test_cases = vec!["a", "ab", "ba", "baa", "abba", "abbc", "abc", "cda"];
+
+        let mut expected = test_cases.clone();
+        expected.sort();
+        expected.reverse(); // Descending order
+
+        let string_array = StringArray::from(test_cases.clone());
+        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
+        let result = sort_bytes(
+            &string_array,
+            indices,
+            vec![],
+            SortOptions {
+                descending: true,
+                nulls_first: false,
+            },
+            None,
+        );
+
+        let sorted_strings: Vec<&str> = result
+            .values()
+            .iter()
+            .map(|&idx| test_cases[idx as usize])
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+    }
+
+    // Stress test: large number of strings with same prefix
+    #[test]
+    fn test_same_prefix_stress() {
+        let mut test_cases = Vec::new();
+        let prefix = "same";
+
+        // Generate many strings with the same prefix
+        for i in 0..1000 {
+            test_cases.push(format!("{prefix}{i:04}"));
+        }
+
+        let mut expected = test_cases.clone();
+        expected.sort();
+
+        let string_array = StringArray::from(test_cases.clone());
+        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
+        let result = sort_bytes(&string_array, indices, vec![], SortOptions::default(), None);
+
+        let sorted_strings: Vec<String> = result
+            .values()
+            .iter()
+            .map(|&idx| test_cases[idx as usize].clone())
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+    }
+
+    // Test limit parameter
+    #[test]
+    fn test_with_limit() {
+        let test_cases = vec!["z", "y", "x", "w", "v", "u", "t", "s"];
+        let limit = 3;
+
+        let mut expected = test_cases.clone();
+        expected.sort();
+        expected.truncate(limit);
+
+        let string_array = StringArray::from(test_cases.clone());
+        let indices: Vec<u32> = (0..test_cases.len() as u32).collect();
+        let result = sort_bytes(
+            &string_array,
+            indices,
+            vec![],
+            SortOptions::default(),
+            Some(limit),
+        );
+
+        let sorted_strings: Vec<&str> = result
+            .values()
+            .iter()
+            .map(|&idx| test_cases[idx as usize])
+            .collect();
+
+        assert_eq!(sorted_strings, expected);
+        assert_eq!(sorted_strings.len(), limit);
+    }
 }
diff --git a/arrow-pyarrow-integration-testing/.cargo/config b/arrow-pyarrow-integration-testing/.cargo/config.toml
similarity index 99%
rename from arrow-pyarrow-integration-testing/.cargo/config
rename to arrow-pyarrow-integration-testing/.cargo/config.toml
index a127967f66c5..0b24f30cf908 100644
--- a/arrow-pyarrow-integration-testing/.cargo/config
+++ b/arrow-pyarrow-integration-testing/.cargo/config.toml
@@ -19,4 +19,4 @@
 rustflags = [
   "-C", "link-arg=-undefined",
   "-C", "link-arg=dynamic_lookup",
-]
\ No newline at end of file
+]
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 72603b5d527d..1fa4197f5157 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -23,9 +23,9 @@ homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
 license = "Apache-2.0"
-keywords = [ "arrow" ]
-edition = "2021"
-rust-version = "1.81"
+keywords = ["arrow"]
+edition = "2024"
+rust-version = "1.85"
 publish = false
 
 [lib]
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.24.1", features = ["extension-module"] }
+pyo3 = { version = "0.27.1", features = ["extension-module"] }
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs
index d4908fff0897..a5690b307040 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -27,12 +27,12 @@ use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
 
-use arrow::array::{make_array, Array, ArrayData, ArrayRef, Int64Array};
+use arrow::array::{Array, ArrayData, ArrayRef, Int64Array, make_array};
 use arrow::compute::kernels;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::error::ArrowError;
 use arrow::ffi_stream::ArrowArrayStreamReader;
-use arrow::pyarrow::{FromPyArrow, PyArrowException, PyArrowType, ToPyArrow};
+use arrow::pyarrow::{FromPyArrow, PyArrowException, PyArrowType, Table, ToPyArrow};
 use arrow::record_batch::RecordBatch;
 
 fn to_py_err(err: ArrowError) -> PyErr {
@@ -41,7 +41,8 @@ fn to_py_err(err: ArrowError) -> PyErr {
 
 /// Returns `array + array` of an int64 array.
 #[pyfunction]
-fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
+fn double<'py>(array: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyAny>> {
+    let py = array.py();
     // import
     let array = make_array(ArrayData::from_pyarrow_bound(array)?);
 
@@ -61,13 +62,13 @@ fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
 /// calls a lambda function that receives and returns an array
 /// whose result must be the array multiplied by two
 #[pyfunction]
-fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
+fn double_py(lambda: &Bound<PyAny>) -> PyResult<bool> {
     // create
     let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
     let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;
 
     // to py
-    let pyarray = array.to_data().to_pyarrow(py)?;
+    let pyarray = array.to_data().to_pyarrow(lambda.py())?;
     let pyarray = lambda.call1((pyarray,))?;
     let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);
 
@@ -75,7 +76,10 @@ fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
 }
 
 #[pyfunction]
-fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyObject> {
+fn make_empty_array<'py>(
+    datatype: PyArrowType<DataType>,
+    py: Python<'py>,
+) -> PyResult<Bound<'py, PyAny>> {
     let array = new_empty_array(&datatype.0);
 
     array.to_data().to_pyarrow(py)
@@ -95,7 +99,7 @@ fn substring(array: PyArrowType<ArrayData>, start: i64) -> PyResult<PyArrowType<
 
 /// Returns the concatenate
 #[pyfunction]
-fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject> {
+fn concatenate<'py>(array: PyArrowType<ArrayData>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
     let array = make_array(array.0);
 
     // concat
@@ -136,6 +140,26 @@ fn round_trip_record_batch_reader(
     Ok(obj)
 }
 
+#[pyfunction]
+fn round_trip_table(obj: PyArrowType<Table>) -> PyResult<PyArrowType<Table>> {
+    Ok(obj)
+}
+
+/// Builds a Table from a list of RecordBatches and a Schema.
+#[pyfunction]
+pub fn build_table(
+    record_batches: Vec<PyArrowType<RecordBatch>>,
+    schema: PyArrowType<Schema>,
+) -> PyResult<PyArrowType<Table>> {
+    Ok(PyArrowType(
+        Table::try_new(
+            record_batches.into_iter().map(|rb| rb.0).collect(),
+            Arc::new(schema.0),
+        )
+        .map_err(to_py_err)?,
+    ))
+}
+
 #[pyfunction]
 fn reader_return_errors(obj: PyArrowType<ArrowArrayStreamReader>) -> PyResult<()> {
     // This makes sure we can correctly consume a RBR and return the error,
@@ -174,6 +198,8 @@ fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> PyResu
     m.add_wrapped(wrap_pyfunction!(round_trip_array))?;
     m.add_wrapped(wrap_pyfunction!(round_trip_record_batch))?;
     m.add_wrapped(wrap_pyfunction!(round_trip_record_batch_reader))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_table))?;
+    m.add_wrapped(wrap_pyfunction!(build_table))?;
     m.add_wrapped(wrap_pyfunction!(reader_return_errors))?;
     m.add_wrapped(wrap_pyfunction!(boxed_reader_roundtrip))?;
     Ok(())
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py
index 3b46d5729a1f..b9b04ddee509 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -20,6 +20,7 @@
 import datetime
 import decimal
 import string
+from typing import Union, Tuple, Protocol
 
 import pytest
 import pyarrow as pa
@@ -27,7 +28,9 @@
 
 import arrow_pyarrow_integration_testing as rust
 
-PYARROW_PRE_14 = int(pa.__version__.split('.')[0]) < 14
+PYARROW_MAJOR_VER = int(pa.__version__.split(".")[0])
+PYARROW_PRE_14 = PYARROW_MAJOR_VER < 14
+PYARROW_PRE_16 = PYARROW_MAJOR_VER < 16
 
 
 @contextlib.contextmanager
@@ -112,36 +115,66 @@ def assert_pyarrow_leak():
     ),
 ]
 
-_unsupported_pyarrow_types = [
-]
+if PYARROW_MAJOR_VER >= 16:
+    _supported_pyarrow_types.extend(
+        [
+            pa.list_view(pa.uint64()),
+            pa.large_list_view(pa.uint64()),
+            pa.list_view(pa.string()),
+            pa.large_list_view(pa.string()),
+        ]
+    )
+
 
 # As of pyarrow 14, pyarrow implements the Arrow PyCapsule interface
 # (https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
 # This defines that Arrow consumers should allow any object that has specific "dunder"
 # methods, `__arrow_c_*_`. These wrapper classes ensure that arrow-rs is able to handle
 # _any_ class, without pyarrow-specific handling.
-class SchemaWrapper:
-    def __init__(self, schema):
+
+
+class ArrowSchemaExportable(Protocol):
+    def __arrow_c_schema__(self) -> object: ...
+
+
+class ArrowArrayExportable(Protocol):
+    def __arrow_c_array__(
+        self,
+        requested_schema: Union[object, None] = None
+    ) -> Tuple[object, object]:
+        ...
+
+
+class ArrowStreamExportable(Protocol):
+    def __arrow_c_stream__(
+        self,
+        requested_schema: Union[object, None] = None
+    ) -> object:
+        ...
+
+
+class SchemaWrapper(ArrowSchemaExportable):
+    def __init__(self, schema: ArrowSchemaExportable) -> None:
         self.schema = schema
 
-    def __arrow_c_schema__(self):
+    def __arrow_c_schema__(self) -> object:
         return self.schema.__arrow_c_schema__()
 
 
-class ArrayWrapper:
-    def __init__(self, array):
+class ArrayWrapper(ArrowArrayExportable):
+    def __init__(self, array: ArrowArrayExportable) -> None:
         self.array = array
 
-    def __arrow_c_array__(self):
-        return self.array.__arrow_c_array__()
+    def __arrow_c_array__(self, requested_schema: Union[object, None] = None) -> Tuple[object, object]:
+        return self.array.__arrow_c_array__(requested_schema=requested_schema)
 
 
-class StreamWrapper:
-    def __init__(self, stream):
+class StreamWrapper(ArrowStreamExportable):
+    def __init__(self, stream: ArrowStreamExportable) -> None:
         self.stream = stream
 
-    def __arrow_c_stream__(self):
-        return self.stream.__arrow_c_stream__()
+    def __arrow_c_stream__(self, requested_schema: Union[object, None] = None) -> object:
+        return self.stream.__arrow_c_stream__(requested_schema=requested_schema)
 
 
 @pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str)
@@ -158,12 +191,6 @@ def test_type_roundtrip_pycapsule(pyarrow_type):
     assert restored == pyarrow_type
     assert restored is not pyarrow_type
 
-
-@pytest.mark.parametrize("pyarrow_type", _unsupported_pyarrow_types, ids=str)
-def test_type_roundtrip_raises(pyarrow_type):
-    with pytest.raises(pa.ArrowException):
-        rust.round_trip_type(pyarrow_type)
-
 @pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str)
 def test_field_roundtrip(pyarrow_type):
     pyarrow_field = pa.field("test", pyarrow_type, nullable=True)
@@ -337,6 +364,21 @@ def test_list_array():
     del a
     del b
 
+
+@pytest.mark.skipif(PYARROW_PRE_16, reason="requires pyarrow 16")
+def test_list_view_array():
+    """
+    Python -> Rust -> Python
+    """
+    a = pa.array([[], None, [1, 2], [4, 5, 6]], pa.list_view(pa.int64()))
+    b = rust.round_trip_array(a)
+    b.validate(full=True)
+    assert a.to_pylist() == b.to_pylist()
+    assert a.type == b.type
+    del a
+    del b
+
+
 def test_map_array():
     """
     Python -> Rust -> Python
@@ -485,7 +527,7 @@ def test_empty_recordbatch_with_row_count():
     """
 
     # Create an empty schema with no fields
-    batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3, 4]}).select([])
+    batch = pa.RecordBatch.from_pydict({"a": [1, 2, 3, 4]}, metadata={b'key1': b'value1'}).select([])
     num_rows = 4
     assert batch.num_rows == num_rows
     assert batch.num_columns == 0
@@ -503,7 +545,7 @@ def test_record_batch_reader():
     """
     Python -> Rust -> Python
     """
-    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
     batches = [
         pa.record_batch([[[1], [2, 42]]], schema),
         pa.record_batch([[None, [], [5, 6]]], schema),
@@ -529,7 +571,7 @@ def test_record_batch_reader_pycapsule():
     """
     Python -> Rust -> Python
     """
-    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
     batches = [
         pa.record_batch([[[1], [2, 42]]], schema),
         pa.record_batch([[None, [], [5, 6]]], schema),
@@ -579,7 +621,7 @@ def test_record_batch_pycapsule():
     """
     Python -> Rust -> Python
     """
-    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
     batch = pa.record_batch([[[1], [2, 42]]], schema)
     wrapped = StreamWrapper(batch)
     b = rust.round_trip_record_batch_reader(wrapped)
@@ -598,7 +640,7 @@ def test_table_pycapsule():
     """
     Python -> Rust -> Python
     """
-    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
     batches = [
         pa.record_batch([[[1], [2, 42]]], schema),
         pa.record_batch([[None, [], [5, 6]]], schema),
@@ -608,11 +650,76 @@ def test_table_pycapsule():
     b = rust.round_trip_record_batch_reader(wrapped)
     new_table = b.read_all()
 
+    assert table == new_table
     assert table.schema == new_table.schema
+    assert table.schema.metadata == new_table.schema.metadata
+    assert len(table.to_batches()) == len(new_table.to_batches())
+
+
+def test_table_empty():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
+    table = pa.Table.from_batches([], schema=schema)
+    new_table = rust.build_table([], schema=schema)
+
     assert table == new_table
+    assert table.schema == new_table.schema
+    assert table.schema.metadata == new_table.schema.metadata
     assert len(table.to_batches()) == len(new_table.to_batches())
 
 
+def test_table_roundtrip():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema),
+        pa.record_batch([[None, [], [5, 6]]], schema),
+    ]
+    table = pa.Table.from_batches(batches, schema=schema)
+    new_table = rust.round_trip_table(table)
+
+    assert table == new_table
+    assert table.schema == new_table.schema
+    assert table.schema.metadata == new_table.schema.metadata
+    assert len(table.to_batches()) == len(new_table.to_batches())
+
+
+def test_table_from_batches():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema),
+        pa.record_batch([[None, [], [5, 6]]], schema),
+    ]
+    table = pa.Table.from_batches(batches)
+    new_table = rust.build_table(batches, schema)
+
+    assert table == new_table
+    assert table.schema == new_table.schema
+    assert table.schema.metadata == new_table.schema.metadata
+    assert len(table.to_batches()) == len(new_table.to_batches())
+
+
+def test_table_error_inconsistent_schema():
+    """
+    Python -> Rust -> Python
+    """
+    schema_1 = pa.schema([('ints', pa.list_(pa.int32()))])
+    schema_2 = pa.schema([('floats', pa.list_(pa.float32()))])
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema_1),
+        pa.record_batch([[None, [], [5.6, 6.4]]], schema_2),
+    ]
+    with pytest.raises(pa.ArrowException, match="Schema error: All record batches must have the same schema."):
+        rust.build_table(batches, schema_1)
+
+
 def test_reject_other_classes():
     # Arbitrary type that is not a PyArrow type
     not_pyarrow = ["hello"]
diff --git a/arrow-pyarrow-testing/Cargo.toml b/arrow-pyarrow-testing/Cargo.toml
new file mode 100644
index 000000000000..b38af3c3b49c
--- /dev/null
+++ b/arrow-pyarrow-testing/Cargo.toml
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Note this package is not published to crates.io, it is only used for testing
+# the arrow-pyarrow crate in the arrow-rs repository.
+#
+# It is not part of the workspace so that `cargo test --all` does not require
+# a Python interpreter or the pyarrow package to be installed.
+#
+# It is used to run tests that require a Python interpreter and the pyarrow
+# package installed. It is not intended to be used as a library or a standalone
+# application.
+#
+# It is different from `arrow-pyarrow-integration-testing` in that it works
+# with a standard pyarrow installation, rather than building a dynamic library
+# that can be loaded by Python (which requires additional configuraton of the
+# Python environment).
+
+[package]
+name = "arrow-pyarrow-testing"
+description = "Tests for arrow-pyarrow that require only a Python interpreter and pyarrow installed"
+version = "0.1.0"
+homepage = "https://github.com/apache/arrow-rs"
+repository = "https://github.com/apache/arrow-rs"
+authors = ["Apache Arrow <dev@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = ["arrow"]
+edition = "2024"
+rust-version = "1.85"
+publish = false
+
+
+[dependencies]
+# Note no dependency on arrow, to ensure arrow-pyarrow can be used by itself
+arrow-array = { path = "../arrow-array" }
+arrow-pyarrow = { path = "../arrow-pyarrow" }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow-testing/src/lib.rs b/arrow-pyarrow-testing/src/lib.rs
new file mode 100644
index 000000000000..80726b500bf2
--- /dev/null
+++ b/arrow-pyarrow-testing/src/lib.rs
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This crate exists to provide a test environment for the `arrow-pyarrow` crate.
+//! It is not intended to be used by itself. See comments in Cargo.toml for more
+//! details.
diff --git a/arrow-pyarrow/tests/pyarrow.rs b/arrow-pyarrow-testing/tests/pyarrow.rs
similarity index 74%
rename from arrow-pyarrow/tests/pyarrow.rs
rename to arrow-pyarrow-testing/tests/pyarrow.rs
index 8ed21f5d8ae4..4ca661b104d2 100644
--- a/arrow-pyarrow/tests/pyarrow.rs
+++ b/arrow-pyarrow-testing/tests/pyarrow.rs
@@ -15,6 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Tests pyarrow bindings
+//!
+//! This test requires installing the `pyarrow` python package. If you do not
+//! have this package installed, you will see an error such as the following:
+//!
+//! ```text
+//! PyErr { type: <class 'ModuleNotFoundError'>, value: ModuleNotFoundError("No module named 'pyarrow'"), traceback: None }
+//! ```
+//!
+//! # Notes
+//!
+//! You can not use a virtual environment to run these tests on MacOS, as it will
+//! fail to find the pyarrow module due to <https://github.com/PyO3/pyo3/issues/1741>
+//!
+//! One way to run them is to install the `pyarrow` package in the system Python,
+//! which might break other packages, so use with caution:
+//!
+//! ```shell
+//! brew install pipx
+//! pip3 install --break-system-packages pyarrow
+//! ```
+
 use arrow_array::builder::{BinaryViewBuilder, StringViewBuilder};
 use arrow_array::{
     Array, ArrayRef, BinaryViewArray, Int32Array, RecordBatch, StringArray, StringViewArray,
@@ -25,20 +47,20 @@ use std::sync::Arc;
 
 #[test]
 fn test_to_pyarrow() {
-    pyo3::prepare_freethreaded_python();
+    Python::initialize();
 
     let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
     let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
     // The "very long string" will not be inlined, and force the creation of a data buffer.
     let c: ArrayRef = Arc::new(StringViewArray::from(vec!["short", "a very long string"]));
     let input = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
-    println!("input: {:?}", input);
+    println!("input: {input:?}");
 
-    let res = Python::with_gil(|py| {
+    let res = Python::attach(|py| {
         let py_input = input.to_pyarrow(py)?;
-        let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
+        let records = RecordBatch::from_pyarrow_bound(&py_input)?;
         let py_records = records.to_pyarrow(py)?;
-        RecordBatch::from_pyarrow_bound(py_records.bind(py))
+        RecordBatch::from_pyarrow_bound(&py_records)
     })
     .unwrap();
 
@@ -47,7 +69,7 @@ fn test_to_pyarrow() {
 
 #[test]
 fn test_to_pyarrow_byte_view() {
-    pyo3::prepare_freethreaded_python();
+    Python::initialize();
 
     for num_variadic_buffers in 0..=2 {
         let string_view: ArrayRef = Arc::new(string_view_column(num_variadic_buffers));
@@ -59,12 +81,12 @@ fn test_to_pyarrow_byte_view() {
         ])
         .unwrap();
 
-        println!("input: {:?}", input);
-        let res = Python::with_gil(|py| {
+        println!("input: {input:?}");
+        let res = Python::attach(|py| {
             let py_input = input.to_pyarrow(py)?;
-            let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
+            let records = RecordBatch::from_pyarrow_bound(&py_input)?;
             let py_records = records.to_pyarrow(py)?;
-            RecordBatch::from_pyarrow_bound(py_records.bind(py))
+            RecordBatch::from_pyarrow_bound(&py_records)
         })
         .unwrap();
 
diff --git a/arrow-pyarrow/Cargo.toml b/arrow-pyarrow/Cargo.toml
index e0dc3137d5f5..c508cabcfee8 100644
--- a/arrow-pyarrow/Cargo.toml
+++ b/arrow-pyarrow/Cargo.toml
@@ -39,4 +39,4 @@ all-features = true
 arrow-array = { workspace = true, features = ["ffi"] }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-pyo3 = { version = "0.24.1", default-features = false }
+pyo3 = { version = "0.27.1", default-features = false }
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index 566aa7402c6e..15951f8dcfbf 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -44,19 +44,23 @@
 //! | `pyarrow.Array`             | [ArrayData]                                                        |
 //! | `pyarrow.RecordBatch`       | [RecordBatch]                                                      |
 //! | `pyarrow.RecordBatchReader` | [ArrowArrayStreamReader] / `Box<dyn RecordBatchReader + Send>` (1) |
+//! | `pyarrow.Table`             | [Table] (2)                                                        |
 //!
 //! (1) `pyarrow.RecordBatchReader` can be imported as [ArrowArrayStreamReader]. Either
 //! [ArrowArrayStreamReader] or `Box<dyn RecordBatchReader + Send>` can be exported
 //! as `pyarrow.RecordBatchReader`. (`Box<dyn RecordBatchReader + Send>` is typically
 //! easier to create.)
 //!
-//! PyArrow has the notion of chunked arrays and tables, but arrow-rs doesn't
-//! have these same concepts. A chunked table is instead represented with
-//! `Vec<RecordBatch>`. A `pyarrow.Table` can be imported to Rust by calling
-//! [pyarrow.Table.to_reader()](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_reader)
-//! and then importing the reader as a [ArrowArrayStreamReader].
+//! (2) Although arrow-rs offers [Table], a convenience wrapper for [pyarrow.Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table)
+//! that internally holds `Vec<RecordBatch>`, it is meant primarily for use cases where you already
+//! have `Vec<RecordBatch>` on the Rust side and want to export that in bulk as a `pyarrow.Table`.
+//! In general, it is recommended to use streaming approaches instead of dealing with data in bulk.
+//! For example, a `pyarrow.Table` (or any other object that implements the ArrayStream PyCapsule
+//! interface) can be imported to Rust through `PyArrowType<ArrowArrayStreamReader>` instead of
+//! forcing eager reading into `Vec<RecordBatch>`.
 
 use std::convert::{From, TryFrom};
+use std::ffi::CStr;
 use std::ptr::{addr_of, addr_of_mut};
 use std::sync::Arc;
 
@@ -64,22 +68,26 @@ use arrow_array::ffi;
 use arrow_array::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
 use arrow_array::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
 use arrow_array::{
-    make_array, RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
-    StructArray,
+    RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, StructArray,
+    make_array,
 };
 use arrow_data::ArrayData;
-use arrow_schema::{ArrowError, DataType, Field, Schema};
+use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef};
 use pyo3::exceptions::{PyTypeError, PyValueError};
 use pyo3::ffi::Py_uintptr_t;
-use pyo3::import_exception;
 use pyo3::prelude::*;
 use pyo3::pybacked::PyBackedStr;
-use pyo3::types::{PyCapsule, PyList, PyTuple};
+use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple};
+use pyo3::{import_exception, intern};
 
 import_exception!(pyarrow, ArrowException);
 /// Represents an exception raised by PyArrow.
 pub type PyArrowException = ArrowException;
 
+const ARROW_ARRAY_STREAM_CAPSULE_NAME: &CStr = c"arrow_array_stream";
+const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c"arrow_schema";
+const ARROW_ARRAY_CAPSULE_NAME: &CStr = c"arrow_array";
+
 fn to_py_err(err: ArrowError) -> PyErr {
     PyArrowException::new_err(err.to_string())
 }
@@ -95,17 +103,17 @@ pub trait FromPyArrow: Sized {
 /// Create a new PyArrow object from a arrow-rs type.
 pub trait ToPyArrow {
     /// Convert the implemented type into a Python object without consuming it.
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject>;
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>>;
 }
 
 /// Convert an arrow-rs type into a PyArrow object.
 pub trait IntoPyArrow {
     /// Convert the implemented type into a Python object while consuming it.
-    fn into_pyarrow(self, py: Python) -> PyResult<PyObject>;
+    fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>>;
 }
 
 impl<T: ToPyArrow> IntoPyArrow for T {
-    fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
+    fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         self.to_pyarrow(py)
     }
 }
@@ -122,8 +130,7 @@ fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
             .extract::<PyBackedStr>()?;
         let found_name = found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
         return Err(PyTypeError::new_err(format!(
-            "Expected instance of {}.{}, got {}.{}",
-            expected_module, expected_name, found_module, found_name
+            "Expected instance of {expected_module}.{expected_name}, got {found_module}.{found_name}",
         )));
     }
     Ok(())
@@ -137,11 +144,10 @@ fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
         ));
     }
 
-    let capsule_name = capsule_name.unwrap().to_str()?;
+    let capsule_name = unsafe { capsule_name.unwrap().as_cstr().to_str()? };
     if capsule_name != name {
         return Err(PyValueError::new_err(format!(
-            "Expected name '{}' in PyCapsule, instead got '{}'",
-            name, capsule_name
+            "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'",
         )));
     }
 
@@ -155,12 +161,16 @@ impl FromPyArrow for DataType {
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(dtype);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let dtype = DataType::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(dtype);
+            }
         }
 
         validate_class("DataType", value)?;
@@ -174,13 +184,13 @@ impl FromPyArrow for DataType {
 }
 
 impl ToPyArrow for DataType {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("DataType")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(dtype.into())
+        Ok(dtype)
     }
 }
 
@@ -191,12 +201,16 @@ impl FromPyArrow for Field {
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let field = Field::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(field);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let field = Field::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(field);
+            }
         }
 
         validate_class("Field", value)?;
@@ -210,13 +224,13 @@ impl FromPyArrow for Field {
 }
 
 impl ToPyArrow for Field {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("Field")?;
         let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(dtype.into())
+        Ok(dtype)
     }
 }
 
@@ -227,12 +241,16 @@ impl FromPyArrow for Schema {
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
             let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_schema")?;
 
-            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
-            let schema = Schema::try_from(schema_ptr).map_err(to_py_err)?;
-            return Ok(schema);
+            let schema_ptr = capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            unsafe {
+                let schema = Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
+                return Ok(schema);
+            }
         }
 
         validate_class("Schema", value)?;
@@ -246,13 +264,13 @@ impl FromPyArrow for Schema {
 }
 
 impl ToPyArrow for Schema {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("Schema")?;
         let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(schema.into())
+        Ok(schema)
     }
 }
 
@@ -271,16 +289,25 @@ impl FromPyArrow for ArrayData {
             }
 
             let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
             let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = array_capsule.cast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
 
-            let schema_ptr = unsafe { schema_capsule.reference::<FFI_ArrowSchema>() };
-            let array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
-            return unsafe { ffi::from_ffi(array, schema_ptr) }.map_err(to_py_err);
+            let schema_ptr = schema_capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            let array = unsafe {
+                FFI_ArrowArray::from_raw(
+                    array_capsule
+                        .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+                        .cast::<FFI_ArrowArray>()
+                        .as_ptr(),
+                )
+            };
+            return unsafe { ffi::from_ffi(array, schema_ptr.as_ref()) }.map_err(to_py_err);
         }
 
         validate_class("Array", value)?;
@@ -305,7 +332,7 @@ impl FromPyArrow for ArrayData {
 }
 
 impl ToPyArrow for ArrayData {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let array = FFI_ArrowArray::new(self);
         let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
 
@@ -318,24 +345,24 @@ impl ToPyArrow for ArrayData {
                 addr_of!(schema) as Py_uintptr_t,
             ),
         )?;
-        Ok(array.unbind())
+        Ok(array)
     }
 }
 
 impl<T: FromPyArrow> FromPyArrow for Vec<T> {
     fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
-        let list = value.downcast::<PyList>()?;
+        let list = value.cast::<PyList>()?;
         list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
     }
 }
 
 impl<T: ToPyArrow> ToPyArrow for Vec<T> {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let values = self
             .iter()
             .map(|v| v.to_pyarrow(py))
             .collect::<PyResult<Vec<_>>>()?;
-        Ok(PyList::new(py, values)?.unbind().into())
+        Ok(PyList::new(py, values)?.into_any())
     }
 }
 
@@ -344,6 +371,7 @@ impl FromPyArrow for RecordBatch {
         // Newer versions of PyArrow as well as other libraries with Arrow data implement this
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+
         if value.hasattr("__arrow_c_array__")? {
             let tuple = value.getattr("__arrow_c_array__")?.call0()?;
 
@@ -354,17 +382,22 @@ impl FromPyArrow for RecordBatch {
             }
 
             let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
+            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
             let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.downcast::<PyCapsule>()?;
+            let array_capsule = array_capsule.cast::<PyCapsule>()?;
 
             validate_pycapsule(schema_capsule, "arrow_schema")?;
             validate_pycapsule(array_capsule, "arrow_array")?;
 
-            let schema_ptr = unsafe { schema_capsule.reference::<FFI_ArrowSchema>() };
-            let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) };
+            let schema_ptr = schema_capsule
+                .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
+                .cast::<FFI_ArrowSchema>();
+            let array_ptr = array_capsule
+                .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))?
+                .cast::<FFI_ArrowArray>();
+            let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_ptr.as_ptr()) };
             let mut array_data =
-                unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?;
+                unsafe { ffi::from_ffi(ffi_array, schema_ptr.as_ref()) }.map_err(to_py_err)?;
             if !matches!(array_data.data_type(), DataType::Struct(_)) {
                 return Err(PyTypeError::new_err(
                     "Expected Struct type from __arrow_c_array.",
@@ -379,7 +412,8 @@ impl FromPyArrow for RecordBatch {
             let array = StructArray::from(array_data);
             // StructArray does not embed metadata from schema. We need to override
             // the output schema with the schema from the capsule.
-            let schema = Arc::new(Schema::try_from(schema_ptr).map_err(to_py_err)?);
+            let schema =
+                unsafe { Arc::new(Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?) };
             let (_fields, columns, nulls) = array.into_parts();
             assert_eq!(
                 nulls.map(|n| n.null_count()).unwrap_or_default(),
@@ -396,7 +430,7 @@ impl FromPyArrow for RecordBatch {
 
         let arrays = value.getattr("columns")?;
         let arrays = arrays
-            .downcast::<PyList>()?
+            .cast::<PyList>()?
             .iter()
             .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
             .collect::<PyResult<_>>()?;
@@ -414,12 +448,12 @@ impl FromPyArrow for RecordBatch {
 }
 
 impl ToPyArrow for RecordBatch {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         // Workaround apache/arrow#37669 by returning RecordBatchIterator
         let reader = RecordBatchIterator::new(vec![Ok(self.clone())], self.schema());
         let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
         let py_reader = reader.into_pyarrow(py)?;
-        py_reader.call_method0(py, "read_next_batch")
+        py_reader.call_method0("read_next_batch")
     }
 }
 
@@ -431,10 +465,17 @@ impl FromPyArrow for ArrowArrayStreamReader {
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_stream__")? {
             let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
-            let capsule = capsule.downcast::<PyCapsule>()?;
+            let capsule = capsule.cast::<PyCapsule>()?;
             validate_pycapsule(capsule, "arrow_array_stream")?;
 
-            let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
+            let stream = unsafe {
+                FFI_ArrowArrayStream::from_raw(
+                    capsule
+                        .pointer_checked(Some(ARROW_ARRAY_STREAM_CAPSULE_NAME))?
+                        .cast::<FFI_ArrowArrayStream>()
+                        .as_ptr(),
+                )
+            };
 
             let stream_reader = ArrowArrayStreamReader::try_new(stream)
                 .map_err(|err| PyValueError::new_err(err.to_string()))?;
@@ -465,7 +506,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
 impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
     // We can't implement `ToPyArrow` for `T: RecordBatchReader + Send` because
     // there is already a blanket implementation for `T: ToPyArrow`.
-    fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
+    fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let mut stream = FFI_ArrowArrayStream::new(self);
 
         let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
@@ -474,18 +515,112 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
         let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t])?;
         let reader = class.call_method1("_import_from_c", args)?;
 
-        Ok(PyObject::from(reader))
+        Ok(reader)
     }
 }
 
 /// Convert a [`ArrowArrayStreamReader`] into a `pyarrow.RecordBatchReader`.
 impl IntoPyArrow for ArrowArrayStreamReader {
-    fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
+    fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let boxed: Box<dyn RecordBatchReader + Send> = Box::new(self);
         boxed.into_pyarrow(py)
     }
 }
 
+/// This is a convenience wrapper around `Vec<RecordBatch>` that tries to simplify conversion from
+/// and to `pyarrow.Table`.
+///
+/// This could be used in circumstances where you either want to consume a `pyarrow.Table` directly
+/// (although technically, since `pyarrow.Table` implements the ArrayStreamReader PyCapsule
+/// interface, one could also consume a `PyArrowType<ArrowArrayStreamReader>` instead) or, more
+/// importantly, where one wants to export a `pyarrow.Table` from a `Vec<RecordBatch>` from the Rust
+/// side.
+///
+/// ```ignore
+/// #[pyfunction]
+/// fn return_table(...) -> PyResult<PyArrowType<Table>> {
+///     let batches: Vec<RecordBatch>;
+///     let schema: SchemaRef;
+///     PyArrowType(Table::try_new(batches, schema).map_err(|err| err.into_py_err(py))?)
+/// }
+/// ```
+#[derive(Clone)]
+pub struct Table {
+    record_batches: Vec<RecordBatch>,
+    schema: SchemaRef,
+}
+
+impl Table {
+    pub fn try_new(
+        record_batches: Vec<RecordBatch>,
+        schema: SchemaRef,
+    ) -> Result<Self, ArrowError> {
+        for record_batch in &record_batches {
+            if schema != record_batch.schema() {
+                return Err(ArrowError::SchemaError(format!(
+                    "All record batches must have the same schema. \
+                         Expected schema: {:?}, got schema: {:?}",
+                    schema,
+                    record_batch.schema()
+                )));
+            }
+        }
+        Ok(Self {
+            record_batches,
+            schema,
+        })
+    }
+
+    pub fn record_batches(&self) -> &[RecordBatch] {
+        &self.record_batches
+    }
+
+    pub fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+
+    pub fn into_inner(self) -> (Vec<RecordBatch>, SchemaRef) {
+        (self.record_batches, self.schema)
+    }
+}
+
+impl TryFrom<Box<dyn RecordBatchReader>> for Table {
+    type Error = ArrowError;
+
+    fn try_from(value: Box<dyn RecordBatchReader>) -> Result<Self, ArrowError> {
+        let schema = value.schema();
+        let batches = value.collect::<Result<Vec<_>, _>>()?;
+        Self::try_new(batches, schema)
+    }
+}
+
+/// Convert a `pyarrow.Table` (or any other ArrowArrayStream compliant object) into [`Table`]
+impl FromPyArrow for Table {
+    fn from_pyarrow_bound(ob: &Bound<PyAny>) -> PyResult<Self> {
+        let reader: Box<dyn RecordBatchReader> =
+            Box::new(ArrowArrayStreamReader::from_pyarrow_bound(ob)?);
+        Self::try_from(reader).map_err(|err| PyErr::new::<PyValueError, _>(err.to_string()))
+    }
+}
+
+/// Convert a [`Table`] into `pyarrow.Table`.
+impl IntoPyArrow for Table {
+    fn into_pyarrow(self, py: Python) -> PyResult<Bound<PyAny>> {
+        let module = py.import(intern!(py, "pyarrow"))?;
+        let class = module.getattr(intern!(py, "Table"))?;
+
+        let py_batches = PyList::new(py, self.record_batches.into_iter().map(PyArrowType))?;
+        let py_schema = PyArrowType(Arc::unwrap_or_clone(self.schema));
+
+        let kwargs = PyDict::new(py);
+        kwargs.set_item("schema", py_schema)?;
+
+        let reader = class.call_method("from_batches", (py_batches,), Some(&kwargs))?;
+
+        Ok(reader)
+    }
+}
+
 /// A newtype wrapper for types implementing [`FromPyArrow`] or [`IntoPyArrow`].
 ///
 /// When wrapped around a type `T: FromPyArrow`, it
@@ -494,9 +629,11 @@ impl IntoPyArrow for ArrowArrayStreamReader {
 #[derive(Debug)]
 pub struct PyArrowType<T>(pub T);
 
-impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
-    fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
-        Ok(Self(T::from_pyarrow_bound(value)?))
+impl<T: FromPyArrow> FromPyObject<'_, '_> for PyArrowType<T> {
+    type Error = PyErr;
+
+    fn extract(value: Borrowed<'_, '_, PyAny>) -> PyResult<Self> {
+        Ok(Self(T::from_pyarrow_bound(&value)?))
     }
 }
 
@@ -508,10 +645,7 @@ impl<'py, T: IntoPyArrow> IntoPyObject<'py> for PyArrowType<T> {
     type Error = PyErr;
 
     fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, PyErr> {
-        match self.0.into_pyarrow(py) {
-            Ok(obj) => Result::Ok(obj.into_bound(py)),
-            Err(err) => Result::Err(err),
-        }
+        self.0.into_pyarrow(py)
     }
 }
 
diff --git a/arrow-row/Cargo.toml b/arrow-row/Cargo.toml
index 7d136939b05c..cd854aa3d48f 100644
--- a/arrow-row/Cargo.toml
+++ b/arrow-row/Cargo.toml
@@ -47,4 +47,3 @@ half = { version = "2.1", default-features = false }
 arrow-cast = { workspace = true }
 arrow-ord = { workspace = true }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
-
diff --git a/arrow-row/src/fixed.rs b/arrow-row/src/fixed.rs
index 3d9920708f9b..493e674018ab 100644
--- a/arrow-row/src/fixed.rs
+++ b/arrow-row/src/fixed.rs
@@ -20,8 +20,8 @@ use crate::null_sentinel;
 use arrow_array::builder::BufferBuilder;
 use arrow_array::{ArrowPrimitiveType, BooleanArray, FixedSizeBinaryArray};
 use arrow_buffer::{
-    bit_util, i256, ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano,
-    MutableBuffer, NullBuffer,
+    ArrowNativeType, BooleanBuffer, Buffer, IntervalDayTime, IntervalMonthDayNano, MutableBuffer,
+    NullBuffer, bit_util, i256,
 };
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{DataType, SortOptions};
@@ -456,7 +456,7 @@ unsafe fn decode_fixed<T: FixedLengthEncoding + ArrowNativeType>(
         .null_bit_buffer(Some(nulls));
 
     // SAFETY: Buffers correct length
-    builder.build_unchecked()
+    unsafe { builder.build_unchecked() }
 }
 
 /// Decodes a `PrimitiveArray` from rows
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 7f8d2cd97cbe..28c65c5994bf 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -97,7 +97,7 @@
 //! assert_eq!(&c2_values, &["a", "f", "c", "e"]);
 //! ```
 //!
-//! # Lexsort
+//! # Lexicographic Sorts (lexsort)
 //!
 //! The row format can also be used to implement a fast multi-column / lexicographic sort
 //!
@@ -117,6 +117,33 @@
 //! }
 //! ```
 //!
+//! # Flattening Dictionaries
+//!
+//! For performance reasons, dictionary arrays are flattened ("hydrated") to their
+//! underlying values during row conversion. See [the issue] for more details.
+//!
+//! This means that the arrays that come out of [`RowConverter::convert_rows`]
+//! may not have the same data types as the input arrays. For example, encoding
+//! a `Dictionary<Int8, Utf8>` and then will come out as a `Utf8` array.
+//!
+//! ```
+//! # use arrow_array::{Array, ArrayRef, DictionaryArray};
+//! # use arrow_array::types::Int8Type;
+//! # use arrow_row::{RowConverter, SortField};
+//! # use arrow_schema::DataType;
+//! # use std::sync::Arc;
+//! // Input is a Dictionary array
+//! let dict: DictionaryArray::<Int8Type> = ["a", "b", "c", "a", "b"].into_iter().collect();
+//! let sort_fields = vec![SortField::new(dict.data_type().clone())];
+//! let arrays = vec![Arc::new(dict) as ArrayRef];
+//! let converter = RowConverter::new(sort_fields).unwrap();
+//! // Convert to rows
+//! let rows = converter.convert_columns(&arrays).unwrap();
+//! let converted = converter.convert_rows(&rows).unwrap();
+//! // result was a Utf8 array, not a Dictionary array
+//! assert_eq!(converted[0].data_type(), &DataType::Utf8);
+//! ```
+//!
 //! [non-comparison sorts]: https://en.wikipedia.org/wiki/Sorting_algorithm#Non-comparison_sorts
 //! [radix sort]: https://en.wikipedia.org/wiki/Radix_sort
 //! [normalized for sorting]: http://wwwlgis.informatik.uni-kl.de/archiv/wwwdvs.informatik.uni-kl.de/courses/DBSREAL/SS2005/Vorlesungsunterlagen/Implementing_Sorting.pdf
@@ -124,26 +151,28 @@
 //! [`lexsort`]: https://docs.rs/arrow-ord/latest/arrow_ord/sort/fn.lexsort.html
 //! [compared]: PartialOrd
 //! [compare]: PartialOrd
+//! [the issue]: https://github.com/apache/arrow-rs/issues/4811
 
 #![doc(
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
 use arrow_array::cast::*;
-use arrow_array::types::ArrowDictionaryKeyType;
+use arrow_array::types::{ArrowDictionaryKeyType, ByteViewType};
 use arrow_array::*;
 use arrow_buffer::{ArrowNativeType, Buffer, OffsetBuffer, ScalarBuffer};
-use arrow_data::ArrayDataBuilder;
+use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::*;
 use variable::{decode_binary_view, decode_string_view};
 
 use crate::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
+use crate::list::{compute_lengths_fixed_size_list, encode_fixed_size_list};
 use crate::variable::{decode_binary, decode_string};
 use arrow_array::types::{Int16Type, Int32Type, Int64Type};
 
@@ -346,6 +375,81 @@ mod variable;
 ///
 /// With `[]` represented by an empty byte array, and `null` a null byte array.
 ///
+/// ## Fixed Size List Encoding
+///
+/// Fixed Size Lists are encoded by first encoding all child elements to the row format.
+///
+/// A non-null list value is then encoded as 0x01 followed by the concatenation of each
+/// of the child elements. A null list value is encoded as a null marker.
+///
+/// For example given:
+///
+/// ```text
+/// [1_u8, 2_u8]
+/// [3_u8, null]
+/// null
+/// ```
+///
+/// The elements would be converted to:
+///
+/// ```text
+///     ┌──┬──┐     ┌──┬──┐     ┌──┬──┐        ┌──┬──┐
+///  1  │01│01│  2  │01│02│  3  │01│03│  null  │00│00│
+///     └──┴──┘     └──┴──┘     └──┴──┘        └──┴──┘
+///```
+///
+/// Which would be encoded as
+///
+/// ```text
+///                 ┌──┬──┬──┬──┬──┐
+///  [1_u8, 2_u8]   │01│01│01│01│02│
+///                 └──┴──┴──┴──┴──┘
+///                     └ 1 ┘ └ 2 ┘
+///                 ┌──┬──┬──┬──┬──┐
+///  [3_u8, null]   │01│01│03│00│00│
+///                 └──┴──┴──┴──┴──┘
+///                     └ 1 ┘ └null┘
+///                 ┌──┐
+///  null           │00│
+///                 └──┘
+///
+///```
+///
+/// ## Union Encoding
+///
+/// A union value is encoded as a single type-id byte followed by the row encoding of the selected child value.
+/// The type-id byte is always present; union arrays have no top-level null marker, so nulls are represented by the child encoding.
+///
+/// For example, given a union of Int32 (type_id = 0) and Utf8 (type_id = 1):
+///
+/// ```text
+///                           ┌──┬──────────────┐
+///  3                        │00│01│80│00│00│03│
+///                           └──┴──────────────┘
+///                            │  └─ signed integer encoding (non-null)
+///                            └──── type_id
+///
+///                           ┌──┬────────────────────────────────┐
+/// "abc"                     │01│02│'a'│'b'│'c'│00│00│00│00│00│03│
+///                           └──┴────────────────────────────────┘
+///                            │  └─ string encoding (non-null)
+///                            └──── type_id
+///
+///                           ┌──┬──────────────┐
+/// null Int32                │00│00│00│00│00│00│
+///                           └──┴──────────────┘
+///                            │  └─ signed integer encoding (null)
+///                            └──── type_id
+///
+///                           ┌──┬──┐
+/// null Utf8                 │01│00│
+///                           └──┴──┘
+///                            │  └─ string encoding (null)
+///                            └──── type_id
+/// ```
+///
+/// See [`UnionArray`] for more details on union types.
+///
 /// # Ordering
 ///
 /// ## Float Ordering
@@ -362,6 +466,12 @@ mod variable;
 /// The encoding described above will order nulls first, this can be inverted by representing
 /// nulls as `0xFF_u8` instead of `0_u8`
 ///
+/// ## Union Ordering
+///
+/// Values of the same type are ordered according to the ordering of that type.
+/// Values of different types are ordered by their type id.
+/// The type_id is negated when descending order is specified.
+///
 /// ## Reverse Column Ordering
 ///
 /// The order of a given column can be reversed by negating the encoded bytes of non-null values
@@ -389,6 +499,9 @@ enum Codec {
     List(RowConverter),
     /// A row converter for the values array of a run-end encoded array
     RunEndEncoded(RowConverter),
+    /// Row converters for each union field (indexed by type_id)
+    /// and the encoding of null rows for each field
+    Union(Vec<RowConverter>, Vec<OwnedRow>),
 }
 
 impl Codec {
@@ -433,6 +546,11 @@ impl Codec {
                 let converter = RowConverter::new(vec![field])?;
                 Ok(Self::List(converter))
             }
+            DataType::FixedSizeList(f, _) => {
+                let field = SortField::new_with_options(f.data_type().clone(), sort_field.options);
+                let converter = RowConverter::new(vec![field])?;
+                Ok(Self::List(converter))
+            }
             DataType::Struct(f) => {
                 let sort_fields = f
                     .iter()
@@ -450,6 +568,35 @@ impl Codec {
 
                 Ok(Self::Struct(converter, owned))
             }
+            DataType::Union(fields, _mode) => {
+                // similar to dictionaries and lists, we set descending to false and negate nulls_first
+                // since the encoded contents will be inverted if descending is set
+                let options = SortOptions {
+                    descending: false,
+                    nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
+                };
+
+                let mut converters = Vec::with_capacity(fields.len());
+                let mut null_rows = Vec::with_capacity(fields.len());
+
+                for (_type_id, field) in fields.iter() {
+                    let sort_field =
+                        SortField::new_with_options(field.data_type().clone(), options);
+                    let converter = RowConverter::new(vec![sort_field])?;
+
+                    let null_array = new_null_array(field.data_type(), 1);
+                    let nulls = converter.convert_columns(&[null_array])?;
+                    let owned = OwnedRow {
+                        data: nulls.buffer.into(),
+                        config: nulls.config,
+                    };
+
+                    converters.push(converter);
+                    null_rows.push(owned);
+                }
+
+                Ok(Self::Union(converters, null_rows))
+            }
             _ => Err(ArrowError::NotYetImplemented(format!(
                 "not yet implemented: {:?}",
                 sort_field.data_type
@@ -472,11 +619,37 @@ impl Codec {
             }
             Codec::List(converter) => {
                 let values = match array.data_type() {
-                    DataType::List(_) => as_list_array(array).values(),
-                    DataType::LargeList(_) => as_large_list_array(array).values(),
+                    DataType::List(_) => {
+                        let list_array = as_list_array(array);
+                        let first_offset = list_array.offsets()[0] as usize;
+                        let last_offset =
+                            list_array.offsets()[list_array.offsets().len() - 1] as usize;
+
+                        // values can include more data than referenced in the ListArray, only encode
+                        // the referenced values.
+                        list_array
+                            .values()
+                            .slice(first_offset, last_offset - first_offset)
+                    }
+                    DataType::LargeList(_) => {
+                        let list_array = as_large_list_array(array);
+
+                        let first_offset = list_array.offsets()[0] as usize;
+                        let last_offset =
+                            list_array.offsets()[list_array.offsets().len() - 1] as usize;
+
+                        // values can include more data than referenced in the LargeListArray, only encode
+                        // the referenced values.
+                        list_array
+                            .values()
+                            .slice(first_offset, last_offset - first_offset)
+                    }
+                    DataType::FixedSizeList(_, _) => {
+                        as_fixed_size_list_array(array).values().clone()
+                    }
                     _ => unreachable!(),
                 };
-                let rows = converter.convert_columns(&[values.clone()])?;
+                let rows = converter.convert_columns(&[values])?;
                 Ok(Encoder::List(rows))
             }
             Codec::RunEndEncoded(converter) => {
@@ -489,9 +662,31 @@ impl Codec {
                     },
                     _ => unreachable!(),
                 };
-                let rows = converter.convert_columns(&[values.clone()])?;
+                let rows = converter.convert_columns(std::slice::from_ref(values))?;
                 Ok(Encoder::RunEndEncoded(rows))
             }
+            Codec::Union(converters, _) => {
+                let union_array = array
+                    .as_any()
+                    .downcast_ref::<UnionArray>()
+                    .expect("expected Union array");
+
+                let type_ids = union_array.type_ids().clone();
+                let offsets = union_array.offsets().cloned();
+
+                let mut child_rows = Vec::with_capacity(converters.len());
+                for (type_id, converter) in converters.iter().enumerate() {
+                    let child_array = union_array.child(type_id as i8);
+                    let rows = converter.convert_columns(std::slice::from_ref(child_array))?;
+                    child_rows.push(rows);
+                }
+
+                Ok(Encoder::Union {
+                    child_rows,
+                    type_ids,
+                    offsets,
+                })
+            }
         }
     }
 
@@ -502,6 +697,10 @@ impl Codec {
             Codec::Struct(converter, nulls) => converter.size() + nulls.data.len(),
             Codec::List(converter) => converter.size(),
             Codec::RunEndEncoded(converter) => converter.size(),
+            Codec::Union(converters, null_rows) => {
+                converters.iter().map(|c| c.size()).sum::<usize>()
+                    + null_rows.iter().map(|n| n.data.len()).sum::<usize>()
+            }
         }
     }
 }
@@ -522,6 +721,12 @@ enum Encoder<'a> {
     List(Rows),
     /// The row encoding of the values array
     RunEndEncoded(Rows),
+    /// The row encoding of each union field's child array, type_ids buffer, offsets buffer (for Dense), and mode
+    Union {
+        child_rows: Vec<Rows>,
+        type_ids: ScalarBuffer<i8>,
+        offsets: Option<ScalarBuffer<i32>>,
+    },
 }
 
 /// Configure the data type and sort order for a given column
@@ -576,11 +781,14 @@ impl RowConverter {
     fn supports_datatype(d: &DataType) -> bool {
         match d {
             _ if !d.is_nested() => true,
-            DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
+            DataType::List(f) | DataType::LargeList(f) | DataType::FixedSizeList(f, _) => {
                 Self::supports_datatype(f.data_type())
             }
             DataType::Struct(f) => f.iter().all(|x| Self::supports_datatype(x.data_type())),
             DataType::RunEndEncoded(_, values) => Self::supports_datatype(values.data_type()),
+            DataType::Union(fs, _mode) => fs
+                .iter()
+                .all(|(_, f)| Self::supports_datatype(f.data_type())),
             _ => false,
         }
     }
@@ -589,6 +797,8 @@ impl RowConverter {
     ///
     /// See [`Row`] for information on when [`Row`] can be compared
     ///
+    /// See [`Self::convert_rows`] for converting [`Rows`] back into [`ArrayRef`]
+    ///
     /// # Panics
     ///
     /// Panics if the schema of `columns` does not match that provided to [`RowConverter::new`]
@@ -642,6 +852,15 @@ impl RowConverter {
                 columns.len()
             )));
         }
+        for colum in columns.iter().skip(1) {
+            if colum.len() != columns[0].len() {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "RowConverter columns must all have the same length, expected {} got {}",
+                    columns[0].len(),
+                    colum.len()
+                )));
+            }
+        }
 
         let encoders = columns
             .iter()
@@ -687,6 +906,8 @@ impl RowConverter {
 
     /// Convert [`Rows`] columns into [`ArrayRef`]
     ///
+    /// See [`Self::convert_columns`] for converting [`ArrayRef`] into [`Rows`]
+    ///
     /// # Panics
     ///
     /// Panics if the rows were not produced by this [`RowConverter`]
@@ -710,7 +931,20 @@ impl RowConverter {
         // SAFETY
         // We have validated that the rows came from this [`RowConverter`]
         // and therefore must be valid
-        unsafe { self.convert_raw(&mut rows, validate_utf8) }
+        let result = unsafe { self.convert_raw(&mut rows, validate_utf8) }?;
+
+        if cfg!(debug_assertions) {
+            for (i, row) in rows.iter().enumerate() {
+                if !row.is_empty() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Codecs {codecs:?} did not consume all bytes for row {i}, remaining bytes: {row:?}",
+                        codecs = &self.codecs
+                    )));
+                }
+            }
+        }
+
+        Ok(result)
     }
 
     /// Returns an empty [`Rows`] with capacity for `row_capacity` rows with
@@ -787,9 +1021,13 @@ impl RowConverter {
             0,
             "can't construct Rows instance from array with nulls"
         );
+        let (offsets, values, _) = array.into_parts();
+        let offsets = offsets.iter().map(|&i| i.as_usize()).collect();
+        // Try zero-copy, if it does not succeed, fall back to copying the values.
+        let buffer = values.into_vec().unwrap_or_else(|values| values.to_vec());
         Rows {
-            buffer: array.values().to_vec(),
-            offsets: array.offsets().iter().map(|&i| i.as_usize()).collect(),
+            buffer,
+            offsets,
             config: RowConfig {
                 fields: Arc::clone(&self.fields),
                 validate_utf8: true,
@@ -810,7 +1048,7 @@ impl RowConverter {
         self.fields
             .iter()
             .zip(&self.codecs)
-            .map(|(field, codec)| decode_column(field, rows, codec, validate_utf8))
+            .map(|(field, codec)| unsafe { decode_column(field, rows, codec, validate_utf8) })
             .collect()
     }
 
@@ -892,6 +1130,12 @@ impl Rows {
         self.offsets.push(self.buffer.len())
     }
 
+    /// Reserve capacity for `row_capacity` rows with a total length of `data_capacity`
+    pub fn reserve(&mut self, row_capacity: usize, data_capacity: usize) {
+        self.buffer.reserve(data_capacity);
+        self.offsets.reserve(row_capacity);
+    }
+
     /// Returns the row at index `row`
     pub fn row(&self, row: usize) -> Row<'_> {
         assert!(row + 1 < self.offsets.len());
@@ -934,8 +1178,8 @@ impl Rows {
     pub fn size(&self) -> usize {
         // Size of fields is accounted for as part of RowConverter
         std::mem::size_of::<Self>()
-            + self.buffer.len()
-            + self.offsets.len() * std::mem::size_of::<usize>()
+            + self.buffer.capacity()
+            + self.offsets.capacity() * std::mem::size_of::<usize>()
     }
 
     /// Create a [BinaryArray] from the [Rows] data without reallocating the
@@ -1311,11 +1555,7 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
                             .iter()
                             .map(|slice| variable::encoded_len(slice))
                     ),
-                    DataType::BinaryView => tracker.push_variable(
-                        array.as_binary_view()
-                            .iter()
-                            .map(|slice| variable::encoded_len(slice))
-                    ),
+                    DataType::BinaryView => push_byte_view_array_lengths(&mut tracker, array.as_binary_view()),
                     DataType::Utf8 => tracker.push_variable(
                         array.as_string::<i32>()
                             .iter()
@@ -1326,11 +1566,7 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
                             .iter()
                             .map(|slice| variable::encoded_len(slice.map(|x| x.as_bytes())))
                     ),
-                    DataType::Utf8View => tracker.push_variable(
-                        array.as_string_view()
-                            .iter()
-                            .map(|slice| variable::encoded_len(slice.map(|x| x.as_bytes())))
-                    ),
+                    DataType::Utf8View => push_byte_view_array_lengths(&mut tracker, array.as_string_view()),
                     DataType::FixedSizeBinary(len) => {
                         let len = len.to_usize().unwrap();
                         tracker.push_fixed(1 + len)
@@ -1365,6 +1601,11 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
                 DataType::LargeList(_) => {
                     list::compute_lengths(tracker.materialized(), rows, as_large_list_array(array))
                 }
+                DataType::FixedSizeList(_, _) => compute_lengths_fixed_size_list(
+                    &mut tracker,
+                    rows,
+                    as_fixed_size_list_array(array),
+                ),
                 _ => unreachable!(),
             },
             Encoder::RunEndEncoded(rows) => match array.data_type() {
@@ -1388,12 +1629,61 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> LengthTracker {
                 },
                 _ => unreachable!(),
             },
+            Encoder::Union {
+                child_rows,
+                type_ids,
+                offsets,
+            } => {
+                let union_array = array
+                    .as_any()
+                    .downcast_ref::<UnionArray>()
+                    .expect("expected UnionArray");
+
+                let lengths = (0..union_array.len()).map(|i| {
+                    let type_id = type_ids[i];
+                    let child_row_i = offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
+                    let child_row = child_rows[type_id as usize].row(child_row_i);
+
+                    // length: 1 byte type_id + child row bytes
+                    1 + child_row.as_ref().len()
+                });
+
+                tracker.push_variable(lengths);
+            }
         }
     }
 
     tracker
 }
 
+/// Add to [`LengthTracker`] the encoded length of each item in the [`GenericByteViewArray`]
+fn push_byte_view_array_lengths<T: ByteViewType>(
+    tracker: &mut LengthTracker,
+    array: &GenericByteViewArray<T>,
+) {
+    if let Some(nulls) = array.nulls().filter(|n| n.null_count() > 0) {
+        tracker.push_variable(
+            array
+                .lengths()
+                .zip(nulls.iter())
+                .map(|(length, is_valid)| {
+                    if is_valid {
+                        Some(length as usize)
+                    } else {
+                        None
+                    }
+                })
+                .map(variable::padded_length),
+        )
+    } else {
+        tracker.push_variable(
+            array
+                .lengths()
+                .map(|len| variable::padded_length(Some(len as usize))),
+        )
+    }
+}
+
 /// Encodes a column to the provided [`Rows`] incrementing the offsets as it progresses
 fn encode_column(
     data: &mut [u8],
@@ -1421,24 +1711,22 @@ fn encode_column(
                     }
                 }
                 DataType::Binary => {
-                    variable::encode(data, offsets, as_generic_binary_array::<i32>(column).iter(), opts)
+                    variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i32>(column), opts)
                 }
                 DataType::BinaryView => {
                     variable::encode(data, offsets, column.as_binary_view().iter(), opts)
                 }
                 DataType::LargeBinary => {
-                    variable::encode(data, offsets, as_generic_binary_array::<i64>(column).iter(), opts)
+                    variable::encode_generic_byte_array(data, offsets, as_generic_binary_array::<i64>(column), opts)
                 }
-                DataType::Utf8 => variable::encode(
+                DataType::Utf8 => variable::encode_generic_byte_array(
                     data, offsets,
-                    column.as_string::<i32>().iter().map(|x| x.map(|x| x.as_bytes())),
+                    column.as_string::<i32>(),
                     opts,
                 ),
-                DataType::LargeUtf8 => variable::encode(
+                DataType::LargeUtf8 => variable::encode_generic_byte_array(
                     data, offsets,
-                    column.as_string::<i64>()
-                        .iter()
-                        .map(|x| x.map(|x| x.as_bytes())),
+                    column.as_string::<i64>(),
                     opts,
                 ),
                 DataType::Utf8View => variable::encode(
@@ -1482,6 +1770,9 @@ fn encode_column(
             DataType::LargeList(_) => {
                 list::encode(data, offsets, rows, opts, as_large_list_array(column))
             }
+            DataType::FixedSizeList(_, _) => {
+                encode_fixed_size_list(data, offsets, rows, opts, as_fixed_size_list_array(column))
+            }
             _ => unreachable!(),
         },
         Encoder::RunEndEncoded(rows) => match column.data_type() {
@@ -1499,6 +1790,36 @@ fn encode_column(
             },
             _ => unreachable!(),
         },
+        Encoder::Union {
+            child_rows,
+            type_ids,
+            offsets: offsets_buf,
+        } => {
+            offsets
+                .iter_mut()
+                .skip(1)
+                .enumerate()
+                .for_each(|(i, offset)| {
+                    let type_id = type_ids[i];
+
+                    let child_row_idx = offsets_buf.as_ref().map(|o| o[i] as usize).unwrap_or(i);
+                    let child_row = child_rows[type_id as usize].row(child_row_idx);
+                    let child_bytes = child_row.as_ref();
+
+                    let type_id_byte = if opts.descending {
+                        !(type_id as u8)
+                    } else {
+                        type_id as u8
+                    };
+                    data[*offset] = type_id_byte;
+
+                    let child_start = *offset + 1;
+                    let child_end = child_start + child_bytes.len();
+                    data[child_start..child_end].copy_from_slice(child_bytes);
+
+                    *offset = child_end;
+                });
+        }
     }
 }
 
@@ -1551,63 +1872,193 @@ unsafe fn decode_column(
                 DataType::LargeBinary => Arc::new(decode_binary::<i64>(rows, options)),
                 DataType::BinaryView => Arc::new(decode_binary_view(rows, options)),
                 DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, options)),
-                DataType::Utf8 => Arc::new(decode_string::<i32>(rows, options, validate_utf8)),
-                DataType::LargeUtf8 => Arc::new(decode_string::<i64>(rows, options, validate_utf8)),
-                DataType::Utf8View => Arc::new(decode_string_view(rows, options, validate_utf8)),
-                _ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {}", data_type)))
+                DataType::Utf8 => Arc::new(unsafe{ decode_string::<i32>(rows, options, validate_utf8) }),
+                DataType::LargeUtf8 => Arc::new(unsafe { decode_string::<i64>(rows, options, validate_utf8) }),
+                DataType::Utf8View => Arc::new(unsafe { decode_string_view(rows, options, validate_utf8) }),
+                _ => return Err(ArrowError::NotYetImplemented(format!("unsupported data type: {data_type}" )))
             }
         }
         Codec::Dictionary(converter, _) => {
-            let cols = converter.convert_raw(rows, validate_utf8)?;
+            let cols = unsafe { converter.convert_raw(rows, validate_utf8) }?;
             cols.into_iter().next().unwrap()
         }
         Codec::Struct(converter, _) => {
             let (null_count, nulls) = fixed::decode_nulls(rows);
             rows.iter_mut().for_each(|row| *row = &row[1..]);
-            let children = converter.convert_raw(rows, validate_utf8)?;
+            let children = unsafe { converter.convert_raw(rows, validate_utf8) }?;
 
-            let child_data = children.iter().map(|c| c.to_data()).collect();
-            let builder = ArrayDataBuilder::new(field.data_type.clone())
+            let child_data: Vec<ArrayData> = children.iter().map(|c| c.to_data()).collect();
+            // Since RowConverter flattens certain data types (i.e. Dictionary),
+            // we need to use updated data type instead of original field
+            let corrected_fields: Vec<Field> = match &field.data_type {
+                DataType::Struct(struct_fields) => struct_fields
+                    .iter()
+                    .zip(child_data.iter())
+                    .map(|(orig_field, child_array)| {
+                        orig_field
+                            .as_ref()
+                            .clone()
+                            .with_data_type(child_array.data_type().clone())
+                    })
+                    .collect(),
+                _ => unreachable!("Only Struct types should be corrected here"),
+            };
+            let corrected_struct_type = DataType::Struct(corrected_fields.into());
+            let builder = ArrayDataBuilder::new(corrected_struct_type)
                 .len(rows.len())
                 .null_count(null_count)
                 .null_bit_buffer(Some(nulls))
                 .child_data(child_data);
 
-            Arc::new(StructArray::from(builder.build_unchecked()))
+            Arc::new(StructArray::from(unsafe { builder.build_unchecked() }))
         }
         Codec::List(converter) => match &field.data_type {
             DataType::List(_) => {
-                Arc::new(list::decode::<i32>(converter, rows, field, validate_utf8)?)
+                Arc::new(unsafe { list::decode::<i32>(converter, rows, field, validate_utf8) }?)
             }
             DataType::LargeList(_) => {
-                Arc::new(list::decode::<i64>(converter, rows, field, validate_utf8)?)
+                Arc::new(unsafe { list::decode::<i64>(converter, rows, field, validate_utf8) }?)
             }
-            _ => unreachable!(),
-        },
-        Codec::RunEndEncoded(converter) => match &field.data_type {
-            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
-                DataType::Int16 => Arc::new(run::decode::<Int16Type>(
-                    converter,
-                    rows,
-                    field,
-                    validate_utf8,
-                )?),
-                DataType::Int32 => Arc::new(run::decode::<Int32Type>(
-                    converter,
-                    rows,
-                    field,
-                    validate_utf8,
-                )?),
-                DataType::Int64 => Arc::new(run::decode::<Int64Type>(
+            DataType::FixedSizeList(_, value_length) => Arc::new(unsafe {
+                list::decode_fixed_size_list(
                     converter,
                     rows,
                     field,
                     validate_utf8,
-                )?),
+                    value_length.as_usize(),
+                )
+            }?),
+            _ => unreachable!(),
+        },
+        Codec::RunEndEncoded(converter) => match &field.data_type {
+            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
+                DataType::Int16 => Arc::new(unsafe {
+                    run::decode::<Int16Type>(converter, rows, field, validate_utf8)
+                }?),
+                DataType::Int32 => Arc::new(unsafe {
+                    run::decode::<Int32Type>(converter, rows, field, validate_utf8)
+                }?),
+                DataType::Int64 => Arc::new(unsafe {
+                    run::decode::<Int64Type>(converter, rows, field, validate_utf8)
+                }?),
                 _ => unreachable!(),
             },
             _ => unreachable!(),
         },
+        Codec::Union(converters, null_rows) => {
+            let len = rows.len();
+
+            let DataType::Union(union_fields, mode) = &field.data_type else {
+                unreachable!()
+            };
+
+            let mut type_ids = Vec::with_capacity(len);
+            let mut rows_by_field: Vec<Vec<(usize, &[u8])>> = vec![Vec::new(); converters.len()];
+
+            for (idx, row) in rows.iter_mut().enumerate() {
+                let type_id_byte = {
+                    let id = row[0];
+                    if options.descending { !id } else { id }
+                };
+
+                let type_id = type_id_byte as i8;
+                type_ids.push(type_id);
+
+                let field_idx = type_id as usize;
+
+                let child_row = &row[1..];
+                rows_by_field[field_idx].push((idx, child_row));
+            }
+
+            let mut child_arrays: Vec<ArrayRef> = Vec::with_capacity(converters.len());
+            let mut offsets = (*mode == UnionMode::Dense).then(|| Vec::with_capacity(len));
+
+            for (field_idx, converter) in converters.iter().enumerate() {
+                let field_rows = &rows_by_field[field_idx];
+
+                match &mode {
+                    UnionMode::Dense => {
+                        if field_rows.is_empty() {
+                            let (_, field) = union_fields.iter().nth(field_idx).unwrap();
+                            child_arrays.push(arrow_array::new_empty_array(field.data_type()));
+                            continue;
+                        }
+
+                        let mut child_data = field_rows
+                            .iter()
+                            .map(|(_, bytes)| *bytes)
+                            .collect::<Vec<_>>();
+
+                        let child_array =
+                            unsafe { converter.convert_raw(&mut child_data, validate_utf8) }?;
+
+                        // advance row slices by the bytes consumed
+                        for ((row_idx, original_bytes), remaining_bytes) in
+                            field_rows.iter().zip(child_data)
+                        {
+                            let consumed_length = 1 + original_bytes.len() - remaining_bytes.len();
+                            rows[*row_idx] = &rows[*row_idx][consumed_length..];
+                        }
+
+                        child_arrays.push(child_array.into_iter().next().unwrap());
+                    }
+                    UnionMode::Sparse => {
+                        let mut sparse_data: Vec<&[u8]> = Vec::with_capacity(len);
+                        let mut field_row_iter = field_rows.iter().peekable();
+                        let null_row_bytes: &[u8] = &null_rows[field_idx].data;
+
+                        for idx in 0..len {
+                            if let Some((next_idx, bytes)) = field_row_iter.peek() {
+                                if *next_idx == idx {
+                                    sparse_data.push(*bytes);
+
+                                    field_row_iter.next();
+                                    continue;
+                                }
+                            }
+                            sparse_data.push(null_row_bytes);
+                        }
+
+                        let child_array =
+                            unsafe { converter.convert_raw(&mut sparse_data, validate_utf8) }?;
+
+                        // advance row slices by the bytes consumed for rows that belong to this field
+                        for (row_idx, child_row) in field_rows.iter() {
+                            let remaining_len = sparse_data[*row_idx].len();
+                            let consumed_length = 1 + child_row.len() - remaining_len;
+                            rows[*row_idx] = &rows[*row_idx][consumed_length..];
+                        }
+
+                        child_arrays.push(child_array.into_iter().next().unwrap());
+                    }
+                }
+            }
+
+            // build offsets for dense unions
+            if let Some(ref mut offsets_vec) = offsets {
+                let mut count = vec![0i32; converters.len()];
+                for type_id in &type_ids {
+                    let field_idx = *type_id as usize;
+                    offsets_vec.push(count[field_idx]);
+
+                    count[field_idx] += 1;
+                }
+            }
+
+            let type_ids_buffer = ScalarBuffer::from(type_ids);
+            let offsets_buffer = offsets.map(ScalarBuffer::from);
+
+            let union_array = UnionArray::try_new(
+                union_fields.clone(),
+                type_ids_buffer,
+                offsets_buffer,
+                child_arrays,
+            )?;
+
+            // note: union arrays don't support physical null buffers
+            // nulls are represented logically though child arrays
+            Arc::new(union_array)
+        }
     };
     Ok(array)
 }
@@ -1616,13 +2067,13 @@ unsafe fn decode_column(
 mod tests {
     use rand::distr::uniform::SampleUniform;
     use rand::distr::{Distribution, StandardUniform};
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
 
     use arrow_array::builder::*;
     use arrow_array::types::*;
     use arrow_array::*;
-    use arrow_buffer::{i256, NullBuffer};
     use arrow_buffer::{Buffer, OffsetBuffer};
+    use arrow_buffer::{NullBuffer, i256};
     use arrow_cast::display::{ArrayFormatter, FormatOptions};
     use arrow_ord::sort::{LexicographicalComparator, SortColumn};
 
@@ -1691,6 +2142,66 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_decimal32() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal32(
+            DECIMAL32_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal32Array::from_iter([
+                None,
+                Some(i32::MIN),
+                Some(-13),
+                Some(46_i32),
+                Some(5456_i32),
+                Some(i32::MAX),
+            ])
+            .with_precision_and_scale(9, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal64(
+            DECIMAL64_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal64Array::from_iter([
+                None,
+                Some(i64::MIN),
+                Some(-13),
+                Some(46_i64),
+                Some(5456_i64),
+                Some(i64::MAX),
+            ])
+            .with_precision_and_scale(18, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
     #[test]
     fn test_decimal128() {
         let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
@@ -2039,6 +2550,177 @@ mod tests {
         back[0].to_data().validate_full().unwrap();
     }
 
+    #[test]
+    fn test_dictionary_in_struct() {
+        let builder = StringDictionaryBuilder::<Int32Type>::new();
+        let mut struct_builder = StructBuilder::new(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Utf8,
+                true,
+            )],
+            vec![Box::new(builder)],
+        );
+
+        let dict_builder = struct_builder
+            .field_builder::<StringDictionaryBuilder<Int32Type>>(0)
+            .unwrap();
+
+        // Flattened: ["a", null, "a", "b"]
+        dict_builder.append_value("a");
+        dict_builder.append_null();
+        dict_builder.append_value("a");
+        dict_builder.append_value("b");
+
+        for _ in 0..4 {
+            struct_builder.append(true);
+        }
+
+        let s = Arc::new(struct_builder.finish()) as ArrayRef;
+        let sort_fields = vec![SortField::new(s.data_type().clone())];
+        let converter = RowConverter::new(sort_fields).unwrap();
+        let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
+
+        let back = converter.convert_rows(&r).unwrap();
+        let [s2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // s.ty = Struct("foo": Dictionary(Int32, Utf8)), s2.ty = Struct("foo": Utf8)
+        assert_ne!(&s.data_type(), &s2.data_type());
+        s2.to_data().validate_full().unwrap();
+
+        // Check if the logical data remains the same
+        // Keys: [0, null, 0, 1]
+        // Values: ["a", "b"]
+        let s1_struct = s.as_struct();
+        let s1_0 = s1_struct.column(0);
+        let s1_idx_0 = s1_0.as_dictionary::<Int32Type>();
+        let keys = s1_idx_0.keys();
+        let values = s1_idx_0.values().as_string::<i32>();
+        // Flattened: ["a", null, "a", "b"]
+        let s2_struct = s2.as_struct();
+        let s2_0 = s2_struct.column(0);
+        let s2_idx_0 = s2_0.as_string::<i32>();
+
+        for i in 0..keys.len() {
+            if keys.is_null(i) {
+                assert!(s2_idx_0.is_null(i));
+            } else {
+                let dict_index = keys.value(i) as usize;
+                assert_eq!(values.value(dict_index), s2_idx_0.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_dictionary_in_struct_empty() {
+        let ty = DataType::Struct(
+            vec![Field::new_dictionary(
+                "foo",
+                DataType::Int32,
+                DataType::Int32,
+                false,
+            )]
+            .into(),
+        );
+        let s = arrow_array::new_empty_array(&ty);
+
+        let sort_fields = vec![SortField::new(s.data_type().clone())];
+        let converter = RowConverter::new(sort_fields).unwrap();
+        let r = converter.convert_columns(&[Arc::clone(&s)]).unwrap();
+
+        let back = converter.convert_rows(&r).unwrap();
+        let [s2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // s.ty = Struct("foo": Dictionary(Int32, Int32)), s2.ty = Struct("foo": Int32)
+        assert_ne!(&s.data_type(), &s2.data_type());
+        s2.to_data().validate_full().unwrap();
+        assert_eq!(s.len(), 0);
+        assert_eq!(s2.len(), 0);
+    }
+
+    #[test]
+    fn test_list_of_string_dictionary() {
+        let mut builder = ListBuilder::<StringDictionaryBuilder<Int32Type>>::default();
+        // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)]
+        builder.values().append("a").unwrap();
+        builder.values().append("b").unwrap();
+        builder.values().append("zero").unwrap();
+        builder.values().append_null();
+        builder.values().append("c").unwrap();
+        builder.values().append("b").unwrap();
+        builder.values().append("d").unwrap();
+        builder.append(true);
+        // List[1] = null
+        builder.append(false);
+        // List[2] = ["e", "zero", "a" (dict)]
+        builder.values().append("e").unwrap();
+        builder.values().append("zero").unwrap();
+        builder.values().append("a").unwrap();
+        builder.append(true);
+
+        let a = Arc::new(builder.finish()) as ArrayRef;
+        let data_type = a.data_type().clone();
+
+        let field = SortField::new(data_type.clone());
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        let [a2] = back.try_into().unwrap();
+
+        // RowConverter flattens Dictionary
+        // a.ty: List(Dictionary(Int32, Utf8)), a2.ty: List(Utf8)
+        assert_ne!(&a.data_type(), &a2.data_type());
+
+        a2.to_data().validate_full().unwrap();
+
+        let a2_list = a2.as_list::<i32>();
+        let a1_list = a.as_list::<i32>();
+
+        // Check if the logical data remains the same
+        // List[0] = ["a", "b", "zero", null, "c", "b", "d" (dict)]
+        let a1_0 = a1_list.value(0);
+        let a1_idx_0 = a1_0.as_dictionary::<Int32Type>();
+        let keys = a1_idx_0.keys();
+        let values = a1_idx_0.values().as_string::<i32>();
+        let a2_0 = a2_list.value(0);
+        let a2_idx_0 = a2_0.as_string::<i32>();
+
+        for i in 0..keys.len() {
+            if keys.is_null(i) {
+                assert!(a2_idx_0.is_null(i));
+            } else {
+                let dict_index = keys.value(i) as usize;
+                assert_eq!(values.value(dict_index), a2_idx_0.value(i));
+            }
+        }
+
+        // List[1] = null
+        assert!(a1_list.is_null(1));
+        assert!(a2_list.is_null(1));
+
+        // List[2] = ["e", "zero", "a" (dict)]
+        let a1_2 = a1_list.value(2);
+        let a1_idx_2 = a1_2.as_dictionary::<Int32Type>();
+        let keys = a1_idx_2.keys();
+        let values = a1_idx_2.values().as_string::<i32>();
+        let a2_2 = a2_list.value(2);
+        let a2_idx_2 = a2_2.as_string::<i32>();
+
+        for i in 0..keys.len() {
+            if keys.is_null(i) {
+                assert!(a2_idx_2.is_null(i));
+            } else {
+                let dict_index = keys.value(i) as usize;
+                assert_eq!(values.value(dict_index), a2_idx_2.value(i));
+            }
+        }
+    }
+
     #[test]
     fn test_primitive_dictionary() {
         let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
@@ -2062,6 +2744,10 @@ mod tests {
         assert!(rows.row(3) < rows.row(2));
         assert!(rows.row(6) < rows.row(2));
         assert!(rows.row(3) < rows.row(6));
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
     }
 
     #[test]
@@ -2090,15 +2776,28 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "Encountered non UTF-8 data")]
-    fn test_invalid_utf8() {
+    fn test_from_binary_shared_buffer() {
         let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
         let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
         let rows = converter.convert_columns(&[array]).unwrap();
-        let binary_row = rows.row(0);
+        let binary_rows = rows.try_into_binary().expect("known-small rows");
+        let _binary_rows_shared_buffer = binary_rows.clone();
 
-        let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
-        let parser = converter.parser();
+        let parsed = converter.from_binary(binary_rows);
+
+        converter.convert_rows(parsed.iter()).unwrap();
+    }
+
+    #[test]
+    #[should_panic(expected = "Encountered non UTF-8 data")]
+    fn test_invalid_utf8() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
+        let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
+        let rows = converter.convert_columns(&[array]).unwrap();
+        let binary_row = rows.row(0);
+
+        let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
+        let parser = converter.parser();
         let utf8_row = parser.parse(binary_row.as_ref());
 
         converter.convert_rows(std::iter::once(utf8_row)).unwrap();
@@ -2197,6 +2896,9 @@ mod tests {
         builder.values().append_null();
         builder.append(true);
         builder.append(true);
+        builder.values().append_value(17); // MASKED
+        builder.values().append_null(); // MASKED
+        builder.append(false);
 
         let list = Arc::new(builder.finish()) as ArrayRef;
         let d = list.data_type().clone();
@@ -2205,11 +2907,12 @@ mod tests {
 
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
         assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
-        assert!(rows.row(2) < rows.row(1)); // [32, 42] < [32, 52, 12]
-        assert!(rows.row(3) < rows.row(2)); // null < [32, 42]
-        assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 42]
-        assert!(rows.row(5) < rows.row(2)); // [] < [32, 42]
+        assert!(rows.row(2) < rows.row(1)); // [32, 52] < [32, 52, 12]
+        assert!(rows.row(3) < rows.row(2)); // null < [32, 52]
+        assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 52]
+        assert!(rows.row(5) < rows.row(2)); // [] < [32, 52]
         assert!(rows.row(3) < rows.row(5)); // null < []
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
 
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
@@ -2222,11 +2925,12 @@ mod tests {
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
-        assert!(rows.row(2) < rows.row(1)); // [32, 42] < [32, 52, 12]
-        assert!(rows.row(3) > rows.row(2)); // null > [32, 42]
-        assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 42]
-        assert!(rows.row(5) < rows.row(2)); // [] < [32, 42]
+        assert!(rows.row(2) < rows.row(1)); // [32, 52] < [32, 52, 12]
+        assert!(rows.row(3) > rows.row(2)); // null > [32, 52]
+        assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 52]
+        assert!(rows.row(5) < rows.row(2)); // [] < [32, 52]
         assert!(rows.row(3) > rows.row(5)); // null > []
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
 
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
@@ -2239,11 +2943,12 @@ mod tests {
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
-        assert!(rows.row(2) > rows.row(1)); // [32, 42] > [32, 52, 12]
-        assert!(rows.row(3) > rows.row(2)); // null > [32, 42]
-        assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 42]
-        assert!(rows.row(5) > rows.row(2)); // [] > [32, 42]
+        assert!(rows.row(2) > rows.row(1)); // [32, 52] > [32, 52, 12]
+        assert!(rows.row(3) > rows.row(2)); // null > [32, 52]
+        assert!(rows.row(4) > rows.row(2)); // [32, null] > [32, 52]
+        assert!(rows.row(5) > rows.row(2)); // [] > [32, 52]
         assert!(rows.row(3) > rows.row(5)); // null > []
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
 
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
@@ -2256,16 +2961,33 @@ mod tests {
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
-        assert!(rows.row(2) > rows.row(1)); // [32, 42] > [32, 52, 12]
-        assert!(rows.row(3) < rows.row(2)); // null < [32, 42]
-        assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 42]
-        assert!(rows.row(5) > rows.row(2)); // [] > [32, 42]
+        assert!(rows.row(2) > rows.row(1)); // [32, 52] > [32, 52, 12]
+        assert!(rows.row(3) < rows.row(2)); // null < [32, 52]
+        assert!(rows.row(4) < rows.row(2)); // [32, null] < [32, 52]
+        assert!(rows.row(5) > rows.row(2)); // [] > [32, 52]
         assert!(rows.row(3) < rows.row(5)); // null < []
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
 
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
         back[0].to_data().validate_full().unwrap();
         assert_eq!(&back[0], &list);
+
+        let sliced_list = list.slice(1, 5);
+        let rows_on_sliced_list = converter
+            .convert_columns(&[Arc::clone(&sliced_list)])
+            .unwrap();
+
+        assert!(rows_on_sliced_list.row(1) > rows_on_sliced_list.row(0)); // [32, 52] > [32, 52, 12]
+        assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); // null < [32, 52]
+        assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); // [32, null] < [32, 52]
+        assert!(rows_on_sliced_list.row(4) > rows_on_sliced_list.row(1)); // [] > [32, 52]
+        assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); // null < []
+
+        let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &sliced_list);
     }
 
     fn test_nested_list<O: OffsetSizeTrait>() {
@@ -2357,6 +3079,19 @@ mod tests {
         assert_eq!(back.len(), 1);
         back[0].to_data().validate_full().unwrap();
         assert_eq!(&back[0], &list);
+
+        let sliced_list = list.slice(1, 3);
+        let rows = converter
+            .convert_columns(&[Arc::clone(&sliced_list)])
+            .unwrap();
+
+        assert!(rows.row(0) < rows.row(1));
+        assert!(rows.row(1) < rows.row(2));
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &sliced_list);
     }
 
     #[test]
@@ -2371,6 +3106,305 @@ mod tests {
         test_nested_list::<i64>();
     }
 
+    #[test]
+    fn test_fixed_size_list() {
+        let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3);
+        builder.values().append_value(32);
+        builder.values().append_value(52);
+        builder.values().append_value(32);
+        builder.append(true);
+        builder.values().append_value(32);
+        builder.values().append_value(52);
+        builder.values().append_value(12);
+        builder.append(true);
+        builder.values().append_value(32);
+        builder.values().append_value(52);
+        builder.values().append_null();
+        builder.append(true);
+        builder.values().append_value(32); // MASKED
+        builder.values().append_value(52); // MASKED
+        builder.values().append_value(13); // MASKED
+        builder.append(false);
+        builder.values().append_value(32);
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.append(true);
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.values().append_null();
+        builder.append(true);
+        builder.values().append_value(17); // MASKED
+        builder.values().append_null(); // MASKED
+        builder.values().append_value(77); // MASKED
+        builder.append(false);
+
+        let list = Arc::new(builder.finish()) as ArrayRef;
+        let d = list.data_type().clone();
+
+        // Default sorting (ascending, nulls first)
+        let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
+
+        let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+        assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
+        assert!(rows.row(2) < rows.row(1)); // [32, 52, null] < [32, 52, 12]
+        assert!(rows.row(3) < rows.row(2)); // null < [32, 52, null]
+        assert!(rows.row(4) < rows.row(2)); // [32, null, null] < [32, 52, null]
+        assert!(rows.row(5) < rows.row(2)); // [null, null, null] < [32, 52, null]
+        assert!(rows.row(3) < rows.row(5)); // null < [null, null, null]
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &list);
+
+        // Ascending, null last
+        let options = SortOptions::default().asc().with_nulls_first(false);
+        let field = SortField::new_with_options(d.clone(), options);
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+        assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
+        assert!(rows.row(2) > rows.row(1)); // [32, 52, null] > [32, 52, 12]
+        assert!(rows.row(3) > rows.row(2)); // null > [32, 52, null]
+        assert!(rows.row(4) > rows.row(2)); // [32, null, null] > [32, 52, null]
+        assert!(rows.row(5) > rows.row(2)); // [null, null, null] > [32, 52, null]
+        assert!(rows.row(3) > rows.row(5)); // null > [null, null, null]
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &list);
+
+        // Descending, nulls last
+        let options = SortOptions::default().desc().with_nulls_first(false);
+        let field = SortField::new_with_options(d.clone(), options);
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+        assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
+        assert!(rows.row(2) > rows.row(1)); // [32, 52, null] > [32, 52, 12]
+        assert!(rows.row(3) > rows.row(2)); // null > [32, 52, null]
+        assert!(rows.row(4) > rows.row(2)); // [32, null, null] > [32, 52, null]
+        assert!(rows.row(5) > rows.row(2)); // [null, null, null] > [32, 52, null]
+        assert!(rows.row(3) > rows.row(5)); // null > [null, null, null]
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &list);
+
+        // Descending, nulls first
+        let options = SortOptions::default().desc().with_nulls_first(true);
+        let field = SortField::new_with_options(d, options);
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
+
+        assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
+        assert!(rows.row(2) < rows.row(1)); // [32, 52, null] > [32, 52, 12]
+        assert!(rows.row(3) < rows.row(2)); // null < [32, 52, null]
+        assert!(rows.row(4) < rows.row(2)); // [32, null, null] < [32, 52, null]
+        assert!(rows.row(5) < rows.row(2)); // [null, null, null] > [32, 52, null]
+        assert!(rows.row(3) < rows.row(5)); // null < [null, null, null]
+        assert_eq!(rows.row(3), rows.row(6)); // null = null (different masked values)
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &list);
+
+        let sliced_list = list.slice(1, 5);
+        let rows_on_sliced_list = converter
+            .convert_columns(&[Arc::clone(&sliced_list)])
+            .unwrap();
+
+        assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(1)); // null < [32, 52, null]
+        assert!(rows_on_sliced_list.row(3) < rows_on_sliced_list.row(1)); // [32, null, null] < [32, 52, null]
+        assert!(rows_on_sliced_list.row(4) < rows_on_sliced_list.row(1)); // [null, null, null] > [32, 52, null]
+        assert!(rows_on_sliced_list.row(2) < rows_on_sliced_list.row(4)); // null < [null, null, null]
+
+        let back = converter.convert_rows(&rows_on_sliced_list).unwrap();
+        assert_eq!(back.len(), 1);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &sliced_list);
+    }
+
+    #[test]
+    fn test_two_fixed_size_lists() {
+        let mut first = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
+        // 0: [100]
+        first.values().append_value(100);
+        first.append(true);
+        // 1: [101]
+        first.values().append_value(101);
+        first.append(true);
+        // 2: [102]
+        first.values().append_value(102);
+        first.append(true);
+        // 3: [null]
+        first.values().append_null();
+        first.append(true);
+        // 4: null
+        first.values().append_null(); // MASKED
+        first.append(false);
+        let first = Arc::new(first.finish()) as ArrayRef;
+        let first_type = first.data_type().clone();
+
+        let mut second = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
+        // 0: [200]
+        second.values().append_value(200);
+        second.append(true);
+        // 1: [201]
+        second.values().append_value(201);
+        second.append(true);
+        // 2: [202]
+        second.values().append_value(202);
+        second.append(true);
+        // 3: [null]
+        second.values().append_null();
+        second.append(true);
+        // 4: null
+        second.values().append_null(); // MASKED
+        second.append(false);
+        let second = Arc::new(second.finish()) as ArrayRef;
+        let second_type = second.data_type().clone();
+
+        let converter = RowConverter::new(vec![
+            SortField::new(first_type.clone()),
+            SortField::new(second_type.clone()),
+        ])
+        .unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
+            .unwrap();
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 2);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &first);
+        back[1].to_data().validate_full().unwrap();
+        assert_eq!(&back[1], &second);
+    }
+
+    #[test]
+    fn test_fixed_size_list_with_variable_width_content() {
+        let mut first = FixedSizeListBuilder::new(
+            StructBuilder::from_fields(
+                vec![
+                    Field::new(
+                        "timestamp",
+                        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("UTC"))),
+                        false,
+                    ),
+                    Field::new("offset_minutes", DataType::Int16, false),
+                    Field::new("time_zone", DataType::Utf8, false),
+                ],
+                1,
+            ),
+            1,
+        );
+        // 0: null
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_null();
+        first.values().append(false);
+        first.append(false);
+        // 1: [null]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_null();
+        first.values().append(false);
+        first.append(true);
+        // 2: [1970-01-01 00:00:00.000000 UTC]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_value(0);
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_value(0);
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_value("UTC");
+        first.values().append(true);
+        first.append(true);
+        // 3: [2005-09-10 13:30:00.123456 Europe/Warsaw]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_value(1126351800123456);
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_value(120);
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_value("Europe/Warsaw");
+        first.values().append(true);
+        first.append(true);
+        let first = Arc::new(first.finish()) as ArrayRef;
+        let first_type = first.data_type().clone();
+
+        let mut second = StringBuilder::new();
+        second.append_value("somewhere near");
+        second.append_null();
+        second.append_value("Greenwich");
+        second.append_value("Warsaw");
+        let second = Arc::new(second.finish()) as ArrayRef;
+        let second_type = second.data_type().clone();
+
+        let converter = RowConverter::new(vec![
+            SortField::new(first_type.clone()),
+            SortField::new(second_type.clone()),
+        ])
+        .unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
+            .unwrap();
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 2);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &first);
+        back[1].to_data().validate_full().unwrap();
+        assert_eq!(&back[1], &second);
+    }
+
     fn generate_primitive_array<K>(len: usize, valid_percent: f64) -> PrimitiveArray<K>
     where
         K: ArrowPrimitiveType,
@@ -2424,6 +3458,34 @@ mod tests {
             .collect()
     }
 
+    fn generate_fixed_stringview_column(len: usize) -> StringViewArray {
+        let edge_cases = vec![
+            Some("bar".to_string()),
+            Some("bar\0".to_string()),
+            Some("LongerThan12Bytes".to_string()),
+            Some("LongerThan12Bytez".to_string()),
+            Some("LongerThan12Bytes\0".to_string()),
+            Some("LongerThan12Byt".to_string()),
+            Some("backend one".to_string()),
+            Some("backend two".to_string()),
+            Some("a".repeat(257)),
+            Some("a".repeat(300)),
+        ];
+
+        // Fill up to `len` by repeating edge cases and trimming
+        let mut values = Vec::with_capacity(len);
+        for i in 0..len {
+            values.push(
+                edge_cases
+                    .get(i % edge_cases.len())
+                    .cloned()
+                    .unwrap_or(None),
+            );
+        }
+
+        StringViewArray::from(values)
+    }
+
     fn generate_dictionary<K>(
         values: ArrayRef,
         len: usize,
@@ -2504,7 +3566,7 @@ mod tests {
 
     fn generate_column(len: usize) -> ArrayRef {
         let mut rng = rng();
-        match rng.random_range(0..16) {
+        match rng.random_range(0..18) {
             0 => Arc::new(generate_primitive_array::<Int32Type>(len, 0.8)),
             1 => Arc::new(generate_primitive_array::<UInt32Type>(len, 0.8)),
             2 => Arc::new(generate_primitive_array::<Int64Type>(len, 0.8)),
@@ -2540,6 +3602,13 @@ mod tests {
             })),
             14 => Arc::new(generate_string_view(len, 0.8)),
             15 => Arc::new(generate_byte_view(len, 0.8)),
+            16 => Arc::new(generate_fixed_stringview_column(len)),
+            17 => Arc::new(
+                generate_list(len + 1000, 0.8, |values_len| {
+                    Arc::new(generate_primitive_array::<Int64Type>(values_len, 0.8))
+                })
+                .slice(500, len),
+            ),
             _ => unreachable!(),
         }
     }
@@ -2622,13 +3691,16 @@ mod tests {
                 }
             }
 
+            // Convert rows produced from convert_columns().
+            // Note: validate_utf8 is set to false since Row is initialized through empty_rows()
             let back = converter.convert_rows(&rows).unwrap();
             for (actual, expected) in back.iter().zip(&arrays) {
                 actual.to_data().validate_full().unwrap();
                 dictionary_eq(actual, expected)
             }
 
-            // Check that we can convert
+            // Check that we can convert rows into ByteArray and then parse, convert it back to array
+            // Note: validate_utf8 is set to true since Row is initialized through RowParser
             let rows = rows.try_into_binary().expect("reasonable size");
             let parser = converter.parser();
             let back = converter
@@ -2659,7 +3731,9 @@ mod tests {
 
         for array in arrays.iter() {
             rows.clear();
-            converter.append(&mut rows, &[array.clone()]).unwrap();
+            converter
+                .append(&mut rows, std::slice::from_ref(array))
+                .unwrap();
             let back = converter.convert_rows(&rows).unwrap();
             assert_eq!(&back[0], array);
         }
@@ -2670,8 +3744,7 @@ mod tests {
         for (i, (actual, expected)) in rows.iter().zip(rows_expected.iter()).enumerate() {
             assert_eq!(
                 actual, expected,
-                "For row {}: expected {:?}, actual: {:?}",
-                i, expected, actual
+                "For row {i}: expected {expected:?}, actual: {actual:?}",
             );
         }
     }
@@ -2698,7 +3771,9 @@ mod tests {
 
         rows.clear();
         let array = Arc::new(dict_array) as ArrayRef;
-        converter.append(&mut rows, &[array.clone()]).unwrap();
+        converter
+            .append(&mut rows, std::slice::from_ref(&array))
+            .unwrap();
         let back = converter.convert_rows(&rows).unwrap();
 
         dictionary_eq(&back[0], &array);
@@ -2715,4 +3790,557 @@ mod tests {
         let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
         assert_eq!(rows.row(0).cmp(&rows.row(1)), Ordering::Less);
     }
+
+    #[test]
+    fn map_should_be_marked_as_unsupported() {
+        let map_data_type = Field::new_map(
+            "map",
+            "entries",
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Utf8, true),
+            false,
+            true,
+        )
+        .data_type()
+        .clone();
+
+        let is_supported = RowConverter::supports_fields(&[SortField::new(map_data_type)]);
+
+        assert!(!is_supported, "Map should not be supported");
+    }
+
+    #[test]
+    fn should_fail_to_create_row_converter_for_unsupported_map_type() {
+        let map_data_type = Field::new_map(
+            "map",
+            "entries",
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Utf8, true),
+            false,
+            true,
+        )
+        .data_type()
+        .clone();
+
+        let converter = RowConverter::new(vec![SortField::new(map_data_type)]);
+
+        match converter {
+            Err(ArrowError::NotYetImplemented(message)) => {
+                assert!(
+                    message.contains("Row format support not yet implemented for"),
+                    "Expected NotYetImplemented error for map data type, got: {message}",
+                );
+            }
+            Err(e) => panic!("Expected NotYetImplemented error, got: {e}"),
+            Ok(_) => panic!("Expected NotYetImplemented error for map data type"),
+        }
+    }
+
+    #[test]
+    fn test_values_buffer_smaller_when_utf8_validation_disabled() {
+        fn get_values_buffer_len(col: ArrayRef) -> (usize, usize) {
+            // 1. Convert cols into rows
+            let converter = RowConverter::new(vec![SortField::new(DataType::Utf8View)]).unwrap();
+
+            // 2a. Convert rows into colsa (validate_utf8 = false)
+            let rows = converter.convert_columns(&[col]).unwrap();
+            let converted = converter.convert_rows(&rows).unwrap();
+            let unchecked_values_len = converted[0].as_string_view().data_buffers()[0].len();
+
+            // 2b. Convert rows into cols (validate_utf8 = true since Row is initialized through RowParser)
+            let rows = rows.try_into_binary().expect("reasonable size");
+            let parser = converter.parser();
+            let converted = converter
+                .convert_rows(rows.iter().map(|b| parser.parse(b.expect("valid bytes"))))
+                .unwrap();
+            let checked_values_len = converted[0].as_string_view().data_buffers()[0].len();
+            (unchecked_values_len, checked_values_len)
+        }
+
+        // Case1. StringViewArray with inline strings
+        let col = Arc::new(StringViewArray::from_iter([
+            Some("hello"), // short(5)
+            None,          // null
+            Some("short"), // short(5)
+            Some("tiny"),  // short(4)
+        ])) as ArrayRef;
+
+        let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
+        // Since there are no long (>12) strings, len of values buffer is 0
+        assert_eq!(unchecked_values_len, 0);
+        // When utf8 validation enabled, values buffer includes inline strings (5+5+4)
+        assert_eq!(checked_values_len, 14);
+
+        // Case2. StringViewArray with long(>12) strings
+        let col = Arc::new(StringViewArray::from_iter([
+            Some("this is a very long string over 12 bytes"),
+            Some("another long string to test the buffer"),
+        ])) as ArrayRef;
+
+        let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
+        // Since there are no inline strings, expected length of values buffer is the same
+        assert!(unchecked_values_len > 0);
+        assert_eq!(unchecked_values_len, checked_values_len);
+
+        // Case3. StringViewArray with both short and long strings
+        let col = Arc::new(StringViewArray::from_iter([
+            Some("tiny"),          // 4 (short)
+            Some("thisisexact13"), // 13 (long)
+            None,
+            Some("short"), // 5 (short)
+        ])) as ArrayRef;
+
+        let (unchecked_values_len, checked_values_len) = get_values_buffer_len(col);
+        // Since there is single long string, len of values buffer is 13
+        assert_eq!(unchecked_values_len, 13);
+        assert!(checked_values_len > unchecked_values_len);
+    }
+
+    #[test]
+    fn test_sparse_union() {
+        // create a sparse union with Int32 (type_id = 0) and Utf8 (type_id = 1)
+        let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
+        let str_array = StringArray::from(vec![None, Some("b"), None, Some("d"), None]);
+
+        // [1, "b", 3, "d", 5]
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("int", DataType::Int32, false))),
+            (1, Arc::new(Field::new("str", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect();
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            None,
+            vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
+        )
+        .unwrap();
+
+        let union_type = union_array.data_type().clone();
+        let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::new(union_array.clone())])
+            .unwrap();
+
+        // round trip
+        let back = converter.convert_rows(&rows).unwrap();
+        let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
+
+        assert_eq!(union_array.len(), back_union.len());
+        for i in 0..union_array.len() {
+            assert_eq!(union_array.type_id(i), back_union.type_id(i));
+        }
+    }
+
+    #[test]
+    fn test_sparse_union_with_nulls() {
+        // create a sparse union with Int32 (type_id = 0) and Utf8 (type_id = 1)
+        let int_array = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5)]);
+        let str_array = StringArray::from(vec![None::<&str>; 5]);
+
+        // [1, null (both children null), 3, null (both children null), 5]
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("int", DataType::Int32, true))),
+            (1, Arc::new(Field::new("str", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            None,
+            vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
+        )
+        .unwrap();
+
+        let union_type = union_array.data_type().clone();
+        let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::new(union_array.clone())])
+            .unwrap();
+
+        // round trip
+        let back = converter.convert_rows(&rows).unwrap();
+        let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
+
+        assert_eq!(union_array.len(), back_union.len());
+        for i in 0..union_array.len() {
+            let expected_null = union_array.is_null(i);
+            let actual_null = back_union.is_null(i);
+            assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
+            if !expected_null {
+                assert_eq!(union_array.type_id(i), back_union.type_id(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_dense_union() {
+        // create a dense union with Int32 (type_id = 0) and use Utf8 (type_id = 1)
+        let int_array = Int32Array::from(vec![1, 3, 5]);
+        let str_array = StringArray::from(vec!["a", "b"]);
+
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+
+        // [1, "a", 3, "b", 5]
+        let offsets = vec![0, 0, 1, 1, 2].into();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("int", DataType::Int32, false))),
+            (1, Arc::new(Field::new("str", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect();
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            Some(offsets), // Dense mode
+            vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
+        )
+        .unwrap();
+
+        let union_type = union_array.data_type().clone();
+        let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::new(union_array.clone())])
+            .unwrap();
+
+        // round trip
+        let back = converter.convert_rows(&rows).unwrap();
+        let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
+
+        assert_eq!(union_array.len(), back_union.len());
+        for i in 0..union_array.len() {
+            assert_eq!(union_array.type_id(i), back_union.type_id(i));
+        }
+    }
+
+    #[test]
+    fn test_dense_union_with_nulls() {
+        // create a dense union with Int32 (type_id = 0) and Utf8 (type_id = 1)
+        let int_array = Int32Array::from(vec![Some(1), None, Some(5)]);
+        let str_array = StringArray::from(vec![Some("a"), None]);
+
+        // [1, "a", 5, null (str null), null (int null)]
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+        let offsets = vec![0, 0, 1, 1, 2].into();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("int", DataType::Int32, true))),
+            (1, Arc::new(Field::new("str", DataType::Utf8, true))),
+        ]
+        .into_iter()
+        .collect();
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            Some(offsets),
+            vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
+        )
+        .unwrap();
+
+        let union_type = union_array.data_type().clone();
+        let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::new(union_array.clone())])
+            .unwrap();
+
+        // round trip
+        let back = converter.convert_rows(&rows).unwrap();
+        let back_union = back[0].as_any().downcast_ref::<UnionArray>().unwrap();
+
+        assert_eq!(union_array.len(), back_union.len());
+        for i in 0..union_array.len() {
+            let expected_null = union_array.is_null(i);
+            let actual_null = back_union.is_null(i);
+            assert_eq!(expected_null, actual_null, "Null mismatch at index {i}");
+            if !expected_null {
+                assert_eq!(union_array.type_id(i), back_union.type_id(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_union_ordering() {
+        let int_array = Int32Array::from(vec![100, 5, 20]);
+        let str_array = StringArray::from(vec!["z", "a"]);
+
+        // [100, "z", 5, "a", 20]
+        let type_ids = vec![0, 1, 0, 1, 0].into();
+        let offsets = vec![0, 0, 1, 1, 2].into();
+
+        let union_fields = [
+            (0, Arc::new(Field::new("int", DataType::Int32, false))),
+            (1, Arc::new(Field::new("str", DataType::Utf8, false))),
+        ]
+        .into_iter()
+        .collect();
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            Some(offsets),
+            vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)],
+        )
+        .unwrap();
+
+        let union_type = union_array.data_type().clone();
+        let converter = RowConverter::new(vec![SortField::new(union_type)]).unwrap();
+
+        let rows = converter.convert_columns(&[Arc::new(union_array)]).unwrap();
+
+        /*
+        expected ordering
+
+        row 2: 5    - type_id 0
+        row 4: 20   - type_id 0
+        row 0: 100  - type id 0
+        row 3: "a"  - type id 1
+        row 1: "z"  - type id 1
+        */
+
+        // 5 < "z"
+        assert!(rows.row(2) < rows.row(1));
+
+        // 100 < "a"
+        assert!(rows.row(0) < rows.row(3));
+
+        // among ints
+        // 5 < 20
+        assert!(rows.row(2) < rows.row(4));
+        // 20 < 100
+        assert!(rows.row(4) < rows.row(0));
+
+        // among strigns
+        // "a" < "z"
+        assert!(rows.row(3) < rows.row(1));
+    }
+
+    #[test]
+    fn test_row_converter_roundtrip_with_many_union_columns() {
+        // col 1: Union(Int32, Utf8) [67, "hello"]
+        let fields1 = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new("int", DataType::Int32, true),
+                Field::new("string", DataType::Utf8, true),
+            ],
+        )
+        .unwrap();
+
+        let int_array1 = Int32Array::from(vec![Some(67), None]);
+        let string_array1 = StringArray::from(vec![None::<&str>, Some("hello")]);
+        let type_ids1 = vec![0i8, 1].into();
+
+        let union_array1 = UnionArray::try_new(
+            fields1.clone(),
+            type_ids1,
+            None,
+            vec![
+                Arc::new(int_array1) as ArrayRef,
+                Arc::new(string_array1) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        // col 2: Union(Int32, Utf8) [100, "world"]
+        let fields2 = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new("int", DataType::Int32, true),
+                Field::new("string", DataType::Utf8, true),
+            ],
+        )
+        .unwrap();
+
+        let int_array2 = Int32Array::from(vec![Some(100), None]);
+        let string_array2 = StringArray::from(vec![None::<&str>, Some("world")]);
+        let type_ids2 = vec![0i8, 1].into();
+
+        let union_array2 = UnionArray::try_new(
+            fields2.clone(),
+            type_ids2,
+            None,
+            vec![
+                Arc::new(int_array2) as ArrayRef,
+                Arc::new(string_array2) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        // create a row converter with 2 union columns
+        let field1 = Field::new("col1", DataType::Union(fields1, UnionMode::Sparse), true);
+        let field2 = Field::new("col2", DataType::Union(fields2, UnionMode::Sparse), true);
+
+        let sort_field1 = SortField::new(field1.data_type().clone());
+        let sort_field2 = SortField::new(field2.data_type().clone());
+
+        let converter = RowConverter::new(vec![sort_field1, sort_field2]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[
+                Arc::new(union_array1.clone()) as ArrayRef,
+                Arc::new(union_array2.clone()) as ArrayRef,
+            ])
+            .unwrap();
+
+        // roundtrip
+        let out = converter.convert_rows(&rows).unwrap();
+
+        let [col1, col2] = out.as_slice() else {
+            panic!("expected 2 columns")
+        };
+
+        let col1 = col1.as_any().downcast_ref::<UnionArray>().unwrap();
+        let col2 = col2.as_any().downcast_ref::<UnionArray>().unwrap();
+
+        for (expected, got) in [union_array1, union_array2].iter().zip([col1, col2]) {
+            assert_eq!(expected.len(), got.len());
+            assert_eq!(expected.type_ids(), got.type_ids());
+
+            for i in 0..expected.len() {
+                assert_eq!(expected.value(i).as_ref(), got.value(i).as_ref());
+            }
+        }
+    }
+
+    #[test]
+    fn test_row_converter_roundtrip_with_one_union_column() {
+        let fields = UnionFields::try_new(
+            vec![0, 1],
+            vec![
+                Field::new("int", DataType::Int32, true),
+                Field::new("string", DataType::Utf8, true),
+            ],
+        )
+        .unwrap();
+
+        let int_array = Int32Array::from(vec![Some(67), None]);
+        let string_array = StringArray::from(vec![None::<&str>, Some("hello")]);
+        let type_ids = vec![0i8, 1].into();
+
+        let union_array = UnionArray::try_new(
+            fields.clone(),
+            type_ids,
+            None,
+            vec![
+                Arc::new(int_array) as ArrayRef,
+                Arc::new(string_array) as ArrayRef,
+            ],
+        )
+        .unwrap();
+
+        let field = Field::new("col", DataType::Union(fields, UnionMode::Sparse), true);
+        let sort_field = SortField::new(field.data_type().clone());
+        let converter = RowConverter::new(vec![sort_field]).unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::new(union_array.clone()) as ArrayRef])
+            .unwrap();
+
+        // roundtrip
+        let out = converter.convert_rows(&rows).unwrap();
+
+        let [col1] = out.as_slice() else {
+            panic!("expected 1 column")
+        };
+
+        let col = col1.as_any().downcast_ref::<UnionArray>().unwrap();
+        assert_eq!(col.len(), union_array.len());
+        assert_eq!(col.type_ids(), union_array.type_ids());
+
+        for i in 0..col.len() {
+            assert_eq!(col.value(i).as_ref(), union_array.value(i).as_ref());
+        }
+    }
+
+    #[test]
+    fn rows_size_should_count_for_capacity() {
+        let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
+
+        let empty_rows_size_with_preallocate_rows_and_data = {
+            let rows = row_converter.empty_rows(1000, 1000);
+
+            rows.size()
+        };
+        let empty_rows_size_with_preallocate_rows = {
+            let rows = row_converter.empty_rows(1000, 0);
+
+            rows.size()
+        };
+        let empty_rows_size_with_preallocate_data = {
+            let rows = row_converter.empty_rows(0, 1000);
+
+            rows.size()
+        };
+        let empty_rows_size_without_preallocate = {
+            let rows = row_converter.empty_rows(0, 0);
+
+            rows.size()
+        };
+
+        assert!(
+            empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_rows,
+            "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_rows}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_rows_and_data > empty_rows_size_with_preallocate_data,
+            "{empty_rows_size_with_preallocate_rows_and_data} should be larger than {empty_rows_size_with_preallocate_data}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_rows > empty_rows_size_without_preallocate,
+            "{empty_rows_size_with_preallocate_rows} should be larger than {empty_rows_size_without_preallocate}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_data > empty_rows_size_without_preallocate,
+            "{empty_rows_size_with_preallocate_data} should be larger than {empty_rows_size_without_preallocate}"
+        );
+    }
+
+    #[test]
+    fn reserve_should_increase_capacity_to_the_requested_size() {
+        let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
+        let mut empty_rows = row_converter.empty_rows(0, 0);
+        empty_rows.reserve(50, 50);
+        let before_size = empty_rows.size();
+        empty_rows.reserve(50, 50);
+        assert_eq!(
+            empty_rows.size(),
+            before_size,
+            "Size should not change when reserving already reserved space"
+        );
+        empty_rows.reserve(10, 20);
+        assert_eq!(
+            empty_rows.size(),
+            before_size,
+            "Size should not change when already have space for the expected reserved data"
+        );
+
+        empty_rows.reserve(100, 20);
+        assert!(
+            empty_rows.size() > before_size,
+            "Size should increase when reserving more space than previously reserved"
+        );
+
+        let before_size = empty_rows.size();
+
+        empty_rows.reserve(20, 100);
+        assert!(
+            empty_rows.size() > before_size,
+            "Size should increase when reserving more space than previously reserved"
+        );
+    }
 }
diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs
index 46cd0f3d3d81..e04aa70c528f 100644
--- a/arrow-row/src/list.rs
+++ b/arrow-row/src/list.rs
@@ -15,26 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{null_sentinel, RowConverter, Rows, SortField};
-use arrow_array::{Array, GenericListArray, OffsetSizeTrait};
-use arrow_buffer::{Buffer, MutableBuffer};
+use crate::{LengthTracker, RowConverter, Rows, SortField, fixed, null_sentinel};
+use arrow_array::{Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, new_null_array};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
 use arrow_data::ArrayDataBuilder;
-use arrow_schema::{ArrowError, SortOptions};
-use std::ops::Range;
+use arrow_schema::{ArrowError, DataType, SortOptions};
+use std::{ops::Range, sync::Arc};
 
 pub fn compute_lengths<O: OffsetSizeTrait>(
     lengths: &mut [usize],
     rows: &Rows,
     array: &GenericListArray<O>,
 ) {
+    let shift = array.value_offsets()[0].as_usize();
+
     let offsets = array.value_offsets().windows(2);
     lengths
         .iter_mut()
         .zip(offsets)
         .enumerate()
         .for_each(|(idx, (length, offsets))| {
-            let start = offsets[0].as_usize();
-            let end = offsets[1].as_usize();
+            let start = offsets[0].as_usize() - shift;
+            let end = offsets[1].as_usize() - shift;
             let range = array.is_valid(idx).then_some(start..end);
             *length += encoded_len(rows, range);
         });
@@ -61,14 +63,16 @@ pub fn encode<O: OffsetSizeTrait>(
     opts: SortOptions,
     array: &GenericListArray<O>,
 ) {
+    let shift = array.value_offsets()[0].as_usize();
+
     offsets
         .iter_mut()
         .skip(1)
         .zip(array.value_offsets().windows(2))
         .enumerate()
         .for_each(|(idx, (offset, offsets))| {
-            let start = offsets[0].as_usize();
-            let end = offsets[1].as_usize();
+            let start = offsets[0].as_usize() - shift;
+            let end = offsets[1].as_usize() - shift;
             let range = array.is_valid(idx).then_some(start..end);
             let out = &mut data[*offset..];
             *offset += encode_one(out, rows, range, opts)
@@ -97,7 +101,7 @@ fn encode_one(
     }
 }
 
-/// Decodes a string array from `rows` with the provided `options`
+/// Decodes an array from `rows` with the provided `options`
 ///
 /// # Safety
 ///
@@ -170,12 +174,30 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
         })
         .collect();
 
-    let child = converter.convert_raw(&mut child_rows, validate_utf8)?;
+    let child = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?;
     assert_eq!(child.len(), 1);
 
     let child_data = child[0].to_data();
 
-    let builder = ArrayDataBuilder::new(field.data_type.clone())
+    // Since RowConverter flattens certain data types (i.e. Dictionary),
+    // we need to use updated data type instead of original field
+    let corrected_type = match &field.data_type {
+        DataType::List(inner_field) => DataType::List(Arc::new(
+            inner_field
+                .as_ref()
+                .clone()
+                .with_data_type(child_data.data_type().clone()),
+        )),
+        DataType::LargeList(inner_field) => DataType::LargeList(Arc::new(
+            inner_field
+                .as_ref()
+                .clone()
+                .with_data_type(child_data.data_type().clone()),
+        )),
+        _ => unreachable!(),
+    };
+
+    let builder = ArrayDataBuilder::new(corrected_type)
         .len(rows.len())
         .null_count(null_count)
         .null_bit_buffer(Some(nulls.into()))
@@ -184,3 +206,120 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
 
     Ok(GenericListArray::from(unsafe { builder.build_unchecked() }))
 }
+
+pub fn compute_lengths_fixed_size_list(
+    tracker: &mut LengthTracker,
+    rows: &Rows,
+    array: &FixedSizeListArray,
+) {
+    let value_length = array.value_length().as_usize();
+    tracker.push_variable((0..array.len()).map(|idx| {
+        match array.is_valid(idx) {
+            true => {
+                1 + ((idx * value_length)..(idx + 1) * value_length)
+                    .map(|child_idx| rows.row(child_idx).as_ref().len())
+                    .sum::<usize>()
+            }
+            false => 1,
+        }
+    }))
+}
+
+/// Encodes the provided `FixedSizeListArray` to `out` with the provided `SortOptions`
+///
+/// `rows` should contain the encoded child elements
+pub fn encode_fixed_size_list(
+    data: &mut [u8],
+    offsets: &mut [usize],
+    rows: &Rows,
+    opts: SortOptions,
+    array: &FixedSizeListArray,
+) {
+    let null_sentinel = null_sentinel(opts);
+    offsets
+        .iter_mut()
+        .skip(1)
+        .enumerate()
+        .for_each(|(idx, offset)| {
+            let value_length = array.value_length().as_usize();
+            match array.is_valid(idx) {
+                true => {
+                    data[*offset] = 0x01;
+                    *offset += 1;
+                    for child_idx in (idx * value_length)..(idx + 1) * value_length {
+                        let row = rows.row(child_idx);
+                        let end_offset = *offset + row.as_ref().len();
+                        data[*offset..end_offset].copy_from_slice(row.as_ref());
+                        *offset = end_offset;
+                    }
+                }
+                false => {
+                    data[*offset] = null_sentinel;
+                    *offset += 1;
+                }
+            };
+        })
+}
+
+/// Decodes a fixed size list array from `rows` with the provided `options`
+///
+/// # Safety
+///
+/// `rows` must contain valid data for the provided `converter`
+pub unsafe fn decode_fixed_size_list(
+    converter: &RowConverter,
+    rows: &mut [&[u8]],
+    field: &SortField,
+    validate_utf8: bool,
+    value_length: usize,
+) -> Result<FixedSizeListArray, ArrowError> {
+    let list_type = &field.data_type;
+    let element_type = match list_type {
+        DataType::FixedSizeList(element_field, _) => element_field.data_type(),
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Expected FixedSizeListArray, found: {list_type}",
+            )));
+        }
+    };
+
+    let len = rows.len();
+    let (null_count, nulls) = fixed::decode_nulls(rows);
+
+    let null_element_encoded = converter.convert_columns(&[new_null_array(element_type, 1)])?;
+    let null_element_encoded = null_element_encoded.row(0);
+    let null_element_slice = null_element_encoded.as_ref();
+
+    let mut child_rows = Vec::new();
+    for row in rows {
+        let valid = row[0] == 1;
+        let mut row_offset = 1;
+        if !valid {
+            for _ in 0..value_length {
+                child_rows.push(null_element_slice);
+            }
+        } else {
+            for _ in 0..value_length {
+                let mut temp_child_rows = vec![&row[row_offset..]];
+                unsafe { converter.convert_raw(&mut temp_child_rows, validate_utf8) }?;
+                let decoded_bytes = row.len() - row_offset - temp_child_rows[0].len();
+                let next_offset = row_offset + decoded_bytes;
+                child_rows.push(&row[row_offset..next_offset]);
+                row_offset = next_offset;
+            }
+        }
+        *row = &row[row_offset..]; // Update row for the next decoder
+    }
+
+    let children = unsafe { converter.convert_raw(&mut child_rows, validate_utf8) }?;
+    let child_data = children.iter().map(|c| c.to_data()).collect();
+    let builder = ArrayDataBuilder::new(list_type.clone())
+        .len(len)
+        .null_count(null_count)
+        .null_bit_buffer(Some(nulls))
+        .child_data(child_data);
+
+    Ok(FixedSizeListArray::from(unsafe {
+        builder.build_unchecked()
+    }))
+}
diff --git a/arrow-row/src/run.rs b/arrow-row/src/run.rs
index ff7c0ffe54eb..24eaaa18e018 100644
--- a/arrow-row/src/run.rs
+++ b/arrow-row/src/run.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{variable, RowConverter, Rows, SortField};
+use crate::{RowConverter, Rows, SortField, variable};
 use arrow_array::types::RunEndIndexType;
 use arrow_array::{PrimitiveArray, RunArray};
 use arrow_buffer::{ArrowNativeType, ScalarBuffer};
@@ -97,8 +97,8 @@ pub unsafe fn decode<R: RunEndIndexType>(
     validate_utf8: bool,
 ) -> Result<RunArray<R>, ArrowError> {
     if rows.is_empty() {
-        let values = converter.convert_raw(&mut [], validate_utf8)?;
-        let run_ends_array = PrimitiveArray::<R>::new(ScalarBuffer::from(vec![]), None);
+        let values = unsafe { converter.convert_raw(&mut [], validate_utf8) }?;
+        let run_ends_array = PrimitiveArray::<R>::try_new(ScalarBuffer::from(vec![]), None)?;
         return RunArray::<R>::try_new(&run_ends_array, &values[0]);
     }
 
@@ -134,7 +134,11 @@ pub unsafe fn decode<R: RunEndIndexType>(
                 run_ends.push(R::Native::usize_as(idx));
             }
             unique_row_indices.push(decoded_values.len());
-            decoded_values.push(decoded_data.clone());
+            let capacity = decoded_data.capacity();
+            decoded_values.push(std::mem::replace(
+                &mut decoded_data,
+                Vec::with_capacity(capacity),
+            ));
         }
     }
     // Add the final run end
@@ -143,13 +147,13 @@ pub unsafe fn decode<R: RunEndIndexType>(
     // Convert the unique decoded values using the row converter
     let mut unique_rows: Vec<&[u8]> = decoded_values.iter().map(|v| v.as_slice()).collect();
     let values = if unique_rows.is_empty() {
-        converter.convert_raw(&mut [], validate_utf8)?
+        unsafe { converter.convert_raw(&mut [], validate_utf8) }?
     } else {
-        converter.convert_raw(&mut unique_rows, validate_utf8)?
+        unsafe { converter.convert_raw(&mut unique_rows, validate_utf8) }?
     };
 
     // Create run ends array
-    let run_ends_array = PrimitiveArray::<R>::new(ScalarBuffer::from(run_ends), None);
+    let run_ends_array = PrimitiveArray::<R>::try_new(ScalarBuffer::from(run_ends), None)?;
 
     // Create the RunEndEncodedArray
     RunArray::<R>::try_new(&run_ends_array, &values[0])
diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs
index 4d4bcddc0807..73e19b197f92 100644
--- a/arrow-row/src/variable.rs
+++ b/arrow-row/src/variable.rs
@@ -17,10 +17,11 @@
 
 use crate::null_sentinel;
 use arrow_array::builder::BufferBuilder;
+use arrow_array::types::ByteArrayType;
 use arrow_array::*;
 use arrow_buffer::bit_util::ceil;
-use arrow_buffer::MutableBuffer;
-use arrow_data::ArrayDataBuilder;
+use arrow_buffer::{ArrowNativeType, MutableBuffer};
+use arrow_data::{ArrayDataBuilder, MAX_INLINE_VIEW_LEN};
 use arrow_schema::{DataType, SortOptions};
 use builder::make_view;
 
@@ -84,6 +85,48 @@ pub fn encode<'a, I: Iterator<Item = Option<&'a [u8]>>>(
     }
 }
 
+/// Calls [`encode`] with optimized iterator for generic byte arrays
+pub(crate) fn encode_generic_byte_array<T: ByteArrayType>(
+    data: &mut [u8],
+    offsets: &mut [usize],
+    input_array: &GenericByteArray<T>,
+    opts: SortOptions,
+) {
+    let input_offsets = input_array.value_offsets();
+    let bytes = input_array.values().as_slice();
+
+    if let Some(null_buffer) = input_array.nulls().filter(|x| x.null_count() > 0) {
+        let input_iter =
+            input_offsets
+                .windows(2)
+                .zip(null_buffer.iter())
+                .map(|(start_end, is_valid)| {
+                    if is_valid {
+                        let item_range = start_end[0].as_usize()..start_end[1].as_usize();
+                        // SAFETY: the offsets of the input are valid by construction
+                        // so it is ok to use unsafe here
+                        let item = unsafe { bytes.get_unchecked(item_range) };
+                        Some(item)
+                    } else {
+                        None
+                    }
+                });
+
+        encode(data, offsets, input_iter, opts);
+    } else {
+        // Skip null checks
+        let input_iter = input_offsets.windows(2).map(|start_end| {
+            let item_range = start_end[0].as_usize()..start_end[1].as_usize();
+            // SAFETY: the offsets of the input are valid by construction
+            // so it is ok to use unsafe here
+            let item = unsafe { bytes.get_unchecked(item_range) };
+            Some(item)
+        });
+
+        encode(data, offsets, input_iter, opts);
+    }
+}
+
 pub fn encode_null(out: &mut [u8], opts: SortOptions) -> usize {
     out[0] = null_sentinel(opts);
     1
@@ -97,6 +140,7 @@ pub fn encode_empty(out: &mut [u8], opts: SortOptions) -> usize {
     1
 }
 
+#[inline]
 pub fn encode_one(out: &mut [u8], val: Option<&[u8]>, opts: SortOptions) -> usize {
     match val {
         None => encode_null(out, opts),
@@ -249,9 +293,10 @@ pub fn decode_binary<I: OffsetSizeTrait>(
 fn decode_binary_view_inner(
     rows: &mut [&[u8]],
     options: SortOptions,
-    check_utf8: bool,
+    validate_utf8: bool,
 ) -> BinaryViewArray {
     let len = rows.len();
+    let inline_str_max_len = MAX_INLINE_VIEW_LEN as usize;
 
     let mut null_count = 0;
 
@@ -261,13 +306,33 @@ fn decode_binary_view_inner(
         valid
     });
 
-    let values_capacity: usize = rows.iter().map(|row| decoded_len(row, options)).sum();
+    // If we are validating UTF-8, decode all string values (including short strings)
+    // into the values buffer and validate UTF-8 once. If not validating,
+    // we save memory by only copying long strings to the values buffer, as short strings
+    // will be inlined into the view and do not need to be stored redundantly.
+    let values_capacity = if validate_utf8 {
+        // Capacity for all long and short strings
+        rows.iter().map(|row| decoded_len(row, options)).sum()
+    } else {
+        // Capacity for all long strings plus room for one short string
+        rows.iter().fold(0, |acc, row| {
+            let len = decoded_len(row, options);
+            if len > inline_str_max_len {
+                acc + len
+            } else {
+                acc
+            }
+        }) + inline_str_max_len
+    };
     let mut values = MutableBuffer::new(values_capacity);
-    let mut views = BufferBuilder::<u128>::new(len);
 
+    let mut views = BufferBuilder::<u128>::new(len);
     for row in rows {
         let start_offset = values.len();
         let offset = decode_blocks(row, options, |b| values.extend_from_slice(b));
+        // Measure string length via change in values buffer.
+        // Used to check if decoded value should be truncated (short string) when validate_utf8 is false
+        let decoded_len = values.len() - start_offset;
         if row[0] == null_sentinel(options) {
             debug_assert_eq!(offset, 1);
             debug_assert_eq!(start_offset, values.len());
@@ -282,11 +347,16 @@ fn decode_binary_view_inner(
 
             let view = make_view(val, 0, start_offset as u32);
             views.append(view);
+
+            // truncate inline string in values buffer if validate_utf8 is false
+            if !validate_utf8 && decoded_len <= inline_str_max_len {
+                values.truncate(start_offset);
+            }
         }
         *row = &row[offset..];
     }
 
-    if check_utf8 {
+    if validate_utf8 {
         // the values contains all data, no matter if it is short or long
         // we can validate utf8 in one go.
         std::str::from_utf8(values.as_slice()).unwrap();
@@ -332,7 +402,7 @@ pub unsafe fn decode_string<I: OffsetSizeTrait>(
 
     // SAFETY:
     // Row data must have come from a valid UTF-8 array
-    GenericStringArray::from(builder.build_unchecked())
+    GenericStringArray::from(unsafe { builder.build_unchecked() })
 }
 
 /// Decodes a string view array from `rows` with the provided `options`
@@ -346,5 +416,5 @@ pub unsafe fn decode_string_view(
     validate_utf8: bool,
 ) -> StringViewArray {
     let view = decode_binary_view_inner(rows, options, validate_utf8);
-    view.to_string_view_unchecked()
+    unsafe { view.to_string_view_unchecked() }
 }
diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index 314c8f7a3515..fb6461a9e9ae 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -33,26 +33,29 @@ name = "arrow_schema"
 bench = false
 
 [dependencies]
-serde = { version = "1.0", default-features = false, features = [
-    "derive",
+serde_core = { version = "1.0", default-features = false, features = [
     "std",
     "rc",
 ], optional = true }
+serde = { version = "1.0", default-features = false, features = [
+    "derive",
+], optional = true }
 bitflags = { version = "2.0.0", default-features = false, optional = true }
 serde_json = { version = "1.0", optional = true }
 
 [features]
-canonical_extension_types = ["dep:serde", "dep:serde_json"]
+canonical_extension_types = ["dep:serde_core", "dep:serde_json"]
 # Enable ffi support
 ffi = ["bitflags"]
-serde = ["dep:serde"]
+serde = ["dep:serde_core", "dep:serde"]
 
 [package.metadata.docs.rs]
 all-features = true
 
 [dev-dependencies]
-bincode = { version = "1.3.3", default-features = false }
-criterion = { version = "0.5", default-features = false }
+criterion = { workspace = true, default-features = false }
+insta = "1.43.1"
+postcard = { version = "1.0.10", default-features = false, features = ["use-std"] }
 
 [[bench]]
 name = "ffi"
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index f22b6c52ba34..40c28649c25b 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
 
@@ -92,7 +91,7 @@ use crate::{ArrowError, Field, FieldRef, Fields, UnionFields};
 ///
 /// [`Schema.fbs`]: https://github.com/apache/arrow/blob/main/format/Schema.fbs
 /// [the physical memory layout of Apache Arrow]: https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub enum DataType {
     /// Null type
@@ -357,6 +356,34 @@ pub enum DataType {
     /// This type mostly used to represent low cardinality string
     /// arrays or a limited set of primitive types as integers.
     Dictionary(Box<DataType>, Box<DataType>),
+    /// Exact 32-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal32(u8, i8),
+    /// Exact 64-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal64(u8, i8),
     /// Exact 128-bit width decimal value with precision and scale
     ///
     /// * precision is the total number of digits
@@ -427,6 +454,17 @@ pub enum TimeUnit {
     Nanosecond,
 }
 
+impl std::fmt::Display for TimeUnit {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            TimeUnit::Second => write!(f, "s"),
+            TimeUnit::Millisecond => write!(f, "ms"),
+            TimeUnit::Microsecond => write!(f, "µs"),
+            TimeUnit::Nanosecond => write!(f, "ns"),
+        }
+    }
+}
+
 /// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -456,27 +494,6 @@ pub enum UnionMode {
     Dense,
 }
 
-impl fmt::Display for DataType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match &self {
-            DataType::Struct(fields) => {
-                write!(f, "Struct(")?;
-                if !fields.is_empty() {
-                    let fields_str = fields
-                        .iter()
-                        .map(|f| format!("{} {}", f.name(), f.data_type()))
-                        .collect::<Vec<_>>()
-                        .join(", ");
-                    write!(f, "{}", fields_str)?;
-                }
-                write!(f, ")")?;
-                Ok(())
-            }
-            _ => write!(f, "{self:?}"),
-        }
-    }
-}
-
 /// Parses `str` into a `DataType`.
 ///
 /// This is the reverse of [`DataType`]'s `Display`
@@ -530,6 +547,8 @@ impl DataType {
                 | Float16
                 | Float32
                 | Float64
+                | Decimal32(_, _)
+                | Decimal64(_, _)
                 | Decimal128(_, _)
                 | Decimal256(_, _)
         )
@@ -572,6 +591,16 @@ impl DataType {
         matches!(self, UInt8 | UInt16 | UInt32 | UInt64)
     }
 
+    /// Returns true if this type is decimal: (Decimal*).
+    #[inline]
+    pub fn is_decimal(&self) -> bool {
+        use DataType::*;
+        matches!(
+            self,
+            Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..)
+        )
+    }
+
     /// Returns true if this type is valid as a dictionary key
     #[inline]
     pub fn is_dictionary_key_type(&self) -> bool {
@@ -612,6 +641,13 @@ impl DataType {
         matches!(self, Null)
     }
 
+    /// Returns true if this type is a String type
+    #[inline]
+    pub fn is_string(&self) -> bool {
+        use DataType::*;
+        matches!(self, Utf8 | LargeUtf8 | Utf8View)
+    }
+
     /// Compares the datatype with another, ignoring nested field names
     /// and metadata.
     pub fn equals_datatype(&self, other: &DataType) -> bool {
@@ -690,6 +726,8 @@ impl DataType {
             DataType::Interval(IntervalUnit::YearMonth) => Some(4),
             DataType::Interval(IntervalUnit::DayTime) => Some(8),
             DataType::Interval(IntervalUnit::MonthDayNano) => Some(16),
+            DataType::Decimal32(_, _) => Some(4),
+            DataType::Decimal64(_, _) => Some(8),
             DataType::Decimal128(_, _) => Some(16),
             DataType::Decimal256(_, _) => Some(32),
             DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
@@ -740,6 +778,8 @@ impl DataType {
                 | DataType::Utf8
                 | DataType::LargeUtf8
                 | DataType::Utf8View
+                | DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
                 | DataType::Decimal128(_, _)
                 | DataType::Decimal256(_, _) => 0,
                 DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
@@ -815,6 +855,18 @@ impl DataType {
     }
 }
 
+/// The maximum precision for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_PRECISION: u8 = 9;
+
+/// The maximum scale for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_SCALE: i8 = 9;
+
+/// The maximum precision for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_PRECISION: u8 = 18;
+
+/// The maximum scale for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_SCALE: i8 = 18;
+
 /// The maximum precision for [DataType::Decimal128] values
 pub const DECIMAL128_MAX_PRECISION: u8 = 38;
 
@@ -827,6 +879,12 @@ pub const DECIMAL256_MAX_PRECISION: u8 = 76;
 /// The maximum scale for [DataType::Decimal256] values
 pub const DECIMAL256_MAX_SCALE: i8 = 76;
 
+/// The default scale for [DataType::Decimal32] values
+pub const DECIMAL32_DEFAULT_SCALE: i8 = 2;
+
+/// The default scale for [DataType::Decimal64] values
+pub const DECIMAL64_DEFAULT_SCALE: i8 = 6;
+
 /// The default scale for [DataType::Decimal128] and [DataType::Decimal256]
 /// values
 pub const DECIMAL_DEFAULT_SCALE: i8 = 10;
@@ -953,53 +1011,58 @@ mod tests {
         assert!(!list_s.equals_datatype(&list_v));
 
         let union_a = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![1, 2],
                 vec![
                     Field::new("f1", DataType::Utf8, false),
                     Field::new("f2", DataType::UInt8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
         let union_b = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![1, 2],
                 vec![
                     Field::new("ff1", DataType::Utf8, false),
                     Field::new("ff2", DataType::UInt8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
         let union_c = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![2, 1],
                 vec![
                     Field::new("fff2", DataType::UInt8, false),
                     Field::new("fff1", DataType::Utf8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
         let union_d = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![2, 1],
                 vec![
                     Field::new("fff1", DataType::Int8, false),
                     Field::new("fff2", DataType::UInt8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
         let union_e = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![1, 2],
                 vec![
                     Field::new("f1", DataType::Utf8, true),
                     Field::new("f2", DataType::UInt8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         );
 
@@ -1101,12 +1164,29 @@ mod tests {
         assert!(!DataType::is_dictionary_key_type(&DataType::Float16));
     }
 
+    #[test]
+    fn test_string() {
+        assert!(DataType::is_string(&DataType::Utf8));
+        assert!(DataType::is_string(&DataType::LargeUtf8));
+        assert!(DataType::is_string(&DataType::Utf8View));
+        assert!(!DataType::is_string(&DataType::Int32));
+    }
+
     #[test]
     fn test_floating() {
         assert!(DataType::is_floating(&DataType::Float16));
         assert!(!DataType::is_floating(&DataType::Int32));
     }
 
+    #[test]
+    fn test_decimal() {
+        assert!(DataType::is_decimal(&DataType::Decimal32(4, 2)));
+        assert!(DataType::is_decimal(&DataType::Decimal64(4, 2)));
+        assert!(DataType::is_decimal(&DataType::Decimal128(4, 2)));
+        assert!(DataType::is_decimal(&DataType::Decimal256(4, 2)));
+        assert!(!DataType::is_decimal(&DataType::Float16));
+    }
+
     #[test]
     fn test_datatype_is_null() {
         assert!(DataType::is_null(&DataType::Null));
@@ -1123,13 +1203,14 @@ mod tests {
     fn test_union_with_duplicated_type_id() {
         let type_ids = vec![1, 1];
         let _union = DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 type_ids,
                 vec![
                     Field::new("f1", DataType::Int32, false),
                     Field::new("f2", DataType::Utf8, false),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         );
     }
@@ -1145,4 +1226,17 @@ mod tests {
         let data_type: DataType = "UInt64".parse().unwrap();
         assert_eq!(data_type, DataType::UInt64);
     }
+
+    #[test]
+    #[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
+    fn test_debug_format_field() {
+        // Make sure the `Debug` formatting of `DataType` is readable and not too long
+        insta::assert_debug_snapshot!(DataType::new_list(DataType::Int8, false), @r"
+        List(
+            Field {
+                data_type: Int8,
+            },
+        )
+        ");
+    }
 }
diff --git a/arrow-schema/src/datatype_display.rs b/arrow-schema/src/datatype_display.rs
new file mode 100644
index 000000000000..cca7cf254fc3
--- /dev/null
+++ b/arrow-schema/src/datatype_display.rs
@@ -0,0 +1,556 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::DataType;
+use std::fmt::Display;
+use std::{collections::HashMap, fmt};
+
+impl Display for DataType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fn format_metadata(metadata: &HashMap<String, String>) -> String {
+            format!("{}", FormatMetadata(metadata))
+        }
+
+        fn format_nullability(field: &crate::Field) -> &str {
+            if field.is_nullable() { "" } else { "non-null " }
+        }
+
+        fn format_field(field: &crate::Field) -> String {
+            let name = field.name();
+            let maybe_nullable = format_nullability(field);
+            let data_type = field.data_type();
+            let metadata_str = format_metadata(field.metadata());
+            format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
+        }
+
+        // A lot of these can still be improved a lot.
+        // _Some_ of these can be parsed with `FromStr`, but not all (YET!).
+        // The goal is that the formatting should always be
+        // * Terse and teadable
+        // * Reversible (contain all necessary information to reverse it perfectly)
+
+        match &self {
+            Self::Null => write!(f, "Null"),
+            Self::Boolean => write!(f, "Boolean"),
+            Self::Int8 => write!(f, "Int8"),
+            Self::Int16 => write!(f, "Int16"),
+            Self::Int32 => write!(f, "Int32"),
+            Self::Int64 => write!(f, "Int64"),
+            Self::UInt8 => write!(f, "UInt8"),
+            Self::UInt16 => write!(f, "UInt16"),
+            Self::UInt32 => write!(f, "UInt32"),
+            Self::UInt64 => write!(f, "UInt64"),
+            Self::Float16 => write!(f, "Float16"),
+            Self::Float32 => write!(f, "Float32"),
+            Self::Float64 => write!(f, "Float64"),
+            Self::Timestamp(time_unit, timezone) => {
+                if let Some(timezone) = timezone {
+                    write!(f, "Timestamp({time_unit}, {timezone:?})")
+                } else {
+                    write!(f, "Timestamp({time_unit})")
+                }
+            }
+            Self::Date32 => write!(f, "Date32"),
+            Self::Date64 => write!(f, "Date64"),
+            Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
+            Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
+            Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
+            Self::Interval(interval_unit) => write!(f, "Interval({interval_unit:?})"),
+            Self::Binary => write!(f, "Binary"),
+            Self::FixedSizeBinary(bytes_per_value) => {
+                write!(f, "FixedSizeBinary({bytes_per_value:?})")
+            }
+            Self::LargeBinary => write!(f, "LargeBinary"),
+            Self::BinaryView => write!(f, "BinaryView"),
+            Self::Utf8 => write!(f, "Utf8"),
+            Self::LargeUtf8 => write!(f, "LargeUtf8"),
+            Self::Utf8View => write!(f, "Utf8View"),
+            Self::List(field)
+            | Self::LargeList(field)
+            | Self::ListView(field)
+            | Self::LargeListView(field) => {
+                let type_name = if matches!(self, Self::List(_)) {
+                    "List"
+                } else if matches!(self, Self::ListView(_)) {
+                    "ListView"
+                } else if matches!(self, Self::LargeList(_)) {
+                    "LargeList"
+                } else {
+                    "LargeListView"
+                };
+
+                let name = field.name();
+                let maybe_nullable = format_nullability(field);
+                let data_type = field.data_type();
+                let field_name_str = if name == "item" {
+                    String::default()
+                } else {
+                    format!(", field: '{name}'")
+                };
+                let metadata_str = format_metadata(field.metadata());
+
+                // e.g. `LargeList(non-null Uint32)
+                write!(
+                    f,
+                    "{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
+                )
+            }
+            Self::FixedSizeList(field, size) => {
+                let name = field.name();
+                let maybe_nullable = format_nullability(field);
+                let data_type = field.data_type();
+                let field_name_str = if name == "item" {
+                    String::default()
+                } else {
+                    format!(", field: '{name}'")
+                };
+                let metadata_str = format_metadata(field.metadata());
+
+                write!(
+                    f,
+                    "FixedSizeList({size} x {maybe_nullable}{data_type}{field_name_str}{metadata_str})",
+                )
+            }
+            Self::Struct(fields) => {
+                write!(f, "Struct(")?;
+                if !fields.is_empty() {
+                    let fields_str = fields
+                        .iter()
+                        .map(|field| format_field(field))
+                        .collect::<Vec<_>>()
+                        .join(", ");
+                    write!(f, "{fields_str}")?;
+                }
+                write!(f, ")")?;
+                Ok(())
+            }
+            Self::Union(union_fields, union_mode) => {
+                write!(f, "Union({union_mode:?}")?;
+                if !union_fields.is_empty() {
+                    write!(f, ", ")?;
+                    let fields_str = union_fields
+                        .iter()
+                        .map(|v| {
+                            let type_id = v.0;
+                            let field_str = format_field(v.1);
+                            format!("{type_id:?}: ({field_str})")
+                        })
+                        .collect::<Vec<_>>()
+                        .join(", ");
+                    write!(f, "{fields_str}")?;
+                }
+                write!(f, ")")?;
+                Ok(())
+            }
+            Self::Dictionary(data_type, data_type1) => {
+                write!(f, "Dictionary({data_type}, {data_type1})")
+            }
+            Self::Decimal32(precision, scale) => write!(f, "Decimal32({precision}, {scale})"),
+            Self::Decimal64(precision, scale) => write!(f, "Decimal64({precision}, {scale})"),
+            Self::Decimal128(precision, scale) => write!(f, "Decimal128({precision}, {scale})"),
+            Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
+            Self::Map(field, sorted) => {
+                write!(f, "Map(")?;
+                let map_field_str = format_field(field);
+                let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
+
+                write!(f, "{map_field_str}, {keys_are_sorted})")?;
+                Ok(())
+            }
+            Self::RunEndEncoded(run_ends_field, values_field) => {
+                write!(f, "RunEndEncoded(")?;
+                let run_ends_str = format_field(run_ends_field);
+                let values_str = format_field(values_field);
+
+                write!(f, "{run_ends_str}, {values_str})")?;
+                Ok(())
+            }
+        }
+    }
+}
+
+/// Adapter to format a metadata HashMap consistently.
+struct FormatMetadata<'a>(&'a HashMap<String, String>);
+
+impl fmt::Display for FormatMetadata<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let metadata = self.0;
+        if metadata.is_empty() {
+            Ok(())
+        } else {
+            let mut entries: Vec<(&String, &String)> = metadata.iter().collect();
+            entries.sort_by(|a, b| a.0.cmp(b.0));
+            write!(f, ", metadata: ")?;
+            f.debug_map().entries(entries).finish()
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use std::sync::Arc;
+
+    use crate::Field;
+
+    use super::*;
+
+    #[test]
+    fn test_display_list() {
+        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
+        let list_data_type_string = list_data_type.to_string();
+        let expected_string = "List(Int32)";
+        assert_eq!(list_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_list_view() {
+        let list_view_data_type =
+            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_view_data_type_string = list_view_data_type.to_string();
+        let expected_string = "ListView(Int32)";
+        assert_eq!(list_view_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_list_with_named_field() {
+        let list_data_type = DataType::List(Arc::new(Field::new("foo", DataType::UInt64, false)));
+        let list_data_type_string = list_data_type.to_string();
+        let expected_string = "List(non-null UInt64, field: 'foo')";
+        assert_eq!(list_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_list_view_with_named_field() {
+        let list_view_data_type =
+            DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
+        let list_view_data_type_string = list_view_data_type.to_string();
+        let expected_string = "ListView(non-null UInt64, field: 'bar')";
+        assert_eq!(list_view_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_nested_list() {
+        let nested_data_type = DataType::List(Arc::new(Field::new_list_field(
+            DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+            false,
+        )));
+        let nested_data_type_string = nested_data_type.to_string();
+        let nested_expected_string = "List(non-null List(non-null UInt64))";
+        assert_eq!(nested_data_type_string, nested_expected_string);
+    }
+
+    #[test]
+    fn test_display_nested_list_view() {
+        let nested_view_data_type = DataType::ListView(Arc::new(Field::new_list_field(
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+            false,
+        )));
+        let nested_view_data_type_string = nested_view_data_type.to_string();
+        let nested_view_expected_string = "ListView(non-null ListView(non-null UInt64))";
+        assert_eq!(nested_view_data_type_string, nested_view_expected_string);
+    }
+
+    #[test]
+    fn test_display_list_with_metadata() {
+        let mut field = Field::new_list_field(DataType::Int32, true);
+        let metadata = HashMap::from([("foo1".to_string(), "value1".to_string())]);
+        field.set_metadata(metadata);
+        let list_data_type = DataType::List(Arc::new(field));
+        let list_data_type_string = list_data_type.to_string();
+        let expected_string = "List(Int32, metadata: {\"foo1\": \"value1\"})";
+
+        assert_eq!(list_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_list_view_with_metadata() {
+        let mut field = Field::new_list_field(DataType::Int32, true);
+        let metadata = HashMap::from([("foo2".to_string(), "value2".to_string())]);
+        field.set_metadata(metadata);
+        let list_view_data_type = DataType::ListView(Arc::new(field));
+        let list_view_data_type_string = list_view_data_type.to_string();
+        let expected_string = "ListView(Int32, metadata: {\"foo2\": \"value2\"})";
+        assert_eq!(list_view_data_type_string, expected_string);
+    }
+
+    #[test]
+    fn test_display_large_list() {
+        let large_list_data_type =
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
+        let large_list_data_type_string = large_list_data_type.to_string();
+        let expected_string = "LargeList(Int32)";
+        assert_eq!(large_list_data_type_string, expected_string);
+
+        // Test with named field
+        let large_list_named =
+            DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64, false)));
+        let large_list_named_string = large_list_named.to_string();
+        let expected_named_string = "LargeList(non-null UInt64, field: 'bar')";
+        assert_eq!(large_list_named_string, expected_named_string);
+
+        // Test with metadata
+        let mut field = Field::new_list_field(DataType::Int32, true);
+        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
+        field.set_metadata(metadata);
+        let large_list_metadata = DataType::LargeList(Arc::new(field));
+        let large_list_metadata_string = large_list_metadata.to_string();
+        let expected_metadata_string = "LargeList(Int32, metadata: {\"key1\": \"value1\"})";
+        assert_eq!(large_list_metadata_string, expected_metadata_string);
+    }
+
+    #[test]
+    fn test_display_large_list_view() {
+        let large_list_view_data_type =
+            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
+        let large_list_view_data_type_string = large_list_view_data_type.to_string();
+        let expected_string = "LargeListView(Int32)";
+        assert_eq!(large_list_view_data_type_string, expected_string);
+
+        // Test with named field
+        let large_list_view_named =
+            DataType::LargeListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
+        let large_list_view_named_string = large_list_view_named.to_string();
+        let expected_named_string = "LargeListView(non-null UInt64, field: 'bar')";
+        assert_eq!(large_list_view_named_string, expected_named_string);
+
+        // Test with metadata
+        let mut field = Field::new_list_field(DataType::Int32, true);
+        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
+        field.set_metadata(metadata);
+        let large_list_view_metadata = DataType::LargeListView(Arc::new(field));
+        let large_list_view_metadata_string = large_list_view_metadata.to_string();
+        let expected_metadata_string = "LargeListView(Int32, metadata: {\"key1\": \"value1\"})";
+        assert_eq!(large_list_view_metadata_string, expected_metadata_string);
+    }
+
+    #[test]
+    fn test_display_fixed_size_list() {
+        let fixed_size_list =
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 5);
+        let fixed_size_list_string = fixed_size_list.to_string();
+        let expected_string = "FixedSizeList(5 x Int32)";
+        assert_eq!(fixed_size_list_string, expected_string);
+
+        // Test with named field
+        let fixed_size_named =
+            DataType::FixedSizeList(Arc::new(Field::new("baz", DataType::UInt64, false)), 3);
+        let fixed_size_named_string = fixed_size_named.to_string();
+        let expected_named_string = "FixedSizeList(3 x non-null UInt64, field: 'baz')";
+        assert_eq!(fixed_size_named_string, expected_named_string);
+
+        // Test with metadata
+        let mut field = Field::new_list_field(DataType::Int32, true);
+        let metadata = HashMap::from([("key2".to_string(), "value2".to_string())]);
+        field.set_metadata(metadata);
+        let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
+        let fixed_size_metadata_string = fixed_size_metadata.to_string();
+        let expected_metadata_string = "FixedSizeList(4 x Int32, metadata: {\"key2\": \"value2\"})";
+        assert_eq!(fixed_size_metadata_string, expected_metadata_string);
+    }
+
+    #[test]
+    fn test_display_struct() {
+        let fields = vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, true),
+        ];
+        let struct_data_type = DataType::Struct(fields.into());
+        let struct_data_type_string = struct_data_type.to_string();
+        let expected_string = "Struct(\"a\": non-null Int32, \"b\": Utf8)";
+        assert_eq!(struct_data_type_string, expected_string);
+
+        // Test with metadata
+        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
+        let metadata = HashMap::from([
+            ("key".to_string(), "value".to_string()),
+            ("key2".to_string(), "value2".to_string()),
+        ]);
+        field_with_metadata.set_metadata(metadata);
+        let struct_fields_with_metadata =
+            vec![Field::new("a", DataType::Int32, false), field_with_metadata];
+        let struct_data_type_with_metadata = DataType::Struct(struct_fields_with_metadata.into());
+        let struct_data_type_with_metadata_string = struct_data_type_with_metadata.to_string();
+        let expected_string_with_metadata = "Struct(\"a\": non-null Int32, \"b\": Utf8, metadata: {\"key\": \"value\", \"key2\": \"value2\"})";
+        assert_eq!(
+            struct_data_type_with_metadata_string,
+            expected_string_with_metadata
+        );
+    }
+
+    #[test]
+    fn test_display_union() {
+        let fields = vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, true),
+        ];
+        let type_ids = vec![0, 1];
+        let union_fields = type_ids
+            .into_iter()
+            .zip(fields.into_iter().map(Arc::new))
+            .collect();
+
+        let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
+        let union_data_type_string = union_data_type.to_string();
+        let expected_string = "Union(Sparse, 0: (\"a\": non-null Int32), 1: (\"b\": Utf8))";
+        assert_eq!(union_data_type_string, expected_string);
+
+        // Test with metadata
+        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
+        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
+        field_with_metadata.set_metadata(metadata);
+        let union_fields_with_metadata = vec![
+            (0, Arc::new(Field::new("a", DataType::Int32, false))),
+            (1, Arc::new(field_with_metadata)),
+        ]
+        .into_iter()
+        .collect();
+        let union_data_type_with_metadata =
+            DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
+        let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
+        let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": non-null Int32), 1: (\"b\": Utf8, metadata: {\"key\": \"value\"}))";
+        assert_eq!(
+            union_data_type_with_metadata_string,
+            expected_string_with_metadata
+        );
+    }
+
+    #[test]
+    fn test_display_map() {
+        let entry_field = Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new("value", DataType::Int32, true),
+                ]
+                .into(),
+            ),
+            false,
+        );
+        let map_data_type = DataType::Map(Arc::new(entry_field), true);
+        let map_data_type_string = map_data_type.to_string();
+        let expected_string =
+            "Map(\"entries\": non-null Struct(\"key\": non-null Utf8, \"value\": Int32), sorted)";
+        assert_eq!(map_data_type_string, expected_string);
+
+        // Test with metadata
+        let mut entry_field_with_metadata = Field::new(
+            "entries",
+            DataType::Struct(
+                vec![
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new("value", DataType::Int32, true),
+                ]
+                .into(),
+            ),
+            false,
+        );
+        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
+        entry_field_with_metadata.set_metadata(metadata);
+        let map_data_type_with_metadata = DataType::Map(Arc::new(entry_field_with_metadata), true);
+        let map_data_type_with_metadata_string = map_data_type_with_metadata.to_string();
+        let expected_string_with_metadata = "Map(\"entries\": non-null Struct(\"key\": non-null Utf8, \"value\": Int32), metadata: {\"key\": \"value\"}, sorted)";
+        assert_eq!(
+            map_data_type_with_metadata_string,
+            expected_string_with_metadata
+        );
+    }
+
+    #[test]
+    fn test_display_run_end_encoded() {
+        let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
+        let values_field = Arc::new(Field::new("values", DataType::Int32, true));
+        let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
+        let ree_data_type_string = ree_data_type.to_string();
+        let expected_string = "RunEndEncoded(\"run_ends\": non-null UInt32, \"values\": Int32)";
+        assert_eq!(ree_data_type_string, expected_string);
+
+        // Test with metadata
+        let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
+        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
+        run_ends_field_with_metadata.set_metadata(metadata);
+        let ree_data_type_with_metadata =
+            DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
+        let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
+        let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": non-null UInt32, metadata: {\"key\": \"value\"}, \"values\": Int32)";
+        assert_eq!(
+            ree_data_type_with_metadata_string,
+            expected_string_with_metadata
+        );
+    }
+
+    #[test]
+    fn test_display_dictionary() {
+        let dict_data_type =
+            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
+        let dict_data_type_string = dict_data_type.to_string();
+        let expected_string = "Dictionary(Int8, Utf8)";
+        assert_eq!(dict_data_type_string, expected_string);
+
+        // Test with complex index and value types
+        let complex_dict_data_type = DataType::Dictionary(
+            Box::new(DataType::Int16),
+            Box::new(DataType::Struct(
+                vec![
+                    Field::new("a", DataType::Int32, false),
+                    Field::new("b", DataType::Utf8, true),
+                ]
+                .into(),
+            )),
+        );
+        let complex_dict_data_type_string = complex_dict_data_type.to_string();
+        let expected_complex_string =
+            "Dictionary(Int16, Struct(\"a\": non-null Int32, \"b\": Utf8))";
+        assert_eq!(complex_dict_data_type_string, expected_complex_string);
+    }
+
+    #[test]
+    fn test_display_interval() {
+        let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
+        let interval_year_month_string = interval_year_month.to_string();
+        let expected_year_month_string = "Interval(YearMonth)";
+        assert_eq!(interval_year_month_string, expected_year_month_string);
+
+        let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
+        let interval_day_time_string = interval_day_time.to_string();
+        let expected_day_time_string = "Interval(DayTime)";
+        assert_eq!(interval_day_time_string, expected_day_time_string);
+
+        let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
+        let interval_month_day_nano_string = interval_month_day_nano.to_string();
+        let expected_month_day_nano_string = "Interval(MonthDayNano)";
+        assert_eq!(
+            interval_month_day_nano_string,
+            expected_month_day_nano_string
+        );
+    }
+
+    #[test]
+    fn test_display_timestamp() {
+        let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
+        let timestamp_without_tz_string = timestamp_without_tz.to_string();
+        let expected_without_tz_string = "Timestamp(µs)";
+        assert_eq!(timestamp_without_tz_string, expected_without_tz_string);
+
+        let timestamp_with_tz =
+            DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
+        let timestamp_with_tz_string = timestamp_with_tz.to_string();
+        let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
+        assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
+    }
+}
diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs
index 70e4b351ff50..9349635151aa 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -17,8 +17,11 @@
 
 use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
 
-use crate::{ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit};
+use crate::{ArrowError, DataType, Field, Fields, IntervalUnit, TimeUnit, UnionFields, UnionMode};
 
+/// Parses a DataType from a string representation
+///
+/// For example, the string "Int32" would be parsed into [`DataType::Int32`]
 pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
     Parser::new(val).parse()
 }
@@ -26,7 +29,9 @@ pub(crate) fn parse_data_type(val: &str) -> ArrowResult<DataType> {
 type ArrowResult<T> = Result<T, ArrowError>;
 
 fn make_error(val: &str, msg: &str) -> ArrowError {
-    let msg = format!("Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error {msg}" );
+    let msg = format!(
+        "Unsupported type '{val}'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error {msg}"
+    );
     ArrowError::ParseError(msg)
 }
 
@@ -34,18 +39,18 @@ fn make_error_expected(val: &str, expected: &Token, actual: &Token) -> ArrowErro
     make_error(val, &format!("Expected '{expected}', got '{actual}'"))
 }
 
-#[derive(Debug)]
 /// Implementation of `parse_data_type`, modeled after <https://github.com/sqlparser-rs/sqlparser-rs>
+#[derive(Debug)]
 struct Parser<'a> {
     val: &'a str,
-    tokenizer: Tokenizer<'a>,
+    tokenizer: Peekable<Tokenizer<'a>>,
 }
 
 impl<'a> Parser<'a> {
     fn new(val: &'a str) -> Self {
         Self {
             val,
-            tokenizer: Tokenizer::new(val),
+            tokenizer: Tokenizer::new(val).peekable(),
         }
     }
 
@@ -72,17 +77,20 @@ impl<'a> Parser<'a> {
             Token::Duration => self.parse_duration(),
             Token::Interval => self.parse_interval(),
             Token::FixedSizeBinary => self.parse_fixed_size_binary(),
+            Token::Decimal32 => self.parse_decimal_32(),
+            Token::Decimal64 => self.parse_decimal_64(),
             Token::Decimal128 => self.parse_decimal_128(),
             Token::Decimal256 => self.parse_decimal_256(),
             Token::Dictionary => self.parse_dictionary(),
             Token::List => self.parse_list(),
+            Token::ListView => self.parse_list_view(),
             Token::LargeList => self.parse_large_list(),
+            Token::LargeListView => self.parse_large_list_view(),
             Token::FixedSizeList => self.parse_fixed_size_list(),
             Token::Struct => self.parse_struct(),
-            Token::FieldName(word) => Err(make_error(
-                self.val,
-                &format!("unrecognized word: {}", word),
-            )),
+            Token::Union => self.parse_union(),
+            Token::Map => self.parse_map(),
+            Token::RunEndEncoded => self.parse_run_end_encoded(),
             tok => Err(make_error(
                 self.val,
                 &format!("finding next type, got unexpected '{tok}'"),
@@ -90,79 +98,146 @@ impl<'a> Parser<'a> {
         }
     }
 
-    /// Parses the List type
+    /// parses Field, this is the inversion of `format_field` in `datatype_display.rs`.
+    /// E.g: "a": non-null Int64
+    ///
+    /// TODO: support metadata: `"a": non-null Int64 metadata: {"foo": "value"}`
+    fn parse_field(&mut self) -> ArrowResult<Field> {
+        let name = self.parse_double_quoted_string("Field")?;
+        self.expect_token(Token::Colon)?;
+        let nullable = self.parse_opt_nullable();
+        let data_type = self.parse_next_type()?;
+        Ok(Field::new(name, data_type, nullable))
+    }
+
+    /// Parses field inside a list. Use `Field::LIST_FIELD_DEFAULT_NAME`
+    /// if no field name is specified.
+    /// E.g: `non-null Int64, field: 'foo'` or `non-null Int64`
+    ///
+    /// TODO: support metadata: `non-ull Int64, metadata: {"foo2": "value"}`
+    fn parse_list_field(&mut self, context: &str) -> ArrowResult<Field> {
+        let nullable = self.parse_opt_nullable();
+        let data_type = self.parse_next_type()?;
+
+        // the field name (if exists) must be after a comma
+        let field_name = if self
+            .tokenizer
+            .next_if(|next| matches!(next, Ok(Token::Comma)))
+            .is_none()
+        {
+            Field::LIST_FIELD_DEFAULT_NAME.into()
+        } else {
+            // expects: `field: 'field_name'`.
+            self.expect_token(Token::Field)?;
+            self.expect_token(Token::Colon)?;
+            self.parse_single_quoted_string(context)?
+        };
+
+        Ok(Field::new(field_name, data_type, nullable))
+    }
+
+    /// Parses the List type (called after `List` has been consumed)
+    /// E.g: List(non-null Int64, field: 'foo')
     fn parse_list(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
-        let data_type = self.parse_next_type()?;
+        let field = self.parse_list_field("List")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::List(Arc::new(field)))
+    }
+
+    /// Parses the ListView type (called after `ListView` has been consumed)
+    /// E.g: ListView(non-null Int64, field: 'foo')
+    fn parse_list_view(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let field = self.parse_list_field("ListView")?;
         self.expect_token(Token::RParen)?;
-        Ok(DataType::List(Arc::new(Field::new_list_field(
-            data_type, true,
-        ))))
+        Ok(DataType::ListView(Arc::new(field)))
     }
 
-    /// Parses the LargeList type
+    /// Parses the LargeList type (called after `LargeList` has been consumed)
+    /// E.g: LargeList(non-null Int64, field: 'foo')
     fn parse_large_list(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
-        let data_type = self.parse_next_type()?;
+        let field = self.parse_list_field("LargeList")?;
         self.expect_token(Token::RParen)?;
-        Ok(DataType::LargeList(Arc::new(Field::new_list_field(
-            data_type, true,
-        ))))
+        Ok(DataType::LargeList(Arc::new(field)))
     }
 
-    /// Parses the FixedSizeList type
-    fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
+    /// Parses the LargeListView type (called after `LargeListView` has been consumed)
+    /// E.g: LargeListView(non-null Int64, field: 'foo')
+    fn parse_large_list_view(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
-        let length = self.parse_i32("FixedSizeList")?;
-        self.expect_token(Token::Comma)?;
-        let data_type = self.parse_next_type()?;
+        let field = self.parse_list_field("LargeListView")?;
         self.expect_token(Token::RParen)?;
-        Ok(DataType::FixedSizeList(
-            Arc::new(Field::new_list_field(data_type, true)),
-            length,
-        ))
+        Ok(DataType::LargeListView(Arc::new(field)))
     }
 
-    /// Parses the next timeunit
-    fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
+    /// Parses the FixedSizeList type (called after `FixedSizeList` has been consumed)
+    ///
+    /// Examples:
+    /// * `FixedSizeList(5 x non-null Int64, field: 'foo')`
+    /// * `FixedSizeList(4, Int64)`
+    ///
+    fn parse_fixed_size_list(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let length = self.parse_i32("FixedSizeList")?;
         match self.next_token()? {
-            Token::TimeUnit(time_unit) => Ok(time_unit),
+            // `FixedSizeList(5 x non-null Int64, field: 'foo')` format
+            Token::X => {
+                let field = self.parse_list_field("FixedSizeList")?;
+                self.expect_token(Token::RParen)?;
+                Ok(DataType::FixedSizeList(Arc::new(field), length))
+            }
+            // `FixedSizeList(4, Int64)` format
+            Token::Comma => {
+                let data_type = self.parse_next_type()?;
+                self.expect_token(Token::RParen)?;
+                Ok(DataType::FixedSizeList(
+                    Arc::new(Field::new_list_field(data_type, true)),
+                    length,
+                ))
+            }
             tok => Err(make_error(
                 self.val,
-                &format!("finding TimeUnit for {context}, got {tok}"),
+                &format!("Expected 'x' or ',' after length for FixedSizeList, got '{tok}'"),
             )),
         }
     }
 
-    /// Parses the next timezone
-    fn parse_timezone(&mut self, context: &str) -> ArrowResult<Option<String>> {
+    /// Parses the next timeunit
+    fn parse_time_unit(&mut self, context: &str) -> ArrowResult<TimeUnit> {
         match self.next_token()? {
-            Token::None => Ok(None),
-            Token::Some => {
-                self.expect_token(Token::LParen)?;
-                let timezone = self.parse_double_quoted_string("Timezone")?;
-                self.expect_token(Token::RParen)?;
-                Ok(Some(timezone))
-            }
+            Token::TimeUnit(time_unit) => Ok(time_unit),
             tok => Err(make_error(
                 self.val,
-                &format!("finding Timezone for {context}, got {tok}"),
+                &format!("finding TimeUnit for {context}, got {tok}"),
             )),
         }
     }
 
     /// Parses the next double quoted string
     fn parse_double_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
-        match self.next_token()? {
-            Token::DoubleQuotedString(s) => Ok(s),
-            Token::FieldName(word) => Err(make_error(
+        let token = self.next_token()?;
+        if let Token::DoubleQuotedString(string) = token {
+            Ok(string)
+        } else {
+            Err(make_error(
                 self.val,
-                &format!("unrecognized word: {}", word),
-            )),
-            tok => Err(make_error(
+                &format!("expected double quoted string for {context}, got '{token}'"),
+            ))
+        }
+    }
+
+    /// Parses the next single quoted string
+    fn parse_single_quoted_string(&mut self, context: &str) -> ArrowResult<String> {
+        let token = self.next_token()?;
+        if let Token::SingleQuotedString(string) = token {
+            Ok(string)
+        } else {
+            Err(make_error(
                 self.val,
-                &format!("finding double quoted string for {context}, got '{tok}'"),
-            )),
+                &format!("expected single quoted string for {context}, got '{token}'"),
+            ))
         }
     }
 
@@ -214,9 +289,45 @@ impl<'a> Parser<'a> {
     fn parse_timestamp(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
         let time_unit = self.parse_time_unit("Timestamp")?;
-        self.expect_token(Token::Comma)?;
-        let timezone = self.parse_timezone("Timestamp")?;
-        self.expect_token(Token::RParen)?;
+
+        let timezone;
+        match self.next_token()? {
+            Token::Comma => {
+                match self.next_token()? {
+                    // Support old style `Timestamp(Nanosecond, None)`
+                    Token::None => {
+                        timezone = None;
+                    }
+                    // Support old style `Timestamp(Nanosecond, Some("Timezone"))`
+                    Token::Some => {
+                        self.expect_token(Token::LParen)?;
+                        timezone = Some(self.parse_double_quoted_string("Timezone")?);
+                        self.expect_token(Token::RParen)?;
+                    }
+                    Token::DoubleQuotedString(tz) => {
+                        // Support new style `Timestamp(Nanosecond, "Timezone")`
+                        timezone = Some(tz);
+                    }
+                    tok => {
+                        return Err(make_error(
+                            self.val,
+                            &format!("Expected None, Some, or a timezone string, got {tok:?}"),
+                        ));
+                    }
+                };
+                self.expect_token(Token::RParen)?;
+            }
+            // No timezone (e.g `Timestamp(ns)`)
+            Token::RParen => {
+                timezone = None;
+            }
+            next_token => {
+                return Err(make_error(
+                    self.val,
+                    &format!("Expected comma followed by a timezone, or an ), got {next_token:?}"),
+                ));
+            }
+        }
         Ok(DataType::Timestamp(time_unit, timezone.map(Into::into)))
     }
 
@@ -253,7 +364,7 @@ impl<'a> Parser<'a> {
                 return Err(make_error(
                     self.val,
                     &format!("finding IntervalUnit for Interval, got {tok}"),
-                ))
+                ));
             }
         };
         self.expect_token(Token::RParen)?;
@@ -268,6 +379,26 @@ impl<'a> Parser<'a> {
         Ok(DataType::FixedSizeBinary(length))
     }
 
+    /// Parses the next Decimal32 (called after `Decimal32` has been consumed)
+    fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal32")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal32")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal32(precision, scale))
+    }
+
+    /// Parses the next Decimal64 (called after `Decimal64` has been consumed)
+    fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal64")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal64")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal64(precision, scale))
+    }
+
     /// Parses the next Decimal128 (called after `Decimal128` has been consumed)
     fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
@@ -300,47 +431,130 @@ impl<'a> Parser<'a> {
             Box::new(value_type),
         ))
     }
+
+    /// Parses the next Struct (called after `Struct` has been consumed)
     fn parse_struct(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
         let mut fields = Vec::new();
         loop {
-            let field_name = match self.next_token()? {
-                // It's valid to have a name that is a type name
-                Token::SimpleType(data_type) => data_type.to_string(),
-                Token::FieldName(name) => name,
-                Token::RParen => {
-                    if fields.is_empty() {
-                        break;
-                    } else {
-                        return Err(make_error(
-                            self.val,
-                            "Unexpected token while parsing Struct fields. Expected a word for the name of Struct, but got trailing comma",
-                        ));
-                    }
-                }
-                tok => {
-                    return Err(make_error(
-                        self.val,
-                        &format!("Expected a word for the name of Struct, but got {tok}"),
-                    ))
-                }
-            };
-            let field_type = self.parse_next_type()?;
-            fields.push(Arc::new(Field::new(field_name, field_type, true)));
+            if self
+                .tokenizer
+                .next_if(|next| matches!(next, Ok(Token::RParen)))
+                .is_some()
+            {
+                break;
+            }
+
+            let field = self.parse_field()?;
+            fields.push(Arc::new(field));
             match self.next_token()? {
                 Token::Comma => continue,
                 Token::RParen => break,
                 tok => {
                     return Err(make_error(
                         self.val,
-                        &format!("Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"),
-                    ))
+                        &format!(
+                            "Unexpected token while parsing Struct fields. Expected ',' or ')', but got '{tok}'"
+                        ),
+                    ));
                 }
             }
         }
         Ok(DataType::Struct(Fields::from(fields)))
     }
 
+    /// Parses the next Union (called after `Union` has been consumed)
+    /// E.g: Union(Sparse, 0: ("a": Int32), 1: ("b": non-null Utf8))
+    fn parse_union(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let union_mode = self.parse_union_mode()?;
+        let mut type_ids = vec![];
+        let mut fields = vec![];
+        loop {
+            if self
+                .tokenizer
+                .next_if(|next| matches!(next, Ok(Token::RParen)))
+                .is_some()
+            {
+                break;
+            }
+            self.expect_token(Token::Comma)?;
+            let (type_id, field) = self.parse_union_field()?;
+            type_ids.push(type_id);
+            fields.push(field);
+        }
+        Ok(DataType::Union(
+            UnionFields::try_new(type_ids, fields)?,
+            union_mode,
+        ))
+    }
+
+    /// Parses the next UnionMode
+    fn parse_union_mode(&mut self) -> ArrowResult<UnionMode> {
+        match self.next_token()? {
+            Token::UnionMode(union_mode) => Ok(union_mode),
+            tok => Err(make_error(
+                self.val,
+                &format!("finding UnionMode for Union, got {tok}"),
+            )),
+        }
+    }
+
+    /// Parses the next UnionField
+    /// 0: ("a": non-null Int32)
+    fn parse_union_field(&mut self) -> ArrowResult<(i8, Field)> {
+        let type_id = self.parse_i8("UnionField")?;
+        self.expect_token(Token::Colon)?;
+        self.expect_token(Token::LParen)?;
+        let field = self.parse_field()?;
+        self.expect_token(Token::RParen)?;
+        Ok((type_id, field))
+    }
+
+    /// Parses the next Map (called after `Map` has been consumed)
+    /// E.g: Map("entries": Struct("key": Utf8, "value": non-null Int32), sorted)
+    fn parse_map(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let field = self.parse_field()?;
+        self.expect_token(Token::Comma)?;
+        let sorted = self.parse_map_sorted()?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Map(Arc::new(field), sorted))
+    }
+
+    /// Parses map's sorted
+    fn parse_map_sorted(&mut self) -> ArrowResult<bool> {
+        match self.next_token()? {
+            Token::MapSorted(sorted) => Ok(sorted),
+            tok => Err(make_error(
+                self.val,
+                &format!("Expected sorted or unsorted for a map; got {tok:?}"),
+            )),
+        }
+    }
+
+    /// Parses the next RunEndEncoded (called after `RunEndEncoded` has been consumed)
+    /// E.g: RunEndEncoded("run_ends": UInt32, "values": nonnull Int32)
+    fn parse_run_end_encoded(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let run_ends = self.parse_field()?;
+        self.expect_token(Token::Comma)?;
+        let values = self.parse_field()?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::RunEndEncoded(
+            Arc::new(run_ends),
+            Arc::new(values),
+        ))
+    }
+
+    /// consume the next token and return `false` if the field is `nonnull`.
+    fn parse_opt_nullable(&mut self) -> bool {
+        let tok = self
+            .tokenizer
+            .next_if(|next| matches!(next, Ok(Token::NonNull | Token::Nullable)));
+        !matches!(tok, Some(Ok(Token::NonNull)))
+    }
+
     /// return the next token, or an error if there are none left
     fn next_token(&mut self) -> ArrowResult<Token> {
         match self.tokenizer.next() {
@@ -362,19 +576,22 @@ impl<'a> Parser<'a> {
 
 /// returns true if this character is a separator
 fn is_separator(c: char) -> bool {
-    c == '(' || c == ')' || c == ',' || c == ' '
+    c == '(' || c == ')' || c == ',' || c == ':' || c == ' '
+}
+
+enum QuoteType {
+    Double,
+    Single,
 }
 
 #[derive(Debug)]
-/// Splits a strings like Dictionary(Int32, Int64) into tokens sutable for parsing
+/// Splits a strings like Dictionary(Int32, Int64) into tokens suitable for parsing
 ///
-/// For example the string "Timestamp(Nanosecond, None)" would be parsed into:
+/// For example the string "Timestamp(ns)" would be parsed into:
 ///
 /// * Token::Timestamp
 /// * Token::Lparen
 /// * Token::IntervalUnit(IntervalUnit::Nanosecond)
-/// * Token::Comma,
-/// * Token::None,
 /// * Token::Rparen,
 struct Tokenizer<'a> {
     val: &'a str,
@@ -426,50 +643,6 @@ impl<'a> Tokenizer<'a> {
                 })?;
                 return Ok(Token::Integer(val));
             }
-            // if it started with a double quote `"`, try parsing it as a double quoted string
-            else if c == '"' {
-                let len = self.word.chars().count();
-
-                // to verify it's double quoted
-                if let Some(last_c) = self.word.chars().last() {
-                    if last_c != '"' || len < 2 {
-                        return Err(make_error(
-                            self.val,
-                            &format!(
-                                "parsing {} as double quoted string: last char must be \"",
-                                self.word
-                            ),
-                        ));
-                    }
-                }
-
-                if len == 2 {
-                    return Err(make_error(
-                        self.val,
-                        &format!(
-                            "parsing {} as double quoted string: empty string isn't supported",
-                            self.word
-                        ),
-                    ));
-                }
-
-                let val: String = self.word.parse().map_err(|e| {
-                    make_error(
-                        self.val,
-                        &format!("parsing {} as double quoted string: {e}", self.word),
-                    )
-                })?;
-
-                let s = val[1..len - 1].to_string();
-                if s.contains('"') {
-                    return Err(make_error(
-                        self.val,
-                        &format!("parsing {} as double quoted string: escaped double quote isn't supported", self.word),
-                    ));
-                }
-
-                return Ok(Token::DoubleQuotedString(s));
-            }
         }
 
         // figure out what the word was
@@ -502,13 +675,15 @@ impl<'a> Tokenizer<'a> {
             "Date64" => Token::SimpleType(DataType::Date64),
 
             "List" => Token::List,
+            "ListView" => Token::ListView,
             "LargeList" => Token::LargeList,
+            "LargeListView" => Token::LargeListView,
             "FixedSizeList" => Token::FixedSizeList,
 
-            "Second" => Token::TimeUnit(TimeUnit::Second),
-            "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
-            "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
-            "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
+            "s" | "Second" => Token::TimeUnit(TimeUnit::Second),
+            "ms" | "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
+            "µs" | "us" | "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
+            "ns" | "Nanosecond" => Token::TimeUnit(TimeUnit::Nanosecond),
 
             "Timestamp" => Token::Timestamp,
             "Time32" => Token::Time32,
@@ -518,6 +693,9 @@ impl<'a> Tokenizer<'a> {
             "Dictionary" => Token::Dictionary,
 
             "FixedSizeBinary" => Token::FixedSizeBinary,
+
+            "Decimal32" => Token::Decimal32,
+            "Decimal64" => Token::Decimal64,
             "Decimal128" => Token::Decimal128,
             "Decimal256" => Token::Decimal256,
 
@@ -528,12 +706,88 @@ impl<'a> Tokenizer<'a> {
             "Some" => Token::Some,
             "None" => Token::None,
 
+            "non-null" => Token::NonNull,
+            "nullable" => Token::Nullable,
+            "field" => Token::Field,
+            "x" => Token::X,
+
             "Struct" => Token::Struct,
-            // If we don't recognize the word, treat it as a field name
-            word => Token::FieldName(word.to_string()),
+
+            "Union" => Token::Union,
+            "Sparse" => Token::UnionMode(UnionMode::Sparse),
+            "Dense" => Token::UnionMode(UnionMode::Dense),
+
+            "Map" => Token::Map,
+            "sorted" => Token::MapSorted(true),
+            "unsorted" => Token::MapSorted(false),
+
+            "RunEndEncoded" => Token::RunEndEncoded,
+
+            token => {
+                return Err(make_error(self.val, &format!("unknown token: {token}")));
+            }
         };
         Ok(token)
     }
+
+    /// Parses e.g. `"foo bar"`, `'foo bar'`
+    fn parse_quoted_string(&mut self, quote_type: QuoteType) -> ArrowResult<Token> {
+        let quote = match quote_type {
+            QuoteType::Double => '\"',
+            QuoteType::Single => '\'',
+        };
+
+        if self.next_char() != Some(quote) {
+            return Err(make_error(self.val, "Expected \""));
+        }
+
+        // reset temp space
+        self.word.clear();
+
+        let mut is_escaped = false;
+
+        loop {
+            match self.next_char() {
+                None => {
+                    return Err(ArrowError::ParseError(format!(
+                        "Unterminated string at: \"{}",
+                        self.word
+                    )));
+                }
+                Some(c) => match c {
+                    '\\' => {
+                        is_escaped = true;
+                        self.word.push(c);
+                    }
+                    c if c == quote => {
+                        if is_escaped {
+                            self.word.push(c);
+                            is_escaped = false;
+                        } else {
+                            break;
+                        }
+                    }
+                    c => {
+                        self.word.push(c);
+                    }
+                },
+            }
+        }
+
+        let val: String = self.word.parse().map_err(|err| {
+            ArrowError::ParseError(format!("Failed to parse string: \"{}\": {err}", self.word))
+        })?;
+
+        if val.is_empty() {
+            // Using empty strings as field names is just asking for trouble
+            return Err(make_error(self.val, "empty strings aren't allowed"));
+        }
+
+        match quote_type {
+            QuoteType::Double => Ok(Token::DoubleQuotedString(val)),
+            QuoteType::Single => Ok(Token::SingleQuotedString(val)),
+        }
+    }
 }
 
 impl Iterator for Tokenizer<'_> {
@@ -547,6 +801,12 @@ impl Iterator for Tokenizer<'_> {
                     self.next_char();
                     continue;
                 }
+                '"' => {
+                    return Some(self.parse_quoted_string(QuoteType::Double));
+                }
+                '\'' => {
+                    return Some(self.parse_quoted_string(QuoteType::Single));
+                }
                 '(' => {
                     self.next_char();
                     return Some(Ok(Token::LParen));
@@ -559,6 +819,10 @@ impl Iterator for Tokenizer<'_> {
                     self.next_char();
                     return Some(Ok(Token::Comma));
                 }
+                ':' => {
+                    self.next_char();
+                    return Some(Ok(Token::Colon));
+                }
                 _ => return Some(self.parse_word()),
             }
         }
@@ -577,6 +841,8 @@ enum Token {
     Duration,
     Interval,
     FixedSizeBinary,
+    Decimal32,
+    Decimal64,
     Decimal128,
     Decimal256,
     Dictionary,
@@ -585,15 +851,27 @@ enum Token {
     LParen,
     RParen,
     Comma,
+    Colon,
     Some,
     None,
     Integer(i64),
     DoubleQuotedString(String),
+    SingleQuotedString(String),
     List,
+    ListView,
     LargeList,
+    LargeListView,
     FixedSizeList,
     Struct,
-    FieldName(String),
+    Union,
+    UnionMode(UnionMode),
+    Map,
+    MapSorted(bool),
+    RunEndEncoded,
+    NonNull,
+    Nullable,
+    Field,
+    X,
 }
 
 impl Display for Token {
@@ -601,7 +879,9 @@ impl Display for Token {
         match self {
             Token::SimpleType(t) => write!(f, "{t}"),
             Token::List => write!(f, "List"),
+            Token::ListView => write!(f, "ListView"),
             Token::LargeList => write!(f, "LargeList"),
+            Token::LargeListView => write!(f, "LargeListView"),
             Token::FixedSizeList => write!(f, "FixedSizeList"),
             Token::Timestamp => write!(f, "Timestamp"),
             Token::Time32 => write!(f, "Time32"),
@@ -613,16 +893,30 @@ impl Display for Token {
             Token::LParen => write!(f, "("),
             Token::RParen => write!(f, ")"),
             Token::Comma => write!(f, ","),
+            Token::Colon => write!(f, ":"),
             Token::Some => write!(f, "Some"),
             Token::None => write!(f, "None"),
             Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
+            Token::Decimal32 => write!(f, "Decimal32"),
+            Token::Decimal64 => write!(f, "Decimal64"),
             Token::Decimal128 => write!(f, "Decimal128"),
             Token::Decimal256 => write!(f, "Decimal256"),
             Token::Dictionary => write!(f, "Dictionary"),
             Token::Integer(v) => write!(f, "Integer({v})"),
             Token::DoubleQuotedString(s) => write!(f, "DoubleQuotedString({s})"),
+            Token::SingleQuotedString(s) => write!(f, "SingleQuotedString({s})"),
             Token::Struct => write!(f, "Struct"),
-            Token::FieldName(s) => write!(f, "FieldName({s})"),
+            Token::Union => write!(f, "Union"),
+            Token::UnionMode(m) => write!(f, "{m:?}"),
+            Token::Map => write!(f, "Map"),
+            Token::MapSorted(sorted) => {
+                write!(f, "{}", if *sorted { "sorted" } else { "unsorted" })
+            }
+            Token::RunEndEncoded => write!(f, "RunEndEncoded"),
+            Token::NonNull => write!(f, "non-null"),
+            Token::Nullable => write!(f, "nullable"),
+            Token::Field => write!(f, "field"),
+            Token::X => write!(f, "x"),
         }
     }
 }
@@ -639,7 +933,7 @@ mod test {
         }
     }
 
-    /// convert data_type to a string, and then parse it as a type
+    /// Ensure we converting data_type to a string, and then parse it as a type
     /// verifying it is the same
     fn round_trip(data_type: DataType) {
         let data_type_string = data_type.to_string();
@@ -708,6 +1002,8 @@ mod test {
             DataType::Utf8,
             DataType::Utf8View,
             DataType::LargeUtf8,
+            DataType::Decimal32(7, 8),
+            DataType::Decimal64(6, 9),
             DataType::Decimal128(7, 12),
             DataType::Decimal256(6, 13),
             // ---------
@@ -759,8 +1055,124 @@ mod test {
                     true,
                 ),
             ])),
+            DataType::Struct(Fields::from(vec![Field::new("f1", DataType::Int64, true)])),
             DataType::Struct(Fields::empty()),
-            // TODO support more structured types (List, LargeList, Union, Map, RunEndEncoded, etc)
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int64, true))),
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
+            DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
+            DataType::List(Arc::new(Field::new("Int64", DataType::Int64, false))),
+            DataType::List(Arc::new(Field::new(
+                "nested_list",
+                DataType::List(Arc::new(Field::new("Int64", DataType::Int64, true))),
+                true,
+            ))),
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
+            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
+            DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
+            DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
+            DataType::ListView(Arc::new(Field::new(
+                "nested_list_view",
+                DataType::ListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
+                true,
+            ))),
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, true))),
+            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int64, false))),
+            DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
+            DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, false))),
+            DataType::LargeList(Arc::new(Field::new(
+                "nested_large_list",
+                DataType::LargeList(Arc::new(Field::new("Int64", DataType::Int64, true))),
+                true,
+            ))),
+            DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, true))),
+            DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int64, false))),
+            DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
+            DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, false))),
+            DataType::LargeListView(Arc::new(Field::new(
+                "nested_large_list_view",
+                DataType::LargeListView(Arc::new(Field::new("Int64", DataType::Int64, true))),
+                true,
+            ))),
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, true)), 2),
+            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int64, false)), 2),
+            DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, true)), 2),
+            DataType::FixedSizeList(Arc::new(Field::new("Int64", DataType::Int64, false)), 2),
+            DataType::FixedSizeList(
+                Arc::new(Field::new(
+                    "nested_fixed_size_list",
+                    DataType::FixedSizeList(
+                        Arc::new(Field::new("Int64", DataType::Int64, true)),
+                        2,
+                    ),
+                    true,
+                )),
+                2,
+            ),
+            DataType::Union(
+                UnionFields::from_fields(vec![
+                    Field::new("Int32", DataType::Int32, false),
+                    Field::new("Utf8", DataType::Utf8, true),
+                ]),
+                UnionMode::Sparse,
+            ),
+            DataType::Union(
+                UnionFields::from_fields(vec![
+                    Field::new("Int32", DataType::Int32, false),
+                    Field::new("Utf8", DataType::Utf8, true),
+                ]),
+                UnionMode::Dense,
+            ),
+            DataType::Union(
+                UnionFields::from_fields(vec![
+                    Field::new_union(
+                        "nested_union",
+                        vec![0, 1],
+                        vec![
+                            Field::new("Int32", DataType::Int32, false),
+                            Field::new("Utf8", DataType::Utf8, true),
+                        ],
+                        UnionMode::Dense,
+                    ),
+                    Field::new("Utf8", DataType::Utf8, true),
+                ]),
+                UnionMode::Sparse,
+            ),
+            DataType::Union(
+                UnionFields::from_fields(vec![Field::new("Int32", DataType::Int32, false)]),
+                UnionMode::Dense,
+            ),
+            DataType::Union(
+                UnionFields::try_new(Vec::<i8>::new(), Vec::<Field>::new()).unwrap(),
+                UnionMode::Sparse,
+            ),
+            DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), true),
+            DataType::Map(Arc::new(Field::new("Int64", DataType::Int64, true)), false),
+            DataType::Map(
+                Arc::new(Field::new_map(
+                    "nested_map",
+                    "entries",
+                    Field::new("key", DataType::Utf8, false),
+                    Field::new("value", DataType::Int32, true),
+                    false,
+                    true,
+                )),
+                true,
+            ),
+            DataType::RunEndEncoded(
+                Arc::new(Field::new("run_ends", DataType::UInt32, false)),
+                Arc::new(Field::new("values", DataType::Int32, true)),
+            ),
+            DataType::RunEndEncoded(
+                Arc::new(Field::new(
+                    "nested_run_end_encoded",
+                    DataType::RunEndEncoded(
+                        Arc::new(Field::new("run_ends", DataType::UInt32, false)),
+                        Arc::new(Field::new("values", DataType::Int32, true)),
+                    ),
+                    true,
+                )),
+                Arc::new(Field::new("values", DataType::Int32, true)),
+            ),
         ]
     }
 
@@ -770,27 +1182,220 @@ mod test {
         let cases = [
             ("Int8", DataType::Int8),
             (
-                "Timestamp        (Nanosecond,      None)",
+                "Timestamp        (ns)",
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
             ),
             (
-                "Timestamp        (Nanosecond,      None)  ",
+                "Timestamp        (ns)  ",
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
             ),
             (
-                "          Timestamp        (Nanosecond,      None               )",
+                "          Timestamp        (ns               )",
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
             ),
             (
-                "Timestamp        (Nanosecond,      None               )  ",
+                "Timestamp        (ns               )  ",
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
             ),
         ];
 
         for (data_type_string, expected_data_type) in cases {
-            println!("Parsing '{data_type_string}', expecting '{expected_data_type:?}'");
             let parsed_data_type = parse_data_type(data_type_string).unwrap();
-            assert_eq!(parsed_data_type, expected_data_type);
+            assert_eq!(
+                parsed_data_type, expected_data_type,
+                "Parsing '{data_type_string}', expecting '{expected_data_type}'"
+            );
+        }
+    }
+
+    /// Ensure that old style types can still be parsed
+    #[test]
+    fn test_parse_data_type_backwards_compatibility() {
+        use DataType::*;
+        use IntervalUnit::*;
+        use TimeUnit::*;
+        // List below created with:
+        for t in list_datatypes() {
+            println!(r#"("{t}", {t:?}),"#);
+        }
+        // (string to parse, expected DataType)
+        let cases = [
+            ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
+            ("Timestamp(Microsecond, None)", Timestamp(Microsecond, None)),
+            ("Timestamp(Millisecond, None)", Timestamp(Millisecond, None)),
+            ("Timestamp(Second, None)", Timestamp(Second, None)),
+            ("Timestamp(Nanosecond, None)", Timestamp(Nanosecond, None)),
+            // Timezones
+            (
+                r#"Timestamp(Nanosecond, Some("+00:00"))"#,
+                Timestamp(Nanosecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(Microsecond, Some("+00:00"))"#,
+                Timestamp(Microsecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(Millisecond, Some("+00:00"))"#,
+                Timestamp(Millisecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(Second, Some("+00:00"))"#,
+                Timestamp(Second, Some("+00:00".into())),
+            ),
+            ("Null", Null),
+            ("Boolean", Boolean),
+            ("Int8", Int8),
+            ("Int16", Int16),
+            ("Int32", Int32),
+            ("Int64", Int64),
+            ("UInt8", UInt8),
+            ("UInt16", UInt16),
+            ("UInt32", UInt32),
+            ("UInt64", UInt64),
+            ("Float16", Float16),
+            ("Float32", Float32),
+            ("Float64", Float64),
+            ("Timestamp(s)", Timestamp(Second, None)),
+            ("Timestamp(ms)", Timestamp(Millisecond, None)),
+            ("Timestamp(µs)", Timestamp(Microsecond, None)),
+            ("Timestamp(ns)", Timestamp(Nanosecond, None)),
+            (
+                r#"Timestamp(ns, "+00:00")"#,
+                Timestamp(Nanosecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(µs, "+00:00")"#,
+                Timestamp(Microsecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(ms, "+00:00")"#,
+                Timestamp(Millisecond, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(s, "+00:00")"#,
+                Timestamp(Second, Some("+00:00".into())),
+            ),
+            (
+                r#"Timestamp(ns, "+08:00")"#,
+                Timestamp(Nanosecond, Some("+08:00".into())),
+            ),
+            (
+                r#"Timestamp(µs, "+08:00")"#,
+                Timestamp(Microsecond, Some("+08:00".into())),
+            ),
+            (
+                r#"Timestamp(ms, "+08:00")"#,
+                Timestamp(Millisecond, Some("+08:00".into())),
+            ),
+            (
+                r#"Timestamp(s, "+08:00")"#,
+                Timestamp(Second, Some("+08:00".into())),
+            ),
+            ("Date32", Date32),
+            ("Date64", Date64),
+            ("Time32(s)", Time32(Second)),
+            ("Time32(ms)", Time32(Millisecond)),
+            ("Time32(µs)", Time32(Microsecond)),
+            ("Time32(ns)", Time32(Nanosecond)),
+            ("Time64(s)", Time64(Second)),
+            ("Time64(ms)", Time64(Millisecond)),
+            ("Time64(µs)", Time64(Microsecond)),
+            ("Time64(ns)", Time64(Nanosecond)),
+            ("Duration(s)", Duration(Second)),
+            ("Duration(ms)", Duration(Millisecond)),
+            ("Duration(µs)", Duration(Microsecond)),
+            ("Duration(ns)", Duration(Nanosecond)),
+            ("Interval(YearMonth)", Interval(YearMonth)),
+            ("Interval(DayTime)", Interval(DayTime)),
+            ("Interval(MonthDayNano)", Interval(MonthDayNano)),
+            ("Binary", Binary),
+            ("BinaryView", BinaryView),
+            ("FixedSizeBinary(0)", FixedSizeBinary(0)),
+            ("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
+            ("FixedSizeBinary(-432)", FixedSizeBinary(-432)),
+            ("LargeBinary", LargeBinary),
+            ("Utf8", Utf8),
+            ("Utf8View", Utf8View),
+            ("LargeUtf8", LargeUtf8),
+            ("Decimal32(7, 8)", Decimal32(7, 8)),
+            ("Decimal64(6, 9)", Decimal64(6, 9)),
+            ("Decimal128(7, 12)", Decimal128(7, 12)),
+            ("Decimal256(6, 13)", Decimal256(6, 13)),
+            (
+                "Dictionary(Int32, Utf8)",
+                Dictionary(Box::new(Int32), Box::new(Utf8)),
+            ),
+            (
+                "Dictionary(Int8, Utf8)",
+                Dictionary(Box::new(Int8), Box::new(Utf8)),
+            ),
+            (
+                "Dictionary(Int8, Timestamp(ns))",
+                Dictionary(Box::new(Int8), Box::new(Timestamp(Nanosecond, None))),
+            ),
+            (
+                "Dictionary(Int8, FixedSizeBinary(23))",
+                Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
+            ),
+            (
+                "Dictionary(Int8, Dictionary(Int8, Utf8))",
+                Dictionary(
+                    Box::new(Int8),
+                    Box::new(Dictionary(Box::new(Int8), Box::new(Utf8))),
+                ),
+            ),
+            (
+                r#"Struct("f1": nullable Int64, "f2": nullable Float64, "f3": nullable Timestamp(s, "+08:00"), "f4": nullable Dictionary(Int8, FixedSizeBinary(23)))"#,
+                Struct(Fields::from(vec![
+                    Field::new("f1", Int64, true),
+                    Field::new("f2", Float64, true),
+                    Field::new("f3", Timestamp(Second, Some("+08:00".into())), true),
+                    Field::new(
+                        "f4",
+                        Dictionary(Box::new(Int8), Box::new(FixedSizeBinary(23))),
+                        true,
+                    ),
+                ])),
+            ),
+            (
+                r#"Struct("Int64": nullable Int64, "Float64": nullable Float64)"#,
+                Struct(Fields::from(vec![
+                    Field::new("Int64", Int64, true),
+                    Field::new("Float64", Float64, true),
+                ])),
+            ),
+            (
+                r#"Struct("f1": nullable Int64, "nested_struct": nullable Struct("n1": nullable Int64))"#,
+                Struct(Fields::from(vec![
+                    Field::new("f1", Int64, true),
+                    Field::new(
+                        "nested_struct",
+                        Struct(Fields::from(vec![Field::new("n1", Int64, true)])),
+                        true,
+                    ),
+                ])),
+            ),
+            (r#"Struct()"#, Struct(Fields::empty())),
+            (
+                "FixedSizeList(4, Int64)",
+                FixedSizeList(Arc::new(Field::new_list_field(Int64, true)), 4),
+            ),
+            (
+                "List(Int64)",
+                List(Arc::new(Field::new_list_field(Int64, true))),
+            ),
+            (
+                "LargeList(Int64)",
+                LargeList(Arc::new(Field::new_list_field(Int64, true))),
+            ),
+        ];
+
+        for (data_type_string, expected_data_type) in cases {
+            let parsed_data_type = parse_data_type(data_type_string).unwrap();
+            assert_eq!(
+                parsed_data_type, expected_data_type,
+                "Parsing '{data_type_string}', expecting '{expected_data_type}'"
+            );
         }
     }
 
@@ -802,41 +1407,75 @@ mod test {
             ("", "Error finding next token"),
             ("null", "Unsupported type 'null'"),
             ("Nu", "Unsupported type 'Nu'"),
+            (r#"Timestamp(ns, +00:00)"#, "Error unknown token: +00"),
             (
-                r#"Timestamp(Nanosecond, Some(+00:00))"#,
-                "Error unrecognized word: +00:00",
-            ),
-            (
-                r#"Timestamp(Nanosecond, Some("+00:00))"#,
-                r#"parsing "+00:00 as double quoted string: last char must be ""#,
-            ),
-            (
-                r#"Timestamp(Nanosecond, Some(""))"#,
-                r#"parsing "" as double quoted string: empty string isn't supported"#,
+                r#"Timestamp(ns, "+00:00)"#,
+                r#"Unterminated string at: "+00:00)"#,
             ),
+            (r#"Timestamp(ns, "")"#, r#"empty strings aren't allowed"#),
             (
-                r#"Timestamp(Nanosecond, Some("+00:00""))"#,
-                r#"parsing "+00:00"" as double quoted string: escaped double quote isn't supported"#,
+                r#"Timestamp(ns, "+00:00"")"#,
+                r#"Parser error: Unterminated string at: ")"#,
             ),
-            ("Timestamp(Nanosecond, ", "Error finding next token"),
+            ("Timestamp(ns, ", "Error finding next token"),
             (
                 "Float32 Float32",
                 "trailing content after parsing 'Float32'",
             ),
             ("Int32, ", "trailing content after parsing 'Int32'"),
             ("Int32(3), ", "trailing content after parsing 'Int32'"),
-            ("FixedSizeBinary(Int32), ", "Error finding i64 for FixedSizeBinary, got 'Int32'"),
-            ("FixedSizeBinary(3.0), ", "Error parsing 3.0 as integer: invalid digit found in string"),
+            (
+                "FixedSizeBinary(Int32), ",
+                "Error finding i64 for FixedSizeBinary, got 'Int32'",
+            ),
+            (
+                "FixedSizeBinary(3.0), ",
+                "Error parsing 3.0 as integer: invalid digit found in string",
+            ),
             // too large for i32
-            ("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
+            (
+                "FixedSizeBinary(4000000000), ",
+                "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
+            ),
             // can't have negative precision
-            ("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
-            ("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
-            ("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
-            ("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
-            ("Struct(f1, Int64)", "Error finding next type, got unexpected ','"),
-            ("Struct(f1 Int64,)", "Expected a word for the name of Struct, but got trailing comma"),
-            ("Struct(f1)", "Error finding next type, got unexpected ')'"),
+            (
+                "Decimal32(-3, 5)",
+                "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal64(-3, 5)",
+                "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal128(-3, 5)",
+                "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal256(-3, 5)",
+                "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal32(3, 500)",
+                "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal64(3, 500)",
+                "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal128(3, 500)",
+                "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted",
+            ),
+            (
+                "Decimal256(3, 500)",
+                "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted",
+            ),
+            ("Struct(f1 Int64)", "Error unknown token: f1"),
+            ("Struct(\"f1\" Int64)", "Expected ':'"),
+            (
+                "Struct(\"f1\": )",
+                "Error finding next type, got unexpected ')'",
+            ),
         ];
 
         for (data_type_string, expected_message) in cases {
@@ -847,10 +1486,13 @@ mod test {
                     let message = e.to_string();
                     assert!(
                         message.contains(expected_message),
-                        "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual:{message}\n"
+                        "\n\ndid not find expected in actual.\n\nexpected: {expected_message}\nactual: {message}\n"
                     );
-                    // errors should also contain  a help message
-                    assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'"));
+
+                    if !message.contains("Unterminated string") {
+                        // errors should also contain a help message
+                        assert!(message.contains("Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'"), "message: {message}");
+                    }
                 }
             }
         }
@@ -860,6 +1502,9 @@ mod test {
     fn parse_error_type() {
         let err = parse_data_type("foobar").unwrap_err();
         assert!(matches!(err, ArrowError::ParseError(_)));
-        assert_eq!(err.to_string(), "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(Nanosecond, None)'. Error unrecognized word: foobar");
+        assert_eq!(
+            err.to_string(),
+            "Parser error: Unsupported type 'foobar'. Must be a supported arrow type name such as 'Int32' or 'Timestamp(ns)'. Error unknown token: foobar"
+        );
     }
 }
diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs
index 982dd026a04d..8c113cba8607 100644
--- a/arrow-schema/src/error.rs
+++ b/arrow-schema/src/error.rs
@@ -46,6 +46,8 @@ pub enum ArrowError {
     CsvError(String),
     /// Error during JSON-related operations.
     JsonError(String),
+    /// Error during Avro-related operations.
+    AvroError(String),
     /// Error during IO operations.
     IoError(String, std::io::Error),
     /// Error during IPC operations in `arrow-ipc` or `arrow-flight`.
@@ -60,6 +62,8 @@ pub enum ArrowError {
     DictionaryKeyOverflowError,
     /// Error when the run end index in a REE array is bigger than the array length
     RunEndIndexOverflowError,
+    /// Error when the offset overflows.
+    OffsetOverflowError(usize),
 }
 
 impl ArrowError {
@@ -107,6 +111,7 @@ impl Display for ArrowError {
             ArrowError::ComputeError(desc) => write!(f, "Compute error: {desc}"),
             ArrowError::ArithmeticOverflow(desc) => write!(f, "Arithmetic overflow: {desc}"),
             ArrowError::DivideByZero => write!(f, "Divide by zero error"),
+            ArrowError::AvroError(desc) => write!(f, "Avro error: {desc}"),
             ArrowError::CsvError(desc) => write!(f, "Csv error: {desc}"),
             ArrowError::JsonError(desc) => write!(f, "Json error: {desc}"),
             ArrowError::IoError(desc, _) => write!(f, "Io error: {desc}"),
@@ -126,6 +131,9 @@ impl Display for ArrowError {
             ArrowError::RunEndIndexOverflowError => {
                 write!(f, "Run end encoded array index overflow error")
             }
+            ArrowError::OffsetOverflowError(offset) => {
+                write!(f, "Offset overflow error: {offset}")
+            }
         }
     }
 }
diff --git a/arrow-schema/src/extension/canonical/bool8.rs b/arrow-schema/src/extension/canonical/bool8.rs
index fdd25677ed0e..362a2cc018c7 100644
--- a/arrow-schema/src/extension/canonical/bool8.rs
+++ b/arrow-schema/src/extension/canonical/bool8.rs
@@ -19,7 +19,7 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#bit-boolean>
 
-use crate::{extension::ExtensionType, ArrowError, DataType};
+use crate::{ArrowError, DataType, extension::ExtensionType};
 
 /// The extension type for `8-bit Boolean`.
 ///
@@ -75,8 +75,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
diff --git a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
index 6fe94fba78aa..b6bd1c1223f4 100644
--- a/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
+++ b/arrow-schema/src/extension/canonical/fixed_shape_tensor.rs
@@ -19,9 +19,12 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#fixed-shape-tensor>
 
-use serde::{Deserialize, Serialize};
+use serde_core::de::{self, MapAccess, Visitor};
+use serde_core::ser::SerializeStruct;
+use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
+use std::fmt;
 
-use crate::{extension::ExtensionType, ArrowError, DataType};
+use crate::{ArrowError, DataType, extension::ExtensionType};
 
 /// The extension type for fixed shape tensor.
 ///
@@ -129,7 +132,7 @@ impl FixedShapeTensor {
 }
 
 /// Extension type metadata for [`FixedShapeTensor`].
-#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct FixedShapeTensorMetadata {
     /// The physical shape of the contained tensors.
     shape: Vec<usize>,
@@ -141,6 +144,143 @@ pub struct FixedShapeTensorMetadata {
     permutations: Option<Vec<usize>>,
 }
 
+impl Serialize for FixedShapeTensorMetadata {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut state = serializer.serialize_struct("FixedShapeTensorMetadata", 3)?;
+        state.serialize_field("shape", &self.shape)?;
+        state.serialize_field("dim_names", &self.dim_names)?;
+        state.serialize_field("permutations", &self.permutations)?;
+        state.end()
+    }
+}
+
+#[derive(Debug)]
+enum MetadataField {
+    Shape,
+    DimNames,
+    Permutations,
+}
+
+struct MetadataFieldVisitor;
+
+impl<'de> Visitor<'de> for MetadataFieldVisitor {
+    type Value = MetadataField;
+
+    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        formatter.write_str("`shape`, `dim_names`, or `permutations`")
+    }
+
+    fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
+    where
+        E: de::Error,
+    {
+        match value {
+            "shape" => Ok(MetadataField::Shape),
+            "dim_names" => Ok(MetadataField::DimNames),
+            "permutations" => Ok(MetadataField::Permutations),
+            _ => Err(de::Error::unknown_field(
+                value,
+                &["shape", "dim_names", "permutations"],
+            )),
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for MetadataField {
+    fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_identifier(MetadataFieldVisitor)
+    }
+}
+
+struct FixedShapeTensorMetadataVisitor;
+
+impl<'de> Visitor<'de> for FixedShapeTensorMetadataVisitor {
+    type Value = FixedShapeTensorMetadata;
+
+    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        formatter.write_str("struct FixedShapeTensorMetadata")
+    }
+
+    fn visit_seq<V>(self, mut seq: V) -> Result<FixedShapeTensorMetadata, V::Error>
+    where
+        V: de::SeqAccess<'de>,
+    {
+        let shape = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(0, &self))?;
+        let dim_names = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(1, &self))?;
+        let permutations = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(2, &self))?;
+        Ok(FixedShapeTensorMetadata {
+            shape,
+            dim_names,
+            permutations,
+        })
+    }
+
+    fn visit_map<V>(self, mut map: V) -> Result<FixedShapeTensorMetadata, V::Error>
+    where
+        V: MapAccess<'de>,
+    {
+        let mut shape = None;
+        let mut dim_names = None;
+        let mut permutations = None;
+
+        while let Some(key) = map.next_key()? {
+            match key {
+                MetadataField::Shape => {
+                    if shape.is_some() {
+                        return Err(de::Error::duplicate_field("shape"));
+                    }
+                    shape = Some(map.next_value()?);
+                }
+                MetadataField::DimNames => {
+                    if dim_names.is_some() {
+                        return Err(de::Error::duplicate_field("dim_names"));
+                    }
+                    dim_names = Some(map.next_value()?);
+                }
+                MetadataField::Permutations => {
+                    if permutations.is_some() {
+                        return Err(de::Error::duplicate_field("permutations"));
+                    }
+                    permutations = Some(map.next_value()?);
+                }
+            }
+        }
+
+        let shape = shape.ok_or_else(|| de::Error::missing_field("shape"))?;
+
+        Ok(FixedShapeTensorMetadata {
+            shape,
+            dim_names,
+            permutations,
+        })
+    }
+}
+
+impl<'de> Deserialize<'de> for FixedShapeTensorMetadata {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_struct(
+            "FixedShapeTensorMetadata",
+            &["shape", "dim_names", "permutations"],
+            FixedShapeTensorMetadataVisitor,
+        )
+    }
+}
+
 impl FixedShapeTensorMetadata {
     /// Returns metadata for a fixed shape tensor extension type.
     ///
@@ -297,8 +437,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
@@ -377,9 +517,8 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "FixedShapeTensor metadata deserialization failed: missing field `shape`"
-    )]
+    #[should_panic(expected = "FixedShapeTensor metadata deserialization failed: \
+        unknown field `not-shape`, expected one of `shape`, `dim_names`, `permutations`")]
     fn invalid_metadata() {
         let fixed_shape_tensor =
             FixedShapeTensor::try_new(DataType::Float32, [100, 200, 500], None, None).unwrap();
diff --git a/arrow-schema/src/extension/canonical/json.rs b/arrow-schema/src/extension/canonical/json.rs
index 0a8a1ae7e020..297a2d99aa04 100644
--- a/arrow-schema/src/extension/canonical/json.rs
+++ b/arrow-schema/src/extension/canonical/json.rs
@@ -19,9 +19,12 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
 
-use serde::{Deserialize, Serialize};
+use serde_core::de::{self, MapAccess, Visitor};
+use serde_core::ser::SerializeStruct;
+use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
+use std::fmt;
 
-use crate::{extension::ExtensionType, ArrowError, DataType};
+use crate::{ArrowError, DataType, extension::ExtensionType};
 
 /// The extension type for `JSON`.
 ///
@@ -42,10 +45,78 @@ use crate::{extension::ExtensionType, ArrowError, DataType};
 pub struct Json(JsonMetadata);
 
 /// Empty object
-#[derive(Debug, Clone, Copy, PartialEq, Deserialize, Serialize)]
-#[serde(deny_unknown_fields)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 struct Empty {}
 
+impl Serialize for Empty {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let state = serializer.serialize_struct("Empty", 0)?;
+        state.end()
+    }
+}
+
+struct EmptyVisitor;
+
+impl<'de> Visitor<'de> for EmptyVisitor {
+    type Value = Empty;
+
+    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        formatter.write_str("struct Empty")
+    }
+
+    fn visit_seq<A>(self, mut _seq: A) -> Result<Self::Value, A::Error>
+    where
+        A: de::SeqAccess<'de>,
+    {
+        Ok(Empty {})
+    }
+
+    fn visit_map<V>(self, mut map: V) -> Result<Empty, V::Error>
+    where
+        V: MapAccess<'de>,
+    {
+        if let Some(key) = map.next_key::<String>()? {
+            return Err(de::Error::unknown_field(&key, EMPTY_FIELDS));
+        }
+        Ok(Empty {})
+    }
+
+    fn visit_u64<E>(self, _v: u64) -> Result<Self::Value, E>
+    where
+        E: de::Error,
+    {
+        Err(de::Error::unknown_field("", EMPTY_FIELDS))
+    }
+
+    fn visit_str<E>(self, _v: &str) -> Result<Self::Value, E>
+    where
+        E: de::Error,
+    {
+        Err(de::Error::unknown_field("", EMPTY_FIELDS))
+    }
+
+    fn visit_bytes<E>(self, _v: &[u8]) -> Result<Self::Value, E>
+    where
+        E: de::Error,
+    {
+        Err(de::Error::unknown_field("", EMPTY_FIELDS))
+    }
+}
+
+static EMPTY_FIELDS: &[&str] = &[];
+
+impl<'de> Deserialize<'de> for Empty {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_struct("Empty", EMPTY_FIELDS, EmptyVisitor)
+    }
+}
+
 /// Extension type metadata for [`Json`].
 #[derive(Debug, Default, Clone, PartialEq)]
 pub struct JsonMetadata(Option<Empty>);
@@ -109,8 +180,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
diff --git a/arrow-schema/src/extension/canonical/mod.rs b/arrow-schema/src/extension/canonical/mod.rs
index 3d66299ca885..ab775d4bd251 100644
--- a/arrow-schema/src/extension/canonical/mod.rs
+++ b/arrow-schema/src/extension/canonical/mod.rs
@@ -87,20 +87,28 @@ impl TryFrom<&Field> for CanonicalExtensionType {
         match value.extension_type_name() {
             // An extension type name with an `arrow.` prefix
             Some(name) if name.starts_with("arrow.") => match name {
-                FixedShapeTensor::NAME => value.try_extension_type::<FixedShapeTensor>().map(Into::into),
-                VariableShapeTensor::NAME => value.try_extension_type::<VariableShapeTensor>().map(Into::into),
+                FixedShapeTensor::NAME => value
+                    .try_extension_type::<FixedShapeTensor>()
+                    .map(Into::into),
+                VariableShapeTensor::NAME => value
+                    .try_extension_type::<VariableShapeTensor>()
+                    .map(Into::into),
                 Json::NAME => value.try_extension_type::<Json>().map(Into::into),
                 Uuid::NAME => value.try_extension_type::<Uuid>().map(Into::into),
                 Opaque::NAME => value.try_extension_type::<Opaque>().map(Into::into),
                 Bool8::NAME => value.try_extension_type::<Bool8>().map(Into::into),
-                _ => Err(ArrowError::InvalidArgumentError(format!("Unsupported canonical extension type: {name}"))),
+                _ => Err(ArrowError::InvalidArgumentError(format!(
+                    "Unsupported canonical extension type: {name}"
+                ))),
             },
             // Name missing the expected prefix
             Some(name) => Err(ArrowError::InvalidArgumentError(format!(
                 "Field extension type name mismatch, expected a name with an `arrow.` prefix, found {name}"
             ))),
             // Name missing
-            None => Err(ArrowError::InvalidArgumentError("Field extension type name missing".to_owned())),
+            None => Err(ArrowError::InvalidArgumentError(
+                "Field extension type name missing".to_owned(),
+            )),
         }
     }
 }
diff --git a/arrow-schema/src/extension/canonical/opaque.rs b/arrow-schema/src/extension/canonical/opaque.rs
index 1db7265cfde7..fceae8d3711d 100644
--- a/arrow-schema/src/extension/canonical/opaque.rs
+++ b/arrow-schema/src/extension/canonical/opaque.rs
@@ -19,9 +19,13 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#opaque>
 
-use serde::{Deserialize, Serialize};
+use serde_core::ser::SerializeStruct;
+use serde_core::{
+    Deserialize, Deserializer, Serialize, Serializer,
+    de::{MapAccess, Visitor},
+};
 
-use crate::{extension::ExtensionType, ArrowError, DataType};
+use crate::{ArrowError, DataType, extension::ExtensionType};
 
 /// The extension type for `Opaque`.
 ///
@@ -61,7 +65,7 @@ impl From<OpaqueMetadata> for Opaque {
 }
 
 /// Extension type metadata for [`Opaque`].
-#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct OpaqueMetadata {
     /// Name of the unknown type in the external system.
     type_name: String,
@@ -70,6 +74,131 @@ pub struct OpaqueMetadata {
     vendor_name: String,
 }
 
+impl Serialize for OpaqueMetadata {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut state = serializer.serialize_struct("OpaqueMetadata", 2)?;
+        state.serialize_field("type_name", &self.type_name)?;
+        state.serialize_field("vendor_name", &self.vendor_name)?;
+        state.end()
+    }
+}
+
+#[derive(Debug)]
+enum MetadataField {
+    TypeName,
+    VendorName,
+}
+
+struct MetadataFieldVisitor;
+
+impl<'de> Visitor<'de> for MetadataFieldVisitor {
+    type Value = MetadataField;
+
+    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+        formatter.write_str("`type_name` or `vendor_name`")
+    }
+
+    fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
+    where
+        E: serde_core::de::Error,
+    {
+        match value {
+            "type_name" => Ok(MetadataField::TypeName),
+            "vendor_name" => Ok(MetadataField::VendorName),
+            _ => Err(serde_core::de::Error::unknown_field(
+                value,
+                &["type_name", "vendor_name"],
+            )),
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for MetadataField {
+    fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_identifier(MetadataFieldVisitor)
+    }
+}
+
+struct OpaqueMetadataVisitor;
+
+impl<'de> Visitor<'de> for OpaqueMetadataVisitor {
+    type Value = OpaqueMetadata;
+
+    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+        formatter.write_str("struct OpaqueMetadata")
+    }
+
+    fn visit_seq<V>(self, mut seq: V) -> Result<OpaqueMetadata, V::Error>
+    where
+        V: serde_core::de::SeqAccess<'de>,
+    {
+        let type_name = seq
+            .next_element()?
+            .ok_or_else(|| serde_core::de::Error::invalid_length(0, &self))?;
+        let vendor_name = seq
+            .next_element()?
+            .ok_or_else(|| serde_core::de::Error::invalid_length(1, &self))?;
+        Ok(OpaqueMetadata {
+            type_name,
+            vendor_name,
+        })
+    }
+
+    fn visit_map<V>(self, mut map: V) -> Result<OpaqueMetadata, V::Error>
+    where
+        V: MapAccess<'de>,
+    {
+        let mut type_name = None;
+        let mut vendor_name = None;
+
+        while let Some(key) = map.next_key()? {
+            match key {
+                MetadataField::TypeName => {
+                    if type_name.is_some() {
+                        return Err(serde_core::de::Error::duplicate_field("type_name"));
+                    }
+                    type_name = Some(map.next_value()?);
+                }
+                MetadataField::VendorName => {
+                    if vendor_name.is_some() {
+                        return Err(serde_core::de::Error::duplicate_field("vendor_name"));
+                    }
+                    vendor_name = Some(map.next_value()?);
+                }
+            }
+        }
+
+        let type_name =
+            type_name.ok_or_else(|| serde_core::de::Error::missing_field("type_name"))?;
+        let vendor_name =
+            vendor_name.ok_or_else(|| serde_core::de::Error::missing_field("vendor_name"))?;
+
+        Ok(OpaqueMetadata {
+            type_name,
+            vendor_name,
+        })
+    }
+}
+
+impl<'de> Deserialize<'de> for OpaqueMetadata {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_struct(
+            "OpaqueMetadata",
+            &["type_name", "vendor_name"],
+            OpaqueMetadataVisitor,
+        )
+    }
+}
+
 impl OpaqueMetadata {
     /// Returns a new `OpaqueMetadata`.
     pub fn new(type_name: impl Into<String>, vendor_name: impl Into<String>) -> Self {
@@ -135,8 +264,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
diff --git a/arrow-schema/src/extension/canonical/uuid.rs b/arrow-schema/src/extension/canonical/uuid.rs
index 8b2e71b7b5aa..09533564ed44 100644
--- a/arrow-schema/src/extension/canonical/uuid.rs
+++ b/arrow-schema/src/extension/canonical/uuid.rs
@@ -19,7 +19,7 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid>
 
-use crate::{extension::ExtensionType, ArrowError, DataType};
+use crate::{ArrowError, DataType, extension::ExtensionType};
 
 /// The extension type for `UUID`.
 ///
@@ -80,8 +80,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
diff --git a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
index 804591776b2f..b5403dcf684f 100644
--- a/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
+++ b/arrow-schema/src/extension/canonical/variable_shape_tensor.rs
@@ -19,9 +19,11 @@
 //!
 //! <https://arrow.apache.org/docs/format/CanonicalExtensions.html#variable-shape-tensor>
 
-use serde::{Deserialize, Serialize};
+use serde_core::de::{self, MapAccess, Visitor};
+use serde_core::{Deserialize, Deserializer, Serialize, Serializer};
+use std::fmt;
 
-use crate::{extension::ExtensionType, ArrowError, DataType, Field};
+use crate::{ArrowError, DataType, Field, extension::ExtensionType};
 
 /// The extension type for `VariableShapeTensor`.
 ///
@@ -140,7 +142,7 @@ impl VariableShapeTensor {
 }
 
 /// Extension type metadata for [`VariableShapeTensor`].
-#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct VariableShapeTensorMetadata {
     /// Explicit names to tensor dimensions.
     dim_names: Option<Vec<String>>,
@@ -148,11 +150,147 @@ pub struct VariableShapeTensorMetadata {
     /// Indices of the desired ordering of the original dimensions.
     permutations: Option<Vec<usize>>,
 
-    /// Sizes of individual tensor’s dimensions which are guaranteed to stay
+    /// Sizes of individual tensor's dimensions which are guaranteed to stay
     /// constant in uniform dimensions and can vary in non-uniform dimensions.
     uniform_shape: Option<Vec<Option<i32>>>,
 }
 
+impl Serialize for VariableShapeTensorMetadata {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        use serde_core::ser::SerializeStruct;
+        let mut state = serializer.serialize_struct("VariableShapeTensorMetadata", 3)?;
+        state.serialize_field("dim_names", &self.dim_names)?;
+        state.serialize_field("permutations", &self.permutations)?;
+        state.serialize_field("uniform_shape", &self.uniform_shape)?;
+        state.end()
+    }
+}
+
+#[derive(Debug)]
+enum MetadataField {
+    DimNames,
+    Permutations,
+    UniformShape,
+}
+
+struct MetadataFieldVisitor;
+
+impl<'de> Visitor<'de> for MetadataFieldVisitor {
+    type Value = MetadataField;
+
+    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        formatter.write_str("`dim_names`, `permutations`, or `uniform_shape`")
+    }
+
+    fn visit_str<E>(self, value: &str) -> Result<MetadataField, E>
+    where
+        E: de::Error,
+    {
+        match value {
+            "dim_names" => Ok(MetadataField::DimNames),
+            "permutations" => Ok(MetadataField::Permutations),
+            "uniform_shape" => Ok(MetadataField::UniformShape),
+            _ => Err(de::Error::unknown_field(
+                value,
+                &["dim_names", "permutations", "uniform_shape"],
+            )),
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for MetadataField {
+    fn deserialize<D>(deserializer: D) -> Result<MetadataField, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_identifier(MetadataFieldVisitor)
+    }
+}
+
+struct VariableShapeTensorMetadataVisitor;
+
+impl<'de> Visitor<'de> for VariableShapeTensorMetadataVisitor {
+    type Value = VariableShapeTensorMetadata;
+
+    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+        formatter.write_str("struct VariableShapeTensorMetadata")
+    }
+
+    fn visit_seq<V>(self, mut seq: V) -> Result<VariableShapeTensorMetadata, V::Error>
+    where
+        V: de::SeqAccess<'de>,
+    {
+        let dim_names = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(0, &self))?;
+        let permutations = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(1, &self))?;
+        let uniform_shape = seq
+            .next_element()?
+            .ok_or_else(|| de::Error::invalid_length(2, &self))?;
+        Ok(VariableShapeTensorMetadata {
+            dim_names,
+            permutations,
+            uniform_shape,
+        })
+    }
+
+    fn visit_map<V>(self, mut map: V) -> Result<VariableShapeTensorMetadata, V::Error>
+    where
+        V: MapAccess<'de>,
+    {
+        let mut dim_names = None;
+        let mut permutations = None;
+        let mut uniform_shape = None;
+
+        while let Some(key) = map.next_key()? {
+            match key {
+                MetadataField::DimNames => {
+                    if dim_names.is_some() {
+                        return Err(de::Error::duplicate_field("dim_names"));
+                    }
+                    dim_names = Some(map.next_value()?);
+                }
+                MetadataField::Permutations => {
+                    if permutations.is_some() {
+                        return Err(de::Error::duplicate_field("permutations"));
+                    }
+                    permutations = Some(map.next_value()?);
+                }
+                MetadataField::UniformShape => {
+                    if uniform_shape.is_some() {
+                        return Err(de::Error::duplicate_field("uniform_shape"));
+                    }
+                    uniform_shape = Some(map.next_value()?);
+                }
+            }
+        }
+
+        Ok(VariableShapeTensorMetadata {
+            dim_names,
+            permutations,
+            uniform_shape,
+        })
+    }
+}
+
+impl<'de> Deserialize<'de> for VariableShapeTensorMetadata {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_struct(
+            "VariableShapeTensorMetadata",
+            &["dim_names", "permutations", "uniform_shape"],
+            VariableShapeTensorMetadataVisitor,
+        )
+    }
+}
+
 impl VariableShapeTensorMetadata {
     /// Returns metadata for a variable shape tensor extension type.
     ///
@@ -310,16 +448,19 @@ impl ExtensionType for VariableShapeTensor {
                     DataType::FixedSizeList(_, list_size) => {
                         let dimensions = usize::try_from(*list_size).expect("conversion failed");
                         // Make sure the metadata is valid.
-                        let metadata = VariableShapeTensorMetadata::try_new(dimensions, metadata.dim_names, metadata.permutations, metadata.uniform_shape)?;
+                        let metadata = VariableShapeTensorMetadata::try_new(
+                            dimensions,
+                            metadata.dim_names,
+                            metadata.permutations,
+                            metadata.uniform_shape,
+                        )?;
                         let data_field = &fields[0];
                         match data_field.data_type() {
-                            DataType::List(field) => {
-                                Ok(Self {
-                                    value_type: field.data_type().clone(),
-                                    dimensions,
-                                    metadata
-                                })
-                            }
+                            DataType::List(field) => Ok(Self {
+                                value_type: field.data_type().clone(),
+                                dimensions,
+                                metadata,
+                            }),
                             data_type => Err(ArrowError::InvalidArgumentError(format!(
                                 "VariableShapeTensor data type mismatch, expected List for data field, found {data_type}"
                             ))),
@@ -342,8 +483,8 @@ mod tests {
     #[cfg(feature = "canonical_extension_types")]
     use crate::extension::CanonicalExtensionType;
     use crate::{
-        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
         Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     };
 
     use super::*;
diff --git a/arrow-schema/src/extension/mod.rs b/arrow-schema/src/extension/mod.rs
index 92b0024bfd2d..cd17272e15ab 100644
--- a/arrow-schema/src/extension/mod.rs
+++ b/arrow-schema/src/extension/mod.rs
@@ -16,8 +16,6 @@
 // under the License.
 
 //! Extension types.
-//!
-//! <div class="warning">This module is experimental. There might be breaking changes between minor releases.</div>
 
 #[cfg(feature = "canonical_extension_types")]
 mod canonical;
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index d86fb66190b4..46c622a6d3e8 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -42,7 +42,7 @@ use std::borrow::Cow;
 use std::sync::Arc;
 use std::{
     collections::HashMap,
-    ffi::{c_char, c_void, CStr, CString},
+    ffi::{CStr, CString, c_char, c_void},
 };
 
 bitflags! {
@@ -98,20 +98,20 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) {
     if schema.is_null() {
         return;
     }
-    let schema = &mut *schema;
+    let schema = unsafe { &mut *schema };
 
     // take ownership back to release it.
-    drop(CString::from_raw(schema.format as *mut c_char));
+    drop(unsafe { CString::from_raw(schema.format as *mut c_char) });
     if !schema.name.is_null() {
-        drop(CString::from_raw(schema.name as *mut c_char));
+        drop(unsafe { CString::from_raw(schema.name as *mut c_char) });
     }
     if !schema.private_data.is_null() {
-        let private_data = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
+        let private_data = unsafe { Box::from_raw(schema.private_data as *mut SchemaPrivateData) };
         for child in private_data.children.iter() {
-            drop(Box::from_raw(*child))
+            drop(unsafe { Box::from_raw(*child) })
         }
         if !private_data.dictionary.is_null() {
-            drop(Box::from_raw(private_data.dictionary));
+            drop(unsafe { Box::from_raw(private_data.dictionary) });
         }
 
         drop(private_data);
@@ -242,7 +242,7 @@ impl FFI_ArrowSchema {
     /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
     /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
     pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
-        std::ptr::replace(schema, Self::empty())
+        unsafe { std::ptr::replace(schema, Self::empty()) }
     }
 
     /// Create an empty [`FFI_ArrowSchema`]
@@ -456,6 +456,14 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                 let c_child = c_schema.child(0);
                 DataType::LargeList(Arc::new(Field::try_from(c_child)?))
             }
+            "+vl" => {
+                let c_child = c_schema.child(0);
+                DataType::ListView(Arc::new(Field::try_from(c_child)?))
+            }
+            "+vL" => {
+                let c_child = c_schema.child(0);
+                DataType::LargeListView(Arc::new(Field::try_from(c_child)?))
+            }
             "+s" => {
                 let fields = c_schema.children().map(Field::try_from);
                 DataType::Struct(fields.collect::<Result<_, ArrowError>>()?)
@@ -483,7 +491,7 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                                 "FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
                         })?;
                         DataType::FixedSizeBinary(parsed_num_bytes)
-                    },
+                    }
                     // FixedSizeList type in format "+w:num_elems"
                     ["+w", num_elems] => {
                         let c_child = c_schema.child(0);
@@ -491,55 +499,63 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                             ArrowError::CDataInterface(
                                 "The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
                         })?;
-                        DataType::FixedSizeList(Arc::new(Field::try_from(c_child)?), parsed_num_elems)
-                    },
+                        DataType::FixedSizeList(
+                            Arc::new(Field::try_from(c_child)?),
+                            parsed_num_elems,
+                        )
+                    }
                     // Decimal types in format "d:precision,scale" or "d:precision,scale,bitWidth"
-                    ["d", extra] => {
-                        match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
-                            [precision, scale] => {
-                                let parsed_precision = precision.parse::<u8>().map_err(|_| {
-                                    ArrowError::CDataInterface(
-                                        "The decimal type requires an integer precision".to_string(),
-                                    )
-                                })?;
-                                let parsed_scale = scale.parse::<i8>().map_err(|_| {
-                                    ArrowError::CDataInterface(
-                                        "The decimal type requires an integer scale".to_string(),
-                                    )
-                                })?;
-                                DataType::Decimal128(parsed_precision, parsed_scale)
-                            },
-                            [precision, scale, bits] => {
-                                let parsed_precision = precision.parse::<u8>().map_err(|_| {
-                                    ArrowError::CDataInterface(
-                                        "The decimal type requires an integer precision".to_string(),
-                                    )
-                                })?;
-                                let parsed_scale = scale.parse::<i8>().map_err(|_| {
-                                    ArrowError::CDataInterface(
-                                        "The decimal type requires an integer scale".to_string(),
-                                    )
-                                })?;
-                                match *bits {
+                    ["d", extra] => match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
+                        [precision, scale] => {
+                            let parsed_precision = precision.parse::<u8>().map_err(|_| {
+                                ArrowError::CDataInterface(
+                                    "The decimal type requires an integer precision".to_string(),
+                                )
+                            })?;
+                            let parsed_scale = scale.parse::<i8>().map_err(|_| {
+                                ArrowError::CDataInterface(
+                                    "The decimal type requires an integer scale".to_string(),
+                                )
+                            })?;
+                            DataType::Decimal128(parsed_precision, parsed_scale)
+                        }
+                        [precision, scale, bits] => {
+                            let parsed_precision = precision.parse::<u8>().map_err(|_| {
+                                ArrowError::CDataInterface(
+                                    "The decimal type requires an integer precision".to_string(),
+                                )
+                            })?;
+                            let parsed_scale = scale.parse::<i8>().map_err(|_| {
+                                ArrowError::CDataInterface(
+                                    "The decimal type requires an integer scale".to_string(),
+                                )
+                            })?;
+                            match *bits {
+                                    "32" => DataType::Decimal32(parsed_precision, parsed_scale),
+                                    "64" => DataType::Decimal64(parsed_precision, parsed_scale),
                                     "128" => DataType::Decimal128(parsed_precision, parsed_scale),
                                     "256" => DataType::Decimal256(parsed_precision, parsed_scale),
-                                    _ => return Err(ArrowError::CDataInterface("Only 128- and 256- bit wide decimals are supported in the Rust implementation".to_string())),
+                                    _ => return Err(ArrowError::CDataInterface("Only 32/64/128/256 bit wide decimals are supported in the Rust implementation".to_string())),
                                 }
-                            }
-                            _ => {
-                                return Err(ArrowError::CDataInterface(format!(
-                                    "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
-                                )))
-                            }
                         }
-                    }
+                        _ => {
+                            return Err(ArrowError::CDataInterface(format!(
+                                "The decimal pattern \"d:{extra:?}\" is not supported in the Rust implementation"
+                            )));
+                        }
+                    },
                     // DenseUnion
                     ["+ud", extra] => {
-                        let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
-                            ArrowError::CDataInterface(
-                                "The Union type requires an integer type id".to_string(),
-                            )
-                        })).collect::<Result<Vec<_>, ArrowError>>()?;
+                        let type_ids = extra
+                            .split(',')
+                            .map(|t| {
+                                t.parse::<i8>().map_err(|_| {
+                                    ArrowError::CDataInterface(
+                                        "The Union type requires an integer type id".to_string(),
+                                    )
+                                })
+                            })
+                            .collect::<Result<Vec<_>, ArrowError>>()?;
                         let mut fields = Vec::with_capacity(type_ids.len());
                         for idx in 0..c_schema.n_children {
                             let c_child = c_schema.child(idx as usize);
@@ -549,19 +565,25 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
 
                         if fields.len() != type_ids.len() {
                             return Err(ArrowError::CDataInterface(
-                                "The Union type requires same number of fields and type ids".to_string(),
+                                "The Union type requires same number of fields and type ids"
+                                    .to_string(),
                             ));
                         }
 
-                        DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense)
+                        DataType::Union(UnionFields::try_new(type_ids, fields)?, UnionMode::Dense)
                     }
                     // SparseUnion
                     ["+us", extra] => {
-                        let type_ids = extra.split(',').map(|t| t.parse::<i8>().map_err(|_| {
-                            ArrowError::CDataInterface(
-                                "The Union type requires an integer type id".to_string(),
-                            )
-                        })).collect::<Result<Vec<_>, ArrowError>>()?;
+                        let type_ids = extra
+                            .split(',')
+                            .map(|t| {
+                                t.parse::<i8>().map_err(|_| {
+                                    ArrowError::CDataInterface(
+                                        "The Union type requires an integer type id".to_string(),
+                                    )
+                                })
+                            })
+                            .collect::<Result<Vec<_>, ArrowError>>()?;
                         let mut fields = Vec::with_capacity(type_ids.len());
                         for idx in 0..c_schema.n_children {
                             let c_child = c_schema.child(idx as usize);
@@ -571,11 +593,12 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
 
                         if fields.len() != type_ids.len() {
                             return Err(ArrowError::CDataInterface(
-                                "The Union type requires same number of fields and type ids".to_string(),
+                                "The Union type requires same number of fields and type ids"
+                                    .to_string(),
                             ));
                         }
 
-                        DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Sparse)
+                        DataType::Union(UnionFields::try_new(type_ids, fields)?, UnionMode::Sparse)
                     }
 
                     // Timestamps in format "tts:" and "tts:America/New_York" for no timezones and timezones resp.
@@ -583,22 +606,14 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                     ["tsm", ""] => DataType::Timestamp(TimeUnit::Millisecond, None),
                     ["tsu", ""] => DataType::Timestamp(TimeUnit::Microsecond, None),
                     ["tsn", ""] => DataType::Timestamp(TimeUnit::Nanosecond, None),
-                    ["tss", tz] => {
-                        DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz)))
-                    }
-                    ["tsm", tz] => {
-                        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz)))
-                    }
-                    ["tsu", tz] => {
-                        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz)))
-                    }
-                    ["tsn", tz] => {
-                        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz)))
-                    }
+                    ["tss", tz] => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(*tz))),
+                    ["tsm", tz] => DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(*tz))),
+                    ["tsu", tz] => DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(*tz))),
+                    ["tsn", tz] => DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(*tz))),
                     _ => {
                         return Err(ArrowError::CDataInterface(format!(
                             "The datatype \"{other:?}\" is still not supported in Rust implementation"
-                        )))
+                        )));
                     }
                 }
             }
@@ -650,6 +665,8 @@ impl TryFrom<&DataType> for FFI_ArrowSchema {
         let children = match dtype {
             DataType::List(child)
             | DataType::LargeList(child)
+            | DataType::ListView(child)
+            | DataType::LargeListView(child)
             | DataType::FixedSizeList(child, _)
             | DataType::Map(child, _) => {
                 vec![FFI_ArrowSchema::try_from(child.as_ref())?]
@@ -706,6 +723,12 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
+        DataType::Decimal32(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
+        }
+        DataType::Decimal64(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
+        }
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))
@@ -733,6 +756,8 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".into()),
         DataType::List(_) => Ok("+l".into()),
         DataType::LargeList(_) => Ok("+L".into()),
+        DataType::ListView(_) => Ok("+vl".into()),
+        DataType::LargeListView(_) => Ok("+vL".into()),
         DataType::Struct(_) => Ok("+s".into()),
         DataType::Map(_, _) => Ok("+m".into()),
         DataType::RunEndEncoded(_, _) => Ok("+r".into()),
@@ -861,6 +886,16 @@ mod tests {
             DataType::Int16,
             false,
         ))));
+        round_trip_type(DataType::ListView(Arc::new(Field::new(
+            "a",
+            DataType::Int16,
+            false,
+        ))));
+        round_trip_type(DataType::LargeListView(Arc::new(Field::new(
+            "a",
+            DataType::Int16,
+            false,
+        ))));
         round_trip_type(DataType::Struct(Fields::from(vec![Field::new(
             "a",
             DataType::Utf8,
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 16573d8cdce0..c4566e41bfa8 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -26,8 +26,8 @@ use crate::datatype::DataType;
 use crate::extension::CanonicalExtensionType;
 use crate::schema::SchemaBuilder;
 use crate::{
-    extension::{ExtensionType, EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
     Fields, UnionFields, UnionMode,
+    extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
 };
 
 /// A reference counted [`Field`]
@@ -44,7 +44,7 @@ pub type FieldRef = Arc<Field>;
 ///
 /// Arrow Extension types, are encoded in `Field`s metadata. See
 /// [`Self::try_extension_type`] to retrieve the [`ExtensionType`], if any.
-#[derive(Debug, Clone)]
+#[derive(Clone)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Field {
     name: String,
@@ -60,6 +60,46 @@ pub struct Field {
     metadata: HashMap<String, String>,
 }
 
+impl std::fmt::Debug for Field {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        #![expect(deprecated)] // Must still print dict_id, if set
+        let Self {
+            name,
+            data_type,
+            nullable,
+            dict_id,
+            dict_is_ordered,
+            metadata,
+        } = self;
+
+        let mut s = f.debug_struct("Field");
+
+        if name != "item" {
+            // Keep it short when debug-formatting `DataType::List`
+            s.field("name", name);
+        }
+
+        s.field("data_type", data_type);
+
+        if *nullable {
+            s.field("nullable", nullable);
+        }
+
+        if *dict_id != 0 {
+            s.field("dict_id", dict_id);
+        }
+
+        if *dict_is_ordered {
+            s.field("dict_is_ordered", dict_is_ordered);
+        }
+
+        if !metadata.is_empty() {
+            s.field("metadata", metadata);
+        }
+        s.finish()
+    }
+}
+
 // Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
 // into comparison. However, these properties are only used in IPC context
 // for matching dictionary encoded data. They are not necessary to be same
@@ -132,6 +172,12 @@ impl Hash for Field {
     }
 }
 
+impl AsRef<Field> for Field {
+    fn as_ref(&self) -> &Field {
+        self
+    }
+}
+
 impl Field {
     /// Default list member field name
     pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
@@ -296,6 +342,13 @@ impl Field {
     /// - `type_ids`: the union type ids
     /// - `fields`: the union fields
     /// - `mode`: the union mode
+    ///
+    /// # Panics
+    ///
+    /// Panics if:
+    /// - any type ID is negative
+    /// - type IDs contain duplicates
+    /// - the number of type IDs does not equal the number of fields
     pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
     where
         S: Into<String>,
@@ -305,7 +358,10 @@ impl Field {
     {
         Self::new(
             name,
-            DataType::Union(UnionFields::new(type_ids, fields), mode),
+            DataType::Union(
+                UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
+                mode,
+            ),
             false, // Unions cannot be nullable
         )
     }
@@ -451,7 +507,12 @@ impl Field {
     /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
     /// if set in the [`Field::metadata`].
     ///
-    /// # Error
+    /// Note that using `try_extension_type` with an extension type that does
+    /// not match the name in the metadata will return an `ArrowError` which can
+    /// be slow due to string allocations. If you only want to check if a
+    /// [`Field`] has a specific [`ExtensionType`], see the example below.
+    ///
+    /// # Errors
     ///
     /// Returns an error if
     /// - this field does not have the name of this extension type
@@ -462,6 +523,57 @@ impl Field {
     /// - the construction of the extension type ([`ExtensionType::try_new`])
     ///   fail (for example when the [`Field::data_type`] is not supported by
     ///   the extension type ([`ExtensionType::supports_data_type`]))
+    ///
+    /// # Examples: Check and retrieve an extension type
+    /// You can use this to check if a [`Field`] has a specific
+    /// [`ExtensionType`] and retrieve it:
+    /// ```
+    /// # use arrow_schema::{DataType, Field, ArrowError};
+    /// # use arrow_schema::extension::ExtensionType;
+    /// # struct MyExtensionType;
+    /// # impl ExtensionType for MyExtensionType {
+    /// # const NAME: &'static str = "my_extension";
+    /// # type Metadata = String;
+    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
+    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
+    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
+    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
+    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
+    /// # }
+    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
+    /// let field = get_field();
+    /// if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
+    ///   // do something with extension_type
+    /// }
+    /// ```
+    ///
+    /// # Example: Checking if a field has a specific extension type first
+    ///
+    /// Since `try_extension_type` returns an error, it is more
+    /// efficient to first check if the name matches before calling
+    /// `try_extension_type`:
+    /// ```
+    /// # use arrow_schema::{DataType, Field, ArrowError};
+    /// # use arrow_schema::extension::ExtensionType;
+    /// # struct MyExtensionType;
+    /// # impl ExtensionType for MyExtensionType {
+    /// # const NAME: &'static str = "my_extension";
+    /// # type Metadata = String;
+    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
+    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
+    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
+    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
+    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
+    /// # }
+    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
+    /// let field = get_field();
+    /// // First check if the name matches before calling the potentially expensive `try_extension_type`
+    /// if field.extension_type_name() == Some(MyExtensionType::NAME) {
+    ///   if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
+    ///     // do something with extension_type
+    ///   }
+    /// }
+    /// ```
     pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
         // Check the extension name in the metadata
         match self.extension_type_name() {
@@ -547,7 +659,7 @@ impl Field {
     /// # Error
     ///
     /// Returns an error if
-    /// - this field does have a canonical extension type (mismatch or missing)
+    /// - this field does not have a canonical extension type (mismatch or missing)
     /// - the canonical extension is not supported
     /// - the construction of the extension type fails
     #[cfg(feature = "canonical_extension_types")]
@@ -606,6 +718,8 @@ impl Field {
             DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
             DataType::List(field)
             | DataType::LargeList(field)
+            | DataType::ListView(field)
+            | DataType::LargeListView(field)
             | DataType::FixedSizeList(field, _)
             | DataType::Map(field, _) => field.fields(),
             DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
@@ -695,13 +809,6 @@ impl Field {
     /// assert!(field.is_nullable());
     /// ```
     pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
-        #[allow(deprecated)]
-        if from.dict_id != self.dict_id {
-            return Err(ArrowError::SchemaError(format!(
-                "Fail to merge schema field '{}' because from dict_id = {} does not match {}",
-                self.name, from.dict_id, self.dict_id
-            )));
-        }
         if from.dict_is_ordered != self.dict_is_ordered {
             return Err(ArrowError::SchemaError(format!(
                 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
@@ -735,24 +842,28 @@ impl Field {
             DataType::Struct(nested_fields) => match &from.data_type {
                 DataType::Struct(from_nested_fields) => {
                     let mut builder = SchemaBuilder::new();
-                    nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?;
+                    nested_fields
+                        .iter()
+                        .chain(from_nested_fields)
+                        .try_for_each(|f| builder.try_merge(f))?;
                     *nested_fields = builder.finish().fields;
                 }
                 _ => {
-                    return Err(ArrowError::SchemaError(
-                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
-                            self.name, from.data_type)
-                ))}
+                    return Err(ArrowError::SchemaError(format!(
+                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
+                        self.name, from.data_type
+                    )));
+                }
             },
             DataType::Union(nested_fields, _) => match &from.data_type {
                 DataType::Union(from_nested_fields, _) => {
                     nested_fields.try_merge(from_nested_fields)?
                 }
                 _ => {
-                    return Err(ArrowError::SchemaError(
-                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
-                            self.name, from.data_type)
-                    ));
+                    return Err(ArrowError::SchemaError(format!(
+                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
+                        self.name, from.data_type
+                    )));
                 }
             },
             DataType::List(field) => match &from.data_type {
@@ -760,30 +871,32 @@ impl Field {
                     let mut f = (**field).clone();
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
-                },
+                }
                 _ => {
-                    return Err(ArrowError::SchemaError(
-                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
-                            self.name, from.data_type)
-                ))}
+                    return Err(ArrowError::SchemaError(format!(
+                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
+                        self.name, from.data_type
+                    )));
+                }
             },
             DataType::LargeList(field) => match &from.data_type {
                 DataType::LargeList(from_field) => {
                     let mut f = (**field).clone();
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
-                },
+                }
                 _ => {
-                    return Err(ArrowError::SchemaError(
-                        format!("Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
-                            self.name, from.data_type)
-                ))}
+                    return Err(ArrowError::SchemaError(format!(
+                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
+                        self.name, from.data_type
+                    )));
+                }
             },
             DataType::Null => {
                 self.nullable = true;
                 self.data_type = from.data_type.clone();
             }
-            | DataType::Boolean
+            DataType::Boolean
             | DataType::Int8
             | DataType::Int16
             | DataType::Int32
@@ -815,15 +928,17 @@ impl Field {
             | DataType::Utf8
             | DataType::LargeUtf8
             | DataType::Utf8View
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => {
                 if from.data_type == DataType::Null {
                     self.nullable = true;
                 } else if self.data_type != from.data_type {
-                    return Err(ArrowError::SchemaError(
-                        format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
-                            self.name, from.data_type, self.data_type)
-                    ));
+                    return Err(ArrowError::SchemaError(format!(
+                        "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
+                        self.name, from.data_type, self.data_type
+                    )));
                 }
             }
         }
@@ -838,11 +953,8 @@ impl Field {
     /// * self.metadata is a superset of other.metadata
     /// * all other fields are equal
     pub fn contains(&self, other: &Field) -> bool {
-        #[allow(deprecated)]
-        let matching_dict_id = self.dict_id == other.dict_id;
         self.name == other.name
         && self.data_type.contains(&other.data_type)
-        && matching_dict_id
         && self.dict_is_ordered == other.dict_is_ordered
         // self need to be nullable or both of them are not nullable
         && (self.nullable || !other.nullable)
@@ -868,10 +980,37 @@ impl Field {
     }
 }
 
-// TODO: improve display with crate https://crates.io/crates/derive_more ?
 impl std::fmt::Display for Field {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{self:?}")
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        #![expect(deprecated)] // Must still print dict_id, if set
+        let Self {
+            name,
+            data_type,
+            nullable,
+            dict_id,
+            dict_is_ordered,
+            metadata,
+        } = self;
+        let maybe_nullable = if *nullable { "nullable " } else { "" };
+        let metadata_str = if metadata.is_empty() {
+            String::new()
+        } else {
+            format!(", metadata: {metadata:?}")
+        };
+        let dict_id_str = if dict_id == &0 {
+            String::new()
+        } else {
+            format!(", dict_id: {dict_id}")
+        };
+        let dict_is_ordered_str = if *dict_is_ordered {
+            ", dict_is_ordered"
+        } else {
+            ""
+        };
+        write!(
+            f,
+            "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
+        )
     }
 }
 
@@ -895,6 +1034,24 @@ mod test {
         Field::new_dict(s, DataType::Int64, false, 4, false);
     }
 
+    #[test]
+    #[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
+    fn test_debug_format_field() {
+        // Make sure the `Debug` formatting of `Field` is readable and not too long
+        insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
+        Field {
+            data_type: UInt8,
+        }
+        ");
+        insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
+        Field {
+            name: "column",
+            data_type: LargeUtf8,
+            nullable: true,
+        }
+        "#);
+    }
+
     #[test]
     fn test_merge_incompatible_types() {
         let mut field = Field::new("c1", DataType::Int64, false);
@@ -902,7 +1059,10 @@ mod test {
             .try_merge(&Field::new("c1", DataType::Float32, true))
             .expect_err("should fail")
             .to_string();
-        assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
+        assert_eq!(
+            "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
+            result
+        );
     }
 
     #[test]
@@ -1126,6 +1286,36 @@ mod test {
         assert!(f1.cmp(&f3).is_lt());
     }
 
+    #[test]
+    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
+    fn test_field_as_ref() {
+        let field = || Field::new("x", DataType::Binary, false);
+
+        // AsRef can be used in a function accepting a field.
+        // However, this case actually works a bit better when function takes `&Field`
+        fn accept_ref(_: impl AsRef<Field>) {}
+
+        accept_ref(field());
+        accept_ref(&field());
+        accept_ref(&&field());
+        accept_ref(Arc::new(field()));
+        accept_ref(&Arc::new(field()));
+        accept_ref(&&Arc::new(field()));
+
+        // AsRef can be used in a function accepting a collection of fields in any form,
+        // such as &[Field], or &[Arc<Field>]
+        fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
+
+        accept_refs(vec![field()]);
+        accept_refs(vec![&field()]);
+        accept_refs(vec![Arc::new(field())]);
+        accept_refs(vec![&Arc::new(field())]);
+        accept_refs(&vec![field()]);
+        accept_refs(&vec![&field()]);
+        accept_refs(&vec![Arc::new(field())]);
+        accept_refs(&vec![&Arc::new(field())]);
+    }
+
     #[test]
     fn test_contains_reflexivity() {
         let mut field = Field::new("field1", DataType::Float16, false);
@@ -1195,13 +1385,14 @@ mod test {
         let field1 = Field::new(
             "field1",
             DataType::Union(
-                UnionFields::new(
+                UnionFields::try_new(
                     vec![1, 2],
                     vec![
                         Field::new("field1", DataType::UInt8, true),
                         Field::new("field3", DataType::Utf8, false),
                     ],
-                ),
+                )
+                .unwrap(),
                 UnionMode::Dense,
             ),
             true,
@@ -1209,13 +1400,14 @@ mod test {
         let field2 = Field::new(
             "field1",
             DataType::Union(
-                UnionFields::new(
+                UnionFields::try_new(
                     vec![1, 3],
                     vec![
                         Field::new("field1", DataType::UInt8, false),
                         Field::new("field3", DataType::Utf8, false),
                     ],
-                ),
+                )
+                .unwrap(),
                 UnionMode::Dense,
             ),
             true,
@@ -1226,13 +1418,14 @@ mod test {
         let field1 = Field::new(
             "field1",
             DataType::Union(
-                UnionFields::new(
+                UnionFields::try_new(
                     vec![1, 2],
                     vec![
                         Field::new("field1", DataType::UInt8, true),
                         Field::new("field3", DataType::Utf8, false),
                     ],
-                ),
+                )
+                .unwrap(),
                 UnionMode::Dense,
             ),
             true,
@@ -1240,13 +1433,14 @@ mod test {
         let field2 = Field::new(
             "field1",
             DataType::Union(
-                UnionFields::new(
+                UnionFields::try_new(
                     vec![1, 2],
                     vec![
                         Field::new("field1", DataType::UInt8, false),
                         Field::new("field3", DataType::Utf8, false),
                     ],
-                ),
+                )
+                .unwrap(),
                 UnionMode::Dense,
             ),
             true,
@@ -1256,8 +1450,8 @@ mod test {
 
     #[cfg(feature = "serde")]
     fn assert_binary_serde_round_trip(field: Field) {
-        let serialized = bincode::serialize(&field).unwrap();
-        let deserialized: Field = bincode::deserialize(&serialized).unwrap();
+        let serialized = postcard::to_stdvec(&field).unwrap();
+        let deserialized: Field = postcard::from_bytes(&serialized).unwrap();
         assert_eq!(field, deserialized)
     }
 
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 904b933cd299..93638181d9ae 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -329,6 +329,22 @@ impl std::fmt::Debug for UnionFields {
     }
 }
 
+/// Allows direct indexing into [`UnionFields`] to access fields by position.
+///
+/// # Panics
+///
+/// Panics if the index is out of bounds. Note that [`UnionFields`] supports
+/// a maximum of 128 fields, as type IDs are represented as `i8` values.
+///
+/// For a non-panicking alternative, use [`UnionFields::get`].
+impl std::ops::Index<usize> for UnionFields {
+    type Output = (i8, FieldRef);
+
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.0[index]
+    }
+}
+
 impl UnionFields {
     /// Create a new [`UnionFields`] with no fields
     pub fn empty() -> Self {
@@ -339,19 +355,212 @@ impl UnionFields {
     ///
     /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
     ///
+    /// # Errors
+    ///
+    /// This function returns an error if:
+    /// - Any type_id appears more than once (duplicate type ids)
+    /// - The type_ids are duplicated
+    ///
+    /// # Examples
+    ///
     /// ```
     /// use arrow_schema::{DataType, Field, UnionFields};
     /// // Create a new UnionFields with type id mapping
     /// // 1 -> DataType::UInt8
     /// // 3 -> DataType::Utf8
-    /// UnionFields::new(
+    /// let result = UnionFields::try_new(
     ///     vec![1, 3],
     ///     vec![
     ///         Field::new("field1", DataType::UInt8, false),
     ///         Field::new("field3", DataType::Utf8, false),
     ///     ],
     /// );
+    /// assert!(result.is_ok());
+    ///
+    /// // This will fail due to duplicate type ids
+    /// let result = UnionFields::try_new(
+    ///     vec![1, 1],
+    ///     vec![
+    ///         Field::new("field1", DataType::UInt8, false),
+    ///         Field::new("field2", DataType::Utf8, false),
+    ///     ],
+    /// );
+    /// assert!(result.is_err());
+    /// ```
+    pub fn try_new<F, T>(type_ids: T, fields: F) -> Result<Self, ArrowError>
+    where
+        F: IntoIterator,
+        F::Item: Into<FieldRef>,
+        T: IntoIterator<Item = i8>,
+    {
+        let mut type_ids_iter = type_ids.into_iter();
+        let mut fields_iter = fields.into_iter().map(Into::into);
+
+        let mut seen_type_ids = 0u128;
+
+        let mut out = Vec::new();
+
+        loop {
+            match (type_ids_iter.next(), fields_iter.next()) {
+                (None, None) => return Ok(Self(out.into())),
+                (Some(type_id), Some(field)) => {
+                    // check type id is non-negative
+                    if type_id < 0 {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "type ids must be non-negative: {type_id}"
+                        )));
+                    }
+
+                    // check type id uniqueness
+                    let mask = 1_u128 << type_id;
+                    if (seen_type_ids & mask) != 0 {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "duplicate type id: {type_id}"
+                        )));
+                    }
+
+                    seen_type_ids |= mask;
+
+                    out.push((type_id, field));
+                }
+                (None, Some(_)) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "fields iterator has more elements than type_ids iterator".to_string(),
+                    ));
+                }
+                (Some(_), None) => {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "type_ids iterator has more elements than fields iterator".to_string(),
+                    ));
+                }
+            }
+        }
+    }
+
+    /// Create a new [`UnionFields`] from a collection of fields with automatically
+    /// assigned type IDs starting from 0.
+    ///
+    /// The type IDs are assigned in increasing order: 0, 1, 2, 3, etc.
+    ///
+    /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
+    ///
+    /// # Panics
+    ///
+    /// Panics if the number of fields exceeds 127 (the maximum value for i8 type IDs).
+    ///
+    /// If you want to avoid panics, use [`UnionFields::try_from_fields`] instead, which
+    /// returns a `Result`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, UnionFields};
+    /// // Create a new UnionFields with automatic type id assignment
+    /// // 0 -> DataType::UInt8
+    /// // 1 -> DataType::Utf8
+    /// let union_fields = UnionFields::from_fields(vec![
+    ///     Field::new("field1", DataType::UInt8, false),
+    ///     Field::new("field2", DataType::Utf8, false),
+    /// ]);
+    /// assert_eq!(union_fields.len(), 2);
+    /// ```
+    pub fn from_fields<F>(fields: F) -> Self
+    where
+        F: IntoIterator,
+        F::Item: Into<FieldRef>,
+    {
+        fields
+            .into_iter()
+            .enumerate()
+            .map(|(i, field)| {
+                let id = i8::try_from(i).expect("UnionFields cannot contain more than 128 fields");
+
+                (id, field.into())
+            })
+            .collect()
+    }
+
+    /// Create a new [`UnionFields`] from a collection of fields with automatically
+    /// assigned type IDs starting from 0.
+    ///
+    /// The type IDs are assigned in increasing order: 0, 1, 2, 3, etc.
+    ///
+    /// This is the non-panicking version of [`UnionFields::from_fields`].
+    ///
+    /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the number of fields exceeds 127 (the maximum value for i8 type IDs).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, UnionFields};
+    /// // Create a new UnionFields with automatic type id assignment
+    /// // 0 -> DataType::UInt8
+    /// // 1 -> DataType::Utf8
+    /// let result = UnionFields::try_from_fields(vec![
+    ///     Field::new("field1", DataType::UInt8, false),
+    ///     Field::new("field2", DataType::Utf8, false),
+    /// ]);
+    /// assert!(result.is_ok());
+    /// assert_eq!(result.unwrap().len(), 2);
+    ///
+    /// // This will fail with too many fields
+    /// let many_fields: Vec<_> = (0..200)
+    ///     .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
+    ///     .collect();
+    /// let result = UnionFields::try_from_fields(many_fields);
+    /// assert!(result.is_err());
     /// ```
+    pub fn try_from_fields<F>(fields: F) -> Result<Self, ArrowError>
+    where
+        F: IntoIterator,
+        F::Item: Into<FieldRef>,
+    {
+        let mut out = Vec::with_capacity(i8::MAX as usize + 1);
+
+        for (i, field) in fields.into_iter().enumerate() {
+            let id = i8::try_from(i).map_err(|_| {
+                ArrowError::InvalidArgumentError(
+                    "UnionFields cannot contain more than 128 fields".into(),
+                )
+            })?;
+
+            out.push((id, field.into()));
+        }
+
+        Ok(Self(out.into()))
+    }
+
+    /// Create a new [`UnionFields`] from a [`Fields`] and array of type_ids
+    ///
+    /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
+    ///
+    /// # Deprecated
+    ///
+    /// Use [`UnionFields::try_new`] instead. This method panics on invalid input,
+    /// while `try_new` returns a `Result`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if any type_id appears more than once (duplicate type ids).
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, UnionFields};
+    /// // Create a new UnionFields with type id mapping
+    /// // 1 -> DataType::UInt8
+    /// // 3 -> DataType::Utf8
+    /// UnionFields::try_new(
+    ///     vec![1, 3],
+    ///     vec![
+    ///         Field::new("field1", DataType::UInt8, false),
+    ///         Field::new("field3", DataType::Utf8, false),
+    ///     ],
+    /// );
+    /// ```
+    #[deprecated(since = "57.0.0", note = "Use `try_new` instead")]
     pub fn new<F, T>(type_ids: T, fields: F) -> Self
     where
         F: IntoIterator,
@@ -365,7 +574,7 @@ impl UnionFields {
             .inspect(|&idx| {
                 let mask = 1_u128 << idx;
                 if (set & mask) != 0 {
-                    panic!("duplicate type id: {}", idx);
+                    panic!("duplicate type id: {idx}");
                 } else {
                     set |= mask;
                 }
@@ -396,6 +605,43 @@ impl UnionFields {
         self.0.iter().map(|(id, f)| (*id, f))
     }
 
+    /// Returns a reference to the field at the given index, or `None` if out of bounds.
+    ///
+    /// This is a safe alternative to direct indexing via `[]`.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, UnionFields};
+    ///
+    /// let fields = UnionFields::new(
+    ///     vec![1, 3],
+    ///     vec![
+    ///         Field::new("field1", DataType::UInt8, false),
+    ///         Field::new("field3", DataType::Utf8, false),
+    ///     ],
+    /// );
+    ///
+    /// assert!(fields.get(0).is_some());
+    /// assert!(fields.get(1).is_some());
+    /// assert!(fields.get(2).is_none());
+    /// ```
+    pub fn get(&self, index: usize) -> Option<&(i8, FieldRef)> {
+        self.0.get(index)
+    }
+
+    /// Searches for a field by its type id, returning the type id and field reference if found.
+    /// Returns `None` if no field with the given type id exists.
+    pub fn find_by_type_id(&self, type_id: i8) -> Option<(i8, &FieldRef)> {
+        self.iter().find(|&(i, _)| i == type_id)
+    }
+
+    /// Searches for a field by value equality, returning its type id and reference if found.
+    /// Returns `None` if no matching field exists in this [`UnionFields`].
+    pub fn find_by_field(&self, field: &Field) -> Option<(i8, &FieldRef)> {
+        self.iter().find(|&(_, f)| f.as_ref() == field)
+    }
+
     /// Merge this field into self if it is compatible.
     ///
     /// See [`Field::try_merge`]
@@ -409,10 +655,12 @@ impl UnionFields {
                     // If the nested fields in two unions are the same, they must have same
                     // type id.
                     if *self_type_id != field_type_id {
-                        return Err(ArrowError::SchemaError(
-                            format!("Fail to merge schema field '{}' because the self_type_id = {} does not equal field_type_id = {}",
-                                    self_field.name(), self_type_id, field_type_id)
-                        ));
+                        return Err(ArrowError::SchemaError(format!(
+                            "Fail to merge schema field '{}' because the self_type_id = {} does not equal field_type_id = {}",
+                            self_field.name(),
+                            self_type_id,
+                            field_type_id
+                        )));
                     }
 
                     is_new_field = false;
@@ -431,7 +679,6 @@ impl UnionFields {
 
 impl FromIterator<(i8, FieldRef)> for UnionFields {
     fn from_iter<T: IntoIterator<Item = (i8, FieldRef)>>(iter: T) -> Self {
-        // TODO: Should this validate type IDs are unique (#3982)
         Self(iter.into_iter().collect())
     }
 }
@@ -486,13 +733,14 @@ mod tests {
             Field::new(
                 "h",
                 DataType::Union(
-                    UnionFields::new(
+                    UnionFields::try_new(
                         vec![1, 3],
                         vec![
                             Field::new("field1", DataType::UInt8, false),
                             Field::new("field3", DataType::Utf8, false),
                         ],
-                    ),
+                    )
+                    .unwrap(),
                     UnionMode::Dense,
                 ),
                 true,
@@ -550,7 +798,8 @@ mod tests {
         assert_eq!(r[0], fields[7]);
 
         let union = DataType::Union(
-            UnionFields::new(vec![1], vec![Field::new("field1", DataType::UInt8, false)]),
+            UnionFields::try_new(vec![1], vec![Field::new("field1", DataType::UInt8, false)])
+                .unwrap(),
             UnionMode::Dense,
         );
 
@@ -566,4 +815,228 @@ mod tests {
         let r = fields.try_filter_leaves(|_, _| Err(ArrowError::SchemaError("error".to_string())));
         assert!(r.is_err());
     }
+
+    #[test]
+    fn test_union_fields_try_new_valid() {
+        let res = UnionFields::try_new(
+            vec![1, 6, 7],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f6", DataType::Utf8, false),
+                Field::new("f7", DataType::Int32, true),
+            ],
+        );
+        assert!(res.is_ok());
+        let union_fields = res.unwrap();
+        assert_eq!(union_fields.len(), 3);
+        assert_eq!(
+            union_fields.iter().map(|(id, _)| id).collect::<Vec<_>>(),
+            vec![1, 6, 7]
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_empty() {
+        let res = UnionFields::try_new(Vec::<i8>::new(), Vec::<Field>::new());
+        assert!(res.is_ok());
+        assert!(res.unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_union_fields_try_new_duplicate_type_id() {
+        let res = UnionFields::try_new(
+            vec![1, 1],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("duplicate type id: 1")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_duplicate_field() {
+        let field = Field::new("field", DataType::UInt8, false);
+        let res = UnionFields::try_new(vec![1, 2], vec![field.clone(), field]);
+        assert!(res.is_ok());
+    }
+
+    #[test]
+    fn test_union_fields_try_new_more_type_ids() {
+        let res = UnionFields::try_new(
+            vec![1, 2, 3],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("type_ids iterator has more elements")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_more_fields() {
+        let res = UnionFields::try_new(
+            vec![1, 2],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+                Field::new("f3", DataType::Int32, true),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("fields iterator has more elements")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_negative_type_ids() {
+        let res = UnionFields::try_new(
+            vec![-128, -1, 0, 127],
+            vec![
+                Field::new("field_min", DataType::UInt8, false),
+                Field::new("field_neg", DataType::Utf8, false),
+                Field::new("field_zero", DataType::Int32, true),
+                Field::new("field_max", DataType::Boolean, false),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("type ids must be non-negative")
+        )
+    }
+
+    #[test]
+    fn test_union_fields_try_new_complex_types() {
+        let res = UnionFields::try_new(
+            vec![0, 1, 2],
+            vec![
+                Field::new(
+                    "struct_field",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("a", DataType::Int32, false),
+                        Field::new("b", DataType::Utf8, true),
+                    ])),
+                    false,
+                ),
+                Field::new_list(
+                    "list_field",
+                    Field::new("item", DataType::Float64, true),
+                    true,
+                ),
+                Field::new(
+                    "dict_field",
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                    false,
+                ),
+            ],
+        );
+        assert!(res.is_ok());
+        assert_eq!(res.unwrap().len(), 3);
+    }
+
+    #[test]
+    fn test_union_fields_try_new_single_field() {
+        let res = UnionFields::try_new(
+            vec![42],
+            vec![Field::new("only_field", DataType::Int64, false)],
+        );
+        assert!(res.is_ok());
+        let union_fields = res.unwrap();
+        assert_eq!(union_fields.len(), 1);
+        assert_eq!(union_fields.iter().next().unwrap().0, 42);
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_empty() {
+        let res = UnionFields::try_from_fields(Vec::<Field>::new());
+        assert!(res.is_ok());
+        assert!(res.unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_single() {
+        let res = UnionFields::try_from_fields(vec![Field::new("only", DataType::Int64, false)]);
+        assert!(res.is_ok());
+        let union_fields = res.unwrap();
+        assert_eq!(union_fields.len(), 1);
+        assert_eq!(union_fields.iter().next().unwrap().0, 0);
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_too_many() {
+        let many_fields: Vec<_> = (0..200)
+            .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
+            .collect();
+        let res = UnionFields::try_from_fields(many_fields);
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("UnionFields cannot contain more than 128 fields")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_max_valid() {
+        let fields: Vec<_> = (0..=i8::MAX)
+            .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
+            .collect();
+        let res = UnionFields::try_from_fields(fields);
+        assert!(res.is_ok());
+        let union_fields = res.unwrap();
+        assert_eq!(union_fields.len(), 128);
+        assert_eq!(union_fields.iter().map(|(id, _)| id).min().unwrap(), 0);
+        assert_eq!(union_fields.iter().map(|(id, _)| id).max().unwrap(), 127);
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_over_max() {
+        // 129 fields should fail
+        let fields: Vec<_> = (0..129)
+            .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
+            .collect();
+        let res = UnionFields::try_from_fields(fields);
+        assert!(res.is_err());
+    }
+
+    #[test]
+    fn test_union_fields_try_from_fields_complex_types() {
+        let res = UnionFields::try_from_fields(vec![
+            Field::new(
+                "struct_field",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("a", DataType::Int32, false),
+                    Field::new("b", DataType::Utf8, true),
+                ])),
+                false,
+            ),
+            Field::new_list(
+                "list_field",
+                Field::new("item", DataType::Float64, true),
+                true,
+            ),
+            Field::new(
+                "dict_field",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
+        ]);
+        assert!(res.is_ok());
+        assert_eq!(res.unwrap().len(), 3);
+    }
 }
diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs
index d1befbd04ff8..1eeeb4d106fb 100644
--- a/arrow-schema/src/lib.rs
+++ b/arrow-schema/src/lib.rs
@@ -21,13 +21,14 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 
 mod datatype;
 
 pub use datatype::*;
 use std::fmt::Display;
+mod datatype_display;
 mod datatype_parse;
 mod error;
 pub use error::*;
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 9affd4162995..0c7db39dfbb5 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -187,7 +187,7 @@ pub type SchemaRef = Arc<Schema>;
 pub struct Schema {
     /// A sequence of fields that describe the schema.
     pub fields: Fields,
-    /// A map of key-value pairs containing additional meta data.
+    /// A map of key-value pairs containing additional metadata.
     pub metadata: HashMap<String, String>,
 }
 
@@ -365,13 +365,6 @@ impl Schema {
         self.fields.iter().flat_map(|f| f.fields()).collect()
     }
 
-    /// Returns a vector with references to all fields (including nested fields)
-    #[deprecated(since = "52.2.0", note = "Use `flattened_fields` instead")]
-    #[inline]
-    pub fn all_fields(&self) -> Vec<&Field> {
-        self.flattened_fields()
-    }
-
     /// Returns an immutable reference of a specific [`Field`] instance selected using an
     /// offset within the internal `fields` vector.
     ///
@@ -553,6 +546,12 @@ impl Hash for Schema {
     }
 }
 
+impl AsRef<Schema> for Schema {
+    fn as_ref(&self) -> &Schema {
+        self
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::datatype::DataType;
@@ -560,6 +559,25 @@ mod tests {
 
     use super::*;
 
+    #[test]
+    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
+    fn test_schema_as_ref() {
+        fn accept_ref(_: impl AsRef<Schema>) {}
+
+        let schema = Schema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("address", DataType::Utf8, false),
+            Field::new("priority", DataType::UInt8, false),
+        ]);
+
+        accept_ref(schema.clone());
+        accept_ref(&schema.clone());
+        accept_ref(&&schema.clone());
+        accept_ref(Arc::new(schema.clone()));
+        accept_ref(&Arc::new(schema.clone()));
+        accept_ref(&&Arc::new(schema.clone()));
+    }
+
     #[test]
     #[cfg(feature = "serde")]
     fn test_ser_de_metadata() {
@@ -704,14 +722,13 @@ mod tests {
     #[test]
     fn create_schema_string() {
         let schema = person_schema();
-        assert_eq!(schema.to_string(),
-                   "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
-        Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
-        Field { name: \"address\", data_type: Struct([\
-            Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
-            Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
-        ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
-        Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
+        assert_eq!(
+            schema.to_string(),
+            "Field { \"first_name\": Utf8, metadata: {\"k\": \"v\"} }, \
+             Field { \"last_name\": Utf8 }, \
+             Field { \"address\": Struct(\"street\": non-null Utf8, \"zip\": non-null UInt16) }, \
+             Field { \"interests\": nullable Dictionary(Int32, Utf8), dict_id: 123, dict_is_ordered }"
+        )
     }
 
     #[test]
@@ -1405,14 +1422,16 @@ mod tests {
         );
 
         // incompatible field should throw error
-        assert!(Schema::try_merge(vec![
-            Schema::new(vec![
-                Field::new("first_name", DataType::Utf8, false),
-                Field::new("last_name", DataType::Utf8, false),
-            ]),
-            Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
-        ])
-        .is_err());
+        assert!(
+            Schema::try_merge(vec![
+                Schema::new(vec![
+                    Field::new("first_name", DataType::Utf8, false),
+                    Field::new("last_name", DataType::Utf8, false),
+                ]),
+                Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
+            ])
+            .is_err()
+        );
 
         // incompatible metadata should throw error
         let res = Schema::try_merge(vec![
diff --git a/arrow-select/Cargo.toml b/arrow-select/Cargo.toml
index 238e1a8f58cc..443094e6c986 100644
--- a/arrow-select/Cargo.toml
+++ b/arrow-select/Cargo.toml
@@ -40,7 +40,7 @@ arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-array = { workspace = true }
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 ahash = { version = "0.8", default-features = false}
 
 [dev-dependencies]
diff --git a/arrow-select/src/coalesce.rs b/arrow-select/src/coalesce.rs
index 9b310c645d07..8cf1f024c3ff 100644
--- a/arrow-select/src/coalesce.rs
+++ b/arrow-select/src/coalesce.rs
@@ -21,8 +21,9 @@
 //! [`filter`]: crate::filter::filter
 //! [`take`]: crate::take::take
 use crate::filter::filter_record_batch;
+use crate::take::take_record_batch;
 use arrow_array::types::{BinaryViewType, StringViewType};
-use arrow_array::{Array, ArrayRef, BooleanArray, RecordBatch};
+use arrow_array::{Array, ArrayRef, BooleanArray, RecordBatch, downcast_primitive};
 use arrow_schema::{ArrowError, DataType, SchemaRef};
 use std::collections::VecDeque;
 use std::sync::Arc;
@@ -31,9 +32,11 @@ use std::sync::Arc;
 
 mod byte_view;
 mod generic;
+mod primitive;
 
 use byte_view::InProgressByteViewArray;
 use generic::GenericInProgressArray;
+use primitive::InProgressPrimitiveArray;
 
 /// Concatenate multiple [`RecordBatch`]es
 ///
@@ -45,10 +48,18 @@ use generic::GenericInProgressArray;
 /// smaller batches, and we want to coalesce them into larger batches for
 /// further processing.
 ///
+/// # Motivation
+///
+/// If we use [`concat_batches`] to implement the same functionality, there are 2 potential issues:
+/// 1. At least 2x peak memory (holding the input and output of concat)
+/// 2. 2 copies of the data (to create the output of filter and then create the output of concat)
+///
+/// See: <https://github.com/apache/arrow-rs/issues/6692> for more discussions
+/// about the motivation.
+///
 /// [`filter`]: crate::filter::filter
 /// [`take`]: crate::take::take
-///
-/// See: <https://github.com/apache/arrow-rs/issues/6692>
+/// [`concat_batches`]: crate::concat::concat_batches
 ///
 /// # Example
 /// ```
@@ -122,14 +133,18 @@ use generic::GenericInProgressArray;
 pub struct BatchCoalescer {
     /// The input schema
     schema: SchemaRef,
-    /// output batch size
-    batch_size: usize,
+    /// The target batch size (and thus size for views allocation). This is a
+    /// hard limit: the output batch will be exactly `target_batch_size`,
+    /// rather than possibly being slightly above.
+    target_batch_size: usize,
     /// In-progress arrays
     in_progress_arrays: Vec<Box<dyn InProgressArray>>,
     /// Buffered row count. Always less than `batch_size`
     buffered_rows: usize,
     /// Completed batches
     completed: VecDeque<RecordBatch>,
+    /// Biggest coalesce batch size. See [`Self::with_biggest_coalesce_batch_size`]
+    biggest_coalesce_batch_size: Option<usize>,
 }
 
 impl BatchCoalescer {
@@ -137,26 +152,58 @@ impl BatchCoalescer {
     ///
     /// # Arguments
     /// - `schema` - the schema of the output batches
-    /// - `batch_size` - the number of rows in each output batch.
+    /// - `target_batch_size` - the number of rows in each output batch.
     ///   Typical values are `4096` or `8192` rows.
     ///
-    pub fn new(schema: SchemaRef, batch_size: usize) -> Self {
+    pub fn new(schema: SchemaRef, target_batch_size: usize) -> Self {
         let in_progress_arrays = schema
             .fields()
             .iter()
-            .map(|field| create_in_progress_array(field.data_type(), batch_size))
+            .map(|field| create_in_progress_array(field.data_type(), target_batch_size))
             .collect::<Vec<_>>();
 
         Self {
             schema,
-            batch_size,
+            target_batch_size,
             in_progress_arrays,
             // We will for sure store at least one completed batch
             completed: VecDeque::with_capacity(1),
             buffered_rows: 0,
+            biggest_coalesce_batch_size: None,
         }
     }
 
+    /// Set the coalesce batch size limit (default `None`)
+    ///
+    /// This limit determine when batches should bypass coalescing. Intuitively,
+    /// batches that are already large are costly to coalesce and are efficient
+    /// enough to process directly without coalescing.
+    ///
+    /// If `Some(limit)`, batches larger than this limit will bypass coalescing
+    /// when there is no buffered data, or when the previously buffered data
+    /// already exceeds this limit.
+    ///
+    /// If `None`, all batches will be coalesced according to the
+    /// target_batch_size.
+    pub fn with_biggest_coalesce_batch_size(mut self, limit: Option<usize>) -> Self {
+        self.biggest_coalesce_batch_size = limit;
+        self
+    }
+
+    /// Get the current biggest coalesce batch size limit
+    ///
+    /// See [`Self::with_biggest_coalesce_batch_size`] for details
+    pub fn biggest_coalesce_batch_size(&self) -> Option<usize> {
+        self.biggest_coalesce_batch_size
+    }
+
+    /// Set the biggest coalesce batch size limit
+    ///
+    /// See [`Self::with_biggest_coalesce_batch_size`] for details
+    pub fn set_biggest_coalesce_batch_size(&mut self, limit: Option<usize>) {
+        self.biggest_coalesce_batch_size = limit;
+    }
+
     /// Return the schema of the output batches
     pub fn schema(&self) -> SchemaRef {
         Arc::clone(&self.schema)
@@ -197,9 +244,48 @@ impl BatchCoalescer {
         self.push_batch(filtered_batch)
     }
 
+    /// Push a batch into the Coalescer after applying a set of indices
+    /// This is semantically equivalent of calling [`Self::push_batch`]
+    /// with the results from  [`take_record_batch`]
+    ///
+    /// # Example
+    /// ```
+    /// # use arrow_array::{record_batch, UInt64Array};
+    /// # use arrow_select::coalesce::BatchCoalescer;
+    /// let batch1 = record_batch!(("a", Int32, [0, 0, 0])).unwrap();
+    /// let batch2 = record_batch!(("a", Int32, [1, 1, 4, 5, 1, 4])).unwrap();
+    /// // Sorted indices to create a sorted output, this can be obtained with
+    /// // `arrow-ord`'s sort_to_indices operation
+    /// let indices = UInt64Array::from(vec![0, 1, 4, 2, 5, 3]);
+    /// // create a new Coalescer that targets creating 1000 row batches
+    /// let mut coalescer = BatchCoalescer::new(batch1.schema(), 1000);
+    /// coalescer.push_batch(batch1);
+    /// coalescer.push_batch_with_indices(batch2, &indices);
+    /// // finsh and retrieve the created batch
+    /// coalescer.finish_buffered_batch().unwrap();
+    /// let completed_batch = coalescer.next_completed_batch().unwrap();
+    /// let expected_batch = record_batch!(("a", Int32, [0, 0, 0, 1, 1, 1, 4, 4, 5])).unwrap();
+    /// assert_eq!(completed_batch, expected_batch);
+    /// ```
+    pub fn push_batch_with_indices(
+        &mut self,
+        batch: RecordBatch,
+        indices: &dyn Array,
+    ) -> Result<(), ArrowError> {
+        // todo: optimize this to avoid materializing (copying the results of take indices to a new batch)
+        let taken_batch = take_record_batch(&batch, indices)?;
+        self.push_batch(taken_batch)
+    }
+
     /// Push all the rows from `batch` into the Coalescer
     ///
-    /// See [`Self::next_completed_batch()`] to retrieve any completed batches.
+    /// When buffered data plus incoming rows reach `target_batch_size` ,
+    /// completed batches are generated eagerly and can be retrieved via
+    /// [`Self::next_completed_batch()`].
+    /// Output batches contain exactly `target_batch_size` rows, so the tail of
+    /// the input batch may remain buffered.
+    /// Remaining partial data either waits for future input batches or can be
+    /// materialized immediately by calling [`Self::finish_buffered_batch()`].
     ///
     /// # Example
     /// ```
@@ -218,11 +304,160 @@ impl BatchCoalescer {
     /// assert_eq!(completed_batch, expected_batch);
     /// ```
     pub fn push_batch(&mut self, batch: RecordBatch) -> Result<(), ArrowError> {
-        let (_schema, arrays, mut num_rows) = batch.into_parts();
-        if num_rows == 0 {
+        // Large batch bypass optimization:
+        // When biggest_coalesce_batch_size is configured and a batch exceeds this limit,
+        // we can avoid expensive split-and-merge operations by passing it through directly.
+        //
+        // IMPORTANT: This optimization is OPTIONAL and only active when biggest_coalesce_batch_size
+        // is explicitly set via with_biggest_coalesce_batch_size(Some(limit)).
+        // If not set (None), ALL batches follow normal coalescing behavior regardless of size.
+
+        // =============================================================================
+        // CASE 1: No buffer + large batch → Direct bypass
+        // =============================================================================
+        // Example scenario (target_batch_size=1000, biggest_coalesce_batch_size=Some(500)):
+        // Input sequence: [600, 1200, 300]
+        //
+        // With biggest_coalesce_batch_size=Some(500) (optimization enabled):
+        //   600 → large batch detected! buffered_rows=0 → Case 1: direct bypass
+        //        → output: [600] (bypass, preserves large batch)
+        //   1200 → large batch detected! buffered_rows=0 → Case 1: direct bypass
+        //         → output: [1200] (bypass, preserves large batch)
+        //   300 → normal batch, buffer: [300]
+        //   Result: [600], [1200], [300] - large batches preserved, mixed sizes
+
+        // =============================================================================
+        // CASE 2: Buffer too large + large batch → Flush first, then bypass
+        // =============================================================================
+        // This case prevents creating extremely large merged batches that would
+        // significantly exceed both target_batch_size and biggest_coalesce_batch_size.
+        //
+        // Example 1: Buffer exceeds limit before large batch arrives
+        // target_batch_size=1000, biggest_coalesce_batch_size=Some(400)
+        // Input: [350, 200, 800]
+        //
+        // Step 1: push_batch([350])
+        //   → batch_size=350 <= 400, normal path
+        //   → buffer: [350], buffered_rows=350
+        //
+        // Step 2: push_batch([200])
+        //   → batch_size=200 <= 400, normal path
+        //   → buffer: [350, 200], buffered_rows=550
+        //
+        // Step 3: push_batch([800])
+        //   → batch_size=800 > 400, large batch path
+        //   → buffered_rows=550 > 400 → Case 2: flush first
+        //   → flush: output [550] (combined [350, 200])
+        //   → then bypass: output [800]
+        //   Result: [550], [800] - buffer flushed to prevent oversized merge
+        //
+        // Example 2: Multiple small batches accumulate before large batch
+        // target_batch_size=1000, biggest_coalesce_batch_size=Some(300)
+        // Input: [150, 100, 80, 900]
+        //
+        // Step 1-3: Accumulate small batches
+        //   150 → buffer: [150], buffered_rows=150
+        //   100 → buffer: [150, 100], buffered_rows=250
+        //   80  → buffer: [150, 100, 80], buffered_rows=330
+        //
+        // Step 4: push_batch([900])
+        //   → batch_size=900 > 300, large batch path
+        //   → buffered_rows=330 > 300 → Case 2: flush first
+        //   → flush: output [330] (combined [150, 100, 80])
+        //   → then bypass: output [900]
+        //   Result: [330], [900] - prevents merge into [1230] which would be too large
+
+        // =============================================================================
+        // CASE 3: Small buffer + large batch → Normal coalescing (no bypass)
+        // =============================================================================
+        // When buffer is small enough, we still merge to maintain efficiency
+        // Example: target_batch_size=1000, biggest_coalesce_batch_size=Some(500)
+        // Input: [300, 1200]
+        //
+        // Step 1: push_batch([300])
+        //   → batch_size=300 <= 500, normal path
+        //   → buffer: [300], buffered_rows=300
+        //
+        // Step 2: push_batch([1200])
+        //   → batch_size=1200 > 500, large batch path
+        //   → buffered_rows=300 <= 500 → Case 3: normal merge
+        //   → buffer: [300, 1200] (1500 total)
+        //   → 1500 > target_batch_size → split: output [1000], buffer [500]
+        //   Result: [1000], [500] - normal split/merge behavior maintained
+
+        // =============================================================================
+        // Comparison: Default vs Optimized Behavior
+        // =============================================================================
+        // target_batch_size=1000, biggest_coalesce_batch_size=Some(500)
+        // Input: [600, 1200, 300]
+        //
+        // DEFAULT BEHAVIOR (biggest_coalesce_batch_size=None):
+        //   600 → buffer: [600]
+        //   1200 → buffer: [600, 1200] (1800 rows total)
+        //         → split: output [1000 rows], buffer [800 rows remaining]
+        //   300 → buffer: [800, 300] (1100 rows total)
+        //        → split: output [1000 rows], buffer [100 rows remaining]
+        //   Result: [1000], [1000], [100] - all outputs respect target_batch_size
+        //
+        // OPTIMIZED BEHAVIOR (biggest_coalesce_batch_size=Some(500)):
+        //   600 → Case 1: direct bypass → output: [600]
+        //   1200 → Case 1: direct bypass → output: [1200]
+        //   300 → normal path → buffer: [300]
+        //   Result: [600], [1200], [300] - large batches preserved
+
+        // =============================================================================
+        // Benefits and Trade-offs
+        // =============================================================================
+        // Benefits of the optimization:
+        // - Large batches stay intact (better for downstream vectorized processing)
+        // - Fewer split/merge operations (better CPU performance)
+        // - More predictable memory usage patterns
+        // - Maintains streaming efficiency while preserving batch boundaries
+        //
+        // Trade-offs:
+        // - Output batch sizes become variable (not always target_batch_size)
+        // - May produce smaller partial batches when flushing before large batches
+        // - Requires tuning biggest_coalesce_batch_size parameter for optimal performance
+
+        // TODO, for unsorted batches, we may can filter all large batches, and coalesce all
+        // small batches together?
+
+        let batch_size = batch.num_rows();
+
+        // Fast path: skip empty batches
+        if batch_size == 0 {
             return Ok(());
         }
 
+        // Large batch optimization: bypass coalescing for oversized batches
+        if let Some(limit) = self.biggest_coalesce_batch_size {
+            if batch_size > limit {
+                // Case 1: No buffered data - emit large batch directly
+                // Example: [] + [1200] → output [1200], buffer []
+                if self.buffered_rows == 0 {
+                    self.completed.push_back(batch);
+                    return Ok(());
+                }
+
+                // Case 2: Buffer too large - flush then emit to avoid oversized merge
+                // Example: [850] + [1200] → output [850], then output [1200]
+                // This prevents creating batches much larger than both target_batch_size
+                // and biggest_coalesce_batch_size, which could cause memory issues
+                if self.buffered_rows > limit {
+                    self.finish_buffered_batch()?;
+                    self.completed.push_back(batch);
+                    return Ok(());
+                }
+
+                // Case 3: Small buffer - proceed with normal coalescing
+                // Example: [300] + [1200] → split and merge normally
+                // This ensures small batches still get properly coalesced
+                // while allowing some controlled growth beyond the limit
+            }
+        }
+
+        let (_schema, arrays, mut num_rows) = batch.into_parts();
+
         // setup input rows
         assert_eq!(arrays.len(), self.in_progress_arrays.len());
         self.in_progress_arrays
@@ -235,8 +470,8 @@ impl BatchCoalescer {
         // If pushing this batch would exceed the target batch size,
         // finish the current batch and start a new one
         let mut offset = 0;
-        while num_rows > (self.batch_size - self.buffered_rows) {
-            let remaining_rows = self.batch_size - self.buffered_rows;
+        while num_rows > (self.target_batch_size - self.buffered_rows) {
+            let remaining_rows = self.target_batch_size - self.buffered_rows;
             debug_assert!(remaining_rows > 0);
 
             // Copy remaining_rows from each array
@@ -260,7 +495,7 @@ impl BatchCoalescer {
         }
 
         // If we have reached the target batch size, finalize the buffered batch
-        if self.buffered_rows >= self.batch_size {
+        if self.buffered_rows >= self.target_batch_size {
             self.finish_buffered_batch()?;
         }
 
@@ -272,6 +507,11 @@ impl BatchCoalescer {
         Ok(())
     }
 
+    /// Returns the number of buffered rows
+    pub fn get_buffered_rows(&self) -> usize {
+        self.buffered_rows
+    }
+
     /// Concatenates any buffered batches into a single `RecordBatch` and
     /// clears any output buffers
     ///
@@ -314,7 +554,7 @@ impl BatchCoalescer {
         !self.completed.is_empty()
     }
 
-    /// Returns the next completed batch, if any
+    /// Removes and returns the next completed batch, if any.
     pub fn next_completed_batch(&mut self) -> Option<RecordBatch> {
         self.completed.pop_front()
     }
@@ -322,7 +562,18 @@ impl BatchCoalescer {
 
 /// Return a new `InProgressArray` for the given data type
 fn create_in_progress_array(data_type: &DataType, batch_size: usize) -> Box<dyn InProgressArray> {
-    match data_type {
+    macro_rules! instantiate_primitive {
+        ($t:ty) => {
+            Box::new(InProgressPrimitiveArray::<$t>::new(
+                batch_size,
+                data_type.clone(),
+            ))
+        };
+    }
+
+    downcast_primitive! {
+        // Instantiate InProgressPrimitiveArray for each primitive type
+        data_type => (instantiate_primitive),
         DataType::Utf8View => Box::new(InProgressByteViewArray::<StringViewType>::new(batch_size)),
         DataType::BinaryView => {
             Box::new(InProgressByteViewArray::<BinaryViewType>::new(batch_size))
@@ -364,8 +615,13 @@ mod tests {
     use crate::concat::concat_batches;
     use arrow_array::builder::StringViewBuilder;
     use arrow_array::cast::AsArray;
-    use arrow_array::{BinaryViewArray, RecordBatchOptions, StringViewArray, UInt32Array};
+    use arrow_array::types::Int32Type;
+    use arrow_array::{
+        BinaryViewArray, Int32Array, Int64Array, RecordBatchOptions, StringArray, StringViewArray,
+        TimestampNanosecondArray, UInt32Array, UInt64Array,
+    };
     use arrow_schema::{DataType, Field, Schema};
+    use rand::{Rng, SeedableRng};
     use std::ops::Range;
 
     #[test]
@@ -456,6 +712,119 @@ mod tests {
             .run();
     }
 
+    /// Coalesce multiple batches, 80k rows, with a 0.1% selectivity filter
+    #[test]
+    fn test_coalesce_filtered_001() {
+        let mut filter_builder = RandomFilterBuilder {
+            num_rows: 8000,
+            selectivity: 0.001,
+            seed: 0,
+        };
+
+        // add 10 batches of 8000 rows each
+        // 80k rows, selecting 0.1% means 80 rows
+        // not exactly 80 as the rows are random;
+        let mut test = Test::new();
+        for _ in 0..10 {
+            test = test
+                .with_batch(multi_column_batch(0..8000))
+                .with_filter(filter_builder.next_filter())
+        }
+        test.with_batch_size(15)
+            .with_expected_output_sizes(vec![15, 15, 15, 13])
+            .run();
+    }
+
+    /// Coalesce multiple batches, 80k rows, with a 1% selectivity filter
+    #[test]
+    fn test_coalesce_filtered_01() {
+        let mut filter_builder = RandomFilterBuilder {
+            num_rows: 8000,
+            selectivity: 0.01,
+            seed: 0,
+        };
+
+        // add 10 batches of 8000 rows each
+        // 80k rows, selecting 1% means 800 rows
+        // not exactly 800 as the rows are random;
+        let mut test = Test::new();
+        for _ in 0..10 {
+            test = test
+                .with_batch(multi_column_batch(0..8000))
+                .with_filter(filter_builder.next_filter())
+        }
+        test.with_batch_size(128)
+            .with_expected_output_sizes(vec![128, 128, 128, 128, 128, 128, 15])
+            .run();
+    }
+
+    /// Coalesce multiple batches, 80k rows, with a 10% selectivity filter
+    #[test]
+    fn test_coalesce_filtered_1() {
+        let mut filter_builder = RandomFilterBuilder {
+            num_rows: 8000,
+            selectivity: 0.1,
+            seed: 0,
+        };
+
+        // add 10 batches of 8000 rows each
+        // 80k rows, selecting 10% means 8000 rows
+        // not exactly 800 as the rows are random;
+        let mut test = Test::new();
+        for _ in 0..10 {
+            test = test
+                .with_batch(multi_column_batch(0..8000))
+                .with_filter(filter_builder.next_filter())
+        }
+        test.with_batch_size(1024)
+            .with_expected_output_sizes(vec![1024, 1024, 1024, 1024, 1024, 1024, 1024, 840])
+            .run();
+    }
+
+    /// Coalesce multiple batches, 8k rows, with a 90% selectivity filter
+    #[test]
+    fn test_coalesce_filtered_90() {
+        let mut filter_builder = RandomFilterBuilder {
+            num_rows: 800,
+            selectivity: 0.90,
+            seed: 0,
+        };
+
+        // add 10 batches of 800 rows each
+        // 8k rows, selecting 99% means 7200 rows
+        // not exactly 7200 as the rows are random;
+        let mut test = Test::new();
+        for _ in 0..10 {
+            test = test
+                .with_batch(multi_column_batch(0..800))
+                .with_filter(filter_builder.next_filter())
+        }
+        test.with_batch_size(1024)
+            .with_expected_output_sizes(vec![1024, 1024, 1024, 1024, 1024, 1024, 1024, 13])
+            .run();
+    }
+
+    #[test]
+    fn test_coalesce_non_null() {
+        Test::new()
+            // 4040 rows of unit32
+            .with_batch(uint32_batch_non_null(0..3000))
+            .with_batch(uint32_batch_non_null(0..1040))
+            .with_batch_size(1024)
+            .with_expected_output_sizes(vec![1024, 1024, 1024, 968])
+            .run();
+    }
+    #[test]
+    fn test_utf8_split() {
+        Test::new()
+            // 4040 rows of utf8 strings in total, split into batches of 1024
+            .with_batch(utf8_batch(0..3000))
+            .with_batch(utf8_batch(0..1040))
+            .with_batch_size(1024)
+            .with_expected_output_sizes(vec![1024, 1024, 1024, 968])
+            .run();
+    }
+
     #[test]
     fn test_string_view_no_views() {
         let output_batches = Test::new()
@@ -639,21 +1008,27 @@ mod tests {
 
     #[test]
     fn test_string_view_many_small_compact() {
-        // The strings are 28 long, so each batch has 400 * 28 = 5600 bytes
+        // 200 rows alternating long (28) and short (≤12) strings.
+        // Only the 100 long strings go into data buffers: 100 × 28 = 2800.
         let batch = stringview_batch_repeated(
-            400,
+            200,
             [Some("This string is 28 bytes long"), Some("small string")],
         );
         let output_batches = Test::new()
             // First allocated buffer is 8kb.
-            // Appending five batches of 5600 bytes will use 5600 * 5 = 28kb (8kb, an 16kb and 32kbkb)
+            // Appending 10 batches of 2800 bytes will use 2800 * 10 = 14kb (8kb, an 16kb and 32kbkb)
+            .with_batch(batch.clone())
+            .with_batch(batch.clone())
+            .with_batch(batch.clone())
+            .with_batch(batch.clone())
+            .with_batch(batch.clone())
             .with_batch(batch.clone())
             .with_batch(batch.clone())
             .with_batch(batch.clone())
             .with_batch(batch.clone())
             .with_batch(batch.clone())
             .with_batch_size(8000)
-            .with_expected_output_sizes(vec![2000]) // only 2000 rows total
+            .with_expected_output_sizes(vec![2000]) // only 1000 rows total
             .run();
 
         // expect a nice even distribution of buffers
@@ -681,7 +1056,7 @@ mod tests {
         // The strings are designed to exactly fit into buffers that are powers of 2 long
         let batch = stringview_batch_repeated(100, [Some("This string is a power of two=32")]);
         let output_batches = Test::new()
-            .with_batches(std::iter::repeat(batch).take(20))
+            .with_batches(std::iter::repeat_n(batch, 20))
             .with_batch_size(900)
             .with_expected_output_sizes(vec![900, 900, 200])
             .run();
@@ -708,14 +1083,14 @@ mod tests {
 
     #[test]
     fn test_string_view_large_small() {
-        // The strings are 37 bytes long, so each batch has 200 * 28 = 5600 bytes
+        // The strings are 37 bytes long, so each batch has 100 * 28 = 2800 bytes
         let mixed_batch = stringview_batch_repeated(
-            400,
+            200,
             [Some("This string is 28 bytes long"), Some("small string")],
         );
         // These strings aren't copied, this array has an 8k buffer
         let all_large = stringview_batch_repeated(
-            100,
+            50,
             [Some(
                 "This buffer has only large strings in it so there are no buffer copies",
             )],
@@ -723,7 +1098,12 @@ mod tests {
 
         let output_batches = Test::new()
             // First allocated buffer is 8kb.
-            // Appending five batches of 5600 bytes will use 5600 * 5 = 28kb (8kb, an 16kb and 32kbkb)
+            // Appending five batches of 2800 bytes will use 2800 * 10 = 28kb (8kb, an 16kb and 32kbkb)
+            .with_batch(mixed_batch.clone())
+            .with_batch(mixed_batch.clone())
+            .with_batch(all_large.clone())
+            .with_batch(mixed_batch.clone())
+            .with_batch(all_large.clone())
             .with_batch(mixed_batch.clone())
             .with_batch(mixed_batch.clone())
             .with_batch(all_large.clone())
@@ -737,26 +1117,17 @@ mod tests {
             col_as_string_view("c0", output_batches.first().unwrap()),
             vec![
                 ExpectedLayout {
-                    len: 8176,
+                    len: 8190,
                     capacity: 8192,
                 },
-                // this buffer was allocated but not used when the all_large batch was pushed
                 ExpectedLayout {
-                    len: 3024,
+                    len: 16366,
                     capacity: 16384,
                 },
                 ExpectedLayout {
-                    len: 7000,
-                    capacity: 8192,
-                },
-                ExpectedLayout {
-                    len: 5600,
+                    len: 6244,
                     capacity: 32768,
                 },
-                ExpectedLayout {
-                    len: 7000,
-                    capacity: 8192,
-                },
             ],
         );
     }
@@ -813,6 +1184,11 @@ mod tests {
     struct Test {
         /// Batches to feed to the coalescer.
         input_batches: Vec<RecordBatch>,
+        /// Filters to apply to the corresponding input batches.
+        ///
+        /// If there are no filters for the input batches, the batch will be
+        /// pushed as is.
+        filters: Vec<BooleanArray>,
         /// The schema. If not provided, the first batch's schema is used.
         schema: Option<SchemaRef>,
         /// Expected output sizes of the resulting batches
@@ -825,6 +1201,7 @@ mod tests {
         fn default() -> Self {
             Self {
                 input_batches: vec![],
+                filters: vec![],
                 schema: None,
                 expected_output_sizes: vec![],
                 target_batch_size: 1024,
@@ -849,6 +1226,12 @@ mod tests {
             self
         }
 
+        /// Extend the filters with `filter`
+        fn with_filter(mut self, filter: BooleanArray) -> Self {
+            self.filters.push(filter);
+            self
+        }
+
         /// Extends the input batches with `batches`
         fn with_batches(mut self, batches: impl IntoIterator<Item = RecordBatch>) -> Self {
             self.input_batches.extend(batches);
@@ -871,23 +1254,29 @@ mod tests {
         ///
         /// Returns the resulting output batches
         fn run(self) -> Vec<RecordBatch> {
+            let expected_output = self.expected_output();
+            let schema = self.schema();
+
             let Self {
                 input_batches,
-                schema,
+                filters,
+                schema: _,
                 target_batch_size,
                 expected_output_sizes,
             } = self;
 
-            let schema = schema.unwrap_or_else(|| input_batches[0].schema());
-
-            // create a single large input batch for output comparison
-            let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
+            let had_input = input_batches.iter().any(|b| b.num_rows() > 0);
 
             let mut coalescer = BatchCoalescer::new(Arc::clone(&schema), target_batch_size);
 
-            let had_input = input_batches.iter().any(|b| b.num_rows() > 0);
+            // feed input batches and filters to the coalescer
+            let mut filters = filters.into_iter();
             for batch in input_batches {
-                coalescer.push_batch(batch).unwrap();
+                if let Some(filter) = filters.next() {
+                    coalescer.push_batch_with_filter(batch, &filter).unwrap();
+                } else {
+                    coalescer.push_batch(batch).unwrap();
+                }
             }
             assert_eq!(schema, coalescer.schema());
 
@@ -927,7 +1316,7 @@ mod tests {
             for (i, (expected_size, batch)) in iter {
                 // compare the contents of the batch after normalization (using
                 // `==` compares the underlying memory layout too)
-                let expected_batch = single_input_batch.slice(starting_idx, *expected_size);
+                let expected_batch = expected_output.slice(starting_idx, *expected_size);
                 let expected_batch = normalize_batch(expected_batch);
                 let batch = normalize_batch(batch.clone());
                 assert_eq!(
@@ -939,17 +1328,77 @@ mod tests {
             }
             output_batches
         }
+
+        /// Return the expected output schema. If not overridden by `with_schema`, it
+        /// returns the schema of the first input batch.
+        fn schema(&self) -> SchemaRef {
+            self.schema
+                .clone()
+                .unwrap_or_else(|| Arc::clone(&self.input_batches[0].schema()))
+        }
+
+        /// Returns the expected output as a single `RecordBatch`
+        fn expected_output(&self) -> RecordBatch {
+            let schema = self.schema();
+            if self.filters.is_empty() {
+                return concat_batches(&schema, &self.input_batches).unwrap();
+            }
+
+            let mut filters = self.filters.iter();
+            let filtered_batches = self
+                .input_batches
+                .iter()
+                .map(|batch| {
+                    if let Some(filter) = filters.next() {
+                        filter_record_batch(batch, filter).unwrap()
+                    } else {
+                        batch.clone()
+                    }
+                })
+                .collect::<Vec<_>>();
+            concat_batches(&schema, &filtered_batches).unwrap()
+        }
+    }
+
+    /// Return a RecordBatch with a UInt32Array with the specified range and
+    /// every third value is null.
+    fn uint32_batch<T: std::iter::Iterator<Item = u32>>(range: T) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, true)]));
+
+        let array = UInt32Array::from_iter(range.map(|i| if i % 3 == 0 { None } else { Some(i) }));
+        RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
     }
 
-    /// Return a RecordBatch with a UInt32Array with the specified range
-    fn uint32_batch(range: Range<u32>) -> RecordBatch {
+    /// Return a RecordBatch with a UInt32Array with no nulls specified range
+    fn uint32_batch_non_null<T: std::iter::Iterator<Item = u32>>(range: T) -> RecordBatch {
         let schema = Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)]));
 
-        RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(UInt32Array::from_iter_values(range))],
-        )
-        .unwrap()
+        let array = UInt32Array::from_iter_values(range);
+        RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
+    }
+
+    /// Return a RecordBatch with a UInt64Array with no nulls specified range
+    fn uint64_batch_non_null<T: std::iter::Iterator<Item = u64>>(range: T) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt64, false)]));
+
+        let array = UInt64Array::from_iter_values(range);
+        RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
+    }
+
+    /// Return a RecordBatch with a StringArrary with values `value0`, `value1`, ...
+    /// and every third value is `None`.
+    fn utf8_batch(range: Range<u32>) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new("c0", DataType::Utf8, true)]));
+
+        let array = StringArray::from_iter(range.map(|i| {
+            if i % 3 == 0 {
+                None
+            } else {
+                Some(format!("value{i}"))
+            }
+        }));
+
+        RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
     }
 
     /// Return a RecordBatch with a StringViewArray with (only) the specified values
@@ -960,14 +1409,11 @@ mod tests {
             false,
         )]));
 
-        RecordBatch::try_new(
-            Arc::clone(&schema),
-            vec![Arc::new(StringViewArray::from_iter(values))],
-        )
-        .unwrap()
+        let array = StringViewArray::from_iter(values);
+        RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
     }
 
-    /// Return a RecordBatch with a StringViewArray with num_rows by repating
+    /// Return a RecordBatch with a StringViewArray with num_rows by repeating
     /// values over and over.
     fn stringview_batch_repeated<'a>(
         num_rows: usize,
@@ -995,6 +1441,75 @@ mod tests {
         RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(array)]).unwrap()
     }
 
+    /// Return a RecordBatch of 100 rows
+    fn multi_column_batch(range: Range<i32>) -> RecordBatch {
+        let int64_array = Int64Array::from_iter(
+            range
+                .clone()
+                .map(|v| if v % 5 == 0 { None } else { Some(v as i64) }),
+        );
+        let string_view_array = StringViewArray::from_iter(range.clone().map(|v| {
+            if v % 5 == 0 {
+                None
+            } else if v % 7 == 0 {
+                Some(format!("This is a string longer than 12 bytes{v}"))
+            } else {
+                Some(format!("Short {v}"))
+            }
+        }));
+        let string_array = StringArray::from_iter(range.clone().map(|v| {
+            if v % 11 == 0 {
+                None
+            } else {
+                Some(format!("Value {v}"))
+            }
+        }));
+        let timestamp_array = TimestampNanosecondArray::from_iter(range.map(|v| {
+            if v % 3 == 0 {
+                None
+            } else {
+                Some(v as i64 * 1000) // simulate a timestamp in milliseconds
+            }
+        }))
+        .with_timezone("America/New_York");
+
+        RecordBatch::try_from_iter(vec![
+            ("int64", Arc::new(int64_array) as ArrayRef),
+            ("stringview", Arc::new(string_view_array) as ArrayRef),
+            ("string", Arc::new(string_array) as ArrayRef),
+            ("timestamp", Arc::new(timestamp_array) as ArrayRef),
+        ])
+        .unwrap()
+    }
+
+    /// Return a boolean array that filters out randomly selected rows
+    /// from the input batch with a `selectivity`.
+    ///
+    /// For example a `selectivity` of 0.1 will filter out
+    /// 90% of the rows.
+    #[derive(Debug)]
+    struct RandomFilterBuilder {
+        num_rows: usize,
+        selectivity: f64,
+        /// seed for random number generator, increases by one each time
+        /// `next_filter` is called
+        seed: u64,
+    }
+    impl RandomFilterBuilder {
+        /// Build the next filter with the current seed and increment the seed
+        /// by one.
+        fn next_filter(&mut self) -> BooleanArray {
+            assert!(self.selectivity >= 0.0 && self.selectivity <= 1.0);
+            let mut rng = rand::rngs::StdRng::seed_from_u64(self.seed);
+            self.seed += 1;
+            BooleanArray::from_iter(
+                (0..self.num_rows)
+                    .map(|_| rng.random_bool(self.selectivity))
+                    .map(Some),
+            )
+        }
+    }
+
     /// Returns the named column as a StringViewArray
     fn col_as_string_view<'b>(name: &str, batch: &'b RecordBatch) -> &'b StringViewArray {
         batch
@@ -1028,4 +1543,470 @@ mod tests {
         let options = RecordBatchOptions::new().with_row_count(Some(row_count));
         RecordBatch::try_new_with_options(schema, columns, &options).unwrap()
     }
+
+    /// Helper function to create a test batch with specified number of rows
+    fn create_test_batch(num_rows: usize) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)]));
+        let array = Int32Array::from_iter_values(0..num_rows as i32);
+        RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap()
+    }
+    #[test]
+    fn test_biggest_coalesce_batch_size_none_default() {
+        // Test that default behavior (None) coalesces all batches
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+
+        // Push a large batch (1000 rows) - should be coalesced normally
+        let large_batch = create_test_batch(1000);
+        coalescer.push_batch(large_batch).unwrap();
+
+        // Should produce multiple batches of target size (100)
+        let mut output_batches = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            output_batches.push(batch);
+        }
+
+        coalescer.finish_buffered_batch().unwrap();
+        while let Some(batch) = coalescer.next_completed_batch() {
+            output_batches.push(batch);
+        }
+
+        // Should have 10 batches of 100 rows each
+        assert_eq!(output_batches.len(), 10);
+        for batch in output_batches {
+            assert_eq!(batch.num_rows(), 100);
+        }
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_bypass_large_batch() {
+        // Test that batches larger than biggest_coalesce_batch_size bypass coalescing
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(500));
+
+        // Push a large batch (1000 rows) - should bypass coalescing
+        let large_batch = create_test_batch(1000);
+        coalescer.push_batch(large_batch.clone()).unwrap();
+
+        // Should have one completed batch immediately (the original large batch)
+        assert!(coalescer.has_completed_batch());
+        let output_batch = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output_batch.num_rows(), 1000);
+
+        // Should be no more completed batches
+        assert!(!coalescer.has_completed_batch());
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_coalesce_small_batch() {
+        // Test that batches smaller than biggest_coalesce_batch_size are coalesced normally
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(500));
+
+        // Push small batches that should be coalesced
+        let small_batch = create_test_batch(50);
+        coalescer.push_batch(small_batch.clone()).unwrap();
+
+        // Should not have completed batch yet (only 50 rows, target is 100)
+        assert!(!coalescer.has_completed_batch());
+        assert_eq!(coalescer.get_buffered_rows(), 50);
+
+        // Push another small batch
+        coalescer.push_batch(small_batch).unwrap();
+
+        // Now should have a completed batch (100 rows total)
+        assert!(coalescer.has_completed_batch());
+        let output_batch = coalescer.next_completed_batch().unwrap();
+        let size = output_batch
+            .column(0)
+            .as_primitive::<Int32Type>()
+            .get_buffer_memory_size();
+        assert_eq!(size, 400); // 100 rows * 4 bytes each
+        assert_eq!(output_batch.num_rows(), 100);
+
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_equal_boundary() {
+        // Test behavior when batch size equals biggest_coalesce_batch_size
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(500));
+
+        // Push a batch exactly equal to the limit
+        let boundary_batch = create_test_batch(500);
+        coalescer.push_batch(boundary_batch).unwrap();
+
+        // Should be coalesced (not bypass) since it's equal, not greater
+        let mut output_count = 0;
+        while coalescer.next_completed_batch().is_some() {
+            output_count += 1;
+        }
+
+        coalescer.finish_buffered_batch().unwrap();
+        while coalescer.next_completed_batch().is_some() {
+            output_count += 1;
+        }
+
+        // Should have 5 batches of 100 rows each
+        assert_eq!(output_count, 5);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_first_large_then_consecutive_bypass() {
+        // Test the new consecutive large batch bypass behavior
+        // Pattern: small batches -> first large batch (coalesced) -> consecutive large batches (bypass)
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(200));
+
+        let small_batch = create_test_batch(50);
+
+        // Push small batch first to create buffered data
+        coalescer.push_batch(small_batch).unwrap();
+        assert_eq!(coalescer.get_buffered_rows(), 50);
+        assert!(!coalescer.has_completed_batch());
+
+        // Push first large batch - should go through normal coalescing due to buffered data
+        let large_batch1 = create_test_batch(250);
+        coalescer.push_batch(large_batch1).unwrap();
+
+        // 50 + 250 = 300 -> 3 complete batches of 100, 0 rows buffered
+        let mut completed_batches = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            completed_batches.push(batch);
+        }
+        assert_eq!(completed_batches.len(), 3);
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+
+        // Now push consecutive large batches - they should bypass
+        let large_batch2 = create_test_batch(300);
+        let large_batch3 = create_test_batch(400);
+
+        // Push second large batch - should bypass since it's consecutive and buffer is empty
+        coalescer.push_batch(large_batch2).unwrap();
+        assert!(coalescer.has_completed_batch());
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 300); // bypassed with original size
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+
+        // Push third large batch - should also bypass
+        coalescer.push_batch(large_batch3).unwrap();
+        assert!(coalescer.has_completed_batch());
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 400); // bypassed with original size
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_empty_batch() {
+        // Test that empty batches don't trigger the bypass logic
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(50));
+
+        let empty_batch = create_test_batch(0);
+        coalescer.push_batch(empty_batch).unwrap();
+
+        // Empty batch should be handled normally (no effect)
+        assert!(!coalescer.has_completed_batch());
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_with_buffered_data_no_bypass() {
+        // Test that when there is buffered data, large batches do NOT bypass (unless consecutive)
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(200));
+
+        // Add some buffered data first
+        let small_batch = create_test_batch(30);
+        coalescer.push_batch(small_batch.clone()).unwrap();
+        coalescer.push_batch(small_batch).unwrap();
+        assert_eq!(coalescer.get_buffered_rows(), 60);
+
+        // Push large batch that would normally bypass, but shouldn't because buffered_rows > 0
+        let large_batch = create_test_batch(250);
+        coalescer.push_batch(large_batch).unwrap();
+
+        // The large batch should be processed through normal coalescing logic
+        // Total: 60 (buffered) + 250 (new) = 310 rows
+        // Output: 3 complete batches of 100 rows each, 10 rows remain buffered
+
+        let mut completed_batches = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            completed_batches.push(batch);
+        }
+
+        assert_eq!(completed_batches.len(), 3);
+        for batch in &completed_batches {
+            assert_eq!(batch.num_rows(), 100);
+        }
+        assert_eq!(coalescer.get_buffered_rows(), 10);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_zero_limit() {
+        // Test edge case where limit is 0 (all batches bypass when no buffered data)
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(0));
+
+        // Even a 1-row batch should bypass when there's no buffered data
+        let tiny_batch = create_test_batch(1);
+        coalescer.push_batch(tiny_batch).unwrap();
+
+        assert!(coalescer.has_completed_batch());
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 1);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_bypass_only_when_no_buffer() {
+        // Test that bypass only occurs when buffered_rows == 0
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(200));
+
+        // First, push a large batch with no buffered data - should bypass
+        let large_batch = create_test_batch(300);
+        coalescer.push_batch(large_batch.clone()).unwrap();
+
+        assert!(coalescer.has_completed_batch());
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 300); // bypassed
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+
+        // Now add some buffered data
+        let small_batch = create_test_batch(50);
+        coalescer.push_batch(small_batch).unwrap();
+        assert_eq!(coalescer.get_buffered_rows(), 50);
+
+        // Push the same large batch again - should NOT bypass this time (not consecutive)
+        coalescer.push_batch(large_batch).unwrap();
+
+        // Should process through normal coalescing: 50 + 300 = 350 rows
+        // Output: 3 complete batches of 100 rows, 50 rows buffered
+        let mut completed_batches = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            completed_batches.push(batch);
+        }
+
+        assert_eq!(completed_batches.len(), 3);
+        for batch in &completed_batches {
+            assert_eq!(batch.num_rows(), 100);
+        }
+        assert_eq!(coalescer.get_buffered_rows(), 50);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_consecutive_large_batches_scenario() {
+        // Test your exact scenario: 20, 20, 30, 700, 600, 700, 900, 700, 600
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            1000,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(500));
+
+        // Push small batches first
+        coalescer.push_batch(create_test_batch(20)).unwrap();
+        coalescer.push_batch(create_test_batch(20)).unwrap();
+        coalescer.push_batch(create_test_batch(30)).unwrap();
+
+        assert_eq!(coalescer.get_buffered_rows(), 70);
+        assert!(!coalescer.has_completed_batch());
+
+        // Push first large batch (700) - should coalesce due to buffered data
+        coalescer.push_batch(create_test_batch(700)).unwrap();
+
+        // 70 + 700 = 770 rows, not enough for 1000, so all stay buffered
+        assert_eq!(coalescer.get_buffered_rows(), 770);
+        assert!(!coalescer.has_completed_batch());
+
+        // Push second large batch (600) - should bypass since previous was large
+        coalescer.push_batch(create_test_batch(600)).unwrap();
+
+        // Should flush buffer (770 rows) and bypass the 600
+        let mut outputs = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            outputs.push(batch);
+        }
+        assert_eq!(outputs.len(), 2); // one flushed buffer batch (770) + one bypassed (600)
+        assert_eq!(outputs[0].num_rows(), 770);
+        assert_eq!(outputs[1].num_rows(), 600);
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+
+        // Push remaining large batches - should all bypass
+        let remaining_batches = [700, 900, 700, 600];
+        for &size in &remaining_batches {
+            coalescer.push_batch(create_test_batch(size)).unwrap();
+
+            assert!(coalescer.has_completed_batch());
+            let output = coalescer.next_completed_batch().unwrap();
+            assert_eq!(output.num_rows(), size);
+            assert_eq!(coalescer.get_buffered_rows(), 0);
+        }
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_truly_consecutive_large_bypass() {
+        // Test truly consecutive large batches that should all bypass
+        // This test ensures buffer is completely empty between large batches
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(200));
+
+        // Push consecutive large batches with no prior buffered data
+        let large_batches = vec![
+            create_test_batch(300),
+            create_test_batch(400),
+            create_test_batch(350),
+            create_test_batch(500),
+        ];
+
+        let mut all_outputs = vec![];
+
+        for (i, large_batch) in large_batches.into_iter().enumerate() {
+            let expected_size = large_batch.num_rows();
+
+            // Buffer should be empty before each large batch
+            assert_eq!(
+                coalescer.get_buffered_rows(),
+                0,
+                "Buffer should be empty before batch {}",
+                i
+            );
+
+            coalescer.push_batch(large_batch).unwrap();
+
+            // Each large batch should bypass and produce exactly one output batch
+            assert!(
+                coalescer.has_completed_batch(),
+                "Should have completed batch after pushing batch {}",
+                i
+            );
+
+            let output = coalescer.next_completed_batch().unwrap();
+            assert_eq!(
+                output.num_rows(),
+                expected_size,
+                "Batch {} should have bypassed with original size",
+                i
+            );
+
+            // Should be no more batches and buffer should be empty
+            assert!(
+                !coalescer.has_completed_batch(),
+                "Should have no more completed batches after batch {}",
+                i
+            );
+            assert_eq!(
+                coalescer.get_buffered_rows(),
+                0,
+                "Buffer should be empty after batch {}",
+                i
+            );
+
+            all_outputs.push(output);
+        }
+
+        // Verify we got exactly 4 output batches with original sizes
+        assert_eq!(all_outputs.len(), 4);
+        assert_eq!(all_outputs[0].num_rows(), 300);
+        assert_eq!(all_outputs[1].num_rows(), 400);
+        assert_eq!(all_outputs[2].num_rows(), 350);
+        assert_eq!(all_outputs[3].num_rows(), 500);
+    }
+
+    #[test]
+    fn test_biggest_coalesce_batch_size_reset_consecutive_on_small_batch() {
+        // Test that small batches reset the consecutive large batch tracking
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::Int32, false)])),
+            100,
+        );
+        coalescer.set_biggest_coalesce_batch_size(Some(200));
+
+        // Push first large batch - should bypass (no buffered data)
+        coalescer.push_batch(create_test_batch(300)).unwrap();
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 300);
+
+        // Push second large batch - should bypass (consecutive)
+        coalescer.push_batch(create_test_batch(400)).unwrap();
+        let output = coalescer.next_completed_batch().unwrap();
+        assert_eq!(output.num_rows(), 400);
+
+        // Push small batch - resets consecutive tracking
+        coalescer.push_batch(create_test_batch(50)).unwrap();
+        assert_eq!(coalescer.get_buffered_rows(), 50);
+
+        // Push large batch again - should NOT bypass due to buffered data
+        coalescer.push_batch(create_test_batch(350)).unwrap();
+
+        // Should coalesce: 50 + 350 = 400 -> 4 complete batches of 100
+        let mut outputs = vec![];
+        while let Some(batch) = coalescer.next_completed_batch() {
+            outputs.push(batch);
+        }
+        assert_eq!(outputs.len(), 4);
+        for batch in outputs {
+            assert_eq!(batch.num_rows(), 100);
+        }
+        assert_eq!(coalescer.get_buffered_rows(), 0);
+    }
+
+    #[test]
+    fn test_coalasce_push_batch_with_indices() {
+        const MID_POINT: u32 = 2333;
+        const TOTAL_ROWS: u32 = 23333;
+        let batch1 = uint32_batch_non_null(0..MID_POINT);
+        let batch2 = uint32_batch_non_null((MID_POINT..TOTAL_ROWS).rev());
+
+        let mut coalescer = BatchCoalescer::new(
+            Arc::new(Schema::new(vec![Field::new("c0", DataType::UInt32, false)])),
+            TOTAL_ROWS as usize,
+        );
+        coalescer.push_batch(batch1).unwrap();
+
+        let rev_indices = (0..((TOTAL_ROWS - MID_POINT) as u64)).rev();
+        let reversed_indices_batch = uint64_batch_non_null(rev_indices);
+
+        let reverse_indices = UInt64Array::from(reversed_indices_batch.column(0).to_data());
+        coalescer
+            .push_batch_with_indices(batch2, &reverse_indices)
+            .unwrap();
+
+        coalescer.finish_buffered_batch().unwrap();
+        let actual = coalescer.next_completed_batch().unwrap();
+
+        let expected = uint32_batch_non_null(0..TOTAL_ROWS);
+
+        assert_eq!(expected, actual);
+    }
 }
diff --git a/arrow-select/src/coalesce/byte_view.rs b/arrow-select/src/coalesce/byte_view.rs
index 9f87d14a8e4f..bca811fff1c6 100644
--- a/arrow-select/src/coalesce/byte_view.rs
+++ b/arrow-select/src/coalesce/byte_view.rs
@@ -20,7 +20,7 @@ use arrow_array::cast::AsArray;
 use arrow_array::types::ByteViewType;
 use arrow_array::{Array, ArrayRef, GenericByteViewArray};
 use arrow_buffer::{Buffer, NullBufferBuilder};
-use arrow_data::ByteView;
+use arrow_data::{ByteView, MAX_INLINE_VIEW_LEN};
 use arrow_schema::ArrowError;
 use std::marker::PhantomData;
 use std::sync::Arc;
@@ -98,7 +98,10 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
     /// This is done on write (when we know it is necessary) rather than
     /// eagerly to avoid allocations that are not used.
     fn ensure_capacity(&mut self) {
-        self.views.reserve(self.batch_size);
+        if self.views.capacity() == 0 {
+            self.views.reserve(self.batch_size);
+        }
+        debug_assert_eq!(self.views.capacity(), self.batch_size);
     }
 
     /// Finishes in progress buffer, if any
@@ -125,7 +128,7 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
             // If there are buffers, we need to update the buffer index
             let updated_views = views.iter().map(|v| {
                 let mut byte_view = ByteView::from(*v);
-                if byte_view.length > 12 {
+                if byte_view.length > MAX_INLINE_VIEW_LEN {
                     // Small views (<=12 bytes) are inlined, so only need to update large views
                     byte_view.buffer_index += starting_buffer;
                 };
@@ -182,7 +185,7 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
             if remaining_capacity < str_len as usize {
                 break;
             }
-            if str_len > 12 {
+            if str_len > MAX_INLINE_VIEW_LEN {
                 remaining_capacity -= str_len as usize;
             }
             num_view_to_current += 1;
@@ -233,7 +236,7 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
                 .iter()
                 .filter_map(|v| {
                     let b = ByteView::from(*v);
-                    if b.length > 12 {
+                    if b.length > MAX_INLINE_VIEW_LEN {
                         Some(b.length as usize)
                     } else {
                         None
@@ -251,7 +254,7 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
         // Copy the views, updating the buffer index and copying the data as needed
         let new_views = views.iter().map(|v| {
             let mut b: ByteView = ByteView::from(*v);
-            if b.length > 12 {
+            if b.length > MAX_INLINE_VIEW_LEN {
                 let buffer_index = b.buffer_index as usize;
                 let buffer_offset = b.offset as usize;
                 let str_len = b.length as usize;
@@ -284,7 +287,10 @@ impl<B: ByteViewType> InProgressArray for InProgressByteViewArray<B> {
                 (false, 0)
             } else {
                 let ideal_buffer_size = s.total_buffer_bytes_used();
-                let actual_buffer_size = s.get_buffer_memory_size();
+                // We don't use get_buffer_memory_size here, because gc is for the contents of the
+                // data buffers, not views and nulls.
+                let actual_buffer_size =
+                    s.data_buffers().iter().map(|b| b.capacity()).sum::<usize>();
                 // copying strings is expensive, so only do it if the array is
                 // sparse (uses at least 2x the memory it needs)
                 let need_gc =
diff --git a/arrow-select/src/coalesce/primitive.rs b/arrow-select/src/coalesce/primitive.rs
new file mode 100644
index 000000000000..69dad221bd52
--- /dev/null
+++ b/arrow-select/src/coalesce/primitive.rs
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::coalesce::InProgressArray;
+use arrow_array::cast::AsArray;
+use arrow_array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray};
+use arrow_buffer::{NullBufferBuilder, ScalarBuffer};
+use arrow_schema::{ArrowError, DataType};
+use std::fmt::Debug;
+use std::sync::Arc;
+
+/// InProgressArray for [`PrimitiveArray`]
+#[derive(Debug)]
+pub(crate) struct InProgressPrimitiveArray<T: ArrowPrimitiveType> {
+    /// Data type of the array
+    data_type: DataType,
+    /// The current source, if any
+    source: Option<ArrayRef>,
+    /// the target batch size (and thus size for views allocation)
+    batch_size: usize,
+    /// In progress nulls
+    nulls: NullBufferBuilder,
+    /// The currently in progress array
+    current: Vec<T::Native>,
+}
+
+impl<T: ArrowPrimitiveType> InProgressPrimitiveArray<T> {
+    /// Create a new `InProgressPrimitiveArray`
+    pub(crate) fn new(batch_size: usize, data_type: DataType) -> Self {
+        Self {
+            data_type,
+            batch_size,
+            source: None,
+            nulls: NullBufferBuilder::new(batch_size),
+            current: vec![],
+        }
+    }
+
+    /// Allocate space for output values if necessary.
+    ///
+    /// This is done on write (when we know it is necessary) rather than
+    /// eagerly to avoid allocations that are not used.
+    fn ensure_capacity(&mut self) {
+        if self.current.capacity() == 0 {
+            self.current.reserve(self.batch_size);
+        }
+        debug_assert_eq!(self.current.capacity(), self.batch_size);
+    }
+}
+
+impl<T: ArrowPrimitiveType + Debug> InProgressArray for InProgressPrimitiveArray<T> {
+    fn set_source(&mut self, source: Option<ArrayRef>) {
+        self.source = source;
+    }
+
+    fn copy_rows(&mut self, offset: usize, len: usize) -> Result<(), ArrowError> {
+        self.ensure_capacity();
+
+        let s = self
+            .source
+            .as_ref()
+            .ok_or_else(|| {
+                ArrowError::InvalidArgumentError(
+                    "Internal Error: InProgressPrimitiveArray: source not set".to_string(),
+                )
+            })?
+            .as_primitive::<T>();
+
+        // add nulls if necessary
+        if let Some(nulls) = s.nulls().as_ref() {
+            let nulls = nulls.slice(offset, len);
+            self.nulls.append_buffer(&nulls);
+        } else {
+            self.nulls.append_n_non_nulls(len);
+        };
+
+        // Copy the values
+        self.current
+            .extend_from_slice(&s.values()[offset..offset + len]);
+
+        Ok(())
+    }
+
+    fn finish(&mut self) -> Result<ArrayRef, ArrowError> {
+        // take and reset the current values and nulls
+        let values = std::mem::take(&mut self.current);
+        let nulls = self.nulls.finish();
+        self.nulls = NullBufferBuilder::new(self.batch_size);
+
+        let array = PrimitiveArray::<T>::try_new(ScalarBuffer::from(values), nulls)?
+            // preserve timezone / precision+scale if applicable
+            .with_data_type(self.data_type.clone());
+        Ok(Arc::new(array))
+    }
+}
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index 69451be7035d..a6e3083a6ee7 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -37,9 +37,11 @@ use arrow_array::builder::{
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
-use arrow_data::transform::{Capacities, MutableArrayData};
+use arrow_buffer::{
+    ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
+};
 use arrow_data::ArrayDataBuilder;
+use arrow_data::transform::{Capacities, MutableArrayData};
 use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
 use std::{collections::HashSet, ops::Add, sync::Arc};
 
@@ -105,7 +107,7 @@ fn concat_dictionaries<K: ArrowDictionaryKeyType>(
         .inspect(|d| output_len += d.len())
         .collect();
 
-    if !should_merge_dictionary_values::<K>(&dictionaries, output_len) {
+    if !should_merge_dictionary_values::<K>(&dictionaries, output_len).0 {
         return concat_fallback(arrays, Capacities::Array(output_len));
     }
 
@@ -134,7 +136,7 @@ fn concat_dictionaries<K: ArrowDictionaryKeyType>(
         NullBuffer::new(nulls.finish())
     });
 
-    let keys = PrimitiveArray::<K>::new(key_values.into(), nulls);
+    let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
     // Sanity check
     assert_eq!(keys.len(), output_len);
 
@@ -206,6 +208,63 @@ fn concat_lists<OffsetSize: OffsetSizeTrait>(
     Ok(Arc::new(array))
 }
 
+fn concat_list_view<OffsetSize: OffsetSizeTrait>(
+    arrays: &[&dyn Array],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let mut output_len = 0;
+    let mut list_has_nulls = false;
+
+    let lists = arrays
+        .iter()
+        .map(|x| x.as_list_view::<OffsetSize>())
+        .inspect(|l| {
+            output_len += l.len();
+            list_has_nulls |= l.null_count() != 0;
+        })
+        .collect::<Vec<_>>();
+
+    let lists_nulls = list_has_nulls.then(|| {
+        let mut nulls = BooleanBufferBuilder::new(output_len);
+        for l in &lists {
+            match l.nulls() {
+                Some(n) => nulls.append_buffer(n.inner()),
+                None => nulls.append_n(l.len(), true),
+            }
+        }
+        NullBuffer::new(nulls.finish())
+    });
+
+    let values: Vec<&dyn Array> = lists.iter().map(|l| l.values().as_ref()).collect();
+
+    let concatenated_values = concat(values.as_slice())?;
+
+    let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| x.sizes()).copied().collect();
+
+    let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| l.offsets().len()).sum());
+    let mut global_offset = OffsetSize::zero();
+    for l in lists.iter() {
+        for &offset in l.offsets() {
+            offsets.push(offset + global_offset);
+        }
+
+        // advance the offsets
+        global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
+    }
+
+    let offsets = ScalarBuffer::from(offsets);
+
+    let array = GenericListViewArray::try_new(
+        field.clone(),
+        offsets,
+        sizes,
+        concatenated_values,
+        lists_nulls,
+    )?;
+
+    Ok(Arc::new(array))
+}
+
 fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
     let mut builder = PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
         .with_data_type(arrays[0].data_type().clone());
@@ -236,7 +295,7 @@ fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, Arr
     let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
 
     for array in arrays {
-        builder.append_array(array.as_bytes::<T>());
+        builder.append_array(array.as_bytes::<T>())?;
     }
 
     Ok(Arc::new(builder.finish()))
@@ -275,10 +334,11 @@ fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, Ar
         })
         .collect::<Result<Vec<_>, ArrowError>>()?;
 
-    Ok(Arc::new(StructArray::try_new(
+    Ok(Arc::new(StructArray::try_new_with_length(
         fields.clone(),
         column_concat_result,
         nulls,
+        len,
     )?))
 }
 
@@ -304,7 +364,7 @@ where
             run_arrays
                 .iter()
                 .scan(R::default_value(), |acc, run_array| {
-                    *acc = *acc + *run_array.run_ends().values().last().unwrap();
+                    *acc = *acc + R::Native::from_usize(run_array.len()).unwrap();
                     Some(*acc)
                 }),
         )
@@ -319,18 +379,17 @@ where
                 let adjustment = needed_run_end_adjustments[i];
                 run_array
                     .run_ends()
-                    .values()
-                    .iter()
-                    .map(move |run_end| *run_end + adjustment)
+                    .sliced_values()
+                    .map(move |run_end| run_end + adjustment)
             },
         ));
 
-    let all_values = concat(
-        &run_arrays
-            .iter()
-            .map(|x| x.values().as_ref())
-            .collect::<Vec<_>>(),
-    )?;
+    let values_slices: Vec<ArrayRef> = run_arrays
+        .iter()
+        .map(|run_array| run_array.values_slice())
+        .collect();
+
+    let all_values = concat(&values_slices.iter().map(|x| x.as_ref()).collect::<Vec<_>>())?;
 
     let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
         .len(total_len)
@@ -421,6 +480,8 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
         }
         DataType::List(field) => concat_lists::<i32>(arrays, field),
         DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
+        DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
+        DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
         DataType::Struct(fields) => concat_structs(arrays, fields),
         DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
         DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
@@ -499,7 +560,9 @@ pub fn concat_batches<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::{GenericListBuilder, StringDictionaryBuilder};
+    use arrow_array::builder::{
+        GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
+    };
     use arrow_schema::{Field, Schema};
     use std::fmt::Debug;
 
@@ -548,7 +611,10 @@ mod tests {
             &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
         ]);
 
-        assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32).");
+        assert_eq!(
+            re.unwrap_err().to_string(),
+            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
+        );
     }
 
     #[test]
@@ -571,7 +637,10 @@ mod tests {
             &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
         ]);
 
-        assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32).");
+        assert_eq!(
+            re.unwrap_err().to_string(),
+            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
+        );
     }
 
     #[test]
@@ -595,7 +664,10 @@ mod tests {
             &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
         ]);
 
-        assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
+        assert_eq!(
+            re.unwrap_err().to_string(),
+            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
+        );
     }
 
     #[test]
@@ -621,7 +693,10 @@ mod tests {
             &BooleanArray::from(vec![Some(true), Some(false), None]),
         ]);
 
-        assert_eq!(re.unwrap_err().to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...).");
+        assert_eq!(
+            re.unwrap_err().to_string(),
+            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
+        );
     }
 
     #[test]
@@ -755,7 +830,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]),
             Some(vec![]),
             None,
@@ -763,14 +838,14 @@ mod tests {
         ];
         let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
         ];
         let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
 
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
         let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
 
         let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
@@ -783,7 +858,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays_slices() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]),
             Some(vec![]), // In slice
             None,         // In slice
@@ -793,7 +868,7 @@ mod tests {
         let list1_array = list1_array.slice(1, 2);
         let list1_values = list1.into_iter().skip(1).take(2);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
@@ -812,7 +887,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_list_arrays_sliced_lengths() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice
             Some(vec![]),                                        // In slice
             None,                                                // In slice
@@ -822,7 +897,7 @@ mod tests {
         let list1_array = list1_array.slice(0, 3); // no offset, but not all values
         let list1_values = list1.into_iter().take(3);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
@@ -843,7 +918,7 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_fixed_size_list_arrays() {
-        let list1 = vec![
+        let list1 = [
             Some(vec![Some(-1), None]),
             None,
             Some(vec![Some(10), Some(20)]),
@@ -851,7 +926,7 @@ mod tests {
         let list1_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
 
-        let list2 = vec![
+        let list2 = [
             None,
             Some(vec![Some(100), None]),
             Some(vec![Some(102), Some(103)]),
@@ -859,7 +934,7 @@ mod tests {
         let list2_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
 
-        let list3 = vec![Some(vec![Some(1000), Some(1001)])];
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
         let list3_array =
             FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
 
@@ -872,6 +947,105 @@ mod tests {
         assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
     }
 
+    #[test]
+    fn test_concat_list_view_arrays() {
+        let list1 = [
+            Some(vec![Some(-1), None]),
+            None,
+            Some(vec![Some(10), Some(20)]),
+        ];
+        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list1.iter() {
+            list1_array.append_option(v.clone());
+        }
+        let list1_array = list1_array.finish();
+
+        let list2 = [
+            None,
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+        ];
+        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list2.iter() {
+            list2_array.append_option(v.clone());
+        }
+        let list2_array = list2_array.finish();
+
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
+        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list3.iter() {
+            list3_array.append_option(v.clone());
+        }
+        let list3_array = list3_array.finish();
+
+        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
+
+        let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
+        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
+        for v in expected.iter() {
+            array_expected.append_option(v.clone());
+        }
+        let array_expected = array_expected.finish();
+
+        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
+    }
+
+    #[test]
+    fn test_concat_sliced_list_view_arrays() {
+        let list1 = [
+            Some(vec![Some(-1), None]),
+            None,
+            Some(vec![Some(10), Some(20)]),
+        ];
+        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list1.iter() {
+            list1_array.append_option(v.clone());
+        }
+        let list1_array = list1_array.finish();
+
+        let list2 = [
+            None,
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+        ];
+        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list2.iter() {
+            list2_array.append_option(v.clone());
+        }
+        let list2_array = list2_array.finish();
+
+        let list3 = [Some(vec![Some(1000), Some(1001)])];
+        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
+        for v in list3.iter() {
+            list3_array.append_option(v.clone());
+        }
+        let list3_array = list3_array.finish();
+
+        // Concat sliced arrays.
+        // ListView slicing will slice the offset/sizes but preserve the original values child.
+        let array_result = concat(&[
+            &list1_array.slice(1, 2),
+            &list2_array.slice(1, 2),
+            &list3_array.slice(0, 1),
+        ])
+        .unwrap();
+
+        let expected: Vec<_> = vec![
+            None,
+            Some(vec![Some(10), Some(20)]),
+            Some(vec![Some(100), None]),
+            Some(vec![Some(102), Some(103)]),
+            Some(vec![Some(1000), Some(1001)]),
+        ];
+        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
+        for v in expected.iter() {
+            array_expected.append_option(v.clone());
+        }
+        let array_expected = array_expected.finish();
+
+        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
+    }
+
     #[test]
     fn test_concat_struct_arrays() {
         let field = Arc::new(Field::new("field", DataType::Int64, true));
@@ -992,6 +1166,23 @@ mod tests {
         assert_eq!(arr.null_count(), 0);
     }
 
+    #[test]
+    fn test_concat_struct_no_fields() {
+        let input_1 = StructArray::new_empty_fields(10, None);
+        let input_2 = StructArray::new_empty_fields(10, None);
+        let arr = concat(&[&input_1, &input_2]).unwrap();
+
+        assert_eq!(arr.len(), 20);
+        assert_eq!(arr.null_count(), 0);
+
+        let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
+        let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
+        let arr = concat(&[&input1_valid, &input2_null]).unwrap();
+
+        assert_eq!(arr.len(), 20);
+        assert_eq!(arr.null_count(), 10);
+    }
+
     #[test]
     fn test_string_array_slices() {
         let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
@@ -1136,12 +1327,14 @@ mod tests {
         // Verify pointer equality check succeeds, and therefore the
         // dictionaries are not merged. A single values buffer should be reused
         // in this case.
-        assert!(dict.values().to_data().ptr_eq(
-            &result_same_dictionary
-                .as_dictionary::<Int8Type>()
-                .values()
-                .to_data()
-        ));
+        assert!(
+            dict.values().to_data().ptr_eq(
+                &result_same_dictionary
+                    .as_dictionary::<Int8Type>()
+                    .values()
+                    .to_data()
+            )
+        );
         assert_eq!(
             result_same_dictionary
                 .as_dictionary::<Int8Type>()
@@ -1171,11 +1364,9 @@ mod tests {
         // 3 * 3   = 9
         // ------------+
         // 909
-        // closest 64 byte aligned cap = 960
 
         let arr = concat(&[&a, &b, &c]).unwrap();
-        // this would have been 1280 if we did not precompute the value lengths.
-        assert_eq!(arr.to_data().buffers()[1].capacity(), 960);
+        assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
     }
 
     #[test]
@@ -1206,10 +1397,12 @@ mod tests {
         );
 
         // Should have reused the dictionary
-        assert!(array
-            .values()
-            .to_data()
-            .ptr_eq(&combined.values().to_data()));
+        assert!(
+            array
+                .values()
+                .to_data()
+                .ptr_eq(&combined.values().to_data())
+        );
         assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
 
         let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
@@ -1300,7 +1493,10 @@ mod tests {
         .unwrap();
 
         let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
-        assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8).");
+        assert_eq!(
+            error.to_string(),
+            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
+        );
     }
 
     #[test]
@@ -1310,58 +1506,58 @@ mod tests {
         let a = concat(&[&a, &b]).unwrap();
         let data = a.to_data();
         assert_eq!(data.buffers()[0].len(), 440);
-        assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 440);
 
         let a = concat(&[&a.slice(10, 20), &b]).unwrap();
         let data = a.to_data();
         assert_eq!(data.buffers()[0].len(), 120);
-        assert_eq!(data.buffers()[0].capacity(), 128); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 120);
 
-        let a = StringArray::from_iter_values(std::iter::repeat("foo").take(100));
+        let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
         let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
 
         let a = concat(&[&a, &b]).unwrap();
         let data = a.to_data();
         // (100 + 4 + 1) * size_of<i32>()
         assert_eq!(data.buffers()[0].len(), 420);
-        assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 420);
 
         // len("foo") * 100 + len("bingo") + len("bongo") + len("lorem")
         assert_eq!(data.buffers()[1].len(), 315);
-        assert_eq!(data.buffers()[1].capacity(), 320); // Nearest multiple of 64
+        assert_eq!(data.buffers()[1].capacity(), 315);
 
         let a = concat(&[&a.slice(10, 40), &b]).unwrap();
         let data = a.to_data();
         // (40 + 4 + 5) * size_of<i32>()
         assert_eq!(data.buffers()[0].len(), 180);
-        assert_eq!(data.buffers()[0].capacity(), 192); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 180);
 
         // len("foo") * 40 + len("bingo") + len("bongo") + len("lorem")
         assert_eq!(data.buffers()[1].len(), 135);
-        assert_eq!(data.buffers()[1].capacity(), 192); // Nearest multiple of 64
+        assert_eq!(data.buffers()[1].capacity(), 135);
 
-        let a = LargeBinaryArray::from_iter_values(std::iter::repeat(b"foo").take(100));
-        let b = LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10));
+        let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
+        let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
 
         let a = concat(&[&a, &b]).unwrap();
         let data = a.to_data();
         // (100 + 10 + 1) * size_of<i64>()
         assert_eq!(data.buffers()[0].len(), 888);
-        assert_eq!(data.buffers()[0].capacity(), 896); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 888);
 
         // len("foo") * 100 + len("cupcakes") * 10
         assert_eq!(data.buffers()[1].len(), 380);
-        assert_eq!(data.buffers()[1].capacity(), 384); // Nearest multiple of 64
+        assert_eq!(data.buffers()[1].capacity(), 380);
 
         let a = concat(&[&a.slice(10, 40), &b]).unwrap();
         let data = a.to_data();
         // (40 + 10 + 1) * size_of<i64>()
         assert_eq!(data.buffers()[0].len(), 408);
-        assert_eq!(data.buffers()[0].capacity(), 448); // Nearest multiple of 64
+        assert_eq!(data.buffers()[0].capacity(), 408);
 
         // len("foo") * 40 + len("cupcakes") * 10
         assert_eq!(data.buffers()[1].len(), 200);
-        assert_eq!(data.buffers()[1].capacity(), 256); // Nearest multiple of 64
+        assert_eq!(data.buffers()[1].capacity(), 200);
     }
 
     #[test]
@@ -1379,16 +1575,13 @@ mod tests {
 
     #[test]
     fn concat_dictionary_list_array_simple() {
-        let scalars = vec![
+        let scalars = [
             create_single_row_list_of_dict(vec![Some("a")]),
             create_single_row_list_of_dict(vec![Some("a")]),
             create_single_row_list_of_dict(vec![Some("b")]),
         ];
 
-        let arrays = scalars
-            .iter()
-            .map(|a| a as &(dyn Array))
-            .collect::<Vec<_>>();
+        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
         let concat_res = concat(arrays.as_slice()).unwrap();
 
         let expected_list = create_list_of_dict(vec![
@@ -1421,10 +1614,7 @@ mod tests {
             })
             .collect::<Vec<_>>();
 
-        let arrays = scalars
-            .iter()
-            .map(|a| a as &(dyn Array))
-            .collect::<Vec<_>>();
+        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
         let concat_res = concat(arrays.as_slice()).unwrap();
 
         let expected_list = create_list_of_dict(
@@ -1471,7 +1661,6 @@ mod tests {
         K: ArrowDictionaryKeyType,
         V: Sync + Send + 'static,
         &'a V: ArrayAccessor + IntoIterator,
-
         <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
         <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
     {
@@ -1525,6 +1714,28 @@ mod tests {
         assert_eq!(&[10, 20, 30, 40], values.values());
     }
 
+    #[test]
+    fn test_concat_sliced_run_array() {
+        // Slicing away first run in both arrays
+        let run_ends1 = Int32Array::from(vec![2, 4]);
+        let values1 = Int32Array::from(vec![10, 20]);
+        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); // [10, 10, 20, 20]
+        let array1 = array1.slice(2, 2); // [20, 20]
+
+        let run_ends2 = Int32Array::from(vec![1, 4]);
+        let values2 = Int32Array::from(vec![30, 40]);
+        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); // [30, 40, 40, 40]
+        let array2 = array2.slice(1, 3); // [40, 40, 40]
+
+        let result = concat(&[&array1, &array2]).unwrap();
+        let result = result.as_run::<Int32Type>();
+        let result = result.downcast::<Int32Array>().unwrap();
+
+        let expected = vec![20, 20, 40, 40, 40];
+        let actual = result.into_iter().flatten().collect::<Vec<_>>();
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn test_concat_run_array_matching_first_last_value() {
         // Create a run array with run ends [2, 4, 7] and values [10, 20, 30]
@@ -1664,4 +1875,29 @@ mod tests {
         assert_eq!(values.len(), 6);
         assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
     }
+
+    #[test]
+    fn test_concat_run_array_with_truncated_run() {
+        // Create a run array with run ends [2, 5] and values [10, 20]
+        // Logical: [10, 10, 20, 20, 20]
+        let run_ends1 = Int32Array::from(vec![2, 5]);
+        let values1 = Int32Array::from(vec![10, 20]);
+        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
+        let array1_sliced = array1.slice(0, 3);
+
+        let run_ends2 = Int32Array::from(vec![2]);
+        let values2 = Int32Array::from(vec![30]);
+        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
+
+        let result = concat(&[&array1_sliced, &array2]).unwrap();
+        let result_run_array = result.as_run::<Int32Type>();
+
+        // Result should be [10, 10, 20, 30, 30]
+        // Run ends should be [2, 3, 5]
+        assert_eq!(result_run_array.len(), 5);
+        let run_ends = result_run_array.run_ends().values();
+        let values = result_run_array.values().as_primitive::<Int32Type>();
+        assert_eq!(values.values(), &[10, 20, 30]);
+        assert_eq!(&[2, 3, 5], run_ends);
+    }
 }
diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
index c5773b16a486..5b32f4e761f8 100644
--- a/arrow-select/src/dictionary.rs
+++ b/arrow-select/src/dictionary.rs
@@ -15,6 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Dictionary utilities for Arrow arrays
+
+use std::sync::Arc;
+
+use crate::filter::filter;
 use crate::interleave::interleave;
 use ahash::RandomState;
 use arrow_array::builder::BooleanBufferBuilder;
@@ -22,11 +27,70 @@ use arrow_array::types::{
     ArrowDictionaryKeyType, ArrowPrimitiveType, BinaryType, ByteArrayType, LargeBinaryType,
     LargeUtf8Type, Utf8Type,
 };
+use arrow_array::{
+    AnyDictionaryArray, Array, ArrayRef, ArrowNativeTypeOp, BooleanArray, DictionaryArray,
+    GenericByteArray, PrimitiveArray, downcast_dictionary_array,
+};
 use arrow_array::{cast::AsArray, downcast_primitive};
-use arrow_array::{Array, ArrayRef, DictionaryArray, GenericByteArray, PrimitiveArray};
 use arrow_buffer::{ArrowNativeType, BooleanBuffer, ScalarBuffer, ToByteSlice};
 use arrow_schema::{ArrowError, DataType};
 
+/// Garbage collects a [DictionaryArray] by removing unreferenced values.
+///
+/// Returns a new [DictionaryArray] such that there are no values
+/// that are not referenced by at least one key. There may still be duplicate
+/// values.
+///
+/// See also [`garbage_collect_any_dictionary`] if you need to handle multiple dictionary types
+pub fn garbage_collect_dictionary<K: ArrowDictionaryKeyType>(
+    dictionary: &DictionaryArray<K>,
+) -> Result<DictionaryArray<K>, ArrowError> {
+    let keys = dictionary.keys();
+    let values = dictionary.values();
+
+    let mask = dictionary.occupancy();
+
+    // If no work to do, return the original dictionary
+    if mask.count_set_bits() == values.len() {
+        return Ok(dictionary.clone());
+    }
+
+    // Create a mapping from the old keys to the new keys, use a Vec for easy indexing
+    let mut key_remap = vec![K::Native::ZERO; values.len()];
+    for (new_idx, old_idx) in mask.set_indices().enumerate() {
+        key_remap[old_idx] = K::Native::from_usize(new_idx)
+            .expect("new index should fit in K::Native, as old index was in range");
+    }
+
+    // ... and then build the new keys array
+    let new_keys = keys.unary(|key| {
+        key_remap
+            .get(key.as_usize())
+            .copied()
+            // nulls may be present in the keys, and they will have arbitrary value; we don't care
+            // and can safely return zero
+            .unwrap_or(K::Native::ZERO)
+    });
+
+    // Create a new values array by filtering using the mask
+    let values = filter(dictionary.values(), &BooleanArray::new(mask, None))?;
+
+    DictionaryArray::try_new(new_keys, values)
+}
+
+/// Equivalent to [`garbage_collect_dictionary`] but without requiring casting to a specific key type.
+pub fn garbage_collect_any_dictionary(
+    dictionary: &dyn AnyDictionaryArray,
+) -> Result<ArrayRef, ArrowError> {
+    // FIXME: this is a workaround for MSRV Rust versions below 1.86 where trait upcasting is not stable.
+    // From 1.86 onward, `&dyn AnyDictionaryArray` can be directly passed to `downcast_dictionary_array!`.
+    let dictionary = &*dictionary.slice(0, dictionary.len());
+    downcast_dictionary_array!(
+        dictionary => garbage_collect_dictionary(dictionary).map(|dict| Arc::new(dict) as ArrayRef),
+        _ => unreachable!("have a dictionary array")
+    )
+}
+
 /// A best effort interner that maintains a fixed number of buckets
 /// and interns keys based on their hash value
 ///
@@ -78,7 +142,7 @@ impl<'a, V> Interner<'a, V> {
     }
 }
 
-pub struct MergedDictionaries<K: ArrowDictionaryKeyType> {
+pub(crate) struct MergedDictionaries<K: ArrowDictionaryKeyType> {
     /// Provides `key_mappings[`array_idx`][`old_key`] -> new_key`
     pub key_mappings: Vec<Vec<K::Native>>,
     /// The new values
@@ -110,10 +174,14 @@ type PtrEq = fn(&dyn Array, &dyn Array) -> bool;
 /// some return over the naive approach used by MutableArrayData
 ///
 /// `len` is the total length of the merged output
-pub fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
+///
+/// Returns `(should_merge, has_overflow)` where:
+/// - `should_merge`: whether dictionary values should be merged
+/// - `has_overflow`: whether the combined dictionary values would overflow the key type
+pub(crate) fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
     dictionaries: &[&DictionaryArray<K>],
     len: usize,
-) -> bool {
+) -> (bool, bool) {
     use DataType::*;
     let first_values = dictionaries[0].values().as_ref();
     let ptr_eq: PtrEq = match first_values.data_type() {
@@ -123,7 +191,11 @@ pub fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
         LargeBinary => bytes_ptr_eq::<LargeBinaryType>,
         dt => {
             if !dt.is_primitive() {
-                return false;
+                return (
+                    false,
+                    K::Native::from_usize(dictionaries.iter().map(|d| d.values().len()).sum())
+                        .is_none(),
+                );
             }
             |a, b| a.to_data().ptr_eq(&b.to_data())
         }
@@ -142,7 +214,10 @@ pub fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
     let overflow = K::Native::from_usize(total_values).is_none();
     let values_exceed_length = total_values >= len;
 
-    !single_dictionary && (overflow || values_exceed_length)
+    (
+        !single_dictionary && (overflow || values_exceed_length),
+        overflow,
+    )
 }
 
 /// Given an array of dictionaries and an optional key mask compute a values array
@@ -153,7 +228,7 @@ pub fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
 /// This method is meant to be very fast and the output dictionary values
 /// may not be unique, unlike `GenericByteDictionaryBuilder` which is slower
 /// but produces unique values
-pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
+pub(crate) fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
     dictionaries: &[&DictionaryArray<K>],
     masks: Option<&[BooleanBuffer]>,
 ) -> Result<MergedDictionaries<K>, ArrowError> {
@@ -298,13 +373,88 @@ fn masked_bytes<'a, T: ByteArrayType>(
 
 #[cfg(test)]
 mod tests {
-    use crate::dictionary::merge_dictionary_values;
+    use super::*;
+
     use arrow_array::cast::as_string_array;
+    use arrow_array::types::Int8Type;
     use arrow_array::types::Int32Type;
-    use arrow_array::{DictionaryArray, Int32Array, StringArray};
+    use arrow_array::{DictionaryArray, Int8Array, Int32Array, StringArray};
     use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, OffsetBuffer};
     use std::sync::Arc;
 
+    #[test]
+    fn test_garbage_collect_i32_dictionary() {
+        let values = StringArray::from_iter_values(["a", "b", "c", "d"]);
+        let keys = Int32Array::from_iter_values([0, 1, 1, 3, 0, 0, 1]);
+        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
+
+        // Only "a", "b", "d" are referenced, "c" is not
+        let gc = garbage_collect_dictionary(&dict).unwrap();
+
+        let expected_values = StringArray::from_iter_values(["a", "b", "d"]);
+        let expected_keys = Int32Array::from_iter_values([0, 1, 1, 2, 0, 0, 1]);
+        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
+
+        assert_eq!(gc, expected);
+    }
+
+    #[test]
+    fn test_garbage_collect_any_dictionary() {
+        let values = StringArray::from_iter_values(["a", "b", "c", "d"]);
+        let keys = Int32Array::from_iter_values([0, 1, 1, 3, 0, 0, 1]);
+        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
+
+        let gc = garbage_collect_any_dictionary(&dict).unwrap();
+
+        let expected_values = StringArray::from_iter_values(["a", "b", "d"]);
+        let expected_keys = Int32Array::from_iter_values([0, 1, 1, 2, 0, 0, 1]);
+        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
+
+        assert_eq!(gc.as_ref(), &expected);
+    }
+
+    #[test]
+    fn test_garbage_collect_with_nulls() {
+        let values = StringArray::from_iter_values(["a", "b", "c"]);
+        let keys = Int8Array::from(vec![Some(2), None, Some(0)]);
+        let dict = DictionaryArray::<Int8Type>::new(keys, Arc::new(values));
+
+        let gc = garbage_collect_dictionary(&dict).unwrap();
+
+        let expected_values = StringArray::from_iter_values(["a", "c"]);
+        let expected_keys = Int8Array::from(vec![Some(1), None, Some(0)]);
+        let expected = DictionaryArray::<Int8Type>::new(expected_keys, Arc::new(expected_values));
+
+        assert_eq!(gc, expected);
+    }
+
+    #[test]
+    fn test_garbage_collect_empty_dictionary() {
+        let values = StringArray::from_iter_values::<&str, _>([]);
+        let keys = Int32Array::from_iter_values([]);
+        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
+
+        let gc = garbage_collect_dictionary(&dict).unwrap();
+
+        assert_eq!(gc, dict);
+    }
+
+    #[test]
+    fn test_garbage_collect_dictionary_all_unreferenced() {
+        let values = StringArray::from_iter_values(["a", "b", "c"]);
+        let keys = Int32Array::from(vec![None, None, None]);
+        let dict = DictionaryArray::<Int32Type>::new(keys, Arc::new(values));
+
+        let gc = garbage_collect_dictionary(&dict).unwrap();
+
+        // All keys are null, so dictionary values can be empty
+        let expected_values = StringArray::from_iter_values::<&str, _>([]);
+        let expected_keys = Int32Array::from(vec![None, None, None]);
+        let expected = DictionaryArray::<Int32Type>::new(expected_keys, Arc::new(expected_values));
+
+        assert_eq!(gc, expected);
+    }
+
     #[test]
     fn test_merge_strings() {
         let a = DictionaryArray::<Int32Type>::from_iter(["a", "b", "a", "b", "d", "c", "e"]);
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index fa91c0690b4c..07ce16de9646 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -26,11 +26,11 @@ use arrow_array::types::{
     ArrowDictionaryKeyType, ArrowPrimitiveType, ByteArrayType, ByteViewType, RunEndIndexType,
 };
 use arrow_array::*;
-use arrow_buffer::{bit_util, ArrowNativeType, BooleanBuffer, NullBuffer, RunEndBuffer};
+use arrow_buffer::{ArrowNativeType, BooleanBuffer, NullBuffer, RunEndBuffer, bit_util};
 use arrow_buffer::{Buffer, MutableBuffer};
+use arrow_data::ArrayDataBuilder;
 use arrow_data::bit_iterator::{BitIndexIterator, BitSliceIterator};
 use arrow_data::transform::MutableArrayData;
-use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::*;
 
 /// If the filter selects more than this fraction of rows, use
@@ -58,7 +58,13 @@ pub struct SlicesIterator<'a>(BitSliceIterator<'a>);
 impl<'a> SlicesIterator<'a> {
     /// Creates a new iterator from a [BooleanArray]
     pub fn new(filter: &'a BooleanArray) -> Self {
-        Self(filter.values().set_slices())
+        filter.values().into()
+    }
+}
+
+impl<'a> From<&'a BooleanBuffer> for SlicesIterator<'a> {
+    fn from(filter: &'a BooleanBuffer) -> Self {
+        Self(filter.set_slices())
     }
 }
 
@@ -112,44 +118,33 @@ fn filter_count(filter: &BooleanArray) -> usize {
     filter.values().count_set_bits()
 }
 
-/// Function that can filter arbitrary arrays
+/// Convert all null values in `BooleanArray` to `false`
 ///
-/// Deprecated: Use [`FilterPredicate`] instead
-#[deprecated]
-pub type Filter<'a> = Box<dyn Fn(&ArrayData) -> ArrayData + 'a>;
-
-/// Returns a prepared function optimized to filter multiple arrays.
+/// This is useful for filter-like operations which select only `true`
+/// values, but not `false` or `NULL` values
 ///
-/// Creating this function requires time, but using it is faster than [filter] when the
-/// same filter needs to be applied to multiple arrays (e.g. a multi-column `RecordBatch`).
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null values.
+/// Internally this is implemented as a bitwise `AND` operation with null bits
+/// and the boolean bits.
 ///
-/// Deprecated: Use [`FilterBuilder`] instead
-#[deprecated]
-#[allow(deprecated)]
-pub fn build_filter(filter: &BooleanArray) -> Result<Filter, ArrowError> {
-    let iter = SlicesIterator::new(filter);
-    let filter_count = filter_count(filter);
-    let chunks = iter.collect::<Vec<_>>();
-
-    Ok(Box::new(move |array: &ArrayData| {
-        match filter_count {
-            // return all
-            len if len == array.len() => array.clone(),
-            0 => ArrayData::new_empty(array.data_type()),
-            _ => {
-                let mut mutable = MutableArrayData::new(vec![array], false, filter_count);
-                chunks
-                    .iter()
-                    .for_each(|(start, end)| mutable.extend(0, *start, *end));
-                mutable.freeze()
-            }
-        }
-    }))
-}
-
-/// Remove null values by do a bitmask AND operation with null bits and the boolean bits.
+/// # Example
+/// ```
+/// # use arrow_array::{Array, BooleanArray};
+/// # use arrow_select::filter::prep_null_mask_filter;
+/// let filter = BooleanArray::from(vec![
+///   Some(true),
+///   Some(false),
+///   None
+/// ]);
+/// // convert Boolean array to a filter mask
+/// let null_mask = prep_null_mask_filter(&filter);
+/// // there are no nulls in the output mask
+/// assert!(null_mask.nulls().is_none());
+/// assert_eq!(null_mask, BooleanArray::from(vec![
+///  true,
+///  false,
+///  false, // Null is converted to false
+/// ]));
+/// ```
 pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
     let nulls = filter.nulls().unwrap();
     let mask = filter.values() & nulls.inner();
@@ -159,6 +154,13 @@ pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
 /// Returns a filtered `values` [`Array`] where the corresponding elements of
 /// `predicate` are `true`.
 ///
+/// If multiple arrays (or record batches) need to be filtered using the same predicate array,
+/// consider using [FilterBuilder] to create a single [FilterPredicate] and then
+/// calling [FilterPredicate::filter_record_batch].
+///
+/// In contrast to this function, it is then the responsibility of the caller
+/// to use [FilterBuilder::optimize] if appropriate.
+///
 /// # See also
 /// * [`FilterBuilder`] for more control over the filtering process.
 /// * [`filter_record_batch`] to filter a [`RecordBatch`]
@@ -180,7 +182,7 @@ pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
 pub fn filter(values: &dyn Array, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
     let mut filter_builder = FilterBuilder::new(predicate);
 
-    if multiple_arrays(values.data_type()) {
+    if FilterBuilder::is_optimize_beneficial(values.data_type()) {
         // Only optimize if filtering more than one array
         // Otherwise, the overhead of optimization can be more than the benefit
         filter_builder = filter_builder.optimize();
@@ -191,39 +193,35 @@ pub fn filter(values: &dyn Array, predicate: &BooleanArray) -> Result<ArrayRef,
     filter_array(values, &predicate)
 }
 
-fn multiple_arrays(data_type: &DataType) -> bool {
-    match data_type {
-        DataType::Struct(fields) => {
-            fields.len() > 1 || fields.len() == 1 && multiple_arrays(fields[0].data_type())
-        }
-        DataType::Union(fields, UnionMode::Sparse) => !fields.is_empty(),
-        _ => false,
-    }
-}
-
 /// Returns a filtered [RecordBatch] where the corresponding elements of
 /// `predicate` are true.
 ///
 /// This is the equivalent of calling [filter] on each column of the [RecordBatch].
+///
+/// If multiple record batches (or arrays) need to be filtered using the same predicate array,
+/// consider using [FilterBuilder] to create a single [FilterPredicate] and then
+/// calling [FilterPredicate::filter_record_batch].
+/// In contrast to this function, it is then the responsibility of the caller
+/// to use [FilterBuilder::optimize] if appropriate.
 pub fn filter_record_batch(
     record_batch: &RecordBatch,
     predicate: &BooleanArray,
 ) -> Result<RecordBatch, ArrowError> {
     let mut filter_builder = FilterBuilder::new(predicate);
-    if record_batch.num_columns() > 1 {
-        // Only optimize if filtering more than one column
+    let num_cols = record_batch.num_columns();
+    if num_cols > 1
+        || (num_cols > 0
+            && FilterBuilder::is_optimize_beneficial(
+                record_batch.schema_ref().field(0).data_type(),
+            ))
+    {
+        // Only optimize if filtering more than one column or if the column contains multiple internal arrays
         // Otherwise, the overhead of optimization can be more than the benefit
         filter_builder = filter_builder.optimize();
     }
     let filter = filter_builder.build();
 
-    let filtered_arrays = record_batch
-        .columns()
-        .iter()
-        .map(|a| filter_array(a, &filter))
-        .collect::<Result<Vec<_>, _>>()?;
-    let options = RecordBatchOptions::default().with_row_count(Some(filter.count()));
-    RecordBatch::try_new_with_options(record_batch.schema(), filtered_arrays, &options)
+    filter.filter_record_batch(record_batch)
 }
 
 /// A builder to construct [`FilterPredicate`]
@@ -252,11 +250,16 @@ impl FilterBuilder {
         }
     }
 
-    /// Compute an optimised representation of the provided `filter` mask that can be
+    /// Compute an optimized representation of the provided `filter` mask that can be
     /// applied to an array more quickly.
     ///
-    /// Note: There is limited benefit to calling this to then filter a single array
-    /// Note: This will likely have a larger memory footprint than the original mask
+    /// When filtering multiple arrays (e.g. a [`RecordBatch`] or a
+    /// [`StructArray`] with multiple fields), optimizing the filter can provide
+    /// significant performance benefits.
+    ///
+    /// However, optimization takes time and can have a larger memory footprint
+    /// than the original mask, so it is often faster to filter a single array,
+    /// without filter optimization.
     pub fn optimize(mut self) -> Self {
         match self.strategy {
             IterationStrategy::SlicesIterator => {
@@ -272,6 +275,22 @@ impl FilterBuilder {
         self
     }
 
+    /// Determines if calling [FilterBuilder::optimize] is beneficial for the
+    /// given type even when filtering just a single array.
+    ///
+    /// See [`FilterBuilder::optimize`] for more details.
+    pub fn is_optimize_beneficial(data_type: &DataType) -> bool {
+        match data_type {
+            DataType::Struct(fields) => {
+                fields.len() > 1
+                    || fields.len() == 1
+                        && FilterBuilder::is_optimize_beneficial(fields[0].data_type())
+            }
+            DataType::Union(fields, UnionMode::Sparse) => !fields.is_empty(),
+            _ => false,
+        }
+    }
+
     /// Construct the final `FilterPredicate`
     pub fn build(self) -> FilterPredicate {
         FilterPredicate {
@@ -337,6 +356,31 @@ impl FilterPredicate {
         filter_array(values, self)
     }
 
+    /// Returns a filtered [`RecordBatch`] containing only the rows that are selected by this
+    /// [`FilterPredicate`].
+    ///
+    /// This is the equivalent of calling [filter] on each column of the [`RecordBatch`].
+    pub fn filter_record_batch(
+        &self,
+        record_batch: &RecordBatch,
+    ) -> Result<RecordBatch, ArrowError> {
+        let filtered_arrays = record_batch
+            .columns()
+            .iter()
+            .map(|a| filter_array(a, self))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        // SAFETY: we know that the set of filtered arrays will match the schema of the original
+        // record batch
+        unsafe {
+            Ok(RecordBatch::new_unchecked(
+                record_batch.schema(),
+                filtered_arrays,
+                self.count,
+            ))
+        }
+    }
+
     /// Number of rows being selected based on this [`FilterPredicate`]
     pub fn count(&self) -> usize {
         self.count
@@ -383,6 +427,12 @@ fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<Array
             DataType::FixedSizeBinary(_) => {
                 Ok(Arc::new(filter_fixed_size_binary(values.as_fixed_size_binary(), predicate)))
             }
+            DataType::ListView(_) => {
+                Ok(Arc::new(filter_list_view::<i32>(values.as_list_view(), predicate)))
+            }
+            DataType::LargeListView(_) => {
+                Ok(Arc::new(filter_list_view::<i64>(values.as_list_view(), predicate)))
+            }
             DataType::RunEndEncoded(_, _) => {
                 downcast_run_array!{
                     values => Ok(Arc::new(filter_run_end_array(values, predicate)?)),
@@ -437,7 +487,12 @@ where
     R::Native: AddAssign,
 {
     let run_ends: &RunEndBuffer<R::Native> = array.run_ends();
-    let mut new_run_ends = vec![R::default_value(); run_ends.len()];
+    let start_physical = run_ends.get_start_physical_index();
+    let end_physical = run_ends.get_end_physical_index();
+    let physical_len = end_physical - start_physical + 1;
+
+    let mut new_run_ends = vec![R::default_value(); physical_len];
+    let offset = run_ends.offset() as u64;
 
     let mut start = 0u64;
     let mut j = 0;
@@ -445,9 +500,9 @@ where
     let filter_values = predicate.filter.values();
     let run_ends = run_ends.inner();
 
-    let pred: BooleanArray = BooleanBuffer::collect_bool(run_ends.len(), |i| {
+    let pred: BooleanArray = BooleanBuffer::collect_bool(physical_len, |i| {
         let mut keep = false;
-        let mut end = run_ends[i].into() as u64;
+        let mut end = (run_ends[i + start_physical].into() as u64).saturating_sub(offset);
         let difference = end.saturating_sub(filter_values.len() as u64);
         end -= difference;
 
@@ -467,10 +522,10 @@ where
 
     new_run_ends.truncate(j);
 
-    let values = array.values();
-    let values = filter(&values, &pred)?;
+    let values = array.values_slice();
+    let values = filter(values.as_ref(), &pred)?;
 
-    let run_ends = PrimitiveArray::<R>::new(new_run_ends.into(), None);
+    let run_ends = PrimitiveArray::<R>::try_new(new_run_ends.into(), None)?;
     RunArray::try_new(&run_ends, &values)
 }
 
@@ -505,20 +560,24 @@ fn filter_null_mask(
 fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
     let src = buffer.values();
     let offset = buffer.offset();
+    assert!(buffer.len() >= predicate.filter.len());
 
     match &predicate.strategy {
         IterationStrategy::IndexIterator => {
-            let bits = IndexIterator::new(&predicate.filter, predicate.count)
-                .map(|src_idx| bit_util::get_bit(src, src_idx + offset));
+            let bits =
+                // SAFETY: IndexIterator uses the filter predicate to derive indices
+                IndexIterator::new(&predicate.filter, predicate.count).map(|src_idx| unsafe {
+                    bit_util::get_bit_raw(buffer.values().as_ptr(), src_idx + offset)
+                });
 
             // SAFETY: `IndexIterator` reports its size correctly
             unsafe { MutableBuffer::from_trusted_len_iter_bool(bits).into() }
         }
         IterationStrategy::Indices(indices) => {
-            let bits = indices
-                .iter()
-                .map(|src_idx| bit_util::get_bit(src, *src_idx + offset));
-
+            // SAFETY: indices were derived from the filter predicate
+            let bits = indices.iter().map(|src_idx| unsafe {
+                bit_util::get_bit_raw(buffer.values().as_ptr(), *src_idx + offset)
+            });
             // SAFETY: `Vec::iter()` reports its size correctly
             unsafe { MutableBuffer::from_trusted_len_iter_bool(bits).into() }
         }
@@ -564,25 +623,30 @@ fn filter_native<T: ArrowNativeType>(values: &[T], predicate: &FilterPredicate)
         IterationStrategy::SlicesIterator => {
             let mut buffer = Vec::with_capacity(predicate.count);
             for (start, end) in SlicesIterator::new(&predicate.filter) {
-                buffer.extend_from_slice(&values[start..end]);
+                // SAFETY: indices were derived from the filter predicate
+                buffer.extend_from_slice(unsafe { values.get_unchecked(start..end) });
             }
             buffer.into()
         }
         IterationStrategy::Slices(slices) => {
             let mut buffer = Vec::with_capacity(predicate.count);
             for (start, end) in slices {
-                buffer.extend_from_slice(&values[*start..*end]);
+                // SAFETY: indices were derived from the filter predicate
+                buffer.extend_from_slice(unsafe { values.get_unchecked(*start..*end) });
             }
             buffer.into()
         }
         IterationStrategy::IndexIterator => {
-            let iter = IndexIterator::new(&predicate.filter, predicate.count).map(|x| values[x]);
+            // SAFETY: indices were derived from the filter predicate
+            let iter = IndexIterator::new(&predicate.filter, predicate.count)
+                .map(|x| unsafe { *values.get_unchecked(x) });
 
             // SAFETY: IndexIterator is trusted length
             unsafe { MutableBuffer::from_trusted_len_iter(iter) }.into()
         }
         IterationStrategy::Indices(indices) => {
-            let iter = indices.iter().map(|x| values[*x]);
+            // SAFETY: indices were derived from the filter predicate
+            let iter = indices.iter().map(|x| unsafe { *values.get_unchecked(*x) });
             iter.collect::<Vec<_>>().into()
         }
         IterationStrategy::All | IterationStrategy::None => unreachable!(),
@@ -831,7 +895,7 @@ fn filter_fixed_size_binary(
 fn filter_dict<T>(array: &DictionaryArray<T>, predicate: &FilterPredicate) -> DictionaryArray<T>
 where
     T: ArrowDictionaryKeyType,
-    T::Native: num::Num,
+    T::Native: num_traits::Num,
 {
     let builder = filter_primitive::<T>(array.keys(), predicate)
         .into_data()
@@ -882,7 +946,10 @@ fn filter_sparse_union(
         unreachable!()
     };
 
-    let type_ids = filter_primitive(&Int8Array::new(array.type_ids().clone(), None), predicate);
+    let type_ids = filter_primitive(
+        &Int8Array::try_new(array.type_ids().clone(), None)?,
+        predicate,
+    );
 
     let children = fields
         .iter()
@@ -894,18 +961,46 @@ fn filter_sparse_union(
     })
 }
 
+/// `filter` implementation for list views
+fn filter_list_view<OffsetType: OffsetSizeTrait>(
+    array: &GenericListViewArray<OffsetType>,
+    predicate: &FilterPredicate,
+) -> GenericListViewArray<OffsetType> {
+    let filtered_offsets = filter_native::<OffsetType>(array.offsets(), predicate);
+    let filtered_sizes = filter_native::<OffsetType>(array.sizes(), predicate);
+
+    // Filter the nulls
+    let nulls = if let Some((null_count, nulls)) = filter_null_mask(array.nulls(), predicate) {
+        let buffer = BooleanBuffer::new(nulls, 0, predicate.count);
+
+        Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
+    } else {
+        None
+    };
+
+    let list_data = ArrayDataBuilder::new(array.data_type().clone())
+        .nulls(nulls)
+        .buffers(vec![filtered_offsets, filtered_sizes])
+        .child_data(vec![array.values().to_data()])
+        .len(predicate.count);
+
+    let list_data = unsafe { list_data.build_unchecked() };
+
+    GenericListViewArray::from(list_data)
+}
+
 #[cfg(test)]
 mod tests {
+    use super::*;
     use arrow_array::builder::*;
     use arrow_array::cast::as_run_array;
     use arrow_array::types::*;
+    use arrow_data::ArrayData;
     use rand::distr::uniform::{UniformSampler, UniformUsize};
     use rand::distr::{Alphanumeric, StandardUniform};
     use rand::prelude::*;
     use rand::rng;
 
-    use super::*;
-
     macro_rules! def_temporal_test {
         ($test:ident, $array_type: ident, $data: expr) => {
             #[test]
@@ -1264,6 +1359,23 @@ mod tests {
         assert_eq!(actual.values(), expected.values())
     }
 
+    #[test]
+    fn test_filter_run_end_encoding_array_sliced() {
+        let run_ends = Int64Array::from(vec![2, 3, 8]);
+        let values = Int64Array::from(vec![7, -2, 9]);
+        let a = RunArray::try_new(&run_ends, &values).unwrap(); // [7, 7, -2, 9, 9, 9, 9, 9]
+        let a = a.slice(2, 3); // [-2, 9, 9]
+        let b = BooleanArray::from(vec![true, false, true]);
+        let result = filter(&a, &b).unwrap();
+
+        let result = result.as_run::<Int64Type>();
+        let result = result.downcast::<Int64Array>().unwrap();
+
+        let expected = vec![-2, 9];
+        let actual = result.into_iter().flatten().collect::<Vec<_>>();
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn test_filter_run_end_encoding_array_remove_value() {
         let run_ends = Int32Array::from(vec![2, 3, 8, 10]);
@@ -1404,6 +1516,69 @@ mod tests {
         assert_eq!(&make_array(expected), &result);
     }
 
+    fn test_case_filter_list_view<T: OffsetSizeTrait>() {
+        // [[1, 2], null, [], [3,4]]
+        let mut list_array = GenericListViewBuilder::<T, _>::new(Int32Builder::new());
+        list_array.append_value([Some(1), Some(2)]);
+        list_array.append_null();
+        list_array.append_value([]);
+        list_array.append_value([Some(3), Some(4)]);
+
+        let list_array = list_array.finish();
+        let predicate = BooleanArray::from_iter([true, false, true, false]);
+
+        // Filter result: [[1, 2], []]
+        let filtered = filter(&list_array, &predicate)
+            .unwrap()
+            .as_list_view::<T>()
+            .clone();
+
+        let mut expected =
+            GenericListViewBuilder::<T, _>::with_capacity(Int32Builder::with_capacity(5), 3);
+        expected.append_value([Some(1), Some(2)]);
+        expected.append_value([]);
+        let expected = expected.finish();
+
+        assert_eq!(&filtered, &expected);
+    }
+
+    fn test_case_filter_sliced_list_view<T: OffsetSizeTrait>() {
+        // [[1, 2], null, [], [3,4]]
+        let mut list_array =
+            GenericListViewBuilder::<T, _>::with_capacity(Int32Builder::with_capacity(6), 4);
+        list_array.append_value([Some(1), Some(2)]);
+        list_array.append_null();
+        list_array.append_value([]);
+        list_array.append_value([Some(3), Some(4)]);
+
+        let list_array = list_array.finish();
+
+        // Sliced: [null, [], [3, 4]]
+        let sliced = list_array.slice(1, 3);
+        let predicate = BooleanArray::from_iter([false, false, true]);
+
+        // Filter result: [[1, 2], []]
+        let filtered = filter(&sliced, &predicate)
+            .unwrap()
+            .as_list_view::<T>()
+            .clone();
+
+        let mut expected = GenericListViewBuilder::<T, _>::new(Int32Builder::new());
+        expected.append_value([Some(3), Some(4)]);
+        let expected = expected.finish();
+
+        assert_eq!(&filtered, &expected);
+    }
+
+    #[test]
+    fn test_filter_list_view_array() {
+        test_case_filter_list_view::<i32>();
+        test_case_filter_list_view::<i64>();
+
+        test_case_filter_sliced_list_view::<i32>();
+        test_case_filter_sliced_list_view::<i64>();
+    }
+
     #[test]
     fn test_slice_iterator_bits() {
         let filter_values = (0..64).map(|i| i == 1).collect::<Vec<bool>>();
@@ -1486,12 +1661,11 @@ mod tests {
     #[test]
     fn test_slices() {
         // takes up 2 u64s
-        let bools = std::iter::repeat(true)
-            .take(10)
-            .chain(std::iter::repeat(false).take(30))
-            .chain(std::iter::repeat(true).take(20))
-            .chain(std::iter::repeat(false).take(17))
-            .chain(std::iter::repeat(true).take(4));
+        let bools = std::iter::repeat_n(true, 10)
+            .chain(std::iter::repeat_n(false, 30))
+            .chain(std::iter::repeat_n(true, 20))
+            .chain(std::iter::repeat_n(false, 17))
+            .chain(std::iter::repeat_n(true, 4));
 
         let bool_array: BooleanArray = bools.map(Some).collect();
 
@@ -2135,4 +2309,22 @@ mod tests {
         // The filtered batch should have 2 rows (the 1st and 3rd)
         assert_eq!(filtered_batch.num_rows(), 2);
     }
+
+    #[test]
+    #[should_panic]
+    fn test_filter_bits_too_large() {
+        let buffer = BooleanBuffer::from(vec![false; 8]);
+        let predicate = BooleanArray::from(vec![true; 9]);
+        let filter = FilterBuilder::new(&predicate).build();
+        filter_bits(&buffer, &filter);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_filter_native_too_large() {
+        let values = vec![1; 8];
+        let predicate = BooleanArray::from(vec![false; 9]);
+        let filter = FilterBuilder::new(&predicate).build();
+        filter_native(&values, &filter);
+    }
 }
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index 3fcf8f1f4c40..d4303e8d85eb 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -17,15 +17,16 @@
 
 //! Interleave elements from multiple arrays
 
+use crate::concat::concat;
 use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
 use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder};
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
-use arrow_data::transform::MutableArrayData;
 use arrow_data::ByteView;
-use arrow_schema::{ArrowError, DataType};
+use arrow_data::transform::MutableArrayData;
+use arrow_schema::{ArrowError, DataType, FieldRef, Fields};
 use std::sync::Arc;
 
 macro_rules! primitive_helper {
@@ -104,6 +105,9 @@ pub fn interleave(
             k.as_ref() => (dict_helper, values, indices),
             _ => unreachable!("illegal dictionary key type {k}")
         },
+        DataType::Struct(fields) => interleave_struct(fields, values, indices),
+        DataType::List(field) => interleave_list::<i32>(values, indices, field),
+        DataType::LargeList(field) => interleave_list::<i64>(values, indices, field),
         _ => interleave_fallback(values, indices)
     }
 }
@@ -156,7 +160,7 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
         .map(|(a, b)| interleaved.arrays[*a].value(*b))
         .collect::<Vec<_>>();
 
-    let array = PrimitiveArray::<T>::new(values.into(), interleaved.nulls);
+    let array = PrimitiveArray::<T>::try_new(values.into(), interleaved.nulls)?;
     Ok(Arc::new(array.with_data_type(data_type.clone())))
 }
 
@@ -194,8 +198,14 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
     indices: &[(usize, usize)],
 ) -> Result<ArrayRef, ArrowError> {
     let dictionaries: Vec<_> = arrays.iter().map(|x| x.as_dictionary::<K>()).collect();
-    if !should_merge_dictionary_values::<K>(&dictionaries, indices.len()) {
-        return interleave_fallback(arrays, indices);
+    let (should_merge, has_overflow) =
+        should_merge_dictionary_values::<K>(&dictionaries, indices.len());
+    if !should_merge {
+        return if has_overflow {
+            interleave_fallback(arrays, indices)
+        } else {
+            interleave_fallback_dictionary::<K>(&dictionaries, indices)
+        };
     }
 
     let masks: Vec<_> = dictionaries
@@ -278,6 +288,83 @@ fn interleave_views<T: ByteViewType>(
     Ok(Arc::new(array))
 }
 
+fn interleave_struct(
+    fields: &Fields,
+    values: &[&dyn Array],
+    indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+    let interleaved = Interleave::<'_, StructArray>::new(values, indices);
+
+    if fields.is_empty() {
+        let array = StructArray::try_new_with_length(
+            fields.clone(),
+            vec![],
+            interleaved.nulls,
+            indices.len(),
+        )?;
+        return Ok(Arc::new(array));
+    }
+
+    let struct_fields_array: Result<Vec<_>, _> = (0..fields.len())
+        .map(|i| {
+            let field_values: Vec<&dyn Array> = interleaved
+                .arrays
+                .iter()
+                .map(|x| x.column(i).as_ref())
+                .collect();
+            interleave(&field_values, indices)
+        })
+        .collect();
+
+    let struct_array =
+        StructArray::try_new(fields.clone(), struct_fields_array?, interleaved.nulls)?;
+    Ok(Arc::new(struct_array))
+}
+
+fn interleave_list<O: OffsetSizeTrait>(
+    values: &[&dyn Array],
+    indices: &[(usize, usize)],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let interleaved = Interleave::<'_, GenericListArray<O>>::new(values, indices);
+
+    let mut capacity = 0usize;
+    let mut offsets = Vec::with_capacity(indices.len() + 1);
+    offsets.push(O::from_usize(0).unwrap());
+    offsets.extend(indices.iter().map(|(array, row)| {
+        let o = interleaved.arrays[*array].value_offsets();
+        let element_len = o[*row + 1].as_usize() - o[*row].as_usize();
+        capacity += element_len;
+        O::from_usize(capacity).expect("offset overflow")
+    }));
+
+    let mut child_indices = Vec::with_capacity(capacity);
+    for (array, row) in indices {
+        let list = interleaved.arrays[*array];
+        let start = list.value_offsets()[*row].as_usize();
+        let end = list.value_offsets()[*row + 1].as_usize();
+        child_indices.extend((start..end).map(|i| (*array, i)));
+    }
+
+    let child_arrays: Vec<&dyn Array> = interleaved
+        .arrays
+        .iter()
+        .map(|list| list.values().as_ref())
+        .collect();
+
+    let interleaved_values = interleave(&child_arrays, &child_indices)?;
+
+    let offsets = OffsetBuffer::new(offsets.into());
+    let list_array = GenericListArray::<O>::new(
+        field.clone(),
+        offsets,
+        interleaved_values,
+        interleaved.nulls,
+    );
+
+    Ok(Arc::new(list_array))
+}
+
 /// Fallback implementation of interleave using [`MutableArrayData`]
 fn interleave_fallback(
     values: &[&dyn Array],
@@ -312,6 +399,76 @@ fn interleave_fallback(
     Ok(make_array(array_data.freeze()))
 }
 
+/// Fallback implementation for interleaving dictionaries when it was determined
+/// that the dictionary values should not be merged. This implementation concatenates
+/// the value slices and recomputes the resulting dictionary keys.
+///
+/// # Panics
+///
+/// This function assumes that the combined dictionary values will not overflow the
+/// key type. Callers must verify this condition [`should_merge_dictionary_values`]
+/// before calling this function.
+fn interleave_fallback_dictionary<K: ArrowDictionaryKeyType>(
+    dictionaries: &[&DictionaryArray<K>],
+    indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+    let relative_offsets: Vec<usize> = dictionaries
+        .iter()
+        .scan(0usize, |offset, dict| {
+            let current = *offset;
+            *offset += dict.values().len();
+            Some(current)
+        })
+        .collect();
+    let all_values: Vec<&dyn Array> = dictionaries.iter().map(|d| d.values().as_ref()).collect();
+    let concatenated_values = concat(&all_values)?;
+
+    let any_nulls = dictionaries.iter().any(|d| d.keys().nulls().is_some());
+    let (new_keys, nulls) = if any_nulls {
+        let mut has_nulls = false;
+        let new_keys: Vec<K::Native> = indices
+            .iter()
+            .map(|(array, row)| {
+                let old_keys = dictionaries[*array].keys();
+                if old_keys.is_valid(*row) {
+                    let old_key = old_keys.values()[*row].as_usize();
+                    K::Native::from_usize(relative_offsets[*array] + old_key)
+                        .expect("key overflow should be checked by caller")
+                } else {
+                    has_nulls = true;
+                    K::Native::ZERO
+                }
+            })
+            .collect();
+
+        let nulls = if has_nulls {
+            let null_buffer = BooleanBuffer::collect_bool(indices.len(), |i| {
+                let (array, row) = indices[i];
+                dictionaries[array].keys().is_valid(row)
+            });
+            Some(NullBuffer::new(null_buffer))
+        } else {
+            None
+        };
+        (new_keys, nulls)
+    } else {
+        let new_keys: Vec<K::Native> = indices
+            .iter()
+            .map(|(array, row)| {
+                let old_key = dictionaries[*array].keys().values()[*row].as_usize();
+                K::Native::from_usize(relative_offsets[*array] + old_key)
+                    .expect("key overflow should be checked by caller")
+            })
+            .collect();
+        (new_keys, None)
+    };
+
+    let keys_array = PrimitiveArray::<K>::new(new_keys.into(), nulls);
+    // SAFETY: keys_array is constructed from a valid set of keys.
+    let array = unsafe { DictionaryArray::new_unchecked(keys_array, concatenated_values) };
+    Ok(Arc::new(array))
+}
+
 /// Interleave rows by index from multiple [`RecordBatch`] instances and return a new [`RecordBatch`].
 ///
 /// This function will call [`interleave`] on each array of the [`RecordBatch`] instances and assemble a new [`RecordBatch`].
@@ -376,8 +533,10 @@ pub fn interleave_record_batch(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::{Int32Builder, ListBuilder, PrimitiveRunBuilder};
     use arrow_array::Int32RunArray;
+    use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
+    use arrow_array::types::Int8Type;
+    use arrow_schema::Field;
 
     #[test]
     fn test_primitive() {
@@ -475,9 +634,43 @@ mod tests {
     }
 
     #[test]
-    fn test_lists() {
+    fn test_interleave_dictionary_overflow_same_values() {
+        let values: ArrayRef = Arc::new(StringArray::from_iter_values(
+            (0..50).map(|i| format!("v{i}")),
+        ));
+
+        // With 3 dictionaries of 50 values each, relative_offsets = [0, 50, 100]
+        // Accessing key 49 from dict3 gives 100 + 49 = 149 which overflows Int8
+        // (max 127).
+        // This test case falls back to interleave_fallback because the
+        // dictionaries share the same underlying values slice.
+        let dict1 = DictionaryArray::<Int8Type>::new(
+            Int8Array::from_iter_values([0, 1, 2]),
+            values.clone(),
+        );
+        let dict2 = DictionaryArray::<Int8Type>::new(
+            Int8Array::from_iter_values([0, 1, 2]),
+            values.clone(),
+        );
+        let dict3 =
+            DictionaryArray::<Int8Type>::new(Int8Array::from_iter_values([49]), values.clone());
+
+        let indices = &[(0, 0), (1, 0), (2, 0)];
+        let result = interleave(&[&dict1, &dict2, &dict3], indices).unwrap();
+
+        let dict_result = result.as_dictionary::<Int8Type>();
+        let string_result: Vec<_> = dict_result
+            .downcast_dict::<StringArray>()
+            .unwrap()
+            .into_iter()
+            .map(|x| x.unwrap())
+            .collect();
+        assert_eq!(string_result, vec!["v0", "v0", "v49"]);
+    }
+
+    fn test_interleave_lists<O: OffsetSizeTrait>() {
         // [[1, 2], null, [3]]
-        let mut a = ListBuilder::new(Int32Builder::new());
+        let mut a = GenericListBuilder::<O, _>::new(Int32Builder::new());
         a.values().append_value(1);
         a.values().append_value(2);
         a.append(true);
@@ -487,7 +680,7 @@ mod tests {
         let a = a.finish();
 
         // [[4], null, [5, 6, null]]
-        let mut b = ListBuilder::new(Int32Builder::new());
+        let mut b = GenericListBuilder::<O, _>::new(Int32Builder::new());
         b.values().append_value(4);
         b.append(true);
         b.append(false);
@@ -498,10 +691,13 @@ mod tests {
         let b = b.finish();
 
         let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
-        let v = values.as_any().downcast_ref::<ListArray>().unwrap();
+        let v = values
+            .as_any()
+            .downcast_ref::<GenericListArray<O>>()
+            .unwrap();
 
         // [[3], null, [4], [5, 6, null], null]
-        let mut expected = ListBuilder::new(Int32Builder::new());
+        let mut expected = GenericListBuilder::<O, _>::new(Int32Builder::new());
         expected.values().append_value(3);
         expected.append(true);
         expected.append(false);
@@ -517,6 +713,209 @@ mod tests {
         assert_eq!(v, &expected);
     }
 
+    #[test]
+    fn test_lists() {
+        test_interleave_lists::<i32>();
+    }
+
+    #[test]
+    fn test_large_lists() {
+        test_interleave_lists::<i64>();
+    }
+
+    #[test]
+    fn test_struct_without_nulls() {
+        let fields = Fields::from(vec![
+            Field::new("number_col", DataType::Int32, false),
+            Field::new("string_col", DataType::Utf8, false),
+        ]);
+        let a = {
+            let number_col = Int32Array::from_iter_values([1, 2, 3, 4]);
+            let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let b = {
+            let number_col = Int32Array::from_iter_values([5, 6, 7]);
+            let string_col = StringArray::from_iter_values(["hello", "world", "foo"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let c = {
+            let number_col = Int32Array::from_iter_values([8, 9, 10]);
+            let string_col = StringArray::from_iter_values(["x", "y", "z"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap();
+        let values_struct = values.as_struct();
+        assert_eq!(values_struct.data_type(), &DataType::Struct(fields));
+        assert_eq!(values_struct.null_count(), 0);
+
+        let values_number = values_struct.column(0).as_primitive::<Int32Type>();
+        assert_eq!(values_number.values(), &[4, 4, 10, 8, 6]);
+        let values_string = values_struct.column(1).as_string::<i32>();
+        let values_string: Vec<_> = values_string.into_iter().collect();
+        assert_eq!(
+            &values_string,
+            &[Some("d"), Some("d"), Some("z"), Some("x"), Some("world")]
+        );
+    }
+
+    #[test]
+    fn test_struct_with_nulls_in_values() {
+        let fields = Fields::from(vec![
+            Field::new("number_col", DataType::Int32, true),
+            Field::new("string_col", DataType::Utf8, true),
+        ]);
+        let a = {
+            let number_col = Int32Array::from_iter_values([1, 2, 3, 4]);
+            let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let b = {
+            let number_col = Int32Array::from_iter([Some(1), Some(4), None]);
+            let string_col = StringArray::from(vec![Some("hello"), None, Some("foo")]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let values = interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (1, 1)]).unwrap();
+        let values_struct = values.as_struct();
+        assert_eq!(values_struct.data_type(), &DataType::Struct(fields));
+
+        // The struct itself has no nulls, but the values do
+        assert_eq!(values_struct.null_count(), 0);
+
+        let values_number: Vec<_> = values_struct
+            .column(0)
+            .as_primitive::<Int32Type>()
+            .into_iter()
+            .collect();
+        assert_eq!(values_number, &[Some(2), None, None, Some(4), Some(4)]);
+
+        let values_string = values_struct.column(1).as_string::<i32>();
+        let values_string: Vec<_> = values_string.into_iter().collect();
+        assert_eq!(
+            &values_string,
+            &[Some("b"), Some("foo"), Some("foo"), Some("d"), None]
+        );
+    }
+
+    #[test]
+    fn test_struct_with_nulls() {
+        let fields = Fields::from(vec![
+            Field::new("number_col", DataType::Int32, false),
+            Field::new("string_col", DataType::Utf8, false),
+        ]);
+        let a = {
+            let number_col = Int32Array::from_iter_values([1, 2, 3, 4]);
+            let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let b = {
+            let number_col = Int32Array::from_iter_values([5, 6, 7]);
+            let string_col = StringArray::from_iter_values(["hello", "world", "foo"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                Some(NullBuffer::from(&[true, false, true])),
+            )
+            .unwrap()
+        };
+
+        let c = {
+            let number_col = Int32Array::from_iter_values([8, 9, 10]);
+            let string_col = StringArray::from_iter_values(["x", "y", "z"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+
+        let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (1, 1), (2, 0)]).unwrap();
+        let values_struct = values.as_struct();
+        assert_eq!(values_struct.data_type(), &DataType::Struct(fields));
+
+        let validity: Vec<bool> = {
+            let null_buffer = values_struct.nulls().expect("should_have_nulls");
+
+            null_buffer.iter().collect()
+        };
+        assert_eq!(validity, &[true, true, true, false, true]);
+        let values_number = values_struct.column(0).as_primitive::<Int32Type>();
+        assert_eq!(values_number.values(), &[4, 4, 10, 6, 8]);
+        let values_string = values_struct.column(1).as_string::<i32>();
+        let values_string: Vec<_> = values_string.into_iter().collect();
+        assert_eq!(
+            &values_string,
+            &[Some("d"), Some("d"), Some("z"), Some("world"), Some("x"),]
+        );
+    }
+
+    #[test]
+    fn test_struct_empty() {
+        let fields = Fields::from(vec![
+            Field::new("number_col", DataType::Int32, false),
+            Field::new("string_col", DataType::Utf8, false),
+        ]);
+        let a = {
+            let number_col = Int32Array::from_iter_values([1, 2, 3, 4]);
+            let string_col = StringArray::from_iter_values(["a", "b", "c", "d"]);
+
+            StructArray::try_new(
+                fields.clone(),
+                vec![Arc::new(number_col), Arc::new(string_col)],
+                None,
+            )
+            .unwrap()
+        };
+        let v = interleave(&[&a], &[]).unwrap();
+        assert!(v.is_empty());
+        assert_eq!(v.data_type(), &DataType::Struct(fields));
+    }
+
     #[test]
     fn interleave_sparse_nulls() {
         let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
@@ -772,6 +1171,29 @@ mod tests {
         assert_eq!(actual, expected);
     }
 
+    #[test]
+    fn test_interleave_run_end_encoded_sliced() {
+        let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+        builder.extend([1, 1, 2, 2, 2, 3].into_iter().map(Some));
+        let a = builder.finish();
+        let a = a.slice(2, 3); // [2, 2, 2]
+
+        let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+        builder.extend([4, 5, 5, 6, 6, 6].into_iter().map(Some));
+        let b = builder.finish();
+        let b = b.slice(1, 3); // [5, 5, 6]
+
+        let indices = &[(0, 1), (1, 0), (0, 2), (1, 1), (1, 2)];
+        let result = interleave(&[&a, &b], indices).unwrap();
+
+        let result = result.as_run::<Int32Type>();
+        let result = result.downcast::<Int32Array>().unwrap();
+
+        let expected = vec![2, 5, 2, 5, 6];
+        let actual = result.into_iter().flatten().collect::<Vec<_>>();
+        assert_eq!(actual, expected);
+    }
+
     #[test]
     fn test_interleave_run_end_encoded_string() {
         let a: Int32RunArray = vec!["hello", "hello", "world", "world", "foo"]
@@ -945,4 +1367,51 @@ mod tests {
         }
         assert_eq!(actual, expected);
     }
+
+    #[test]
+    fn test_struct_no_fields() {
+        let fields = Fields::empty();
+        let a = StructArray::try_new_with_length(fields.clone(), vec![], None, 10).unwrap();
+        let v = interleave(&[&a], &[(0, 0)]).unwrap();
+        assert_eq!(v.len(), 1);
+        assert_eq!(v.data_type(), &DataType::Struct(fields));
+    }
+
+    #[test]
+    fn test_interleave_fallback_dictionary_with_nulls() {
+        let input_1_keys = Int32Array::from_iter([Some(0), None, Some(1)]);
+        let input_1_values = StringArray::from_iter_values(["foo", "bar"]);
+        let dict_a = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
+
+        let input_2_keys = Int32Array::from_iter([Some(0), Some(1), None]);
+        let input_2_values = StringArray::from_iter_values(["baz", "qux"]);
+        let dict_b = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
+
+        let indices = vec![
+            (0, 0), // "foo"
+            (0, 1), // null
+            (1, 0), // "baz"
+            (1, 2), // null
+            (0, 2), // "bar"
+            (1, 1), // "qux"
+        ];
+
+        let result =
+            interleave_fallback_dictionary::<Int32Type>(&[&dict_a, &dict_b], &indices).unwrap();
+        let dict_result = result.as_dictionary::<Int32Type>();
+
+        let string_result = dict_result.downcast_dict::<StringArray>().unwrap();
+        let collected: Vec<_> = string_result.into_iter().collect();
+        assert_eq!(
+            collected,
+            vec![
+                Some("foo"),
+                None,
+                Some("baz"),
+                None,
+                Some("bar"),
+                Some("qux")
+            ]
+        );
+    }
 }
diff --git a/arrow-select/src/lib.rs b/arrow-select/src/lib.rs
index a2ddff351c9a..33c1ee8ddb0a 100644
--- a/arrow-select/src/lib.rs
+++ b/arrow-select/src/lib.rs
@@ -21,14 +21,15 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 
 pub mod coalesce;
 pub mod concat;
-mod dictionary;
+pub mod dictionary;
 pub mod filter;
 pub mod interleave;
+pub mod merge;
 pub mod nullif;
 pub mod take;
 pub mod union_extract;
diff --git a/arrow-select/src/merge.rs b/arrow-select/src/merge.rs
new file mode 100644
index 000000000000..eff3db50ee7c
--- /dev/null
+++ b/arrow-select/src/merge.rs
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`merge`] and [`merge_n`]: Combine values from two or more arrays
+
+use crate::filter::{SlicesIterator, prep_null_mask_filter};
+use crate::zip::zip;
+use arrow_array::{Array, ArrayRef, BooleanArray, Datum, make_array, new_empty_array};
+use arrow_data::ArrayData;
+use arrow_data::transform::MutableArrayData;
+use arrow_schema::ArrowError;
+
+/// An index for the [merge_n] function.
+///
+/// This trait allows the indices argument for [merge_n] to be stored using a more
+/// compact representation than `usize` when the input arrays are small.
+/// If the number of input arrays is less than 256 for instance, the indices can be stored as `u8`.
+///
+/// Implementation must ensure that all values which return `None` from [MergeIndex::index] are
+/// considered equal by the [PartialEq] and [Eq] implementations.
+pub trait MergeIndex: PartialEq + Eq + Copy {
+    /// Returns the index value as an `Option<usize>`.
+    ///
+    /// `None` values returned by this function indicate holes in the index array and will result
+    /// in null values in the array created by [merge].
+    fn index(&self) -> Option<usize>;
+}
+
+impl MergeIndex for usize {
+    fn index(&self) -> Option<usize> {
+        Some(*self)
+    }
+}
+
+impl MergeIndex for Option<usize> {
+    fn index(&self) -> Option<usize> {
+        *self
+    }
+}
+
+/// Merges elements by index from a list of [`Array`], creating a new [`Array`] from
+/// those values.
+///
+/// Each element in `indices` is the index of an array in `values`. The `indices` array is processed
+/// sequentially. The first occurrence of index value `n` will be mapped to the first
+/// value of the array at index `n`. The second occurrence to the second value, and so on.
+/// An index value where `MergeIndex::index` returns `None` is interpreted as a null value.
+///
+/// # Implementation notes
+///
+/// This algorithm is similar in nature to both [zip] and
+/// [interleave](crate::interleave::interleave), but there are some important differences.
+///
+/// In contrast to [zip], this function supports multiple input arrays. Instead of
+/// a boolean selection vector, an index array is to take values from the input arrays, and a special
+/// marker values can be used to indicate null values.
+///
+/// In contrast to [interleave](crate::interleave::interleave), this function does not use pairs of
+/// indices. The values in `indices` serve the same purpose as the first value in the pairs passed
+/// to `interleave`.
+/// The index in the array is implicit and is derived from the number of times a particular array
+/// index occurs.
+/// The more constrained indexing mechanism used by this algorithm makes it easier to copy values
+/// in contiguous slices. In the example below, the two subsequent elements from array `2` can be
+/// copied in a single operation from the source array instead of copying them one by one.
+/// Long spans of null values are also especially cheap because they do not need to be represented
+/// in an input array.
+///
+/// # Panics
+///
+/// This function does not check that the number of occurrences of any particular array index matches
+/// the length of the corresponding input array. If an array contains more values than required, the
+/// spurious values will be ignored. If an array contains fewer values than necessary, this function
+/// will panic.
+///
+/// # Example
+///
+/// ```text
+/// ┌───────────┐  ┌─────────┐                             ┌─────────┐
+/// │┌─────────┐│  │   None  │                             │   NULL  │
+/// ││    A    ││  ├─────────┤                             ├─────────┤
+/// │└─────────┘│  │    1    │                             │    B    │
+/// │┌─────────┐│  ├─────────┤                             ├─────────┤
+/// ││    B    ││  │    0    │    merge(values, indices)   │    A    │
+/// │└─────────┘│  ├─────────┤  ─────────────────────────▶ ├─────────┤
+/// │┌─────────┐│  │   None  │                             │   NULL  │
+/// ││    C    ││  ├─────────┤                             ├─────────┤
+/// │├─────────┤│  │    2    │                             │    C    │
+/// ││    D    ││  ├─────────┤                             ├─────────┤
+/// │└─────────┘│  │    2    │                             │    D    │
+/// └───────────┘  └─────────┘                             └─────────┘
+///    values        indices                                  result
+///
+/// ```
+pub fn merge_n(values: &[&dyn Array], indices: &[impl MergeIndex]) -> Result<ArrayRef, ArrowError> {
+    if values.is_empty() {
+        return Err(ArrowError::InvalidArgumentError(
+            "merge_n requires at least one value array".to_string(),
+        ));
+    }
+
+    let data_type = values[0].data_type();
+
+    for array in values.iter().skip(1) {
+        if array.data_type() != data_type {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "It is not possible to merge arrays of different data types ({} and {})",
+                data_type,
+                array.data_type()
+            )));
+        }
+    }
+
+    if indices.is_empty() {
+        return Ok(new_empty_array(data_type));
+    }
+
+    #[cfg(debug_assertions)]
+    for ix in indices {
+        if let Some(index) = ix.index() {
+            assert!(
+                index < values.len(),
+                "Index out of bounds: {} >= {}",
+                index,
+                values.len()
+            );
+        }
+    }
+
+    let data: Vec<ArrayData> = values.iter().map(|a| a.to_data()).collect();
+    let data_refs = data.iter().collect();
+
+    let mut mutable = MutableArrayData::new(data_refs, true, indices.len());
+
+    // This loop extends the mutable array by taking slices from the partial results.
+    //
+    // take_offsets keeps track of how many values have been taken from each array.
+    let mut take_offsets = vec![0; values.len() + 1];
+    let mut start_row_ix = 0;
+    loop {
+        let array_ix = indices[start_row_ix];
+
+        // Determine the length of the slice to take.
+        let mut end_row_ix = start_row_ix + 1;
+        while end_row_ix < indices.len() && indices[end_row_ix] == array_ix {
+            end_row_ix += 1;
+        }
+        let slice_length = end_row_ix - start_row_ix;
+
+        // Extend mutable with either nulls or with values from the array.
+        match array_ix.index() {
+            None => mutable.extend_nulls(slice_length),
+            Some(index) => {
+                let start_offset = take_offsets[index];
+                let end_offset = start_offset + slice_length;
+                mutable.extend(index, start_offset, end_offset);
+                take_offsets[index] = end_offset;
+            }
+        }
+
+        if end_row_ix == indices.len() {
+            break;
+        } else {
+            // Set the start_row_ix for the next slice.
+            start_row_ix = end_row_ix;
+        }
+    }
+
+    Ok(make_array(mutable.freeze()))
+}
+
+/// Merges two arrays in the order specified by a boolean mask.
+///
+/// This algorithm is a variant of [zip] that does not require the truthy and
+/// falsy arrays to have the same length.
+///
+/// When truthy of falsy are [Scalar](arrow_array::Scalar), the single
+/// scalar value is repeated whenever the mask array contains true or false respectively.
+///
+/// # Example
+///
+/// ```text
+///  truthy
+/// ┌─────────┐  mask
+/// │    A    │  ┌─────────┐                             ┌─────────┐
+/// ├─────────┤  │  true   │                             │    A    │
+/// │    C    │  ├─────────┤                             ├─────────┤
+/// ├─────────┤  │  true   │                             │    C    │
+/// │   NULL  │  ├─────────┤                             ├─────────┤
+/// ├─────────┤  │  false  │  merge(mask, truthy, falsy) │    B    │
+/// │    D    │  ├─────────┤  ─────────────────────────▶ ├─────────┤
+/// └─────────┘  │  true   │                             │   NULL  │
+///  falsy       ├─────────┤                             ├─────────┤
+/// ┌─────────┐  │  false  │                             │    E    │
+/// │    B    │  ├─────────┤                             ├─────────┤
+/// ├─────────┤  │  true   │                             │    D    │
+/// │    E    │  └─────────┘                             └─────────┘
+/// └─────────┘
+/// ```
+pub fn merge(
+    mask: &BooleanArray,
+    truthy: &dyn Datum,
+    falsy: &dyn Datum,
+) -> Result<ArrayRef, ArrowError> {
+    let (truthy_array, truthy_is_scalar) = truthy.get();
+    let (falsy_array, falsy_is_scalar) = falsy.get();
+
+    if truthy_is_scalar && falsy_is_scalar {
+        // When both truthy and falsy are scalars, we can use `zip` since the result is the same
+        // and zip has optimized code for scalars.
+        return zip(mask, truthy, falsy);
+    }
+
+    if truthy_array.data_type() != falsy_array.data_type() {
+        return Err(ArrowError::InvalidArgumentError(
+            "arguments need to have the same data type".into(),
+        ));
+    }
+
+    if truthy_is_scalar && truthy_array.len() != 1 {
+        return Err(ArrowError::InvalidArgumentError(
+            "scalar arrays must have 1 element".into(),
+        ));
+    }
+    if falsy_is_scalar && falsy_array.len() != 1 {
+        return Err(ArrowError::InvalidArgumentError(
+            "scalar arrays must have 1 element".into(),
+        ));
+    }
+
+    let falsy = falsy_array.to_data();
+    let truthy = truthy_array.to_data();
+
+    let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, mask.len());
+
+    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
+    // fill with falsy values
+
+    // keep track of how much is filled
+    let mut filled = 0;
+    let mut falsy_offset = 0;
+    let mut truthy_offset = 0;
+
+    // Ensure nulls are treated as false
+    let mask_buffer = match mask.null_count() {
+        0 => mask.values().clone(),
+        _ => prep_null_mask_filter(mask).into_parts().0,
+    };
+
+    SlicesIterator::from(&mask_buffer).for_each(|(start, end)| {
+        // the gap needs to be filled with falsy values
+        if start > filled {
+            if falsy_is_scalar {
+                for _ in filled..start {
+                    // Copy the first item from the 'falsy' array into the output buffer.
+                    mutable.extend(1, 0, 1);
+                }
+            } else {
+                let falsy_length = start - filled;
+                let falsy_end = falsy_offset + falsy_length;
+                mutable.extend(1, falsy_offset, falsy_end);
+                falsy_offset = falsy_end;
+            }
+        }
+        // fill with truthy values
+        if truthy_is_scalar {
+            for _ in start..end {
+                // Copy the first item from the 'truthy' array into the output buffer.
+                mutable.extend(0, 0, 1);
+            }
+        } else {
+            let truthy_length = end - start;
+            let truthy_end = truthy_offset + truthy_length;
+            mutable.extend(0, truthy_offset, truthy_end);
+            truthy_offset = truthy_end;
+        }
+        filled = end;
+    });
+    // the remaining part is falsy
+    if filled < mask.len() {
+        if falsy_is_scalar {
+            for _ in filled..mask.len() {
+                // Copy the first item from the 'falsy' array into the output buffer.
+                mutable.extend(1, 0, 1);
+            }
+        } else {
+            let falsy_length = mask.len() - filled;
+            let falsy_end = falsy_offset + falsy_length;
+            mutable.extend(1, falsy_offset, falsy_end);
+        }
+    }
+
+    let data = mutable.freeze();
+    Ok(make_array(data))
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::merge::{MergeIndex, merge, merge_n};
+    use arrow_array::cast::AsArray;
+    use arrow_array::{Array, BooleanArray, Datum, Int32Array, Scalar, StringArray, UInt64Array};
+    use arrow_schema::ArrowError::InvalidArgumentError;
+
+    #[derive(PartialEq, Eq, Copy, Clone)]
+    struct CompactMergeIndex {
+        index: u8,
+    }
+
+    impl MergeIndex for CompactMergeIndex {
+        fn index(&self) -> Option<usize> {
+            if self.index == u8::MAX {
+                None
+            } else {
+                Some(self.index as usize)
+            }
+        }
+    }
+
+    #[test]
+    fn test_merge() {
+        let a1 = StringArray::from(vec![Some("A"), Some("B"), Some("E"), None]);
+        let a2 = StringArray::from(vec![Some("C"), Some("D")]);
+
+        let indices = BooleanArray::from(vec![true, false, true, false, true, true]);
+
+        let merged = merge(&indices, &a1, &a2).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), indices.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "C");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "B");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "D");
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "E");
+        assert!(!merged.is_valid(5));
+    }
+
+    #[test]
+    fn test_merge_null_is_false() {
+        let a1 = StringArray::from(vec![Some("A"), Some("B"), Some("E"), None]);
+        let a2 = StringArray::from(vec![Some("C"), Some("D")]);
+
+        let indices = BooleanArray::from(vec![
+            Some(true),
+            None,
+            Some(true),
+            None,
+            Some(true),
+            Some(true),
+        ]);
+
+        let merged = merge(&indices, &a1, &a2).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), indices.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "C");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "B");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "D");
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "E");
+        assert!(!merged.is_valid(5));
+    }
+
+    #[test]
+    fn test_merge_false_tail() {
+        let a1 = StringArray::from(vec![Some("A"), Some("B"), Some("E"), None]);
+        let a2 = StringArray::from(vec![Some("C"), Some("D"), None, Some("F")]);
+
+        let indices = BooleanArray::from(vec![true, false, true, false, true, true, false, false]);
+
+        let merged = merge(&indices, &a1, &a2).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), indices.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "C");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "B");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "D");
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "E");
+        assert!(!merged.is_valid(5));
+        assert!(!merged.is_valid(6));
+        assert!(merged.is_valid(7));
+        assert_eq!(merged.value(7), "F");
+    }
+
+    #[test]
+    fn test_merge_scalars() {
+        let truthy = Scalar::new(StringArray::from(vec![Some("A")]));
+        let falsy = Scalar::new(StringArray::from(vec![Some("B")]));
+
+        let mask = BooleanArray::from(vec![true, false, false, true]);
+
+        let merged = merge(&mask, &truthy, &falsy).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), mask.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "B");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "B");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "A");
+    }
+
+    #[test]
+    fn test_merge_scalar_and_array() {
+        let truthy = Scalar::new(StringArray::from(vec![Some("A")]));
+        let falsy = StringArray::from(vec![Some("B"), Some("C")]);
+
+        let mask = BooleanArray::from(vec![true, false, false, true]);
+
+        let merged = merge(&mask, &truthy, &falsy).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), mask.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "B");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "C");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "A");
+    }
+
+    #[test]
+    fn test_merge_array_and_scalar() {
+        let truthy = StringArray::from(vec![Some("B"), Some("C")]);
+        let falsy = Scalar::new(StringArray::from(vec![Some("A")]));
+
+        let mask = BooleanArray::from(vec![true, false, false, true, false, false]);
+
+        let merged = merge(&mask, &truthy, &falsy).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), mask.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "B");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "A");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "A");
+        assert!(merged.is_valid(3));
+        assert_eq!(merged.value(3), "C");
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "A");
+        assert!(merged.is_valid(5));
+        assert_eq!(merged.value(5), "A");
+    }
+
+    #[test]
+    fn test_merge_empty_mask() {
+        let a1 = StringArray::from(vec![Some("A")]);
+        let a2 = StringArray::from(vec![Some("B")]);
+        let mask: Vec<bool> = vec![];
+        let mask = BooleanArray::from(mask);
+        let result = merge(&mask, &a1, &a2).unwrap();
+        assert_eq!(result.len(), 0);
+    }
+
+    #[derive(Debug, Copy, Clone)]
+    pub struct UnsafeScalar<T: Array>(T);
+
+    impl<T: Array> Datum for UnsafeScalar<T> {
+        fn get(&self) -> (&dyn Array, bool) {
+            (&self.0, true)
+        }
+    }
+
+    #[test]
+    fn test_merge_invalid_truthy_scalar() {
+        let truthy = UnsafeScalar(StringArray::from(vec![Some("A"), Some("C")]));
+        let falsy = StringArray::from(vec![Some("B"), Some("D")]);
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let merged = merge(&mask, &truthy, &falsy);
+        assert!(matches!(merged, Err(InvalidArgumentError { .. })));
+    }
+
+    #[test]
+    fn test_merge_invalid_falsy_scalar() {
+        let truthy = StringArray::from(vec![Some("A"), Some("C")]);
+        let falsy = UnsafeScalar(StringArray::from(vec![Some("B"), Some("D")]));
+        let mask = vec![true, false, true, false];
+        let mask = BooleanArray::from(mask);
+        let merged = merge(&mask, &truthy, &falsy);
+        assert!(matches!(merged, Err(InvalidArgumentError { .. })));
+    }
+
+    #[test]
+    fn test_merge_incompatible_arrays() {
+        let truthy = StringArray::from(vec![Some("A"), Some("B")]);
+        let falsy = Int32Array::from(vec![1, 2]);
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let merged = merge(&mask, &truthy, &falsy);
+        assert!(matches!(merged, Err(InvalidArgumentError { .. })));
+    }
+
+    #[test]
+    fn test_merge_n() {
+        let a1 = StringArray::from(vec![Some("A")]);
+        let a2 = StringArray::from(vec![Some("B"), None, None]);
+        let a3 = StringArray::from(vec![Some("C"), Some("D")]);
+
+        let indices = vec![
+            CompactMergeIndex { index: u8::MAX },
+            CompactMergeIndex { index: 1 },
+            CompactMergeIndex { index: 0 },
+            CompactMergeIndex { index: u8::MAX },
+            CompactMergeIndex { index: 2 },
+            CompactMergeIndex { index: 2 },
+            CompactMergeIndex { index: 1 },
+            CompactMergeIndex { index: 1 },
+        ];
+
+        let arrays = [a1, a2, a3];
+        let array_refs = arrays.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
+        let merged = merge_n(&array_refs, &indices).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), indices.len());
+        assert!(!merged.is_valid(0));
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "B");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "A");
+        assert!(!merged.is_valid(3));
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "C");
+        assert!(merged.is_valid(5));
+        assert_eq!(merged.value(5), "D");
+        assert!(!merged.is_valid(6));
+        assert!(!merged.is_valid(7));
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_merge_n_invalid_indices() {
+        let a1 = StringArray::from(vec![Some("A")]);
+
+        let indices = vec![CompactMergeIndex { index: 99 }];
+
+        let arrays = [a1];
+        let array_refs = arrays.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
+        let _ = merge_n(&array_refs, &indices);
+    }
+
+    #[test]
+    fn test_merge_n_empty_indices() {
+        let a1 = StringArray::from(vec![Some("A")]);
+        let a2 = StringArray::from(vec![Some("B"), None, None]);
+        let a3 = StringArray::from(vec![Some("C"), Some("D")]);
+
+        let indices: Vec<CompactMergeIndex> = vec![];
+
+        let arrays = [a1, a2, a3];
+        let array_refs = arrays.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
+        let merged = merge_n(&array_refs, &indices).unwrap();
+
+        assert_eq!(merged.len(), indices.len());
+    }
+
+    #[test]
+    fn test_merge_n_empty_values() {
+        let indices: Vec<CompactMergeIndex> = vec![];
+
+        let arrays: Vec<&dyn Array> = vec![];
+        let merged = merge_n(&arrays, &indices);
+
+        assert!(matches!(merged, Err(InvalidArgumentError { .. })));
+    }
+
+    #[test]
+    fn test_merge_n_incompatible_arrays() {
+        let a1: Box<dyn Array> = Box::new(StringArray::from(vec![Some("A")]));
+        let a2: Box<dyn Array> = Box::new(Int32Array::from(vec![1, 2, 3]));
+        let a3: Box<dyn Array> = Box::new(UInt64Array::from(vec![42, 314]));
+
+        let indices: Vec<CompactMergeIndex> = vec![];
+
+        let arrays = [a1.as_ref(), a2.as_ref(), a3.as_ref()];
+        let merged = merge_n(&arrays, &indices);
+
+        assert!(matches!(merged, Err(InvalidArgumentError { .. })));
+    }
+}
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index dc729da7e6c3..fa875c20e302 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -17,13 +17,13 @@
 
 //! Implements the `nullif` function for Arrow arrays.
 
-use arrow_array::{make_array, Array, ArrayRef, BooleanArray};
-use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper};
-use arrow_buffer::{BooleanBuffer, NullBuffer};
+use arrow_array::{Array, ArrayRef, BooleanArray, make_array};
+use arrow_buffer::buffer::bitwise_bin_op_helper;
+use arrow_buffer::{BooleanBuffer, NullBuffer, bitwise_unary_op_helper};
 use arrow_schema::{ArrowError, DataType};
 
 /// Returns a new array with the same values and the validity bit to false where
-/// the corresponding element of`right` is true.
+/// the corresponding element of `right` is true.
 ///
 /// This can be used to implement SQL `NULLIF`
 ///
@@ -120,7 +120,8 @@ mod tests {
     use arrow_array::{Int32Array, NullArray, StringArray, StructArray};
     use arrow_data::ArrayData;
     use arrow_schema::{Field, Fields};
-    use rand::{rng, Rng};
+    use rand::prelude::StdRng;
+    use rand::{Rng, SeedableRng};
 
     #[test]
     fn test_nullif_int_array() {
@@ -492,23 +493,60 @@ mod tests {
         let r_data = r.to_data();
         r_data.validate().unwrap();
 
-        assert_eq!(r.as_ref(), &expected);
+        assert_eq!(
+            r.as_ref(),
+            &expected,
+            "expected nulls: {:#?}\n\n\
+        result nulls:   {:#?}\n\n\\
+        expected values: {:#?}\n\n\
+        result values:   {:#?}",
+            expected.nulls(),
+            r.nulls(),
+            expected.values(),
+            r.as_primitive::<Int32Type>().values()
+        );
+        validate_nulls(expected.nulls());
+        validate_nulls(r.nulls());
+    }
+
+    /// Ensures that the null count matches the actual number of nulls.
+    fn validate_nulls(nulls: Option<&NullBuffer>) {
+        let Some(nulls) = nulls else {
+            return;
+        };
+        let mut actual_null_count = 0;
+        for i in 0..nulls.len() {
+            if nulls.is_null(i) {
+                actual_null_count += 1;
+            }
+        }
+        assert_eq!(actual_null_count, nulls.null_count());
     }
 
     #[test]
     fn nullif_fuzz() {
-        let mut rng = rng();
+        let mut rng = StdRng::seed_from_u64(7337);
 
         let arrays = [
-            Int32Array::from(vec![0; 128]),
-            (0..128)
-                .map(|_| rng.random_bool(0.5).then_some(0))
+            Int32Array::from(vec![0; 1024]), // no nulls
+            (0..1024) // 50% nulls
+                .map(|_| rng.random_bool(0.5).then_some(1))
                 .collect(),
         ];
 
         for a in arrays {
-            let a_slices = [(0, 128), (64, 64), (0, 64), (32, 32), (0, 0), (32, 0)];
-
+            let a_slices = [
+                (0, 128),
+                (0, 129),
+                (64, 64),
+                (0, 64),
+                (32, 32),
+                (0, 0),
+                (32, 0),
+                (5, 800),
+                (33, 53),
+                (77, 101),
+            ];
             for (a_offset, a_length) in a_slices {
                 let a = a.slice(a_offset, a_length);
 
@@ -516,14 +554,54 @@ mod tests {
                     let b_start_offset = rng.random_range(0..i);
                     let b_end_offset = rng.random_range(0..i);
 
+                    // b with 50% nulls
                     let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset)
                         .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5)))
                         .collect();
-                    let b = b.slice(b_start_offset, a_length);
-
-                    test_nullif(&a, &b);
+                    let b_sliced = b.slice(b_start_offset, a_length);
+                    test_nullif(&a, &b_sliced);
+
+                    // b with no nulls (and no null buffer)
+                    let b = remove_null_buffer(&b);
+                    let b_sliced = b.slice(b_start_offset, a_length);
+                    test_nullif(&a, &b_sliced);
+
+                    // b with no nulls (but with a null buffer)
+                    let b = remove_null_values(&b);
+                    let b_sliced = b.slice(b_start_offset, a_length);
+                    test_nullif(&a, &b_sliced);
                 }
             }
         }
     }
+
+    /// Returns a new BooleanArray with no null buffer
+    fn remove_null_buffer(array: &BooleanArray) -> BooleanArray {
+        make_array(
+            array
+                .into_data()
+                .into_builder()
+                .nulls(None)
+                .build()
+                .unwrap(),
+        )
+        .as_boolean()
+        .clone()
+    }
+
+    /// Returns a new BooleanArray with a null buffer where all values are valid
+    fn remove_null_values(array: &BooleanArray) -> BooleanArray {
+        let len = array.len();
+        let new_nulls = NullBuffer::from_iter(std::iter::repeat_n(true, len));
+        make_array(
+            array
+                .into_data()
+                .into_builder()
+                .nulls(Some(new_nulls))
+                .build()
+                .unwrap(),
+        )
+        .as_boolean()
+        .clone()
+    }
 }
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index ef287eb24427..1961a604d928 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -17,6 +17,7 @@
 
 //! Defines take kernel for [Array]
 
+use std::fmt::Display;
 use std::sync::Arc;
 
 use arrow_array::builder::{BufferBuilder, UInt32Builder};
@@ -24,13 +25,13 @@ use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::{
-    bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer,
-    ScalarBuffer,
+    ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
+    bit_util,
 };
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{ArrowError, DataType, FieldRef, UnionMode};
 
-use num::{One, Zero};
+use num_traits::{One, Zero};
 
 /// Take elements by index from [Array], creating a new [Array] from those indexes.
 ///
@@ -164,31 +165,47 @@ pub fn take_arrays(
 fn check_bounds<T: ArrowPrimitiveType>(
     len: usize,
     indices: &PrimitiveArray<T>,
-) -> Result<(), ArrowError> {
+) -> Result<(), ArrowError>
+where
+    T::Native: Display,
+{
+    let len = match T::Native::from_usize(len) {
+        Some(len) => len,
+        None => {
+            if T::DATA_TYPE.is_integer() {
+                // the biggest representable value for T::Native is lower than len, e.g: u8::MAX < 512, no need to check bounds
+                return Ok(());
+            } else {
+                return Err(ArrowError::ComputeError("Cast to usize failed".to_string()));
+            }
+        }
+    };
+
     if indices.null_count() > 0 {
         indices.iter().flatten().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
-            if ix >= len {
+            if index >= len {
                 return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} from {len} entries"
+                    "Array index out of bounds, cannot get item at index {index} from {len} entries"
                 )));
             }
             Ok(())
         })
     } else {
-        indices.values().iter().try_for_each(|index| {
-            let ix = index
-                .to_usize()
-                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
-            if ix >= len {
-                return Err(ArrowError::ComputeError(format!(
-                    "Array index out of bounds, cannot get item at index {ix} from {len} entries"
-                )));
+        let in_bounds = indices.values().iter().fold(true, |in_bounds, &i| {
+            in_bounds & (i >= T::Native::ZERO) & (i < len)
+        });
+
+        if !in_bounds {
+            for &index in indices.values() {
+                if index < T::Native::ZERO || index >= len {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Array index out of bounds, cannot get item at index {index} from {len} entries"
+                    )));
+                }
             }
-            Ok(())
-        })
+        }
+
+        Ok(())
     }
 }
 
@@ -218,6 +235,12 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::LargeList(_) => {
             Ok(Arc::new(take_list::<_, Int64Type>(values.as_list(), indices)?))
         }
+        DataType::ListView(_) => {
+            Ok(Arc::new(take_list_view::<_, Int32Type>(values.as_list_view(), indices)?))
+        }
+        DataType::LargeListView(_) => {
+            Ok(Arc::new(take_list_view::<_, Int64Type>(values.as_list_view(), indices)?))
+        }
         DataType::FixedSizeList(_, length) => {
             let values = values
                 .as_any()
@@ -314,8 +337,8 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
         DataType::Union(fields, UnionMode::Dense) => {
             let values = values.as_any().downcast_ref::<UnionArray>().unwrap();
 
-            let type_ids = <PrimitiveArray<Int8Type>>::new(take_native(values.type_ids(), indices), None);
-            let offsets = <PrimitiveArray<Int32Type>>::new(take_native(values.offsets().unwrap(), indices), None);
+            let type_ids = <PrimitiveArray<Int8Type>>::try_new(take_native(values.type_ids(), indices), None)?;
+            let offsets = <PrimitiveArray<Int32Type>>::try_new(take_native(values.offsets().unwrap(), indices), None)?;
 
             let children = fields.iter()
                 .map(|(field_type_id, _)| {
@@ -361,13 +384,6 @@ pub struct TakeOptions {
     pub check_bounds: bool,
 }
 
-#[inline(always)]
-fn maybe_usize<I: ArrowNativeType>(index: I) -> Result<usize, ArrowError> {
-    index
-        .to_usize()
-        .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))
-}
-
 /// `take` implementation for all primitive arrays
 ///
 /// This checks if an `indices` slot is populated, and gets the value from `values`
@@ -387,7 +403,7 @@ where
 {
     let values_buf = take_native(values.values(), indices);
     let nulls = take_nulls(values.nulls(), indices);
-    Ok(PrimitiveArray::new(values_buf, nulls).with_data_type(values.data_type().clone()))
+    Ok(PrimitiveArray::try_new(values_buf, nulls)?.with_data_type(values.data_type().clone()))
 }
 
 #[inline(never)]
@@ -416,9 +432,10 @@ fn take_native<T: ArrowNativeType, I: ArrowPrimitiveType>(
             .enumerate()
             .map(|(idx, index)| match values.get(index.as_usize()) {
                 Some(v) => *v,
-                None => match n.is_null(idx) {
-                    true => T::default(),
-                    false => panic!("Out-of-bounds index {index:?}"),
+                // SAFETY: idx<indices.len()
+                None => match unsafe { n.inner().value_unchecked(idx) } {
+                    false => T::default(),
+                    true => panic!("Out-of-bounds index {index:?}"),
                 },
             })
             .collect(),
@@ -442,8 +459,10 @@ fn take_bits<I: ArrowPrimitiveType>(
             let mut output_buffer = MutableBuffer::new_null(len);
             let output_slice = output_buffer.as_slice_mut();
             nulls.valid_indices().for_each(|idx| {
-                if values.value(indices.value(idx).as_usize()) {
-                    bit_util::set_bit(output_slice, idx);
+                // SAFETY: idx is a valid index in indices.nulls() --> idx<indices.len()
+                if values.value(unsafe { indices.value_unchecked(idx).as_usize() }) {
+                    // SAFETY: MutableBuffer was created with space for indices.len() bit, and idx < indices.len()
+                    unsafe { bit_util::set_bit_raw(output_slice.as_mut_ptr(), idx) };
                 }
             });
             BooleanBuffer::new(output_buffer.into(), 0, len)
@@ -480,11 +499,15 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
     let nulls = take_nulls(array.nulls(), indices);
 
     let (offsets, values) = if array.null_count() == 0 && indices.null_count() == 0 {
-        offsets.extend(indices.values().iter().map(|index| {
+        offsets.reserve(indices.len());
+        for index in indices.values() {
             let index = index.as_usize();
             capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
-            T::Offset::from_usize(capacity).expect("overflow")
-        }));
+            offsets.push(
+                T::Offset::from_usize(capacity)
+                    .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+            );
+        }
         let mut values = Vec::with_capacity(capacity);
 
         for index in indices.values() {
@@ -492,13 +515,17 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
         }
         (offsets, values)
     } else if indices.null_count() == 0 {
-        offsets.extend(indices.values().iter().map(|index| {
+        offsets.reserve(indices.len());
+        for index in indices.values() {
             let index = index.as_usize();
             if array.is_valid(index) {
                 capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
             }
-            T::Offset::from_usize(capacity).expect("overflow")
-        }));
+            offsets.push(
+                T::Offset::from_usize(capacity)
+                    .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+            );
+        }
         let mut values = Vec::with_capacity(capacity);
 
         for index in indices.values() {
@@ -509,13 +536,17 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
         }
         (offsets, values)
     } else if array.null_count() == 0 {
-        offsets.extend(indices.values().iter().enumerate().map(|(i, index)| {
+        offsets.reserve(indices.len());
+        for (i, index) in indices.values().iter().enumerate() {
             let index = index.as_usize();
             if indices.is_valid(i) {
                 capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
             }
-            T::Offset::from_usize(capacity).expect("overflow")
-        }));
+            offsets.push(
+                T::Offset::from_usize(capacity)
+                    .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+            );
+        }
         let mut values = Vec::with_capacity(capacity);
 
         for (i, index) in indices.values().iter().enumerate() {
@@ -526,13 +557,17 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
         (offsets, values)
     } else {
         let nulls = nulls.as_ref().unwrap();
-        offsets.extend(indices.values().iter().enumerate().map(|(i, index)| {
+        offsets.reserve(indices.len());
+        for (i, index) in indices.values().iter().enumerate() {
             let index = index.as_usize();
             if nulls.is_valid(i) {
                 capacity += input_offsets[index + 1].as_usize() - input_offsets[index].as_usize();
             }
-            T::Offset::from_usize(capacity).expect("overflow")
-        }));
+            offsets.push(
+                T::Offset::from_usize(capacity)
+                    .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+            );
+        }
         let mut values = Vec::with_capacity(capacity);
 
         for (i, index) in indices.values().iter().enumerate() {
@@ -546,11 +581,8 @@ fn take_bytes<T: ByteArrayType, IndexType: ArrowPrimitiveType>(
         (offsets, values)
     };
 
-    T::Offset::from_usize(values.len()).ok_or(ArrowError::ComputeError(format!(
-        "Offset overflow for {}BinaryArray: {}",
-        T::Offset::PREFIX,
-        values.len()
-    )))?;
+    T::Offset::from_usize(values.len())
+        .ok_or_else(|| ArrowError::OffsetOverflowError(values.len()))?;
 
     let array = unsafe {
         let offsets = OffsetBuffer::new_unchecked(offsets.into());
@@ -608,6 +640,33 @@ where
     Ok(GenericListArray::<OffsetType::Native>::from(list_data))
 }
 
+fn take_list_view<IndexType, OffsetType>(
+    values: &GenericListViewArray<OffsetType::Native>,
+    indices: &PrimitiveArray<IndexType>,
+) -> Result<GenericListViewArray<OffsetType::Native>, ArrowError>
+where
+    IndexType: ArrowPrimitiveType,
+    OffsetType: ArrowPrimitiveType,
+    OffsetType::Native: OffsetSizeTrait,
+{
+    let taken_offsets = take_native(values.offsets(), indices);
+    let taken_sizes = take_native(values.sizes(), indices);
+    let nulls = take_nulls(values.nulls(), indices);
+
+    let list_view_data = ArrayDataBuilder::new(values.data_type().clone())
+        .len(indices.len())
+        .nulls(nulls)
+        .buffers(vec![taken_offsets.into(), taken_sizes.into()])
+        .child_data(vec![values.values().to_data()]);
+
+    // SAFETY: all buffers and child nodes for ListView added in constructor
+    let list_view_data = unsafe { list_view_data.build_unchecked() };
+
+    Ok(GenericListViewArray::<OffsetType::Native>::from(
+        list_view_data,
+    ))
+}
+
 /// `take` implementation for `FixedSizeListArray`
 ///
 /// Calculates the index and indexed offset for the inner array,
@@ -647,27 +706,60 @@ fn take_fixed_size_list<IndexType: ArrowPrimitiveType>(
     Ok(FixedSizeListArray::from(list_data))
 }
 
+/// The take kernel implementation for `FixedSizeBinaryArray`.
+///
+/// The computation is done in two steps:
+/// - Compute the values buffer
+/// - Compute the null buffer
 fn take_fixed_size_binary<IndexType: ArrowPrimitiveType>(
     values: &FixedSizeBinaryArray,
     indices: &PrimitiveArray<IndexType>,
     size: i32,
 ) -> Result<FixedSizeBinaryArray, ArrowError> {
-    let nulls = values.nulls();
-    let array_iter = indices
-        .values()
-        .iter()
-        .map(|idx| {
-            let idx = maybe_usize::<IndexType::Native>(*idx)?;
-            if nulls.map(|n| n.is_valid(idx)).unwrap_or(true) {
-                Ok(Some(values.value(idx)))
-            } else {
-                Ok(None)
+    let size_usize = usize::try_from(size).map_err(|_| {
+        ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", size))
+    })?;
+
+    let values_buffer = values.values().as_slice();
+    let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize);
+
+    if indices.null_count() == 0 {
+        let array_iter = indices.values().iter().map(|idx| {
+            let offset = idx.as_usize() * size_usize;
+            &values_buffer[offset..offset + size_usize]
+        });
+        for slice in array_iter {
+            values_buffer_builder.append_slice(slice);
+        }
+    } else {
+        // The indices nullability cannot be ignored here because the values buffer may contain
+        // nulls which should not cause a panic.
+        let array_iter = indices.iter().map(|idx| {
+            idx.map(|idx| {
+                let offset = idx.as_usize() * size_usize;
+                &values_buffer[offset..offset + size_usize]
+            })
+        });
+        for slice in array_iter {
+            match slice {
+                None => values_buffer_builder.append_n(size_usize, 0),
+                Some(slice) => values_buffer_builder.append_slice(slice),
             }
-        })
-        .collect::<Result<Vec<_>, ArrowError>>()?
-        .into_iter();
+        }
+    }
+
+    let values_buffer = values_buffer_builder.finish();
+    let value_nulls = take_nulls(values.nulls(), indices);
+    let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());
+
+    let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size))
+        .len(indices.len())
+        .nulls(final_nulls)
+        .offset(0)
+        .add_buffer(values_buffer)
+        .build()?;
 
-    FixedSizeBinaryArray::try_from_sparse_iter_with_size(array_iter, size)
+    Ok(FixedSizeBinaryArray::from(array_data))
 }
 
 /// `take` implementation for dictionary arrays
@@ -919,7 +1011,6 @@ to_indices_reinterpret!(Int64Type, UInt64Type);
 /// # use arrow_array::{StringArray, Int32Array, UInt32Array, RecordBatch};
 /// # use arrow_schema::{DataType, Field, Schema};
 /// # use arrow_select::take::take_record_batch;
-///
 /// let schema = Arc::new(Schema::new(vec![
 ///     Field::new("a", DataType::Int32, true),
 ///     Field::new("b", DataType::Utf8, true),
@@ -967,6 +1058,7 @@ mod tests {
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
     use arrow_data::ArrayData;
     use arrow_schema::{Field, Fields, TimeUnit, UnionFields};
+    use num_traits::ToPrimitive;
 
     fn test_take_decimal_arrays(
         data: Vec<Option<i128>>,
@@ -1808,6 +1900,55 @@ mod tests {
         }};
     }
 
+    fn test_take_list_view_generic<OffsetType: OffsetSizeTrait, ValuesType: ArrowPrimitiveType, F>(
+        values: Vec<Option<Vec<Option<ValuesType::Native>>>>,
+        take_indices: Vec<Option<usize>>,
+        expected: Vec<Option<Vec<Option<ValuesType::Native>>>>,
+        mapper: F,
+    ) where
+        F: Fn(GenericListViewArray<OffsetType>) -> GenericListViewArray<OffsetType>,
+    {
+        let mut list_view_array =
+            GenericListViewBuilder::<OffsetType, _>::new(PrimitiveBuilder::<ValuesType>::new());
+
+        for value in values {
+            list_view_array.append_option(value);
+        }
+        let list_view_array = list_view_array.finish();
+        let list_view_array = mapper(list_view_array);
+
+        let mut indices = UInt64Builder::new();
+        for idx in take_indices {
+            indices.append_option(idx.map(|i| i.to_u64().unwrap()));
+        }
+        let indices = indices.finish();
+
+        let taken = take(&list_view_array, &indices, None)
+            .unwrap()
+            .as_list_view()
+            .clone();
+
+        let mut expected_array =
+            GenericListViewBuilder::<OffsetType, _>::new(PrimitiveBuilder::<ValuesType>::new());
+        for value in expected {
+            expected_array.append_option(value);
+        }
+        let expected_array = expected_array.finish();
+
+        assert_eq!(taken, expected_array);
+    }
+
+    macro_rules! list_view_test_case {
+        (values: $values:expr, indices: $indices:expr, expected: $expected: expr) => {{
+            test_take_list_view_generic::<i32, Int8Type, _>($values, $indices, $expected, |x| x);
+            test_take_list_view_generic::<i64, Int8Type, _>($values, $indices, $expected, |x| x);
+        }};
+        (values: $values:expr, transform: $fn:expr, indices: $indices:expr, expected: $expected: expr) => {{
+            test_take_list_view_generic::<i32, Int8Type, _>($values, $indices, $expected, $fn);
+            test_take_list_view_generic::<i64, Int8Type, _>($values, $indices, $expected, $fn);
+        }};
+    }
+
     fn do_take_fixed_size_list_test<T>(
         length: <Int32Type as ArrowPrimitiveType>::Native,
         input_data: Vec<Option<Vec<Option<T::Native>>>>,
@@ -1858,6 +1999,72 @@ mod tests {
         test_take_list_with_nulls!(i64, LargeList, LargeListArray);
     }
 
+    #[test]
+    fn test_test_take_list_view_reversed() {
+        // Take reversed indices
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![Some(2), Some(1), Some(0)],
+            expected: vec![
+                Some(vec![Some(7), Some(8), None]),
+                None,
+                Some(vec![Some(1), None, Some(3)]),
+            ]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_null_indices() {
+        // Take with null indices
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![None, Some(0), None],
+            expected: vec![None, Some(vec![Some(1), None, Some(3)]), None]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_null_values() {
+        // Take at null values
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1), None, Some(3)]),
+                None,
+                Some(vec![Some(7), Some(8), None]),
+            ],
+            indices: vec![Some(1), Some(1), Some(1), None, None],
+            expected: vec![None; 5]
+        }
+    }
+
+    #[test]
+    fn test_take_list_view_sliced() {
+        // Take null indices/values, with slicing.
+        list_view_test_case! {
+            values: vec![
+                Some(vec![Some(1)]),
+                None,
+                None,
+                Some(vec![Some(2), Some(3)]),
+                Some(vec![Some(4), Some(5)]),
+                None,
+            ],
+            transform: |l| l.slice(2, 4),
+            indices: vec![Some(0), Some(3), None, Some(1), Some(2)],
+            expected: vec![
+                None, None, None, Some(vec![Some(2), Some(3)]), Some(vec![Some(4), Some(5)])
+            ]
+        }
+    }
+
     #[test]
     fn test_take_fixed_size_list() {
         do_take_fixed_size_list_test::<Int32Type>(
@@ -1914,6 +2121,32 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_take_fixed_size_binary_with_nulls_indices() {
+        let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+            [
+                Some(vec![0x01, 0x01, 0x01, 0x01]),
+                Some(vec![0x02, 0x02, 0x02, 0x02]),
+                Some(vec![0x03, 0x03, 0x03, 0x03]),
+                Some(vec![0x04, 0x04, 0x04, 0x04]),
+            ]
+            .into_iter(),
+            4,
+        )
+        .unwrap();
+
+        // The two middle indices are null -> Should be null in the output.
+        let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]);
+
+        let result = take_fixed_size_binary(&fsb, &indices, 4).unwrap();
+        assert_eq!(result.len(), 4);
+        assert_eq!(result.null_count(), 2);
+        assert_eq!(
+            result.nulls().unwrap().iter().collect::<Vec<_>>(),
+            vec![true, false, false, true]
+        );
+    }
+
     #[test]
     #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
     fn test_take_list_out_of_bounds() {
@@ -2204,6 +2437,27 @@ mod tests {
         assert_eq!(take_out_values.values(), &[2, 2, 2, 2, 1]);
     }
 
+    #[test]
+    fn test_take_runs_sliced() {
+        let logical_array: Vec<i32> = vec![1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6];
+
+        let mut builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+        builder.extend(logical_array.into_iter().map(Some));
+        let run_array = builder.finish();
+
+        let run_array = run_array.slice(4, 6); // [3, 3, 3, 4, 4, 5]
+
+        let take_indices: PrimitiveArray<Int32Type> = vec![0, 5, 5, 1, 4].into_iter().collect();
+
+        let result = take_run(&run_array, &take_indices).unwrap();
+        let result = result.downcast::<Int32Array>().unwrap();
+
+        let expected = vec![3, 5, 5, 3, 4];
+        let actual = result.into_iter().flatten().collect::<Vec<_>>();
+
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn test_take_value_index_from_fixed_list() {
         let list = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
@@ -2402,7 +2656,7 @@ mod tests {
 
     #[test]
     fn test_take_union_dense_all_match_issue_6206() {
-        let fields = UnionFields::new(vec![0], vec![Field::new("a", DataType::Int64, false)]);
+        let fields = UnionFields::from_fields(vec![Field::new("a", DataType::Int64, false)]);
         let ints = Arc::new(Int64Array::from(vec![1, 2, 3, 4, 5]));
 
         let array = UnionArray::try_new(
@@ -2417,4 +2671,15 @@ mod tests {
         let array = take(&array, &indicies, None).unwrap();
         assert_eq!(array.len(), 3);
     }
+
+    #[test]
+    fn test_take_bytes_offset_overflow() {
+        let indices = Int32Array::from(vec![0; (i32::MAX >> 4) as usize]);
+        let text = ('a'..='z').collect::<String>();
+        let values = StringArray::from(vec![Some(text.clone())]);
+        assert!(matches!(
+            take(&values, &indices, None),
+            Err(ArrowError::OffsetOverflowError(_))
+        ));
+    }
 }
diff --git a/arrow-select/src/union_extract.rs b/arrow-select/src/union_extract.rs
index 62d660b80475..3accecc359fa 100644
--- a/arrow-select/src/union_extract.rs
+++ b/arrow-select/src/union_extract.rs
@@ -19,10 +19,10 @@
 
 use crate::take::take;
 use arrow_array::{
-    make_array, new_empty_array, new_null_array, Array, ArrayRef, BooleanArray, Int32Array, Scalar,
-    UnionArray,
+    Array, ArrayRef, BooleanArray, Int32Array, Scalar, UnionArray, make_array, new_empty_array,
+    new_null_array,
 };
-use arrow_buffer::{bit_util, BooleanBuffer, MutableBuffer, NullBuffer, ScalarBuffer};
+use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer, ScalarBuffer, bit_util};
 use arrow_data::layout;
 use arrow_schema::{ArrowError, DataType, UnionFields};
 use std::cmp::Ordering;
@@ -53,13 +53,13 @@ use std::sync::Arc;
 /// # use arrow_schema::{DataType, Field, UnionFields};
 /// # use arrow_array::{UnionArray, StringArray, Int32Array};
 /// # use arrow_select::union_extract::union_extract;
-/// let fields = UnionFields::new(
+/// let fields = UnionFields::try_new(
 ///     [1, 3],
 ///     [
 ///         Field::new("A", DataType::Int32, true),
 ///         Field::new("B", DataType::Utf8, true)
 ///     ]
-/// );
+/// ).unwrap();
 ///
 /// let union = UnionArray::try_new(
 ///     fields,
@@ -257,7 +257,7 @@ fn extract_dense(
                 //case 6: some type_ids matches our target, but not all. For selected values, take the value pointed by the offset. For unselected, use a valid null
                 Ok(take(
                     target,
-                    &Int32Array::new(offsets.clone(), Some(selected.into())),
+                    &Int32Array::try_new(offsets.clone(), Some(selected.into()))?,
                     None,
                 )?)
             }
@@ -399,8 +399,8 @@ fn is_sequential_generic<const N: usize>(offsets: &[i32]) -> bool {
 
 #[cfg(test)]
 mod tests {
-    use super::{eq_scalar_inner, is_sequential_generic, union_extract, BoolValue};
-    use arrow_array::{new_null_array, Array, Int32Array, NullArray, StringArray, UnionArray};
+    use super::{BoolValue, eq_scalar_inner, is_sequential_generic, union_extract};
+    use arrow_array::{Array, Int32Array, NullArray, StringArray, UnionArray, new_null_array};
     use arrow_buffer::{BooleanBuffer, ScalarBuffer};
     use arrow_schema::{ArrowError, DataType, Field, UnionFields, UnionMode};
     use std::sync::Arc;
@@ -543,17 +543,18 @@ mod tests {
     }
 
     fn str1() -> UnionFields {
-        UnionFields::new(vec![1], vec![Field::new("str", DataType::Utf8, true)])
+        UnionFields::try_new(vec![1], vec![Field::new("str", DataType::Utf8, true)]).unwrap()
     }
 
     fn str1_int3() -> UnionFields {
-        UnionFields::new(
+        UnionFields::try_new(
             vec![1, 3],
             vec![
                 Field::new("str", DataType::Utf8, true),
                 Field::new("int", DataType::Int32, true),
             ],
         )
+        .unwrap()
     }
 
     #[test]
@@ -599,13 +600,14 @@ mod tests {
     fn sparse_1_3a_null_target() {
         let union = UnionArray::try_new(
             // multiple fields
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![1, 3],
                 vec![
                     Field::new("str", DataType::Utf8, true),
                     Field::new("null", DataType::Null, true), // target type is Null
                 ],
-            ),
+            )
+            .unwrap(),
             ScalarBuffer::from(vec![1]), //not empty
             None,                        // sparse
             vec![
@@ -682,13 +684,14 @@ mod tests {
     }
 
     fn str1_union3(union3_datatype: DataType) -> UnionFields {
-        UnionFields::new(
+        UnionFields::try_new(
             vec![1, 3],
             vec![
                 Field::new("str", DataType::Utf8, true),
                 Field::new("union", union3_datatype, true),
             ],
         )
+        .unwrap()
     }
 
     #[test]
diff --git a/arrow-select/src/window.rs b/arrow-select/src/window.rs
index 2ad51561c69b..74f7f4a79191 100644
--- a/arrow-select/src/window.rs
+++ b/arrow-select/src/window.rs
@@ -18,9 +18,9 @@
 //! Defines windowing functions, like `shift`ing
 
 use crate::concat::concat;
-use arrow_array::{make_array, new_null_array, Array, ArrayRef};
+use arrow_array::{Array, ArrayRef, make_array, new_null_array};
 use arrow_schema::ArrowError;
-use num::abs;
+use num_traits::abs;
 
 /// Shifts array by defined number of items (to left or right)
 /// A positive value for `offset` shifts the array to the right
@@ -29,7 +29,6 @@ use num::abs;
 /// ```
 /// # use arrow_array::Int32Array;
 /// # use arrow_select::window::shift;
-///
 /// let a: Int32Array = vec![Some(1), None, Some(4)].into();
 ///
 /// // shift array 1 element to the right
diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index 2efd2e749921..8702b558d01f 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -17,10 +17,25 @@
 
 //! [`zip`]: Combine values from two arrays based on boolean mask
 
-use crate::filter::SlicesIterator;
+use crate::filter::{SlicesIterator, prep_null_mask_filter};
+use arrow_array::cast::AsArray;
+use arrow_array::types::{
+    BinaryType, BinaryViewType, ByteArrayType, ByteViewType, LargeBinaryType, LargeUtf8Type,
+    StringViewType, Utf8Type,
+};
 use arrow_array::*;
+use arrow_buffer::{
+    BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, OffsetBufferBuilder,
+    ScalarBuffer, ToByteSlice,
+};
 use arrow_data::transform::MutableArrayData;
-use arrow_schema::ArrowError;
+use arrow_data::{ArrayData, ByteView};
+use arrow_schema::{ArrowError, DataType};
+use std::fmt::{Debug, Formatter};
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::ops::Not;
+use std::sync::{Arc, OnceLock};
 
 /// Zip two arrays by some boolean mask.
 ///
@@ -86,8 +101,16 @@ pub fn zip(
     truthy: &dyn Datum,
     falsy: &dyn Datum,
 ) -> Result<ArrayRef, ArrowError> {
-    let (truthy, truthy_is_scalar) = truthy.get();
-    let (falsy, falsy_is_scalar) = falsy.get();
+    let (truthy_array, truthy_is_scalar) = truthy.get();
+    let (falsy_array, falsy_is_scalar) = falsy.get();
+
+    if falsy_is_scalar && truthy_is_scalar {
+        let zipper = ScalarZipper::try_new(truthy, falsy)?;
+        return zipper.zip_impl.create_output(mask);
+    }
+
+    let truthy = truthy_array;
+    let falsy = falsy_array;
 
     if truthy.data_type() != falsy.data_type() {
         return Err(ArrowError::InvalidArgumentError(
@@ -119,7 +142,17 @@ pub fn zip(
     let falsy = falsy.to_data();
     let truthy = truthy.to_data();
 
-    let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, truthy.len());
+    zip_impl(mask, &truthy, truthy_is_scalar, &falsy, falsy_is_scalar)
+}
+
+fn zip_impl(
+    mask: &BooleanArray,
+    truthy: &ArrayData,
+    truthy_is_scalar: bool,
+    falsy: &ArrayData,
+    falsy_is_scalar: bool,
+) -> Result<ArrayRef, ArrowError> {
+    let mut mutable = MutableArrayData::new(vec![truthy, falsy], false, truthy.len());
 
     // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
     // fill with falsy values
@@ -127,7 +160,8 @@ pub fn zip(
     // keep track of how much is filled
     let mut filled = 0;
 
-    SlicesIterator::new(mask).for_each(|(start, end)| {
+    let mask_buffer = maybe_prep_null_mask_filter(mask);
+    SlicesIterator::from(&mask_buffer).for_each(|(start, end)| {
         // the gap needs to be filled with falsy values
         if start > filled {
             if falsy_is_scalar {
@@ -166,9 +200,651 @@ pub fn zip(
     Ok(make_array(data))
 }
 
+/// Zipper for 2 scalars
+///
+/// Useful for using in `IF <expr> THEN <scalar> ELSE <scalar> END` expressions
+///
+/// # Example
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array, Scalar, cast::AsArray, types::Int32Type};
+///
+/// # use arrow_select::zip::ScalarZipper;
+/// let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+/// let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+/// let zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
+///
+/// // Later when we have a boolean mask
+/// let mask = BooleanArray::from(vec![true, false, true, false, true]);
+/// let result = zipper.zip(&mask).unwrap();
+/// let actual = result.as_primitive::<Int32Type>();
+/// let expected = Int32Array::from(vec![Some(42), Some(123), Some(42), Some(123), Some(42)]);
+/// ```
+///
+#[derive(Debug, Clone)]
+pub struct ScalarZipper {
+    zip_impl: Arc<dyn ZipImpl>,
+}
+
+impl ScalarZipper {
+    /// Try to create a new ScalarZipper from two scalar Datum
+    ///
+    /// # Errors
+    /// returns error if:
+    /// - the two Datum have different data types
+    /// - either Datum is not a scalar (or has more than 1 element)
+    ///
+    pub fn try_new(truthy: &dyn Datum, falsy: &dyn Datum) -> Result<Self, ArrowError> {
+        let (truthy, truthy_is_scalar) = truthy.get();
+        let (falsy, falsy_is_scalar) = falsy.get();
+
+        if truthy.data_type() != falsy.data_type() {
+            return Err(ArrowError::InvalidArgumentError(
+                "arguments need to have the same data type".into(),
+            ));
+        }
+
+        if !truthy_is_scalar {
+            return Err(ArrowError::InvalidArgumentError(
+                "only scalar arrays are supported".into(),
+            ));
+        }
+
+        if !falsy_is_scalar {
+            return Err(ArrowError::InvalidArgumentError(
+                "only scalar arrays are supported".into(),
+            ));
+        }
+
+        if truthy.len() != 1 {
+            return Err(ArrowError::InvalidArgumentError(
+                "scalar arrays must have 1 element".into(),
+            ));
+        }
+        if falsy.len() != 1 {
+            return Err(ArrowError::InvalidArgumentError(
+                "scalar arrays must have 1 element".into(),
+            ));
+        }
+
+        macro_rules! primitive_size_helper {
+            ($t:ty) => {
+                Arc::new(PrimitiveScalarImpl::<$t>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            };
+        }
+
+        let zip_impl = downcast_primitive! {
+            truthy.data_type() => (primitive_size_helper),
+            DataType::Utf8 => {
+                Arc::new(BytesScalarImpl::<Utf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::LargeUtf8 => {
+                Arc::new(BytesScalarImpl::<LargeUtf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::Binary => {
+                Arc::new(BytesScalarImpl::<BinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::LargeBinary => {
+                Arc::new(BytesScalarImpl::<LargeBinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::Utf8View => {
+                Arc::new(ByteViewScalarImpl::<StringViewType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::BinaryView => {
+                Arc::new(ByteViewScalarImpl::<BinaryViewType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+            _ => {
+                Arc::new(FallbackImpl::new(truthy, falsy)) as Arc<dyn ZipImpl>
+            },
+        };
+
+        Ok(Self { zip_impl })
+    }
+
+    /// Creating output array based on input boolean array and the two scalar values the zipper was created with
+    /// See struct level documentation for examples.
+    pub fn zip(&self, mask: &BooleanArray) -> Result<ArrayRef, ArrowError> {
+        self.zip_impl.create_output(mask)
+    }
+}
+
+/// Impl for creating output array based on a mask
+trait ZipImpl: Debug + Send + Sync {
+    /// Creating output array based on input boolean array
+    fn create_output(&self, input: &BooleanArray) -> Result<ArrayRef, ArrowError>;
+}
+
+#[derive(Debug, PartialEq)]
+struct FallbackImpl {
+    truthy: ArrayData,
+    falsy: ArrayData,
+}
+
+impl FallbackImpl {
+    fn new(left: &dyn Array, right: &dyn Array) -> Self {
+        Self {
+            truthy: left.to_data(),
+            falsy: right.to_data(),
+        }
+    }
+}
+
+impl ZipImpl for FallbackImpl {
+    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
+        zip_impl(predicate, &self.truthy, true, &self.falsy, true)
+    }
+}
+
+struct PrimitiveScalarImpl<T: ArrowPrimitiveType> {
+    data_type: DataType,
+    truthy: Option<T::Native>,
+    falsy: Option<T::Native>,
+}
+
+impl<T: ArrowPrimitiveType> Debug for PrimitiveScalarImpl<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PrimitiveScalarImpl")
+            .field("data_type", &self.data_type)
+            .field("truthy", &self.truthy)
+            .field("falsy", &self.falsy)
+            .finish()
+    }
+}
+
+impl<T: ArrowPrimitiveType> PrimitiveScalarImpl<T> {
+    fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+        Self {
+            data_type: truthy.data_type().clone(),
+            truthy: Self::get_value_from_scalar(truthy),
+            falsy: Self::get_value_from_scalar(falsy),
+        }
+    }
+
+    fn get_value_from_scalar(scalar: &dyn Array) -> Option<T::Native> {
+        if scalar.is_null(0) {
+            None
+        } else {
+            let value = scalar.as_primitive::<T>().value(0);
+
+            Some(value)
+        }
+    }
+
+    /// return an output array that has
+    /// `value` in all locations where predicate is true
+    /// `null` otherwise
+    fn get_scalar_and_null_buffer_for_single_non_nullable(
+        predicate: BooleanBuffer,
+        value: T::Native,
+    ) -> (Vec<T::Native>, Option<NullBuffer>) {
+        let result_len = predicate.len();
+        let nulls = NullBuffer::new(predicate);
+        let scalars = vec![value; result_len];
+
+        (scalars, Some(nulls))
+    }
+}
+
+impl<T: ArrowPrimitiveType> ZipImpl for PrimitiveScalarImpl<T> {
+    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
+        let result_len = predicate.len();
+        // Nulls are treated as false
+        let predicate = maybe_prep_null_mask_filter(predicate);
+
+        let (scalars, nulls): (Vec<T::Native>, Option<NullBuffer>) = match (self.truthy, self.falsy)
+        {
+            (Some(truthy_val), Some(falsy_val)) => {
+                let scalars: Vec<T::Native> = predicate
+                    .iter()
+                    .map(|b| if b { truthy_val } else { falsy_val })
+                    .collect();
+
+                (scalars, None)
+            }
+            (Some(truthy_val), None) => {
+                // If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
+                // If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
+
+                Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
+            }
+            (None, Some(falsy_val)) => {
+                // Flipping the boolean buffer as we want the opposite of the TRUE case
+                //
+                // if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
+                // if the condition is false we want the FALSY value so we need to NOT the value so we get 1 (meaning not null)
+                let predicate = predicate.not();
+
+                Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
+            }
+            (None, None) => {
+                // All values are null
+                let nulls = NullBuffer::new_null(result_len);
+                let scalars = vec![T::default_value(); result_len];
+
+                (scalars, Some(nulls))
+            }
+        };
+
+        let scalars = ScalarBuffer::<T::Native>::from(scalars);
+        let output = PrimitiveArray::<T>::try_new(scalars, nulls)?;
+
+        // Keep decimal precisions, scales or timestamps timezones
+        let output = output.with_data_type(self.data_type.clone());
+
+        Ok(Arc::new(output))
+    }
+}
+
+#[derive(PartialEq, Hash)]
+struct BytesScalarImpl<T: ByteArrayType> {
+    truthy: Option<Vec<u8>>,
+    falsy: Option<Vec<u8>>,
+    phantom: PhantomData<T>,
+}
+
+impl<T: ByteArrayType> Debug for BytesScalarImpl<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BytesScalarImpl")
+            .field("truthy", &self.truthy)
+            .field("falsy", &self.falsy)
+            .finish()
+    }
+}
+
+impl<T: ByteArrayType> BytesScalarImpl<T> {
+    fn new(truthy_value: &dyn Array, falsy_value: &dyn Array) -> Self {
+        Self {
+            truthy: Self::get_value_from_scalar(truthy_value),
+            falsy: Self::get_value_from_scalar(falsy_value),
+            phantom: PhantomData,
+        }
+    }
+
+    fn get_value_from_scalar(scalar: &dyn Array) -> Option<Vec<u8>> {
+        if scalar.is_null(0) {
+            None
+        } else {
+            let bytes: &[u8] = scalar.as_bytes::<T>().value(0).as_ref();
+
+            Some(bytes.to_vec())
+        }
+    }
+
+    /// return an output array that has
+    /// `value` in all locations where predicate is true
+    /// `null` otherwise
+    fn get_scalar_and_null_buffer_for_single_non_nullable(
+        predicate: BooleanBuffer,
+        value: &[u8],
+    ) -> (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) {
+        let value_length = value.len();
+
+        let number_of_true = predicate.count_set_bits();
+
+        // Fast path for all nulls
+        if number_of_true == 0 {
+            // All values are null
+            let nulls = NullBuffer::new_null(predicate.len());
+
+            return (
+                // Empty bytes
+                Buffer::from(&[]),
+                // All nulls so all lengths are 0
+                OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
+                Some(nulls),
+            );
+        }
+
+        let offsets = OffsetBuffer::<T::Offset>::from_lengths(
+            predicate.iter().map(|b| if b { value_length } else { 0 }),
+        );
+
+        let mut bytes = MutableBuffer::with_capacity(0);
+        bytes.repeat_slice_n_times(value, number_of_true);
+
+        let bytes = Buffer::from(bytes);
+
+        // If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
+        // If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
+        let nulls = NullBuffer::new(predicate);
+
+        (bytes, offsets, Some(nulls))
+    }
+
+    /// Create a [`Buffer`] where `value` slice is repeated `number_of_values` times
+    /// and [`OffsetBuffer`] where there are `number_of_values` lengths, and all equals to `value` length
+    fn get_bytes_and_offset_for_all_same_value(
+        number_of_values: usize,
+        value: &[u8],
+    ) -> (Buffer, OffsetBuffer<T::Offset>) {
+        let value_length = value.len();
+
+        let offsets =
+            OffsetBuffer::<T::Offset>::from_repeated_length(value_length, number_of_values);
+
+        let mut bytes = MutableBuffer::with_capacity(0);
+        bytes.repeat_slice_n_times(value, number_of_values);
+        let bytes = Buffer::from(bytes);
+
+        (bytes, offsets)
+    }
+
+    fn create_output_on_non_nulls(
+        predicate: &BooleanBuffer,
+        truthy_val: &[u8],
+        falsy_val: &[u8],
+    ) -> (Buffer, OffsetBuffer<<T as ByteArrayType>::Offset>) {
+        let true_count = predicate.count_set_bits();
+
+        match true_count {
+            0 => {
+                // All values are falsy
+
+                let (bytes, offsets) =
+                    Self::get_bytes_and_offset_for_all_same_value(predicate.len(), falsy_val);
+
+                return (bytes, offsets);
+            }
+            n if n == predicate.len() => {
+                // All values are truthy
+                let (bytes, offsets) =
+                    Self::get_bytes_and_offset_for_all_same_value(predicate.len(), truthy_val);
+
+                return (bytes, offsets);
+            }
+
+            _ => {
+                // Fallback
+            }
+        }
+
+        let total_number_of_bytes =
+            true_count * truthy_val.len() + (predicate.len() - true_count) * falsy_val.len();
+        let mut mutable = MutableBuffer::with_capacity(total_number_of_bytes);
+        let mut offset_buffer_builder = OffsetBufferBuilder::<T::Offset>::new(predicate.len());
+
+        // keep track of how much is filled
+        let mut filled = 0;
+
+        let truthy_len = truthy_val.len();
+        let falsy_len = falsy_val.len();
+
+        SlicesIterator::from(predicate).for_each(|(start, end)| {
+            // the gap needs to be filled with falsy values
+            if start > filled {
+                let false_repeat_count = start - filled;
+                // Push false value `repeat_count` times
+                mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
+
+                for _ in 0..false_repeat_count {
+                    offset_buffer_builder.push_length(falsy_len)
+                }
+            }
+
+            let true_repeat_count = end - start;
+            // fill with truthy values
+            mutable.repeat_slice_n_times(truthy_val, true_repeat_count);
+
+            for _ in 0..true_repeat_count {
+                offset_buffer_builder.push_length(truthy_len)
+            }
+            filled = end;
+        });
+        // the remaining part is falsy
+        if filled < predicate.len() {
+            let false_repeat_count = predicate.len() - filled;
+            // Copy the first item from the 'falsy' array into the output buffer.
+            mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
+
+            for _ in 0..false_repeat_count {
+                offset_buffer_builder.push_length(falsy_len)
+            }
+        }
+
+        (mutable.into(), offset_buffer_builder.finish())
+    }
+}
+
+impl<T: ByteArrayType> ZipImpl for BytesScalarImpl<T> {
+    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
+        let result_len = predicate.len();
+        // Nulls are treated as false
+        let predicate = maybe_prep_null_mask_filter(predicate);
+
+        let (bytes, offsets, nulls): (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) =
+            match (self.truthy.as_deref(), self.falsy.as_deref()) {
+                (Some(truthy_val), Some(falsy_val)) => {
+                    let (bytes, offsets) =
+                        Self::create_output_on_non_nulls(&predicate, truthy_val, falsy_val);
+
+                    (bytes, offsets, None)
+                }
+                (Some(truthy_val), None) => {
+                    Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
+                }
+                (None, Some(falsy_val)) => {
+                    // Flipping the boolean buffer as we want the opposite of the TRUE case
+                    //
+                    // if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
+                    // if the condition is false we want the FALSE value so we need to NOT the value so we get 1 (meaning not null)
+                    let predicate = predicate.not();
+                    Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
+                }
+                (None, None) => {
+                    // All values are null
+                    let nulls = NullBuffer::new_null(result_len);
+
+                    (
+                        // Empty bytes
+                        Buffer::from(&[]),
+                        // All nulls so all lengths are 0
+                        OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
+                        Some(nulls),
+                    )
+                }
+            };
+
+        let output = unsafe {
+            // Safety: the values are based on valid inputs
+            // and `try_new` is expensive for strings as it validate that the input is valid utf8
+            GenericByteArray::<T>::new_unchecked(offsets, bytes, nulls)
+        };
+
+        Ok(Arc::new(output))
+    }
+}
+
+fn maybe_prep_null_mask_filter(predicate: &BooleanArray) -> BooleanBuffer {
+    // Nulls are treated as false
+    if predicate.null_count() == 0 {
+        predicate.values().clone()
+    } else {
+        let cleaned = prep_null_mask_filter(predicate);
+        let (boolean_buffer, _) = cleaned.into_parts();
+        boolean_buffer
+    }
+}
+
+struct ByteViewScalarImpl<T: ByteViewType> {
+    truthy_view: Option<u128>,
+    truthy_buffers: Arc<[Buffer]>,
+    falsy_view: Option<u128>,
+    falsy_buffers: Arc<[Buffer]>,
+    phantom: PhantomData<T>,
+}
+
+static EMPTY_ARC: OnceLock<Arc<[Buffer]>> = OnceLock::new();
+fn empty_arc_buffers() -> Arc<[Buffer]> {
+    Arc::clone(EMPTY_ARC.get_or_init(|| Arc::new([])))
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+    fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+        let (truthy_view, truthy_buffers) = Self::get_value_from_scalar(truthy);
+        let (falsy_view, falsy_buffers) = Self::get_value_from_scalar(falsy);
+        Self {
+            truthy_view,
+            truthy_buffers,
+            falsy_view,
+            falsy_buffers,
+            phantom: PhantomData,
+        }
+    }
+
+    fn get_value_from_scalar(scalar: &dyn Array) -> (Option<u128>, Arc<[Buffer]>) {
+        if scalar.is_null(0) {
+            (None, empty_arc_buffers())
+        } else {
+            let (views, buffers, _) = scalar.as_byte_view::<T>().clone().into_parts();
+            (views.first().copied(), buffers)
+        }
+    }
+
+    fn get_views_for_single_non_nullable(
+        predicate: BooleanBuffer,
+        value: u128,
+        buffers: Arc<[Buffer]>,
+    ) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
+        let number_of_true = predicate.count_set_bits();
+        let number_of_values = predicate.len();
+
+        // Fast path for all nulls
+        if number_of_true == 0 {
+            // All values are null
+            return (
+                vec![0; number_of_values].into(),
+                empty_arc_buffers(),
+                Some(NullBuffer::new_null(number_of_values)),
+            );
+        }
+        let bytes = vec![value; number_of_values];
+
+        // If value is true and we want to handle the TRUTHY case, the null buffer will have 1 (meaning not null)
+        // If value is false and we want to handle the FALSY case, the null buffer will have 0 (meaning null)
+        let nulls = NullBuffer::new(predicate);
+        (bytes.into(), buffers, Some(nulls))
+    }
+
+    fn get_views_for_non_nullable(
+        predicate: BooleanBuffer,
+        result_len: usize,
+        truthy_view: u128,
+        truthy_buffers: Arc<[Buffer]>,
+        falsy_view: u128,
+        falsy_buffers: Arc<[Buffer]>,
+    ) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
+        let true_count = predicate.count_set_bits();
+        match true_count {
+            0 => {
+                // all values are falsy
+                (vec![falsy_view; result_len].into(), falsy_buffers, None)
+            }
+            n if n == predicate.len() => {
+                // all values are truthy
+                (vec![truthy_view; result_len].into(), truthy_buffers, None)
+            }
+            _ => {
+                let true_count = predicate.count_set_bits();
+                let mut buffers: Vec<Buffer> = truthy_buffers.to_vec();
+
+                // If the falsy buffers are empty, we can use the falsy view as it is, because the value
+                // is completely inlined. Otherwise, we have non-inlined values in the buffer, and we need
+                // to recalculate the falsy view
+                let view_falsy = if falsy_buffers.is_empty() {
+                    falsy_view
+                } else {
+                    let byte_view_falsy = ByteView::from(falsy_view);
+                    let new_index_falsy_buffers =
+                        buffers.len() as u32 + byte_view_falsy.buffer_index;
+                    buffers.extend(falsy_buffers.iter().cloned());
+                    let byte_view_falsy =
+                        byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+                    byte_view_falsy.as_u128()
+                };
+
+                let total_number_of_bytes = true_count * 16 + (predicate.len() - true_count) * 16;
+                let mut mutable = MutableBuffer::new(total_number_of_bytes);
+                let mut filled = 0;
+
+                SlicesIterator::from(&predicate).for_each(|(start, end)| {
+                    if start > filled {
+                        let false_repeat_count = start - filled;
+                        mutable
+                            .repeat_slice_n_times(view_falsy.to_byte_slice(), false_repeat_count);
+                    }
+                    let true_repeat_count = end - start;
+                    mutable.repeat_slice_n_times(truthy_view.to_byte_slice(), true_repeat_count);
+                    filled = end;
+                });
+
+                if filled < predicate.len() {
+                    let false_repeat_count = predicate.len() - filled;
+                    mutable.repeat_slice_n_times(view_falsy.to_byte_slice(), false_repeat_count);
+                }
+
+                let bytes = Buffer::from(mutable);
+                (bytes.into(), buffers.into(), None)
+            }
+        }
+    }
+}
+
+impl<T: ByteViewType> Debug for ByteViewScalarImpl<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ByteViewScalarImpl")
+            .field("truthy", &self.truthy_view)
+            .field("falsy", &self.falsy_view)
+            .finish()
+    }
+}
+
+impl<T: ByteViewType> ZipImpl for ByteViewScalarImpl<T> {
+    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
+        let result_len = predicate.len();
+        // Nulls are treated as false
+        let predicate = maybe_prep_null_mask_filter(predicate);
+
+        let (views, buffers, nulls) = match (self.truthy_view, self.falsy_view) {
+            (Some(truthy), Some(falsy)) => Self::get_views_for_non_nullable(
+                predicate,
+                result_len,
+                truthy,
+                Arc::clone(&self.truthy_buffers),
+                falsy,
+                Arc::clone(&self.falsy_buffers),
+            ),
+            (Some(truthy), None) => Self::get_views_for_single_non_nullable(
+                predicate,
+                truthy,
+                Arc::clone(&self.truthy_buffers),
+            ),
+            (None, Some(falsy)) => {
+                let predicate = predicate.not();
+                Self::get_views_for_single_non_nullable(
+                    predicate,
+                    falsy,
+                    Arc::clone(&self.falsy_buffers),
+                )
+            }
+            (None, None) => {
+                // All values are null
+                (
+                    vec![0; result_len].into(),
+                    empty_arc_buffers(),
+                    Some(NullBuffer::new_null(result_len)),
+                )
+            }
+        };
+
+        let result = unsafe { GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls) };
+        Ok(Arc::new(result))
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
+    use arrow_array::types::Int32Type;
 
     #[test]
     fn test_zip_kernel_one() {
@@ -245,7 +921,7 @@ mod test {
     }
 
     #[test]
-    fn test_zip_kernel_scalar_both() {
+    fn test_zip_kernel_scalar_both_mask_ends_with_true() {
         let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
         let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
 
@@ -257,7 +933,26 @@ mod test {
     }
 
     #[test]
-    fn test_zip_kernel_scalar_none_1() {
+    fn test_zip_kernel_scalar_both_mask_ends_with_false() {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+
+        let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![
+            Some(42),
+            Some(42),
+            Some(123),
+            Some(42),
+            Some(123),
+            Some(123),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_primitive_scalar_none_1() {
         let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
         let scalar_falsy = Scalar::new(Int32Array::new_null(1));
 
@@ -269,7 +964,7 @@ mod test {
     }
 
     #[test]
-    fn test_zip_kernel_scalar_none_2() {
+    fn test_zip_kernel_primitive_scalar_none_2() {
         let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
         let scalar_falsy = Scalar::new(Int32Array::new_null(1));
 
@@ -279,4 +974,590 @@ mod test {
         let expected = Int32Array::from(vec![None, None, Some(42), Some(42), None]);
         assert_eq!(actual, &expected);
     }
+
+    #[test]
+    fn test_zip_kernel_primitive_scalar_both_null() {
+        let scalar_truthy = Scalar::new(Int32Array::new_null(1));
+        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
+
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![None, None, None, None, None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_primitive_array_with_nulls_is_mask_should_be_treated_as_false() {
+        let truthy = Int32Array::from_iter_values(vec![1, 2, 3, 4, 5, 6]);
+        let falsy = Int32Array::from_iter_values(vec![7, 8, 9, 10, 11, 12]);
+
+        let mask = {
+            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
+            let nulls = NullBuffer::from(vec![
+                true, true, true,
+                false, // null treated as false even though in the original mask it was true
+                true, true,
+            ]);
+            BooleanArray::new(booleans, Some(nulls))
+        };
+        let out = zip(&mask, &truthy, &falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            Some(9),
+            Some(10), // true in mask but null
+            Some(11),
+            Some(12),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_primitive_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
+     {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+
+        let mask = {
+            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
+            let nulls = NullBuffer::from(vec![
+                true, true, true,
+                false, // null treated as false even though in the original mask it was true
+                true, true,
+            ]);
+            BooleanArray::new(booleans, Some(nulls))
+        };
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![
+            Some(42),
+            Some(42),
+            Some(123),
+            Some(123), // true in mask but null
+            Some(123),
+            Some(123),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_string_array_with_nulls_is_mask_should_be_treated_as_false() {
+        let truthy = StringArray::from_iter_values(vec!["1", "2", "3", "4", "5", "6"]);
+        let falsy = StringArray::from_iter_values(vec!["7", "8", "9", "10", "11", "12"]);
+
+        let mask = {
+            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
+            let nulls = NullBuffer::from(vec![
+                true, true, true,
+                false, // null treated as false even though in the original mask it was true
+                true, true,
+            ]);
+            BooleanArray::new(booleans, Some(nulls))
+        };
+        let out = zip(&mask, &truthy, &falsy).unwrap();
+        let actual = out.as_string::<i32>();
+        let expected = StringArray::from_iter_values(vec![
+            "1", "2", "9", "10", // true in mask but null
+            "11", "12",
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_large_string_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
+     {
+        let scalar_truthy = Scalar::new(LargeStringArray::from_iter_values(["test"]));
+        let scalar_falsy = Scalar::new(LargeStringArray::from_iter_values(["something else"]));
+
+        let mask = {
+            let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
+            let nulls = NullBuffer::from(vec![
+                true, true, true,
+                false, // null treated as false even though in the original mask it was true
+                true, true,
+            ]);
+            BooleanArray::new(booleans, Some(nulls))
+        };
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<LargeStringArray>().unwrap();
+        let expected = LargeStringArray::from_iter(vec![
+            Some("test"),
+            Some("test"),
+            Some("something else"),
+            Some("something else"), // true in mask but null
+            Some("something else"),
+            Some("something else"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_bytes_scalar_none_1() {
+        let scalar_truthy = Scalar::new(StringArray::from_iter_values(["hello"]));
+        let scalar_falsy = Scalar::new(StringArray::new_null(1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
+        let expected = StringArray::from_iter(vec![
+            Some("hello"),
+            Some("hello"),
+            None,
+            None,
+            Some("hello"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_bytes_scalar_none_2() {
+        let scalar_truthy = Scalar::new(StringArray::new_null(1));
+        let scalar_falsy = Scalar::new(StringArray::from_iter_values(["hello"]));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
+        let expected = StringArray::from_iter(vec![None, None, Some("hello"), Some("hello"), None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_bytes_scalar_both() {
+        let scalar_truthy = Scalar::new(StringArray::from_iter_values(["test"]));
+        let scalar_falsy = Scalar::new(StringArray::from_iter_values(["something else"]));
+
+        // mask ends with false
+        let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
+        let expected = StringArray::from_iter(vec![
+            Some("test"),
+            Some("test"),
+            Some("something else"),
+            Some("test"),
+            Some("something else"),
+            Some("something else"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_scalar_bytes_only_taking_one_side() {
+        let mask_len = 5;
+        let all_true_mask = BooleanArray::from(vec![true; mask_len]);
+        let all_false_mask = BooleanArray::from(vec![false; mask_len]);
+
+        let null_scalar = Scalar::new(StringArray::new_null(1));
+        let non_null_scalar_1 = Scalar::new(StringArray::from_iter_values(["test"]));
+        let non_null_scalar_2 = Scalar::new(StringArray::from_iter_values(["something else"]));
+
+        {
+            // 1. Test where left is null and right is non-null
+            //    and mask is all true
+            let out = zip(&all_true_mask, &null_scalar, &non_null_scalar_1).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 2. Test where left is null and right is non-null
+            //    and mask is all false
+            let out = zip(&all_false_mask, &null_scalar, &non_null_scalar_1).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 3. Test where left is non-null and right is null
+            //    and mask is all true
+            let out = zip(&all_true_mask, &non_null_scalar_1, &null_scalar).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 4. Test where left is non-null and right is null
+            //    and mask is all false
+            let out = zip(&all_false_mask, &non_null_scalar_1, &null_scalar).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 5. Test where both left and right are not null
+            //    and mask is all true
+            let out = zip(&all_true_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 6. Test where both left and right are not null
+            //    and mask is all false
+            let out = zip(&all_false_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected =
+                StringArray::from_iter(std::iter::repeat_n(Some("something else"), mask_len));
+            assert_eq!(actual, &expected);
+        }
+
+        {
+            // 7. Test where both left and right are null
+            //    and mask is random
+            let mask = BooleanArray::from(vec![true, false, true, false, true]);
+            let out = zip(&mask, &null_scalar, &null_scalar).unwrap();
+            let actual = out.as_string::<i32>();
+            let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
+            assert_eq!(actual, &expected);
+        }
+    }
+
+    #[test]
+    fn test_scalar_zipper() {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+
+        let scalar_zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
+        let out = scalar_zipper.zip(&mask).unwrap();
+        let actual = out.as_primitive::<Int32Type>();
+        let expected = Int32Array::from(vec![Some(123), Some(123), Some(42), Some(42), Some(123)]);
+        assert_eq!(actual, &expected);
+
+        // test with different mask length as well
+        let mask = BooleanArray::from(vec![true, false, true]);
+        let out = scalar_zipper.zip(&mask).unwrap();
+        let actual = out.as_primitive::<Int32Type>();
+        let expected = Int32Array::from(vec![Some(42), Some(123), Some(42)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings() {
+        let scalar_truthy = Scalar::new(StringArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string::<i32>();
+        let expected = StringArray::from(vec![
+            Some("hello"),
+            Some("world"),
+            Some("hello"),
+            Some("world"),
+            Some("hello"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_binary() {
+        let truthy_bytes: &[u8] = b"\xFF\xFE\xFD";
+        let falsy_bytes: &[u8] = b"world";
+        let scalar_truthy = Scalar::new(BinaryArray::from_iter_values(
+            // Non valid UTF8 bytes
+            vec![truthy_bytes],
+        ));
+        let scalar_falsy = Scalar::new(BinaryArray::from_iter_values(vec![falsy_bytes]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_binary::<i32>();
+        let expected = BinaryArray::from(vec![
+            Some(truthy_bytes),
+            Some(falsy_bytes),
+            Some(truthy_bytes),
+            Some(falsy_bytes),
+            Some(truthy_bytes),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_large_binary() {
+        let truthy_bytes: &[u8] = b"hey";
+        let falsy_bytes: &[u8] = b"world";
+        let scalar_truthy = Scalar::new(LargeBinaryArray::from_iter_values(vec![truthy_bytes]));
+        let scalar_falsy = Scalar::new(LargeBinaryArray::from_iter_values(vec![falsy_bytes]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_binary::<i64>();
+        let expected = LargeBinaryArray::from(vec![
+            Some(truthy_bytes),
+            Some(falsy_bytes),
+            Some(truthy_bytes),
+            Some(falsy_bytes),
+            Some(truthy_bytes),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    // Test to ensure that the precision and scale are kept when zipping Decimal128 data
+    #[test]
+    fn test_zip_decimal_with_custom_precision_and_scale() {
+        let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432])
+            .with_precision_and_scale(20, 2)
+            .unwrap();
+
+        let arr: ArrayRef = Arc::new(arr);
+
+        let scalar_1 = Scalar::new(arr.slice(0, 1));
+        let scalar_2 = Scalar::new(arr.slice(1, 1));
+        let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
+        let array_1: ArrayRef = arr.slice(0, 2);
+        let array_2: ArrayRef = arr.slice(2, 2);
+
+        test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
+    }
+
+    // Test to ensure that the timezone is kept when zipping TimestampArray data
+    #[test]
+    fn test_zip_timestamp_with_timezone() {
+        let arr = TimestampSecondArray::from(vec![0, 1000, 2000, 4000])
+            .with_timezone("+01:00".to_string());
+
+        let arr: ArrayRef = Arc::new(arr);
+
+        let scalar_1 = Scalar::new(arr.slice(0, 1));
+        let scalar_2 = Scalar::new(arr.slice(1, 1));
+        let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
+        let array_1: ArrayRef = arr.slice(0, 2);
+        let array_2: ArrayRef = arr.slice(2, 2);
+
+        test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
+    }
+
+    fn test_zip_output_data_types_for_input(
+        scalar_1: Scalar<ArrayRef>,
+        scalar_2: Scalar<ArrayRef>,
+        null_scalar: Scalar<ArrayRef>,
+        array_1: ArrayRef,
+        array_2: ArrayRef,
+    ) {
+        // non null Scalar vs non null Scalar
+        test_zip_output_data_type(&scalar_1, &scalar_2, 10);
+
+        // null Scalar vs non-null Scalar (and vice versa)
+        test_zip_output_data_type(&null_scalar, &scalar_1, 10);
+        test_zip_output_data_type(&scalar_1, &null_scalar, 10);
+
+        // non-null Scalar and array (and vice versa)
+        test_zip_output_data_type(&array_1.as_ref(), &scalar_1, array_1.len());
+        test_zip_output_data_type(&scalar_1, &array_1.as_ref(), array_1.len());
+
+        // Array and null scalar (and vice versa)
+        test_zip_output_data_type(&array_1.as_ref(), &null_scalar, array_1.len());
+
+        test_zip_output_data_type(&null_scalar, &array_1.as_ref(), array_1.len());
+
+        // Both arrays
+        test_zip_output_data_type(&array_1.as_ref(), &array_2.as_ref(), array_1.len());
+    }
+
+    fn test_zip_output_data_type(truthy: &dyn Datum, falsy: &dyn Datum, mask_length: usize) {
+        let expected_data_type = truthy.get().0.data_type().clone();
+        assert_eq!(&expected_data_type, falsy.get().0.data_type());
+
+        // Try different masks to test different paths
+        let mask_all_true = BooleanArray::from(vec![true; mask_length]);
+        let mask_all_false = BooleanArray::from(vec![false; mask_length]);
+        let mask_some_true_and_false =
+            BooleanArray::from((0..mask_length).map(|i| i % 2 == 0).collect::<Vec<bool>>());
+
+        for mask in [&mask_all_true, &mask_all_false, &mask_some_true_and_false] {
+            let out = zip(mask, truthy, falsy).unwrap();
+            assert_eq!(out.data_type(), &expected_data_type);
+        }
+    }
+
+    #[test]
+    fn zip_scalar_fallback_impl() {
+        let truthy_list_item_scalar = Some(vec![Some(1), None, Some(3)]);
+        let truthy_list_array_scalar =
+            Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                truthy_list_item_scalar.clone(),
+            ]));
+        let falsy_list_item_scalar = Some(vec![None, Some(2), Some(4)]);
+        let falsy_list_array_scalar =
+            Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                falsy_list_item_scalar.clone(),
+            ]));
+        let mask = BooleanArray::from(vec![true, false, true, false, false, true, false]);
+        let out = zip(&mask, &truthy_list_array_scalar, &falsy_list_array_scalar).unwrap();
+        let actual = out.as_list::<i32>();
+
+        let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+            truthy_list_item_scalar.clone(),
+            falsy_list_item_scalar.clone(),
+            truthy_list_item_scalar.clone(),
+            falsy_list_item_scalar.clone(),
+            falsy_list_item_scalar.clone(),
+            truthy_list_item_scalar.clone(),
+            falsy_list_item_scalar.clone(),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("hello"),
+            Some("world"),
+            Some("hello"),
+            Some("world"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_binary_array_view() {
+        let scalar_truthy = Scalar::new(BinaryViewArray::from_iter_values(vec![b"hello"]));
+        let scalar_falsy = Scalar::new(BinaryViewArray::from_iter_values(vec![b"world"]));
+
+        let mask = BooleanArray::from(vec![true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_byte_view();
+        let expected = BinaryViewArray::from_iter_values(vec![b"hello", b"world"]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_with_nulls() {
+        let scalar_truthy = Scalar::new(StringViewArray::from_iter_values(["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![
+            Some("hello"),
+            Some("hello"),
+            None,
+            None,
+            Some("hello"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_all_true_null() {
+        let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![None::<String>, None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_all_false_null() {
+        let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![None::<String>, None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_string_array_view_all_true() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![Some("hello"), Some("hello")]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_string_array_view_all_false() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![Some("world"), Some("world")]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_large_strings() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("longer than 12 bytes"),
+            Some("another longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_short_strings() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("hello"),
+            Some("longer than 12 bytes"),
+            Some("hello"),
+            Some("longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_all_true() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("longer than 12 bytes"),
+            Some("longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_all_false() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("another longer than 12 bytes"),
+            Some("another longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
 }
diff --git a/arrow-string/Cargo.toml b/arrow-string/Cargo.toml
index 95aa289178d9..3045c355e48a 100644
--- a/arrow-string/Cargo.toml
+++ b/arrow-string/Cargo.toml
@@ -43,5 +43,5 @@ arrow-array = { workspace = true }
 arrow-select = { workspace = true }
 regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
 regex-syntax = { version = "0.8.0", default-features = false, features = ["unicode"] }
-num = { version = "0.4", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 memchr = "2.7.4"
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 49fc244e72cc..de9aa5367058 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -78,10 +78,10 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
         DataType::Utf8View => {
             let list = array.as_string_view();
             let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
-            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
                 v.into(),
                 list.nulls().cloned(),
-            )))
+            )?))
         }
         DataType::Binary => {
             let list = array.as_binary::<i32>();
@@ -92,15 +92,15 @@ pub fn length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
             Ok(length_impl::<Int64Type>(list.offsets(), list.nulls()))
         }
         DataType::FixedSizeBinary(len) | DataType::FixedSizeList(_, len) => Ok(Arc::new(
-            Int32Array::new(vec![*len; array.len()].into(), array.nulls().cloned()),
+            Int32Array::try_new(vec![*len; array.len()].into(), array.nulls().cloned())?,
         )),
         DataType::BinaryView => {
             let list = array.as_binary_view();
             let v = list.views().iter().map(|v| *v as i32).collect::<Vec<_>>();
-            Ok(Arc::new(PrimitiveArray::<Int32Type>::new(
+            Ok(Arc::new(PrimitiveArray::<Int32Type>::try_new(
                 v.into(),
                 list.nulls().cloned(),
-            )))
+            )?))
         }
         other => Err(ArrowError::ComputeError(format!(
             "length not supported for {other:?}"
@@ -144,7 +144,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
                 .iter()
                 .map(|view| (*view as i32).wrapping_mul(8))
                 .collect();
-            Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
+            Ok(Arc::new(Int32Array::try_new(
+                values,
+                array.nulls().cloned(),
+            )?))
         }
         DataType::Binary => {
             let list = array.as_binary::<i32>();
@@ -154,10 +157,10 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
             let list = array.as_binary::<i64>();
             Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
         }
-        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::new(
+        DataType::FixedSizeBinary(len) => Ok(Arc::new(Int32Array::try_new(
             vec![*len * 8; array.len()].into(),
             array.nulls().cloned(),
-        ))),
+        )?)),
         other => Err(ArrowError::ComputeError(format!(
             "bit_length not supported for {other:?}"
         ))),
@@ -624,11 +627,7 @@ mod tests {
         let data: Vec<Option<&str>> = (0..TOTAL)
             .map(|n| {
                 let i = n % 5;
-                if i == 3 {
-                    None
-                } else {
-                    Some(v[i as usize])
-                }
+                if i == 3 { None } else { Some(v[i as usize]) }
             })
             .collect();
 
@@ -671,11 +670,7 @@ mod tests {
         let data: Vec<Option<&str>> = (0..TOTAL)
             .map(|n| {
                 let i = n % 5;
-                if i == 3 {
-                    None
-                } else {
-                    Some(v[i as usize])
-                }
+                if i == 3 { None } else { Some(v[i as usize]) }
             })
             .collect();
 
diff --git a/arrow-string/src/lib.rs b/arrow-string/src/lib.rs
index 4c90d783ff4d..77c8e6050aa9 100644
--- a/arrow-string/src/lib.rs
+++ b/arrow-string/src/lib.rs
@@ -21,7 +21,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 
 mod binary_like;
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 91ba94fd6027..55fe292ecb76 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -727,7 +727,9 @@ mod tests {
             "arrow",
             "arrow"
         ],
-        vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
+        vec![
+            "arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"
+        ],
         like,
         vec![true, true, true, false, false, true, false, false]
     );
@@ -817,7 +819,9 @@ mod tests {
             "arrow",
             "arrow"
         ],
-        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
+        vec![
+            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
+        ],
         starts_with,
         vec![true, false, false, false, true, false, false, false]
     );
@@ -864,7 +868,9 @@ mod tests {
             "arrow",
             "arrow"
         ],
-        vec!["arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"],
+        vec![
+            "arrow", "ar%", "row", "foo", "arr", "arrow_", "arrow_", ".*"
+        ],
         ends_with,
         vec![true, false, true, false, false, false, false, false]
     );
@@ -1155,7 +1161,9 @@ mod tests {
         ],
         "FFkoSS%",
         ilike,
-        vec![false, true, true, false, false, false, false, true, true, false]
+        vec![
+            false, true, true, false, false, false, false, true, true, false
+        ]
     );
 
     test_utf8_scalar!(
@@ -1174,7 +1182,9 @@ mod tests {
         ],
         "%FFkoSS",
         ilike,
-        vec![false, true, true, false, false, false, false, true, true, true]
+        vec![
+            false, true, true, false, false, false, false, true, true, true
+        ]
     );
 
     test_utf8_scalar!(
@@ -1194,7 +1204,9 @@ mod tests {
         ],
         "%FFkoSS%",
         ilike,
-        vec![false, true, true, false, false, false, false, true, true, true, true]
+        vec![
+            false, true, true, false, false, false, false, true, true, true, true
+        ]
     );
 
     // Replicates `test_utf8_array_ilike_unicode_contains` and
@@ -1219,7 +1231,9 @@ mod tests {
         ],
         "FFkoSS",
         contains,
-        vec![false, true, true, false, false, false, false, true, true, true, false]
+        vec![
+            false, true, true, false, false, false, false, true, true, true, false
+        ]
     );
 
     test_utf8_scalar!(
@@ -1239,7 +1253,9 @@ mod tests {
         ],
         "%FF__SS%",
         ilike,
-        vec![false, true, true, false, false, false, false, true, true, true, true]
+        vec![
+            false, true, true, false, false, false, false, true, true, true, true
+        ]
     );
 
     // 😈 is four bytes long.
@@ -1260,7 +1276,9 @@ mod tests {
         ],
         "%Ssh😈klF",
         like,
-        vec![false, false, false, false, false, false, false, true, true, false, false]
+        vec![
+            false, false, false, false, false, false, false, true, true, false, false
+        ]
     );
 
     test_utf8_scalar!(
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index cdc0b8897d8e..aa281be53bd0 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -33,22 +33,6 @@ use regex::Regex;
 use std::collections::HashMap;
 use std::sync::Arc;
 
-/// Perform SQL `array ~ regex_array` operation on [`StringArray`] / [`LargeStringArray`].
-/// If `regex_array` element has an empty value, the corresponding result value is always true.
-///
-/// `flags_array` are optional [`StringArray`] / [`LargeStringArray`] flag, which allow
-/// special search modes, such as case insensitive and multi-line mode.
-/// See the documentation [here](https://docs.rs/regex/1.5.4/regex/#grouping-and-flags)
-/// for more information.
-#[deprecated(since = "54.0.0", note = "please use `regexp_is_match` instead")]
-pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    regex_array: &GenericStringArray<OffsetSize>,
-    flags_array: Option<&GenericStringArray<OffsetSize>>,
-) -> Result<BooleanArray, ArrowError> {
-    regexp_is_match(array, regex_array, flags_array)
-}
-
 /// Return BooleanArray indicating which strings in an array match an array of
 /// regular expressions.
 ///
@@ -164,19 +148,6 @@ where
     Ok(BooleanArray::from(data))
 }
 
-/// Perform SQL `array ~ regex_array` operation on [`StringArray`] /
-/// [`LargeStringArray`] and a scalar.
-///
-/// See the documentation on [`regexp_is_match_utf8`] for more details.
-#[deprecated(since = "54.0.0", note = "please use `regexp_is_match_scalar` instead")]
-pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    regex: &str,
-    flag: Option<&str>,
-) -> Result<BooleanArray, ArrowError> {
-    regexp_is_match_scalar(array, regex, flag)
-}
-
 /// Return BooleanArray indicating which strings in an array match a single regular expression.
 ///
 /// This is equivalent to the SQL `array ~ regex_array`, supporting
diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs
index fa6a47147521..dc0d88afad11 100644
--- a/arrow-string/src/substring.rs
+++ b/arrow-string/src/substring.rs
@@ -25,7 +25,7 @@ use arrow_array::*;
 use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
-use num::Zero;
+use num_traits::Zero;
 use std::cmp::Ordering;
 use std::sync::Arc;
 
@@ -915,11 +915,7 @@ mod tests {
         let data: Vec<Option<&str>> = (0..TOTAL)
             .map(|n| {
                 let i = n % 5;
-                if i == 3 {
-                    None
-                } else {
-                    Some(v[i as usize])
-                }
+                if i == 3 { None } else { Some(v[i as usize]) }
             })
             .collect();
 
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 0be22561a50c..0c5a925ae330 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -84,7 +84,7 @@ canonical_extension_types = ["arrow-schema/canonical_extension_types"]
 
 [dev-dependencies]
 chrono = { workspace = true }
-criterion = { version = "0.6", default-features = false }
+criterion = { workspace = true, default-features = false }
 half = { version = "2.1", default-features = false }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
@@ -120,9 +120,15 @@ harness = false
 required-features = ["test_utils"]
 
 [[bench]]
-name = "array_from_vec"
+name = "array_from"
 harness = false
 
+[[bench]]
+name = "array_iter"
+harness = false
+required-features = ["test_utils"]
+
+
 [[bench]]
 name = "builder"
 harness = false
@@ -166,7 +172,6 @@ name = "coalesce_kernels"
 harness = false
 required-features = ["test_utils"]
 
-
 [[bench]]
 name = "take_kernels"
 harness = false
@@ -177,6 +182,16 @@ name = "interleave_kernels"
 harness = false
 required-features = ["test_utils"]
 
+[[bench]]
+name = "merge_kernels"
+harness = false
+required-features = ["test_utils"]
+
+[[bench]]
+name = "zip_kernels"
+harness = false
+required-features = ["test_utils"]
+
 [[bench]]
 name = "length_kernel"
 harness = false
@@ -295,6 +310,11 @@ name = "lexsort"
 harness = false
 required-features = ["test_utils"]
 
+[[bench]]
+name = "nullif_kernel"
+harness = false
+required-features = ["test_utils"]
+
 [[test]]
 name = "csv"
 required-features = ["csv", "chrono-tz"]
diff --git a/arrow/README.md b/arrow/README.md
index 64d9eb980e60..7397db8c8a0c 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -29,6 +29,14 @@ There are several [examples](https://github.com/apache/arrow-rs/tree/main/arrow/
 
 The API documentation for most recent, unreleased code is available [here](https://arrow.apache.org/rust/arrow/index.html).
 
+## Arrow Implementation Status
+
+Please see the [Implementation Status Page] on the Apache Arrow website for which
+Arrow features are supported by this crate.
+
+[Implementation Status Page]: https://arrow.apache.org/docs/status.html
+
+
 ## Rust Version Compatibility
 
 This crate is tested with the latest stable version of Rust. We do not currently test against other, older versions.
diff --git a/arrow/benches/aggregate_kernels.rs b/arrow/benches/aggregate_kernels.rs
index 25dbe3548496..baf90e22962d 100644
--- a/arrow/benches/aggregate_kernels.rs
+++ b/arrow/benches/aggregate_kernels.rs
@@ -25,7 +25,7 @@ extern crate arrow;
 use arrow::compute::kernels::aggregate::*;
 use arrow::util::bench_util::*;
 use arrow::{array::*, datatypes::Float32Type};
-use arrow_array::types::{Float64Type, Int16Type, Int32Type, Int64Type, Int8Type};
+use arrow_array::types::{Float64Type, Int8Type, Int16Type, Int32Type, Int64Type};
 
 const BATCH_SIZE: usize = 64 * 1024;
 
diff --git a/arrow/benches/array_data_validate.rs b/arrow/benches/array_data_validate.rs
index 531462f2d8b5..33d000d14bd8 100644
--- a/arrow/benches/array_data_validate.rs
+++ b/arrow/benches/array_data_validate.rs
@@ -53,7 +53,7 @@ fn validate_benchmark(c: &mut Criterion) {
         b.iter(|| validate_utf8_array(&str_arr))
     });
 
-    let byte_array = BinaryArray::from_iter_values(std::iter::repeat(b"test").take(20000));
+    let byte_array = BinaryArray::from_iter_values(std::iter::repeat_n(b"test", 20000));
     c.bench_function("byte_array_to_string_array 20000", |b| {
         b.iter(|| StringArray::from(BinaryArray::from(byte_array.to_data())))
     });
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from.rs
similarity index 63%
rename from arrow/benches/array_from_vec.rs
rename to arrow/benches/array_from.rs
index 2850eae5d718..575a8280f652 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from.rs
@@ -15,16 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+extern crate arrow;
 #[macro_use]
 extern crate criterion;
 
 use criterion::Criterion;
 
-extern crate arrow;
-
 use arrow::array::*;
 use arrow_buffer::i256;
 use rand::Rng;
+use std::iter::repeat_n;
 use std::{hint, sync::Arc};
 
 fn array_from_vec(n: usize) {
@@ -73,6 +73,28 @@ fn struct_array_from_vec(
     hint::black_box(StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap());
 }
 
+fn decimal32_array_from_vec(array: &[Option<i32>]) {
+    hint::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(9, 2)
+            .unwrap(),
+    );
+}
+
+fn decimal64_array_from_vec(array: &[Option<i64>]) {
+    hint::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(17, 2)
+            .unwrap(),
+    );
+}
+
 fn decimal128_array_from_vec(array: &[Option<i128>]) {
     hint::black_box(
         array
@@ -95,7 +117,31 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
     );
 }
 
-fn decimal_benchmark(c: &mut Criterion) {
+fn array_from_vec_decimal_benchmark(c: &mut Criterion) {
+    // bench decimal32 array
+    // create option<i32> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.random_range::<i32, _>(0..99999999)));
+    }
+    c.bench_function("decimal32_array_from_vec 32768", |b| {
+        b.iter(|| decimal32_array_from_vec(array.as_slice()))
+    });
+
+    // bench decimal64 array
+    // create option<i64> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.random_range::<i64, _>(0..9999999999)));
+    }
+    c.bench_function("decimal64_array_from_vec 32768", |b| {
+        b.iter(|| decimal64_array_from_vec(array.as_slice()))
+    });
+
     // bench decimal128 array
     // create option<i128> array
     let size: usize = 1 << 15;
@@ -124,7 +170,7 @@ fn decimal_benchmark(c: &mut Criterion) {
     });
 }
 
-fn criterion_benchmark(c: &mut Criterion) {
+fn array_from_vec_benchmark(c: &mut Criterion) {
     c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
     c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
     c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
@@ -160,5 +206,48 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 }
 
-criterion_group!(benches, criterion_benchmark, decimal_benchmark);
+fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) -> Vec<Option<TItem>> {
+    hint::black_box(
+        repeat_n(item, len)
+            .enumerate()
+            .map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
+            .collect(),
+    )
+}
+
+fn from_iter_benchmark(c: &mut Criterion) {
+    const ITER_LEN: usize = 16_384;
+
+    // All ArrowPrimitiveType use the same implementation
+    c.bench_function("Int64Array::from_iter", |b| {
+        let values = gen_option_vector(1, ITER_LEN);
+        b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
+    });
+    c.bench_function("Int64Array::from_trusted_len_iter", |b| {
+        let values = gen_option_vector(1, ITER_LEN);
+        b.iter(|| unsafe {
+            // SAFETY: values.iter() is a TrustedLenIterator
+            hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
+        });
+    });
+
+    c.bench_function("BooleanArray::from_iter", |b| {
+        let values = gen_option_vector(true, ITER_LEN);
+        b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
+    });
+    c.bench_function("BooleanArray::from_trusted_len_iter", |b| {
+        let values = gen_option_vector(true, ITER_LEN);
+        b.iter(|| unsafe {
+            // SAFETY: values.iter() is a TrustedLenIterator
+            hint::black_box(BooleanArray::from_trusted_len_iter(values.iter()))
+        });
+    });
+}
+
+criterion_group!(
+    benches,
+    array_from_vec_benchmark,
+    array_from_vec_decimal_benchmark,
+    from_iter_benchmark
+);
 criterion_main!(benches);
diff --git a/arrow/benches/array_iter.rs b/arrow/benches/array_iter.rs
new file mode 100644
index 000000000000..14738196bf40
--- /dev/null
+++ b/arrow/benches/array_iter.rs
@@ -0,0 +1,305 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate arrow;
+#[macro_use]
+extern crate criterion;
+
+use criterion::{Criterion, Throughput};
+use std::hint;
+
+use arrow::array::*;
+use arrow::util::bench_util::*;
+use arrow_array::types::{Int8Type, Int16Type, Int32Type, Int64Type};
+
+const BATCH_SIZE: usize = 64 * 1024;
+
+/// Run [`ArrayIter::fold`] while using black_box on each item and the result of the cb to prevent compiler optimizations.
+fn fold_black_box_item_and_cb_res<ArrayAcc, F, B>(array: ArrayAcc, init: B, mut f: F)
+where
+    ArrayAcc: ArrayAccessor,
+    F: FnMut(B, Option<ArrayAcc::Item>) -> B,
+{
+    let result = ArrayIter::new(array).fold(hint::black_box(init), |acc, item| {
+        let res = f(acc, hint::black_box(item));
+        hint::black_box(res)
+    });
+
+    hint::black_box(result);
+}
+/// Run [`ArrayIter::fold`] while using black_box on each item to prevent compiler optimizations.
+fn fold_black_box_item<ArrayAcc, F, B>(array: ArrayAcc, init: B, mut f: F)
+where
+    ArrayAcc: ArrayAccessor,
+    F: FnMut(B, Option<ArrayAcc::Item>) -> B,
+{
+    let result = ArrayIter::new(array).fold(hint::black_box(init), |acc, item| {
+        f(acc, hint::black_box(item))
+    });
+
+    hint::black_box(result);
+}
+
+/// Run [`ArrayIter::fold`] without using black_box on each item, but only on the result
+/// to see if the compiler can do more optimizations.
+fn fold_black_box_result<ArrayAcc, F, B>(array: ArrayAcc, init: B, f: F)
+where
+    ArrayAcc: ArrayAccessor,
+    F: FnMut(B, Option<ArrayAcc::Item>) -> B,
+{
+    let result = ArrayIter::new(array).fold(hint::black_box(init), f);
+
+    hint::black_box(result);
+}
+
+/// Run [`ArrayIter::any`] while using black_box on each item and the predicate return value to prevent compiler optimizations.
+fn any_black_box_item_and_predicate<ArrayAcc>(
+    array: ArrayAcc,
+    mut any_predicate: impl FnMut(Option<ArrayAcc::Item>) -> bool,
+) where
+    ArrayAcc: ArrayAccessor,
+{
+    let any_res = ArrayIter::new(array).any(|item| {
+        let item = hint::black_box(item);
+        let res = any_predicate(item);
+        hint::black_box(res)
+    });
+
+    hint::black_box(any_res);
+}
+
+/// Run [`ArrayIter::any`] without using black_box in the loop, but only on the result
+/// to see if the compiler can do more optimizations.
+fn any_black_box_result<ArrayAcc>(
+    array: ArrayAcc,
+    any_predicate: impl FnMut(Option<ArrayAcc::Item>) -> bool,
+) where
+    ArrayAcc: ArrayAccessor,
+{
+    let any_res = ArrayIter::new(array).any(any_predicate);
+
+    hint::black_box(any_res);
+}
+
+/// Benchmark [`ArrayIter`] functions,
+///
+/// The passed `predicate_that_will_always_evaluate_to_false` function should be a predicate
+/// that always returns `false` to ensure that the full array is always iterated over.
+///
+/// The predicate function should:
+/// 1. always return false
+/// 2. be impossible for the compiler to optimize away
+/// 3. not use `hint::black_box` internally (unless impossible) to allow for more compiler optimizations
+///
+/// the way to achieve this is to make the predicate check for a value that is not presented in the array.
+///
+/// The reason for these requirements is that we want to iterate over the entire array while
+/// letting the compiler have room for optimizations so it will be more representative of real world usage.
+fn benchmark_array_iter<ArrayAcc, FoldFn, FoldInit>(
+    c: &mut Criterion,
+    name: &str,
+    nonnull_array: ArrayAcc,
+    nullable_array: ArrayAcc,
+    fold_init: FoldInit,
+    fold_fn: FoldFn,
+    predicate_that_will_always_evaluate_to_false: impl Fn(Option<ArrayAcc::Item>) -> bool,
+) where
+    ArrayAcc: ArrayAccessor + Copy,
+    FoldInit: Copy,
+    FoldFn: Fn(FoldInit, Option<ArrayAcc::Item>) -> FoldInit,
+{
+    let predicate_that_will_always_evaluate_to_false =
+        &predicate_that_will_always_evaluate_to_false;
+    let fold_fn = &fold_fn;
+
+    // Assert always false return false
+    {
+        let found = ArrayIter::new(nonnull_array).any(predicate_that_will_always_evaluate_to_false);
+        assert!(!found, "The predicate must always evaluate to false");
+    }
+    {
+        let found =
+            ArrayIter::new(nullable_array).any(predicate_that_will_always_evaluate_to_false);
+        assert!(!found, "The predicate must always evaluate to false");
+    }
+
+    c.benchmark_group(name)
+        .throughput(Throughput::Elements(BATCH_SIZE as u64))
+        // Most of the Rust default iterator functions are implemented on top of 2 functions:
+        // `fold` and `try_fold`
+        // so we are benchmarking `fold` first
+        .bench_function("nonnull fold black box item and fold result", |b| {
+            b.iter(|| fold_black_box_item_and_cb_res(nonnull_array, fold_init, fold_fn))
+        })
+        .bench_function("nonnull fold black box item", |b| {
+            b.iter(|| fold_black_box_item(nonnull_array, fold_init, fold_fn))
+        })
+        .bench_function("nonnull fold black box only result", |b| {
+            b.iter(|| fold_black_box_result(nonnull_array, fold_init, fold_fn))
+        })
+        .bench_function("null fold black box item and fold result", |b| {
+            b.iter(|| fold_black_box_item_and_cb_res(nullable_array, fold_init, fold_fn))
+        })
+        .bench_function("null fold black box item", |b| {
+            b.iter(|| fold_black_box_item(nullable_array, fold_init, fold_fn))
+        })
+        .bench_function("null fold black box only result", |b| {
+            b.iter(|| fold_black_box_result(nullable_array, fold_init, fold_fn))
+        })
+        // Due to `try_fold` not being available in stable Rust,
+        // we are benchmarking `any` instead which the default Rust implementation
+        // uses `try_fold` under the hood.
+        .bench_function("nonnull any black box item and predicate", |b| {
+            b.iter(|| {
+                any_black_box_item_and_predicate(
+                    nonnull_array,
+                    predicate_that_will_always_evaluate_to_false,
+                )
+            })
+        })
+        .bench_function("nonnull any black box only result", |b| {
+            b.iter(|| {
+                any_black_box_result(nonnull_array, predicate_that_will_always_evaluate_to_false)
+            })
+        })
+        .bench_function("null any black box item and predicate", |b| {
+            b.iter(|| {
+                any_black_box_item_and_predicate(
+                    nullable_array,
+                    predicate_that_will_always_evaluate_to_false,
+                )
+            })
+        })
+        .bench_function("null any black box only result", |b| {
+            b.iter(|| {
+                any_black_box_result(nullable_array, predicate_that_will_always_evaluate_to_false)
+            })
+        });
+}
+
+/// Replace all occurrences of `item_to_replace` with `replace_with` in the given `PrimitiveArray`.
+/// will make it so we can filter by missing value
+fn replace_primitive_value<T>(
+    array: PrimitiveArray<T>,
+    item_to_replace: T::Native,
+    replace_with: T::Native,
+) -> PrimitiveArray<T>
+where
+    T: ArrowPrimitiveType,
+    <T as ArrowPrimitiveType>::Native: Eq,
+{
+    array.unary(|item| {
+        if item == item_to_replace {
+            replace_with
+        } else {
+            item
+        }
+    })
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    benchmark_array_iter(
+        c,
+        "int8",
+        &replace_primitive_value(create_primitive_array::<Int8Type>(BATCH_SIZE, 0.0), 42, 1),
+        &replace_primitive_value(create_primitive_array::<Int8Type>(BATCH_SIZE, 0.5), 42, 1),
+        // fold init
+        0i8,
+        // fold function
+        |acc, item| acc.wrapping_add(item.unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item == Some(42),
+    );
+    benchmark_array_iter(
+        c,
+        "int16",
+        &replace_primitive_value(create_primitive_array::<Int16Type>(BATCH_SIZE, 0.0), 42, 1),
+        &replace_primitive_value(create_primitive_array::<Int16Type>(BATCH_SIZE, 0.5), 42, 1),
+        // fold init
+        0i16,
+        // fold function
+        |acc, item| acc.wrapping_add(item.unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item == Some(42),
+    );
+    benchmark_array_iter(
+        c,
+        "int32",
+        &replace_primitive_value(create_primitive_array::<Int32Type>(BATCH_SIZE, 0.0), 42, 1),
+        &replace_primitive_value(create_primitive_array::<Int32Type>(BATCH_SIZE, 0.5), 42, 1),
+        // fold init
+        0i32,
+        // fold function
+        |acc, item| acc.wrapping_add(item.unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item == Some(42),
+    );
+    benchmark_array_iter(
+        c,
+        "int64",
+        &replace_primitive_value(create_primitive_array::<Int64Type>(BATCH_SIZE, 0.0), 42, 1),
+        &replace_primitive_value(create_primitive_array::<Int64Type>(BATCH_SIZE, 0.5), 42, 1),
+        // fold init
+        0i64,
+        // fold function
+        |acc, item| acc.wrapping_add(item.unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item == Some(42),
+    );
+
+    benchmark_array_iter(
+        c,
+        "string with len 16",
+        &create_string_array_with_len::<i32>(BATCH_SIZE, 0.0, 16),
+        &create_string_array_with_len::<i32>(BATCH_SIZE, 0.5, 16),
+        // fold init
+        0_usize,
+        // fold function
+        |acc, item| acc.wrapping_add(item.map(|item| item.len()).unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item.is_some_and(|item| item.is_empty()),
+    );
+
+    benchmark_array_iter(
+        c,
+        "string view with len 16",
+        &create_string_view_array_with_len(BATCH_SIZE, 0.0, 16, false),
+        &create_string_view_array_with_len(BATCH_SIZE, 0.5, 16, false),
+        // fold init
+        0_usize,
+        // fold function
+        |acc, item| acc.wrapping_add(item.map(|item| item.len()).unwrap_or_default()),
+        // predicate that will always evaluate to false while allowing us to avoid using hint::black_box and let the compiler optimize more
+        |item| item.is_some_and(|item| item.is_empty()),
+    );
+
+    benchmark_array_iter(
+        c,
+        "boolean mixed true and false",
+        &create_boolean_array(BATCH_SIZE, 0.0, 0.5),
+        &create_boolean_array(BATCH_SIZE, 0.5, 0.5),
+        // fold init
+        0_usize,
+        // fold function
+        |acc, item| acc.wrapping_add(item.unwrap_or_default() as usize),
+        // Must use black_box here as this can be optimized away
+        |_item| hint::black_box(false),
+    );
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/arrow/benches/boolean_append_packed.rs b/arrow/benches/boolean_append_packed.rs
index 508720eb346f..5bf98741bc83 100644
--- a/arrow/benches/boolean_append_packed.rs
+++ b/arrow/benches/boolean_append_packed.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use arrow::array::BooleanBufferBuilder;
-use criterion::{criterion_group, criterion_main, Criterion};
-use rand::{rng, Rng};
+use criterion::{Criterion, criterion_group, criterion_main};
+use rand::{Rng, rng};
 
 fn rand_bytes(len: usize) -> Vec<u8> {
     let mut rng = rng();
diff --git a/arrow/benches/boolean_kernels.rs b/arrow/benches/boolean_kernels.rs
index 8d1d51242aae..a7231c031aaf 100644
--- a/arrow/benches/boolean_kernels.rs
+++ b/arrow/benches/boolean_kernels.rs
@@ -40,31 +40,43 @@ fn bench_not(array: &BooleanArray) {
 }
 
 fn add_benchmark(c: &mut Criterion) {
+    // allocate arrays of 32K elements
     let size = 2usize.pow(15);
+
+    // Note we allocate all arrays before the benchmark to ensure the allocation of the arrays
+    // is not affected by allocations that happen during the benchmarked operation.
     let array1 = create_boolean_array(size, 0.0, 0.5);
     let array2 = create_boolean_array(size, 0.0, 0.5);
+
+    // Slice by 1 (not aligned to byte (8 bit) or word (64 bit) boundaries)
+    let offset = 1;
+    let array1_sliced_1 = array1.slice(offset, size - offset);
+    let array2_sliced_1 = array2.slice(offset, size - offset);
+
+    // Slice by 24 (aligned on byte (8 bit) but not word (64 bit) boundaries)
+    let offset = 24;
+    let array1_sliced_24 = array1.slice(offset, size - offset);
+    let array2_sliced_24 = array2.slice(offset, size - offset);
+
     c.bench_function("and", |b| b.iter(|| bench_and(&array1, &array2)));
     c.bench_function("or", |b| b.iter(|| bench_or(&array1, &array2)));
     c.bench_function("not", |b| b.iter(|| bench_not(&array1)));
 
-    let array1_slice = array1.slice(1, size - 1);
-    let array1_slice = array1_slice
-        .as_any()
-        .downcast_ref::<BooleanArray>()
-        .unwrap();
-    let array2_slice = array2.slice(1, size - 1);
-    let array2_slice = array2_slice
-        .as_any()
-        .downcast_ref::<BooleanArray>()
-        .unwrap();
+    c.bench_function("and_sliced_1", |b| {
+        b.iter(|| bench_and(&array1_sliced_1, &array2_sliced_1))
+    });
+    c.bench_function("or_sliced_1", |b| {
+        b.iter(|| bench_or(&array1_sliced_1, &array2_sliced_1))
+    });
+    c.bench_function("not_sliced_1", |b| b.iter(|| bench_not(&array1_sliced_1)));
 
-    c.bench_function("and_sliced", |b| {
-        b.iter(|| bench_and(array1_slice, array2_slice))
+    c.bench_function("and_sliced_24", |b| {
+        b.iter(|| bench_and(&array1_sliced_24, &array2_sliced_24))
     });
-    c.bench_function("or_sliced", |b| {
-        b.iter(|| bench_or(array1_slice, array2_slice))
+    c.bench_function("or_sliced_24", |b| {
+        b.iter(|| bench_or(&array1_sliced_24, &array2_sliced_24))
     });
-    c.bench_function("not_sliced", |b| b.iter(|| bench_not(array1_slice)));
+    c.bench_function("not_slice_24", |b| b.iter(|| bench_not(&array1_sliced_24)));
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs
index b46544b1f8e7..c569224b0f9b 100644
--- a/arrow/benches/buffer_bit_ops.rs
+++ b/arrow/benches/buffer_bit_ops.rs
@@ -22,7 +22,7 @@ use criterion::{Criterion, Throughput};
 
 extern crate arrow;
 
-use arrow::buffer::{buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer};
+use arrow::buffer::{Buffer, MutableBuffer, buffer_bin_and, buffer_bin_or, buffer_unary_not};
 use std::hint;
 
 ///  Helper function to create arrays
diff --git a/arrow/benches/buffer_create.rs b/arrow/benches/buffer_create.rs
index 690327e86f68..be73b2ad218c 100644
--- a/arrow/benches/buffer_create.rs
+++ b/arrow/benches/buffer_create.rs
@@ -19,8 +19,8 @@
 extern crate criterion;
 use arrow::util::test_util::seedable_rng;
 use criterion::Criterion;
-use rand::distr::Uniform;
 use rand::Rng;
+use rand::distr::Uniform;
 
 extern crate arrow;
 
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 46dd18c0fa52..2374797961a1 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -108,6 +108,42 @@ fn bench_string(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_decimal32(c: &mut Criterion) {
+    c.bench_function("bench_decimal32_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::rng();
+            let mut decimal_builder = Decimal32Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.random_range::<i32, _>(0..999999999));
+            }
+            hint::black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(9, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
+fn bench_decimal64(c: &mut Criterion) {
+    c.bench_function("bench_decimal64_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::rng();
+            let mut decimal_builder = Decimal64Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.random_range::<i64, _>(0..9999999999));
+            }
+            hint::black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(18, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
 fn bench_decimal128(c: &mut Criterion) {
     c.bench_function("bench_decimal128_builder", |b| {
         b.iter(|| {
@@ -151,6 +187,8 @@ criterion_group!(
     bench_primitive_nulls,
     bench_bool,
     bench_string,
+    bench_decimal32,
+    bench_decimal64,
     bench_decimal128,
     bench_decimal256,
 );
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index d01031be5fd4..040c118a1e83 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -18,8 +18,8 @@
 #[macro_use]
 extern crate criterion;
 use criterion::Criterion;
-use rand::distr::{Distribution, StandardUniform, Uniform};
 use rand::Rng;
+use rand::distr::{Distribution, StandardUniform, Uniform};
 use std::hint;
 
 use chrono::DateTime;
@@ -83,6 +83,36 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
     Arc::new(builder.finish())
 }
 
+fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal32Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.random_range::<i32, _>(0..1000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
+fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal64Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.random_range::<i64, _>(0..1000000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
 fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
     let mut rng = seedable_rng();
     let mut builder = Decimal128Builder::with_capacity(size);
@@ -159,6 +189,8 @@ fn add_benchmark(c: &mut Criterion) {
     let utf8_date_array = build_utf8_date_array(512, true);
     let utf8_date_time_array = build_utf8_date_time_array(512, true);
 
+    let decimal32_array = build_decimal32_array(512, 9, 3);
+    let decimal64_array = build_decimal64_array(512, 10, 3);
     let decimal128_array = build_decimal128_array(512, 10, 3);
     let decimal256_array = build_decimal256_array(512, 50, 3);
     let string_array = build_string_array(512);
@@ -248,6 +280,22 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
     });
 
+    c.bench_function("cast decimal32 to decimal32 512", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(9, 4)))
+    });
+    c.bench_function("cast decimal32 to decimal32 512 lower precision", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(6, 5)))
+    });
+    c.bench_function("cast decimal32 to decimal64 512", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal64(11, 5)))
+    });
+    c.bench_function("cast decimal64 to decimal32 512", |b| {
+        b.iter(|| cast_array(&decimal64_array, DataType::Decimal32(9, 2)))
+    });
+    c.bench_function("cast decimal64 to decimal64 512", |b| {
+        b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(12, 4)))
+    });
+
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
     });
@@ -311,6 +359,46 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("cast binary view to string view", |b| {
         b.iter(|| cast_array(&binary_view_array, DataType::Utf8View))
     });
+
+    c.bench_function("cast string single run to ree<int32>", |b| {
+        let source_array = StringArray::from(vec!["a"; 8192]);
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Utf8, true)),
+        );
+        b.iter(|| cast(&array_ref, &target_type).unwrap());
+    });
+
+    c.bench_function("cast runs of 10 string to ree<int32>", |b| {
+        let source_array: Int32Array = (0..8192).map(|i| i / 10).collect();
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        b.iter(|| cast(&array_ref, &target_type).unwrap());
+    });
+
+    c.bench_function("cast runs of 1000 int32s to ree<int32>", |b| {
+        let source_array: Int32Array = (0..8192).map(|i| i / 1000).collect();
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        b.iter(|| cast(&array_ref, &target_type).unwrap());
+    });
+
+    c.bench_function("cast no runs of int32s to ree<int32>", |b| {
+        let source_array: Int32Array = (0..8192).collect();
+        let array_ref = Arc::new(source_array) as ArrayRef;
+        let target_type = DataType::RunEndEncoded(
+            Arc::new(Field::new("run_ends", DataType::Int32, false)),
+            Arc::new(Field::new("values", DataType::Int32, true)),
+        );
+        b.iter(|| cast(&array_ref, &target_type).unwrap());
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/coalesce_kernels.rs b/arrow/benches/coalesce_kernels.rs
index 1168d4b023cd..b85c5cc532db 100644
--- a/arrow/benches/coalesce_kernels.rs
+++ b/arrow/benches/coalesce_kernels.rs
@@ -21,10 +21,10 @@ use arrow::util::bench_util::*;
 use std::sync::Arc;
 
 use arrow::array::*;
-use arrow_array::types::{Float64Type, Int32Type};
-use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use arrow_array::types::{Float64Type, Int32Type, TimestampNanosecondType};
+use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use arrow_select::coalesce::BatchCoalescer;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 /// Benchmarks for generating evently sized output RecordBatches
 /// from a sequence of filtered source batches
@@ -32,6 +32,17 @@ use criterion::{criterion_group, criterion_main, Criterion};
 fn add_all_filter_benchmarks(c: &mut Criterion) {
     let batch_size = 8192; // 8K rows is a commonly used size for batches
 
+    // Multiple primitive types
+    let primitive_schema = SchemaRef::new(Schema::new(vec![
+        Field::new("int32_val", DataType::Int32, true),
+        Field::new("float_val", DataType::Float64, true),
+        Field::new(
+            "timestamp_val",
+            DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+            true,
+        ),
+    ]));
+
     // Single StringViewArray
     let single_schema = SchemaRef::new(Schema::new(vec![Field::new(
         "value",
@@ -70,6 +81,18 @@ fn add_all_filter_benchmarks(c: &mut Criterion) {
     for null_density in [0.0, 0.1] {
         // Selectivity: 0.1%, 1%, 10%, 80%
         for selectivity in [0.001, 0.01, 0.1, 0.8] {
+            FilterBenchmarkBuilder {
+                c,
+                name: "primitive",
+                batch_size,
+                num_output_batches: 50,
+                null_density,
+                selectivity,
+                max_string_len: 30,
+                schema: &primitive_schema,
+            }
+            .build();
+
             FilterBenchmarkBuilder {
                 c,
                 name: "single_utf8view",
@@ -413,6 +436,14 @@ impl DataStreamBuilder {
                 self.null_density,
                 seed,
             )),
+            DataType::Timestamp(TimeUnit::Nanosecond, Some(tz)) => Arc::new(
+                create_primitive_array_with_seed::<TimestampNanosecondType>(
+                    self.batch_size,
+                    self.null_density,
+                    seed,
+                )
+                .with_timezone(Arc::clone(tz)),
+            ),
             DataType::Utf8 => Arc::new(create_string_array::<i32>(
                 self.batch_size,
                 self.null_density,
diff --git a/arrow/benches/comparison_kernels.rs b/arrow/benches/comparison_kernels.rs
index 6a02deb41ad5..00c01374b625 100644
--- a/arrow/benches/comparison_kernels.rs
+++ b/arrow/benches/comparison_kernels.rs
@@ -27,8 +27,8 @@ use arrow_buffer::IntervalMonthDayNano;
 use arrow_string::like::*;
 use arrow_string::regexp::regexp_is_match_scalar;
 use criterion::Criterion;
-use rand::rngs::StdRng;
 use rand::Rng;
+use rand::rngs::StdRng;
 use std::hint;
 
 const SIZE: usize = 65536;
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index dfa4f5992023..474b93737005 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,7 +18,10 @@
 #[macro_use]
 extern crate criterion;
 
-use arrow::array::{Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder};
+use arrow::array::{
+    Array, Decimal32Array, Decimal32Builder, Decimal64Array, Decimal64Builder, Decimal128Array,
+    Decimal128Builder, Decimal256Array, Decimal256Builder,
+};
 use criterion::Criterion;
 use rand::Rng;
 
@@ -26,6 +29,14 @@ extern crate arrow;
 
 use arrow_buffer::i256;
 
+fn validate_decimal32_array(array: Decimal32Array) {
+    array.with_precision_and_scale(8, 0).unwrap();
+}
+
+fn validate_decimal64_array(array: Decimal64Array) {
+    array.with_precision_and_scale(16, 0).unwrap();
+}
+
 fn validate_decimal128_array(array: Decimal128Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
@@ -34,6 +45,46 @@ fn validate_decimal256_array(array: Decimal256Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
 
+fn validate_decimal32_benchmark(c: &mut Criterion) {
+    let mut rng = rand::rng();
+    let size: i32 = 20000;
+    let mut decimal_builder = Decimal32Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.random_range::<i32, _>(0..99999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(9, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal32_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal32Array::from(data.clone());
+            validate_decimal32_array(array);
+        })
+    });
+}
+
+fn validate_decimal64_benchmark(c: &mut Criterion) {
+    let mut rng = rand::rng();
+    let size: i64 = 20000;
+    let mut decimal_builder = Decimal64Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.random_range::<i64, _>(0..999999999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(18, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal64_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal64Array::from(data.clone());
+            validate_decimal64_array(array);
+        })
+    });
+}
+
 fn validate_decimal128_benchmark(c: &mut Criterion) {
     let mut rng = rand::rng();
     let size: i128 = 20000;
@@ -78,6 +129,8 @@ fn validate_decimal256_benchmark(c: &mut Criterion) {
 
 criterion_group!(
     benches,
+    validate_decimal32_benchmark,
+    validate_decimal64_benchmark,
     validate_decimal128_benchmark,
     validate_decimal256_benchmark,
 );
diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs
index 354fe606dd76..ff117f9d63f5 100644
--- a/arrow/benches/filter_kernels.rs
+++ b/arrow/benches/filter_kernels.rs
@@ -18,7 +18,7 @@ extern crate arrow;
 
 use std::sync::Arc;
 
-use arrow::compute::{filter_record_batch, FilterBuilder, FilterPredicate};
+use arrow::compute::{FilterBuilder, FilterPredicate, filter_record_batch};
 use arrow::util::bench_util::*;
 
 use arrow::array::*;
@@ -26,7 +26,7 @@ use arrow::compute::filter;
 use arrow::datatypes::{Field, Float32Type, Int32Type, Int64Type, Schema, UInt8Type};
 
 use arrow_array::types::Decimal128Type;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use std::hint;
 
 fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) {
diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs
index 60125a4ee364..8daf42a14414 100644
--- a/arrow/benches/interleave_kernels.rs
+++ b/arrow/benches/interleave_kernels.rs
@@ -30,6 +30,7 @@ use arrow::util::test_util::seedable_rng;
 use arrow::{array::*, util::bench_util::*};
 use arrow_select::interleave::interleave;
 use std::hint;
+use std::sync::Arc;
 
 fn do_bench(
     c: &mut Criterion,
@@ -74,11 +75,52 @@ fn add_benchmark(c: &mut Criterion) {
     let values = create_string_array_with_len::<i32>(10, 0.0, 20);
     let dict = create_dict_from_values::<Int32Type>(1024, 0.0, &values);
 
+    let struct_i32_no_nulls_i32_no_nulls = StructArray::new(
+        Fields::from(vec![
+            Field::new("a", Int32Type::DATA_TYPE, false),
+            Field::new("b", Int32Type::DATA_TYPE, false),
+        ]),
+        vec![
+            Arc::new(create_primitive_array::<Int32Type>(1024, 0.)),
+            Arc::new(create_primitive_array::<Int32Type>(1024, 0.)),
+        ],
+        None,
+    );
+
+    let struct_string_no_nulls_string_no_nulls = StructArray::new(
+        Fields::from(vec![
+            Field::new("a", DataType::Utf8, false),
+            Field::new("b", DataType::Utf8, false),
+        ]),
+        vec![
+            Arc::new(create_string_array_with_len::<i32>(1024, 0., 20)),
+            Arc::new(create_string_array_with_len::<i32>(1024, 0., 20)),
+        ],
+        None,
+    );
+
+    let struct_i32_no_nulls_string_no_nulls = StructArray::new(
+        Fields::from(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, false),
+        ]),
+        vec![
+            Arc::new(create_primitive_array::<Int32Type>(1024, 0.)),
+            Arc::new(create_string_array_with_len::<i32>(1024, 0., 20)),
+        ],
+        None,
+    );
+
     let values = create_string_array_with_len::<i32>(1024, 0.0, 20);
     let sparse_dict = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &values, 10..20);
 
     let string_view = create_string_view_array(1024, 0.0);
 
+    // use 8192 as a standard list size for better coverage
+    let list_i64 = create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.1, 0.1, 20, 42);
+    let list_i64_no_nulls =
+        create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42);
+
     let cases: &[(&str, &dyn Array)] = &[
         ("i32(0.0)", &i32),
         ("i32(0.5)", &i32_opt),
@@ -87,6 +129,20 @@ fn add_benchmark(c: &mut Criterion) {
         ("dict(20, 0.0)", &dict),
         ("dict_sparse(20, 0.0)", &sparse_dict),
         ("str_view(0.0)", &string_view),
+        (
+            "struct(i32(0.0), i32(0.0)",
+            &struct_i32_no_nulls_i32_no_nulls,
+        ),
+        (
+            "struct(str(20, 0.0), str(20, 0.0))",
+            &struct_string_no_nulls_string_no_nulls,
+        ),
+        (
+            "struct(i32(0.0), str(20, 0.0)",
+            &struct_i32_no_nulls_string_no_nulls,
+        ),
+        ("list<i64>(0.1,0.1,20)", &list_i64),
+        ("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
     ];
 
     for (prefix, base) in cases {
diff --git a/arrow/benches/json_writer.rs b/arrow/benches/json_writer.rs
index ff76ecdd6253..c636c076ec9d 100644
--- a/arrow/benches/json_writer.rs
+++ b/arrow/benches/json_writer.rs
@@ -25,8 +25,9 @@ use arrow::util::bench_util::{
 use arrow::util::test_util::seedable_rng;
 use arrow_array::{Array, ListArray, RecordBatch, StructArray};
 use arrow_buffer::{BooleanBuffer, NullBuffer, OffsetBuffer};
-use arrow_json::LineDelimitedWriter;
+use arrow_json::{LineDelimitedWriter, ReaderBuilder};
 use rand::Rng;
+use serde::Serialize;
 use std::sync::Arc;
 
 const NUM_ROWS: usize = 65536;
@@ -181,6 +182,138 @@ fn bench_struct_list(c: &mut Criterion) {
     do_bench(c, "bench_struct_list", &batch)
 }
 
+fn do_number_to_string_bench<S: Serialize>(
+    name: &str,
+    c: &mut Criterion,
+    schema: Arc<Schema>,
+    rows: Vec<S>,
+) {
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_coerce_primitive(true) // important for coercion
+                .build_decoder()
+                .expect("Failed to build decoder");
+
+            decoder.serialize(&rows).expect("Failed to serialize rows");
+
+            decoder
+                .flush()
+                .expect("Failed to flush")
+                .expect("No RecordBatch produced");
+        })
+    });
+}
+
+fn bench_i64_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: i64,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Int64Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("i64_to_string", c, schema, rows)
+}
+
+fn bench_i32_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: i32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Int32Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("i32_to_string", c, schema, rows)
+}
+
+fn bench_f32_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: f32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Float32Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("f32_to_string", c, schema, rows)
+}
+
+fn bench_f64_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val: f64,
+    }
+
+    let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
+
+    let a_bunch_of_numbers = create_primitive_array::<Float64Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val: a_bunch_of_numbers.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("f64_to_string", c, schema, rows)
+}
+
+fn bench_mixed_numbers_to_string(c: &mut Criterion) {
+    #[derive(Serialize)]
+    struct TestRow {
+        val1: f64,
+        val2: f32,
+        val3: i64,
+        val4: i32,
+    }
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("val1", DataType::Utf8, false),
+        Field::new("val2", DataType::Utf8, false),
+        Field::new("val3", DataType::Utf8, false),
+        Field::new("val4", DataType::Utf8, false),
+    ]));
+
+    let f64_array = create_primitive_array::<Float64Type>(NUM_ROWS, 0.0);
+    let f32_array = create_primitive_array::<Float32Type>(NUM_ROWS, 0.0);
+    let i64_array = create_primitive_array::<Int64Type>(NUM_ROWS, 0.0);
+    let i32_array = create_primitive_array::<Int32Type>(NUM_ROWS, 0.0);
+
+    let rows: Vec<TestRow> = (0..NUM_ROWS)
+        .map(|i| TestRow {
+            val1: f64_array.value(i),
+            val2: f32_array.value(i),
+            val3: i64_array.value(i),
+            val4: i32_array.value(i),
+        })
+        .collect();
+
+    do_number_to_string_bench("mixed_numbers_to_string", c, schema, rows)
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     bench_integer(c);
     bench_float(c);
@@ -192,6 +325,11 @@ fn criterion_benchmark(c: &mut Criterion) {
     bench_list(c);
     bench_nullable_list(c);
     bench_struct_list(c);
+    bench_f64_to_string(c);
+    bench_f32_to_string(c);
+    bench_i64_to_string(c);
+    bench_i32_to_string(c);
+    bench_mixed_numbers_to_string(c);
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/arrow/benches/lexsort.rs b/arrow/benches/lexsort.rs
index 6e6f607f7b3f..16a2606b919a 100644
--- a/arrow/benches/lexsort.rs
+++ b/arrow/benches/lexsort.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::compute::{lexsort_to_indices, SortColumn};
+use arrow::compute::{SortColumn, lexsort_to_indices};
 use arrow::row::{RowConverter, SortField};
 use arrow::util::bench_util::{
     create_dict_from_values, create_primitive_array, create_string_array_with_len,
@@ -24,7 +24,7 @@ use arrow::util::data_gen::create_random_array;
 use arrow_array::types::Int32Type;
 use arrow_array::{Array, ArrayRef, UInt32Array};
 use arrow_schema::{DataType, Field};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use std::{hint, sync::Arc};
 
 #[derive(Copy, Clone)]
diff --git a/arrow/benches/merge_kernels.rs b/arrow/benches/merge_kernels.rs
new file mode 100644
index 000000000000..f7a7fe1f8f67
--- /dev/null
+++ b/arrow/benches/merge_kernels.rs
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::measurement::WallTime;
+use criterion::{BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main};
+use rand::distr::{Distribution, StandardUniform};
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::hint;
+use std::sync::Arc;
+
+use arrow::array::*;
+use arrow::datatypes::*;
+use arrow::util::bench_util::*;
+use arrow_select::merge::merge;
+
+trait InputGenerator {
+    fn name(&self) -> &str;
+
+    /// Return an ArrayRef containing a single null value
+    fn generate_scalar_with_null_value(&self) -> ArrayRef;
+
+    /// Generate a `number_of_scalars` unique scalars
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef>;
+
+    /// Generate an array with the specified length and null percentage
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef;
+}
+
+struct GeneratePrimitive<T: ArrowPrimitiveType> {
+    description: String,
+    _marker: std::marker::PhantomData<T>,
+}
+
+impl<T> InputGenerator for GeneratePrimitive<T>
+where
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&T::DATA_TYPE, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
+        let rng = StdRng::seed_from_u64(seed);
+
+        rng.sample_iter::<T::Native, _>(StandardUniform)
+            .take(number_of_scalars)
+            .map(|v: T::Native| {
+                Arc::new(PrimitiveArray::<T>::new_scalar(v).into_inner()) as ArrayRef
+            })
+            .collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
+        Arc::new(create_primitive_array_with_seed::<T>(
+            array_length,
+            null_percentage,
+            seed,
+        ))
+    }
+}
+
+struct GenerateBytes<Byte: ByteArrayType> {
+    range_length: std::ops::Range<usize>,
+    description: String,
+
+    _marker: std::marker::PhantomData<Byte>,
+}
+
+impl<Byte> InputGenerator for GenerateBytes<Byte>
+where
+    Byte: ByteArrayType,
+{
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&Byte::DATA_TYPE, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
+        let array = self.generate_array(seed, number_of_scalars, 0.0);
+
+        (0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
+        let is_binary =
+            Byte::DATA_TYPE == DataType::Binary || Byte::DATA_TYPE == DataType::LargeBinary;
+        if is_binary {
+            Arc::new(create_binary_array_with_len_range_and_prefix_and_seed::<
+                Byte::Offset,
+            >(
+                array_length,
+                null_percentage,
+                self.range_length.start,
+                self.range_length.end - 1,
+                &[],
+                seed,
+            ))
+        } else {
+            Arc::new(create_string_array_with_len_range_and_prefix_and_seed::<
+                Byte::Offset,
+            >(
+                array_length,
+                null_percentage,
+                self.range_length.start,
+                self.range_length.end - 1,
+                "",
+                seed,
+            ))
+        }
+    }
+}
+
+fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
+    vec![
+        ("all_true", create_boolean_array(len, 0.0, 1.0)),
+        ("99pct_true", create_boolean_array(len, 0.0, 0.99)),
+        ("90pct_true", create_boolean_array(len, 0.0, 0.9)),
+        ("50pct_true", create_boolean_array(len, 0.0, 0.5)),
+        ("10pct_true", create_boolean_array(len, 0.0, 0.1)),
+        ("1pct_true", create_boolean_array(len, 0.0, 0.01)),
+        ("all_false", create_boolean_array(len, 0.0, 0.0)),
+        ("50pct_nulls", create_boolean_array(len, 0.5, 0.5)),
+    ]
+}
+
+fn bench_merge_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) {
+    const ARRAY_LEN: usize = 8192;
+
+    let mut group =
+        c.benchmark_group(format!("merge_{ARRAY_LEN}_from_{}", input_generator.name()).as_str());
+
+    let null_scalar = input_generator.generate_scalar_with_null_value();
+    let [non_null_scalar_1, non_null_scalar_2]: [_; 2] = input_generator
+        .generate_non_null_scalars(42, 2)
+        .try_into()
+        .unwrap();
+
+    // For simplicity, we generate arrays with length ARRAY_LEN. Not all input values will be used.
+    let array_1_10pct_nulls = input_generator.generate_array(42, ARRAY_LEN, 0.1);
+    let array_2_10pct_nulls = input_generator.generate_array(18, ARRAY_LEN, 0.1);
+
+    let masks = mask_cases(ARRAY_LEN);
+
+    // Benchmarks for different scalar combinations
+    for (description, truthy, falsy) in &[
+        ("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1),
+        (
+            "non_null_scalar_vs_null_scalar",
+            &non_null_scalar_1,
+            &null_scalar,
+        ),
+        ("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2),
+    ] {
+        bench_merge_input_on_all_masks(
+            description,
+            &mut group,
+            &masks,
+            &Scalar::new(truthy),
+            &Scalar::new(falsy),
+        );
+    }
+
+    bench_merge_input_on_all_masks(
+        "array_vs_non_null_scalar",
+        &mut group,
+        &masks,
+        &array_1_10pct_nulls,
+        &non_null_scalar_1,
+    );
+
+    bench_merge_input_on_all_masks(
+        "non_null_scalar_vs_array",
+        &mut group,
+        &masks,
+        &non_null_scalar_1,
+        &array_1_10pct_nulls,
+    );
+
+    bench_merge_input_on_all_masks(
+        "array_vs_array",
+        &mut group,
+        &masks,
+        &array_1_10pct_nulls,
+        &array_2_10pct_nulls,
+    );
+
+    group.finish();
+}
+
+fn bench_merge_input_on_all_masks(
+    description: &str,
+    group: &mut BenchmarkGroup<WallTime>,
+    masks: &[(&str, BooleanArray)],
+    truthy: &impl Datum,
+    falsy: &impl Datum,
+) {
+    for (mask_description, mask) in masks {
+        let id = BenchmarkId::new(description, mask_description);
+        group.bench_with_input(id, mask, |b, mask| {
+            b.iter(|| hint::black_box(merge(mask, truthy, falsy)))
+        });
+    }
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    // Primitive
+    bench_merge_on_input_generator(
+        c,
+        &GeneratePrimitive::<Int32Type> {
+            description: "i32".to_string(),
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Short strings
+    bench_merge_on_input_generator(
+        c,
+        &GenerateBytes::<GenericStringType<i32>> {
+            description: "short strings (3..10)".to_string(),
+            range_length: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Long strings
+    bench_merge_on_input_generator(
+        c,
+        &GenerateBytes::<GenericStringType<i32>> {
+            description: "long strings (100..400)".to_string(),
+            range_length: 100..400,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Short Bytes
+    bench_merge_on_input_generator(
+        c,
+        &GenerateBytes::<GenericBinaryType<i32>> {
+            description: "short bytes (3..10)".to_string(),
+            range_length: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Long Bytes
+    bench_merge_on_input_generator(
+        c,
+        &GenerateBytes::<GenericBinaryType<i32>> {
+            description: "long bytes (100..400)".to_string(),
+            range_length: 100..400,
+            _marker: std::marker::PhantomData,
+        },
+    );
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/arrow/benches/nullif_kernel.rs b/arrow/benches/nullif_kernel.rs
new file mode 100644
index 000000000000..61ae7d4eea56
--- /dev/null
+++ b/arrow/benches/nullif_kernel.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+use criterion::Criterion;
+
+use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
+
+use arrow::array::*;
+use arrow_array::types::Int64Type;
+use arrow_select::nullif::nullif;
+use std::hint;
+
+fn bench_nullif(left: &dyn Array, right: &BooleanArray) {
+    hint::black_box(nullif(left, right).unwrap());
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    let size = 8192usize;
+
+    // create input before benchmark to ensure allocations are consistent
+    let int64_no_nulls = create_primitive_array::<Int64Type>(size, 0.0);
+    let int64_nulls = create_primitive_array::<Int64Type>(size, 0.1);
+
+    let mask_10 = create_boolean_array(size, 0.0, 0.1);
+    let mask_10_sliced = create_boolean_array(size + 7, 0.0, 0.1).slice(7, size);
+    let mask_1 = create_boolean_array(size, 0.0, 0.01);
+
+    c.bench_function("nullif no-nulls mask(10%)", |b| {
+        b.iter(|| bench_nullif(&int64_no_nulls, &mask_10))
+    });
+    c.bench_function("nullif no-nulls mask(10%, sliced)", |b| {
+        b.iter(|| bench_nullif(&int64_no_nulls, &mask_10_sliced))
+    });
+    c.bench_function("nullif no-nulls mask(1%)", |b| {
+        b.iter(|| bench_nullif(&int64_no_nulls, &mask_1))
+    });
+
+    c.bench_function("nullif nulls mask(10%)", |b| {
+        b.iter(|| bench_nullif(&int64_nulls, &mask_10))
+    });
+    c.bench_function("nullif nulls mask(10%, sliced)", |b| {
+        b.iter(|| bench_nullif(&int64_nulls, &mask_10_sliced))
+    });
+    c.bench_function("nullif nulls mask(1%)", |b| {
+        b.iter(|| bench_nullif(&int64_nulls, &mask_1))
+    });
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/arrow/benches/partition_kernels.rs b/arrow/benches/partition_kernels.rs
index 82de6e0f00ba..f150d155c317 100644
--- a/arrow/benches/partition_kernels.rs
+++ b/arrow/benches/partition_kernels.rs
@@ -20,7 +20,7 @@ extern crate criterion;
 use criterion::Criterion;
 use std::sync::Arc;
 extern crate arrow;
-use arrow::compute::kernels::sort::{lexsort, SortColumn};
+use arrow::compute::kernels::sort::{SortColumn, lexsort};
 use arrow::util::bench_util::*;
 use arrow::{
     array::*,
@@ -28,7 +28,7 @@ use arrow::{
 };
 use arrow_ord::partition::partition;
 use rand::distr::{Distribution, StandardUniform};
-use std::{hint, iter};
+use std::hint;
 
 fn create_array<T: ArrowPrimitiveType>(size: usize, with_nulls: bool) -> ArrayRef
 where
@@ -45,11 +45,10 @@ fn bench_partition(sorted_columns: &[ArrayRef]) {
 
 fn create_sorted_low_cardinality_data(length: usize) -> Vec<ArrayRef> {
     let arr = Int64Array::from_iter_values(
-        iter::repeat(1)
-            .take(length / 4)
-            .chain(iter::repeat(2).take(length / 4))
-            .chain(iter::repeat(3).take(length / 4))
-            .chain(iter::repeat(4).take(length / 4)),
+        std::iter::repeat_n(1, length / 4)
+            .chain(std::iter::repeat_n(2, length / 4))
+            .chain(std::iter::repeat_n(3, length / 4))
+            .chain(std::iter::repeat_n(4, length / 4)),
     );
     lexsort(
         &[SortColumn {
diff --git a/arrow/benches/primitive_run_accessor.rs b/arrow/benches/primitive_run_accessor.rs
index 10c1e9ff39a9..a3cd20434060 100644
--- a/arrow/benches/primitive_run_accessor.rs
+++ b/arrow/benches/primitive_run_accessor.rs
@@ -18,7 +18,7 @@
 use arrow::datatypes::Int32Type;
 use arrow::{array::PrimitiveArray, util::bench_util::create_primitive_run_array};
 use arrow_array::ArrayAccessor;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 fn criterion_benchmark(c: &mut Criterion) {
     let mut group = c.benchmark_group("primitive_run_accessor");
diff --git a/arrow/benches/primitive_run_take.rs b/arrow/benches/primitive_run_take.rs
index 8e5dbced62bd..c394c37c6ccf 100644
--- a/arrow/benches/primitive_run_take.rs
+++ b/arrow/benches/primitive_run_take.rs
@@ -21,7 +21,7 @@ use arrow::datatypes::{Int32Type, Int64Type};
 use arrow::util::bench_util::*;
 use arrow::util::test_util::seedable_rng;
 use arrow_array::UInt32Array;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use rand::Rng;
 use std::hint;
 
diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs
index 00e5f52ca958..1c120bb2f24e 100644
--- a/arrow/benches/row_format.rs
+++ b/arrow/benches/row_format.rs
@@ -23,11 +23,16 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::{Int64Type, UInt64Type};
 use arrow::row::{RowConverter, SortField};
 use arrow::util::bench_util::{
-    create_boolean_array, create_dict_from_values, create_primitive_array,
-    create_string_array_with_len, create_string_dict_array, create_string_view_array_with_len,
+    create_boolean_array, create_boolean_array_with_seed, create_dict_from_values,
+    create_f64_array_with_seed, create_primitive_array, create_primitive_array_with_seed,
+    create_string_array_with_len, create_string_array_with_len_range_and_prefix_and_seed,
+    create_string_dict_array, create_string_view_array_with_len,
+    create_string_view_array_with_max_len,
 };
-use arrow_array::types::Int32Type;
+use arrow::util::data_gen::create_random_array;
 use arrow_array::Array;
+use arrow_array::types::{Int8Type, Int32Type};
+use arrow_schema::{DataType, Field};
 use criterion::Criterion;
 use std::{hint, sync::Arc};
 
@@ -54,6 +59,16 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
     c.bench_function(&format!("convert_rows {name}"), |b| {
         b.iter(|| hint::black_box(converter.convert_rows(&rows).unwrap()));
     });
+
+    let mut rows = converter.empty_rows(0, 0);
+    c.bench_function(&format!("append_rows {name}"), |b| {
+        let cols = cols.clone();
+        b.iter(|| {
+            rows.clear();
+            converter.append(&mut rows, &cols).unwrap();
+            hint::black_box(&mut rows);
+        });
+    });
 }
 
 fn bench_iter(c: &mut Criterion) {
@@ -72,6 +87,102 @@ fn bench_iter(c: &mut Criterion) {
     });
 }
 
+/// A single benchmark with a medium number of columns (around 50) without nested columns for real-world use cases
+/// This also makes sure there is a large gap between each value in the column and how it is laid out in the row format.
+/// and it is on the edge of not fitting in L3 on some machines
+fn run_benchmark_on_medium_amount_and_types_of_columns_without_nesting(
+    batch_size: usize,
+    c: &mut Criterion,
+) {
+    let mut seed = 0;
+
+    let mut cols: Vec<ArrayRef> = vec![];
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(create_primitive_array_with_seed::<Int8Type>(
+            batch_size, nulls, seed,
+        )) as ArrayRef);
+    }
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(create_primitive_array_with_seed::<Int32Type>(
+            batch_size, nulls, seed,
+        )) as ArrayRef);
+    }
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+            batch_size, nulls, seed,
+        )) as ArrayRef);
+    }
+
+    for _ in 0..10 {
+        seed += 1;
+        cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+            batch_size, 0.0, seed,
+        )) as ArrayRef);
+    }
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(
+            create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+                batch_size, nulls, 0, 50, "", seed,
+            ),
+        ));
+    }
+
+    for _ in 0..3 {
+        seed += 1;
+        cols.push(Arc::new(
+            create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+                batch_size, 0.0, 0, 10, "", seed,
+            ),
+        ));
+    }
+    for _ in 0..3 {
+        seed += 1;
+        cols.push(Arc::new(
+            create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+                batch_size, 0.0, 10, 20, "", seed,
+            ),
+        ));
+    }
+    for _ in 0..3 {
+        seed += 1;
+        cols.push(Arc::new(
+            create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+                batch_size, 0.0, 20, 30, "", seed,
+            ),
+        ));
+    }
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(create_boolean_array_with_seed(
+            batch_size, nulls, 0.5, seed,
+        )));
+    }
+
+    for _ in 0..10 {
+        seed += 1;
+        cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+            batch_size, 0.0, seed,
+        )) as ArrayRef);
+    }
+
+    for nulls in [0.0, 0.1, 0.2, 0.5] {
+        seed += 1;
+        cols.push(Arc::new(create_f64_array_with_seed(batch_size, nulls, seed)) as ArrayRef);
+    }
+
+    assert_eq!(cols.len(), 53);
+    do_bench(c, format!("{batch_size} 53 columns").as_str(), cols);
+}
+
 fn row_bench(c: &mut Criterion) {
     let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
     do_bench(c, "4096 u64(0)", cols);
@@ -115,6 +226,12 @@ fn row_bench(c: &mut Criterion) {
     let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0.5, 100, false)) as ArrayRef];
     do_bench(c, "4096 string view(100, 0.5)", cols);
 
+    let cols = vec![Arc::new(create_string_view_array_with_max_len(4096, 0., 100)) as ArrayRef];
+    do_bench(c, "4096 string view(1..100, 0)", cols);
+
+    let cols = vec![Arc::new(create_string_view_array_with_max_len(4096, 0.5, 100)) as ArrayRef];
+    do_bench(c, "4096 string view(1..100, 0.5)", cols);
+
     let cols = vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 10)) as ArrayRef];
     do_bench(c, "4096 string_dictionary(10, 0)", cols);
 
@@ -160,7 +277,108 @@ fn row_bench(c: &mut Criterion) {
         Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef,
         Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
     ];
-    do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols);
+    do_bench(
+        c,
+        "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)",
+        cols,
+    );
+
+    // List
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            4096,
+            0.,
+            1.0,
+        )
+        .unwrap(),
+    ];
+    do_bench(c, "4096 list(0) of u64(0)", cols);
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::LargeList(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            4096,
+            0.,
+            1.0,
+        )
+        .unwrap(),
+    ];
+    do_bench(c, "4096 large_list(0) of u64(0)", cols);
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            10,
+            0.,
+            1.0,
+        )
+        .unwrap(),
+    ];
+    do_bench(c, "10 list(0) of u64(0)", cols);
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::LargeList(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            10,
+            0.,
+            1.0,
+        )
+        .unwrap(),
+    ];
+    do_bench(c, "10 large_list(0) of u64(0)", cols);
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            4096,
+            0.,
+            1.0,
+        )
+        .unwrap()
+        .slice(10, 20),
+    ];
+    do_bench(c, "4096 list(0) sliced to 10 of u64(0)", cols);
+
+    let cols = vec![
+        create_random_array(
+            &Field::new(
+                "list",
+                DataType::LargeList(Arc::new(Field::new_list_field(DataType::UInt64, false))),
+                false,
+            ),
+            4096,
+            0.,
+            1.0,
+        )
+        .unwrap()
+        .slice(10, 20),
+    ];
+    do_bench(c, "4096 large_list(0) sliced to 10 of u64(0)", cols);
+
+    run_benchmark_on_medium_amount_and_types_of_columns_without_nesting(4096, c);
+    run_benchmark_on_medium_amount_and_types_of_columns_without_nesting(8192, c);
 
     bench_iter(c);
 }
diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs
index 012babd15d33..408d55b5cc6e 100644
--- a/arrow/benches/sort_kernel.rs
+++ b/arrow/benches/sort_kernel.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 
 extern crate arrow;
 
-use arrow::compute::{lexsort, sort, sort_to_indices, SortColumn};
+use arrow::compute::{SortColumn, lexsort, sort, sort_to_indices};
 use arrow::datatypes::{Int16Type, Int32Type};
 use arrow::util::bench_util::*;
 use arrow::{array::*, datatypes::Float32Type};
@@ -103,6 +103,36 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
+    let arr = create_string_array_with_max_len::<i32>(2usize.pow(12), 0.0, 10);
+    c.bench_function("sort string[0-10] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_max_len::<i32>(2usize.pow(12), 0.5, 10);
+    c.bench_function("sort string[0-10] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_max_len::<i32>(2usize.pow(12), 0.0, 100);
+    c.bench_function("sort string[0-100] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_max_len::<i32>(2usize.pow(12), 0.5, 100);
+    c.bench_function("sort string[0-100] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array::<i32>(2usize.pow(12), 0.0);
+    c.bench_function("sort string[0-400] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array::<i32>(2usize.pow(12), 0.5);
+    c.bench_function("sort string[0-400] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
     let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 10);
     c.bench_function("sort string[10] to indices 2^12", |b| {
         b.iter(|| bench_sort_to_indices(&arr, None))
@@ -113,6 +143,63 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 100);
+    c.bench_function("sort string[100] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.5, 100);
+    c.bench_function("sort string[100] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 1000);
+    c.bench_function("sort string[1000] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.5, 1000);
+    c.bench_function("sort string[1000] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length fixed 10, and without nulls.
+    let arr = create_string_view_array_with_fixed_len(2usize.pow(12), 0.0, 10);
+    c.bench_function("sort string_view[10] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length fixed 10, and with 50% nulls.
+    let arr = create_string_view_array_with_fixed_len(2usize.pow(12), 0.5, 10);
+    c.bench_function("sort string_view[10] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length randomly chosen from 0 to max 400, and without nulls.
+    let arr = create_string_view_array(2usize.pow(12), 0.0);
+    c.bench_function("sort string_view[0-400] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length randomly chosen from 0 to max 400, and with 50% nulls.
+    let arr = create_string_view_array(2usize.pow(12), 0.5);
+    c.bench_function("sort string_view[0-400] nulls to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length < 12 bytes which is inlined data, and without nulls.
+    let arr = create_string_view_array_with_max_len(2usize.pow(12), 0.0, 12);
+    c.bench_function("sort string_view_inlined[0-12] to indices 2^12", |b| {
+        b.iter(|| bench_sort_to_indices(&arr, None))
+    });
+
+    // This will generate string view arrays with 2^12 elements, each with a length < 12 bytes which is inlined data, and with 50% nulls.
+    let arr = create_string_view_array_with_max_len(2usize.pow(12), 0.5, 12);
+    c.bench_function(
+        "sort string_view_inlined[0-12] nulls to indices 2^12",
+        |b| b.iter(|| bench_sort_to_indices(&arr, None)),
+    );
+
     let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.0, 10);
     c.bench_function("sort string[10] dict to indices 2^12", |b| {
         b.iter(|| bench_sort_to_indices(&arr, None))
diff --git a/arrow/benches/string_dictionary_builder.rs b/arrow/benches/string_dictionary_builder.rs
index a39fd5d03847..a47995efb0e5 100644
--- a/arrow/benches/string_dictionary_builder.rs
+++ b/arrow/benches/string_dictionary_builder.rs
@@ -17,8 +17,8 @@
 
 use arrow::array::StringDictionaryBuilder;
 use arrow::datatypes::Int32Type;
-use criterion::{criterion_group, criterion_main, Criterion};
-use rand::{rng, Rng};
+use criterion::{Criterion, criterion_group, criterion_main};
+use rand::{Rng, rng};
 
 /// Note: this is best effort, not all keys are necessarily present or unique
 fn build_strings(dict_size: usize, total_size: usize, key_len: usize) -> Vec<String> {
diff --git a/arrow/benches/string_run_builder.rs b/arrow/benches/string_run_builder.rs
index b4457b74dada..ab4e645cf894 100644
--- a/arrow/benches/string_run_builder.rs
+++ b/arrow/benches/string_run_builder.rs
@@ -18,7 +18,7 @@
 use arrow::array::StringRunBuilder;
 use arrow::datatypes::Int32Type;
 use arrow::util::bench_util::create_string_array_for_runs;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 
 fn criterion_benchmark(c: &mut Criterion) {
     let mut group = c.benchmark_group("string_run_builder");
diff --git a/arrow/benches/string_run_iterator.rs b/arrow/benches/string_run_iterator.rs
index 32088573dc25..3008c09f09d4 100644
--- a/arrow/benches/string_run_iterator.rs
+++ b/arrow/benches/string_run_iterator.rs
@@ -17,8 +17,8 @@
 
 use arrow::array::{Int32RunArray, StringArray, StringRunBuilder};
 use arrow::datatypes::Int32Type;
-use criterion::{criterion_group, criterion_main, Criterion};
-use rand::{rng, Rng};
+use criterion::{Criterion, criterion_group, criterion_main};
+use rand::{Rng, rng};
 
 fn build_strings_runs(
     physical_array_len: usize,
@@ -29,7 +29,7 @@ fn build_strings_runs(
     let run_len = logical_array_len / physical_array_len;
     let mut values: Vec<String> = (0..physical_array_len)
         .map(|_| (0..string_len).map(|_| rng.random::<char>()).collect())
-        .flat_map(|s| std::iter::repeat(s).take(run_len))
+        .flat_map(|s| std::iter::repeat_n(s, run_len))
         .collect();
     while values.len() < logical_array_len {
         let last_val = values[values.len() - 1].clone();
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index 8f6f92a375e3..37b83a5e33ed 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -23,7 +23,7 @@ use rand::Rng;
 
 extern crate arrow;
 
-use arrow::compute::{take, TakeOptions};
+use arrow::compute::{TakeOptions, take};
 use arrow::datatypes::*;
 use arrow::util::test_util::seedable_rng;
 use arrow::{array::*, util::bench_util::*};
diff --git a/arrow/benches/zip_kernels.rs b/arrow/benches/zip_kernels.rs
new file mode 100644
index 000000000000..65f6bb280f00
--- /dev/null
+++ b/arrow/benches/zip_kernels.rs
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::measurement::WallTime;
+use criterion::{BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main};
+use rand::distr::{Distribution, StandardUniform};
+use rand::prelude::StdRng;
+use rand::{Rng, SeedableRng};
+use std::hint;
+use std::ops::Range;
+use std::sync::Arc;
+
+use arrow::array::*;
+use arrow::datatypes::*;
+use arrow::util::bench_util::*;
+use arrow_select::zip::zip;
+
+trait InputGenerator {
+    fn name(&self) -> &str;
+
+    /// Return an ArrayRef containing a single null value
+    fn generate_scalar_with_null_value(&self) -> ArrayRef;
+
+    /// Generate a `number_of_scalars` unique scalars
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef>;
+
+    /// Generate array with specified length and null percentage
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef;
+}
+
+struct GeneratePrimitive<T: ArrowPrimitiveType> {
+    description: String,
+    _marker: std::marker::PhantomData<T>,
+}
+
+impl<T> InputGenerator for GeneratePrimitive<T>
+where
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&T::DATA_TYPE, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
+        let rng = StdRng::seed_from_u64(seed);
+
+        rng.sample_iter::<T::Native, _>(StandardUniform)
+            .take(number_of_scalars)
+            .map(|v: T::Native| {
+                Arc::new(PrimitiveArray::<T>::new_scalar(v).into_inner()) as ArrayRef
+            })
+            .collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
+        Arc::new(create_primitive_array_with_seed::<T>(
+            array_length,
+            null_percentage,
+            seed,
+        ))
+    }
+}
+
+struct GenerateBytes<Byte: ByteArrayType> {
+    range_length: std::ops::Range<usize>,
+    description: String,
+
+    _marker: std::marker::PhantomData<Byte>,
+}
+
+impl<Byte> InputGenerator for GenerateBytes<Byte>
+where
+    Byte: ByteArrayType,
+{
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&Byte::DATA_TYPE, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
+        let array = self.generate_array(seed, number_of_scalars, 0.0);
+
+        (0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
+        let is_binary =
+            Byte::DATA_TYPE == DataType::Binary || Byte::DATA_TYPE == DataType::LargeBinary;
+        if is_binary {
+            Arc::new(create_binary_array_with_len_range_and_prefix_and_seed::<
+                Byte::Offset,
+            >(
+                array_length,
+                null_percentage,
+                self.range_length.start,
+                self.range_length.end - 1,
+                &[],
+                seed,
+            ))
+        } else {
+            Arc::new(create_string_array_with_len_range_and_prefix_and_seed::<
+                Byte::Offset,
+            >(
+                array_length,
+                null_percentage,
+                self.range_length.start,
+                self.range_length.end - 1,
+                "",
+                seed,
+            ))
+        }
+    }
+}
+
+struct GenerateStringView {
+    range: Range<usize>,
+    description: String,
+    _marker: std::marker::PhantomData<StringViewType>,
+}
+
+impl InputGenerator for GenerateStringView {
+    fn name(&self) -> &str {
+        self.description.as_str()
+    }
+    fn generate_scalar_with_null_value(&self) -> ArrayRef {
+        new_null_array(&DataType::Utf8View, 1)
+    }
+
+    fn generate_non_null_scalars(&self, seed: u64, number_of_scalars: usize) -> Vec<ArrayRef> {
+        let array = self.generate_array(seed, number_of_scalars, 0.0);
+        (0..number_of_scalars).map(|i| array.slice(i, 1)).collect()
+    }
+
+    fn generate_array(&self, seed: u64, array_length: usize, null_percentage: f32) -> ArrayRef {
+        Arc::new(create_string_view_array_with_len_range_and_seed(
+            array_length,
+            null_percentage,
+            self.range.clone(),
+            seed,
+        ))
+    }
+}
+
+fn mask_cases(len: usize) -> Vec<(&'static str, BooleanArray)> {
+    vec![
+        ("all_true", create_boolean_array(len, 0.0, 1.0)),
+        ("99pct_true", create_boolean_array(len, 0.0, 0.99)),
+        ("90pct_true", create_boolean_array(len, 0.0, 0.9)),
+        ("50pct_true", create_boolean_array(len, 0.0, 0.5)),
+        ("10pct_true", create_boolean_array(len, 0.0, 0.1)),
+        ("1pct_true", create_boolean_array(len, 0.0, 0.01)),
+        ("all_false", create_boolean_array(len, 0.0, 0.0)),
+        ("50pct_nulls", create_boolean_array(len, 0.5, 0.5)),
+    ]
+}
+
+fn bench_zip_on_input_generator(c: &mut Criterion, input_generator: &impl InputGenerator) {
+    const ARRAY_LEN: usize = 8192;
+
+    let mut group =
+        c.benchmark_group(format!("zip_{ARRAY_LEN}_from_{}", input_generator.name()).as_str());
+
+    let null_scalar = input_generator.generate_scalar_with_null_value();
+    let [non_null_scalar_1, non_null_scalar_2]: [_; 2] = input_generator
+        .generate_non_null_scalars(42, 2)
+        .try_into()
+        .unwrap();
+
+    let array_1_10pct_nulls = input_generator.generate_array(42, ARRAY_LEN, 0.1);
+    let array_2_10pct_nulls = input_generator.generate_array(18, ARRAY_LEN, 0.1);
+
+    let masks = mask_cases(ARRAY_LEN);
+
+    // Benchmarks for different scalar combinations
+    for (description, truthy, falsy) in &[
+        ("null_vs_non_null_scalar", &null_scalar, &non_null_scalar_1),
+        (
+            "non_null_scalar_vs_null_scalar",
+            &non_null_scalar_1,
+            &null_scalar,
+        ),
+        ("non_nulls_scalars", &non_null_scalar_1, &non_null_scalar_2),
+    ] {
+        bench_zip_input_on_all_masks(
+            description,
+            &mut group,
+            &masks,
+            &Scalar::new(truthy),
+            &Scalar::new(falsy),
+        );
+    }
+
+    bench_zip_input_on_all_masks(
+        "array_vs_non_null_scalar",
+        &mut group,
+        &masks,
+        &array_1_10pct_nulls,
+        &non_null_scalar_1,
+    );
+
+    bench_zip_input_on_all_masks(
+        "non_null_scalar_vs_array",
+        &mut group,
+        &masks,
+        &non_null_scalar_1,
+        &array_1_10pct_nulls,
+    );
+
+    bench_zip_input_on_all_masks(
+        "array_vs_array",
+        &mut group,
+        &masks,
+        &array_1_10pct_nulls,
+        &array_2_10pct_nulls,
+    );
+
+    group.finish();
+}
+
+fn bench_zip_input_on_all_masks(
+    description: &str,
+    group: &mut BenchmarkGroup<WallTime>,
+    masks: &[(&str, BooleanArray)],
+    truthy: &impl Datum,
+    falsy: &impl Datum,
+) {
+    for (mask_description, mask) in masks {
+        let id = BenchmarkId::new(description, mask_description);
+        group.bench_with_input(id, mask, |b, mask| {
+            b.iter(|| hint::black_box(zip(mask, truthy, falsy)))
+        });
+    }
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    // Primitive
+    bench_zip_on_input_generator(
+        c,
+        &GeneratePrimitive::<Int32Type> {
+            description: "i32".to_string(),
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Short strings
+    bench_zip_on_input_generator(
+        c,
+        &GenerateBytes::<GenericStringType<i32>> {
+            description: "short strings (3..10)".to_string(),
+            range_length: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Long strings
+    bench_zip_on_input_generator(
+        c,
+        &GenerateBytes::<GenericStringType<i32>> {
+            description: "long strings (100..400)".to_string(),
+            range_length: 100..400,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Short Bytes
+    bench_zip_on_input_generator(
+        c,
+        &GenerateBytes::<GenericBinaryType<i32>> {
+            description: "short bytes (3..10)".to_string(),
+            range_length: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    // Long Bytes
+    bench_zip_on_input_generator(
+        c,
+        &GenerateBytes::<GenericBinaryType<i32>> {
+            description: "long bytes (100..400)".to_string(),
+            range_length: 100..400,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    bench_zip_on_input_generator(
+        c,
+        &GenerateStringView {
+            description: "string_views size (3..10)".to_string(),
+            range: 3..10,
+            _marker: std::marker::PhantomData,
+        },
+    );
+
+    bench_zip_on_input_generator(
+        c,
+        &GenerateStringView {
+            description: "string_views size (10..100)".to_string(),
+            range: 10..100,
+            _marker: std::marker::PhantomData,
+        },
+    );
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/arrow/examples/collect.rs b/arrow/examples/collect.rs
index ced4640d600f..57b78a822ae6 100644
--- a/arrow/examples/collect.rs
+++ b/arrow/examples/collect.rs
@@ -20,7 +20,7 @@
 
 use arrow::array::Array;
 use arrow_array::types::Int32Type;
-use arrow_array::{Float32Array, Int32Array, Int8Array, ListArray};
+use arrow_array::{Float32Array, Int8Array, Int32Array, ListArray};
 
 fn main() {
     // Primitive Arrays
@@ -71,11 +71,13 @@ fn main() {
             .unwrap()
             .values()
     );
-    assert!(!list2
-        .as_any()
-        .downcast_ref::<Int32Array>()
-        .unwrap()
-        .is_valid(1));
+    assert!(
+        !list2
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap()
+            .is_valid(1)
+    );
     assert_eq!(
         &[6, 7],
         list3
diff --git a/arrow/examples/dynamic_types.rs b/arrow/examples/dynamic_types.rs
index b866cb7e6b1a..df5fe5ae654e 100644
--- a/arrow/examples/dynamic_types.rs
+++ b/arrow/examples/dynamic_types.rs
@@ -63,7 +63,7 @@ fn main() -> Result<()> {
     // build a record batch
     let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
 
-    print_batches(&[batch.clone()]).unwrap();
+    print_batches(std::slice::from_ref(&batch)).unwrap();
 
     process(&batch);
     Ok(())
diff --git a/arrow/examples/zero_copy_ipc.rs b/arrow/examples/zero_copy_ipc.rs
index 15fc477c59cf..0c80572cf468 100644
--- a/arrow/examples/zero_copy_ipc.rs
+++ b/arrow/examples/zero_copy_ipc.rs
@@ -20,14 +20,14 @@
 //! Zero copy in this case means the Arrow arrays refer directly to a user
 //! provided buffer or memory region.
 
-use arrow::array::{record_batch, RecordBatch};
+use arrow::array::{RecordBatch, record_batch};
 use arrow::error::Result;
 use arrow_buffer::Buffer;
 use arrow_cast::pretty::pretty_format_batches;
 use arrow_ipc::convert::fb_to_schema;
-use arrow_ipc::reader::{read_footer_length, FileDecoder};
+use arrow_ipc::reader::{FileDecoder, read_footer_length};
 use arrow_ipc::writer::FileWriter;
-use arrow_ipc::{root_as_footer, Block};
+use arrow_ipc::{Block, root_as_footer};
 use std::path::PathBuf;
 use std::sync::Arc;
 
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index 410e9d5af2a6..f95afc4928df 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -25,7 +25,7 @@ pub use arrow_array::cast::*;
 pub use arrow_array::iterator::*;
 pub use arrow_array::*;
 pub use arrow_data::{
-    layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, ByteView, DataTypeLayout,
+    ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, ByteView, DataTypeLayout, layout,
 };
 
 pub use arrow_data::transform::{Capacities, MutableArrayData};
@@ -35,6 +35,4 @@ pub use arrow_data::transform::{Capacities, MutableArrayData};
 pub use arrow_array::ffi::export_array_into_raw;
 
 // --------------------- Array's values comparison ---------------------
-
-#[allow(deprecated)]
-pub use arrow_ord::ord::{build_compare, make_comparator, DynComparator};
+pub use arrow_ord::ord::{DynComparator, make_comparator};
diff --git a/arrow/src/compute/kernels.rs b/arrow/src/compute/kernels.rs
index 6317a4229f5e..466f24205339 100644
--- a/arrow/src/compute/kernels.rs
+++ b/arrow/src/compute/kernels.rs
@@ -22,7 +22,7 @@ pub use arrow_cast::cast;
 pub use arrow_cast::parse as cast_utils;
 pub use arrow_ord::{cmp, partition, rank, sort};
 pub use arrow_select::{
-    coalesce, concat, filter, interleave, nullif, take, union_extract, window, zip,
+    coalesce, concat, filter, interleave, merge, nullif, take, union_extract, window, zip,
 };
 pub use arrow_string::{concat_elements, length, regexp, substring};
 
@@ -30,8 +30,5 @@ pub use arrow_string::{concat_elements, length, regexp, substring};
 pub mod comparison {
     pub use arrow_ord::comparison::*;
     pub use arrow_string::like::*;
-    // continue to export deprecated methods until they are removed
     pub use arrow_string::regexp::{regexp_is_match, regexp_is_match_scalar};
-    #[allow(deprecated)]
-    pub use arrow_string::regexp::{regexp_is_match_utf8, regexp_is_match_utf8_scalar};
 }
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index d41289d52e2a..4286128a76e1 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -24,7 +24,7 @@
 
 pub use arrow_array::types::*;
 pub use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType};
-pub use arrow_buffer::{i256, ArrowNativeType, ToByteSlice};
+pub use arrow_buffer::{ArrowNativeType, ToByteSlice, i256};
 pub use arrow_data::decimal::*;
 pub use arrow_schema::{
     DataType, Field, FieldRef, Fields, IntervalUnit, Schema, SchemaBuilder, SchemaRef, TimeUnit,
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index e9e94540d9e7..f9b0c717f0b3 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -255,7 +255,9 @@
 //! * CSV: [`Reader`](csv::reader::Reader) and [`Writer`](csv::writer::Writer)
 //! * IPC: [`Reader`](ipc::reader::StreamReader) and [`Writer`](ipc::writer::FileWriter)
 //!
-//! Parquet is published as a [separate crate](https://crates.io/crates/parquet)
+//! Support for [Apache Parquet] is published as a [separate parquet crate](https://crates.io/crates/parquet)
+//!
+//! Support for [Apache Avro] is published as a [separate arrow-avro crate](https://crates.io/crates/arrow-avro)
 //!
 //! # Serde Compatibility
 //!
@@ -330,8 +332,8 @@
 //! Some functionality is also distributed independently of this crate:
 //!
 //! * [`arrow-flight`] - support for [Arrow Flight RPC]
-//! * [`arrow-integration-test`] - support for [Arrow JSON Test Format]
-//! * [`parquet`](https://docs.rs/parquet/latest/parquet/) - support for [Apache Parquet]
+//! * [`parquet`](https://docs.rs/parquet) - support for [Apache Parquet]
+//! * [`arrow-avro`](https://docs.rs/arrow-avro) - support for [Apache Avro]
 //!
 //! # Safety and Security
 //!
@@ -358,11 +360,11 @@
 //! [`Buffer`]: buffer::Buffer
 //! [`RecordBatch`]: record_batch::RecordBatch
 //! [`arrow-flight`]: https://docs.rs/arrow-flight/latest/arrow_flight/
-//! [`arrow-integration-test`]: https://docs.rs/arrow-integration-test/latest/arrow_integration_test/
 //! [`parquet`]: https://docs.rs/parquet/latest/parquet/
 //! [Arrow Flight RPC]: https://arrow.apache.org/docs/format/Flight.html
 //! [Arrow JSON Test Format]: https://github.com/apache/arrow/blob/master/docs/source/format/Integration.rst#json-test-data-format
 //! [Apache Parquet]: https://parquet.apache.org/
+//! [Apache Avro]: https://avro.apache.org/
 //! [DataFusion]: https://github.com/apache/arrow-datafusion
 //! [issue tracker]: https://github.com/apache/arrow-rs/issues
 
@@ -370,7 +372,7 @@
     html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
     html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![deny(clippy::redundant_clone)]
 #![warn(missing_debug_implementations)]
 #![warn(missing_docs)]
diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs
index cd135a2f04df..3b65ea7b52f9 100644
--- a/arrow/src/tensor.rs
+++ b/arrow/src/tensor.rs
@@ -86,6 +86,10 @@ pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
 pub type Date32Tensor<'a> = Tensor<'a, Date32Type>;
 /// [Tensor] of type [Int16Type]
 pub type Date64Tensor<'a> = Tensor<'a, Date64Type>;
+/// [Tensor] of type [Decimal32Type]
+pub type Decimal32Tensor<'a> = Tensor<'a, Decimal32Type>;
+/// [Tensor] of type [Decimal64Type]
+pub type Decimal64Tensor<'a> = Tensor<'a, Decimal64Type>;
 /// [Tensor] of type [Decimal128Type]
 pub type Decimal128Tensor<'a> = Tensor<'a, Decimal128Type>;
 /// [Tensor] of type [Decimal256Type]
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 94c6adfb83da..bcf7a559e960 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -22,10 +22,10 @@ use crate::datatypes::*;
 use crate::util::test_util::seedable_rng;
 use arrow_buffer::{Buffer, IntervalMonthDayNano};
 use half::f16;
-use rand::distr::uniform::SampleUniform;
-use rand::rng;
 use rand::Rng;
 use rand::SeedableRng;
+use rand::distr::uniform::SampleUniform;
+use rand::rng;
 use rand::{
     distr::{Alphanumeric, Distribution, StandardUniform},
     prelude::StdRng,
@@ -117,6 +117,29 @@ where
         .collect()
 }
 
+/// Creates a random array of a given size and null density based on the provided seed
+pub fn create_boolean_array_with_seed(
+    size: usize,
+    null_density: f32,
+    true_density: f32,
+    seed: u64,
+) -> BooleanArray
+where
+    StandardUniform: Distribution<bool>,
+{
+    let mut rng = StdRng::seed_from_u64(seed);
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let value = rng.random::<f32>() < true_density;
+                Some(value)
+            }
+        })
+        .collect()
+}
+
 /// Creates a random (but fixed-seeded) string array of a given size and null density.
 ///
 /// Strings have a random length
@@ -155,6 +178,27 @@ fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
     min_str_len: usize,
     max_str_len: usize,
     prefix: &str,
+) -> GenericStringArray<Offset> {
+    create_string_array_with_len_range_and_prefix_and_seed(
+        size,
+        null_density,
+        min_str_len,
+        max_str_len,
+        prefix,
+        42,
+    )
+}
+
+/// Creates a random [`GenericStringArray`] of a given `size` and `null_density`
+/// filling it with random strings with lengths in the specified range,
+/// all starting with the provided `prefix`, generated using the provided `seed`.
+pub fn create_string_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
+    size: usize,
+    null_density: f32,
+    min_str_len: usize,
+    max_str_len: usize,
+    prefix: &str,
+    seed: u64,
 ) -> GenericStringArray<Offset> {
     assert!(
         min_str_len <= max_str_len,
@@ -165,7 +209,7 @@ fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
         "Prefix length must be <= max_str_len"
     );
 
-    let rng = &mut seedable_rng();
+    let rng = &mut StdRng::seed_from_u64(seed);
     (0..size)
         .map(|_| {
             if rng.random::<f32>() < null_density {
@@ -187,6 +231,33 @@ fn create_string_array_with_len_range_and_prefix<Offset: OffsetSizeTrait>(
         })
         .collect()
 }
+/// Creates a string view array of a given range, null density and length
+///
+/// Arguments:
+/// - `size`: number of  string view array
+/// - `null_density`: density of nulls in the string view array
+/// - `range`: range size of each string in the string view array
+/// - `seed`: seed for the random number generator
+pub fn create_string_view_array_with_len_range_and_seed(
+    size: usize,
+    null_density: f32,
+    range: Range<usize>,
+    seed: u64,
+) -> StringViewArray {
+    let rng = &mut StdRng::seed_from_u64(seed);
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let str_len = rng.random_range(range.clone());
+                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
+                let value = String::from_utf8(value).unwrap();
+                Some(value)
+            }
+        })
+        .collect()
+}
 
 fn create_string_view_array_with_len_range_and_prefix(
     size: usize,
@@ -228,7 +299,7 @@ fn create_string_view_array_with_len_range_and_prefix(
 }
 
 /// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
-fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
+pub fn create_string_array_with_max_len<Offset: OffsetSizeTrait>(
     size: usize,
     null_density: f32,
     max_str_len: usize,
@@ -297,6 +368,26 @@ pub fn create_string_view_array_with_max_len(
         .collect()
 }
 
+/// Creates a random (but fixed-seeded) array of a given size, null density and length
+pub fn create_string_view_array_with_fixed_len(
+    size: usize,
+    null_density: f32,
+    str_len: usize,
+) -> StringViewArray {
+    let rng = &mut seedable_rng();
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
+                let value = String::from_utf8(value).unwrap();
+                Some(value)
+            }
+        })
+        .collect()
+}
+
 /// Creates a random (but fixed-seeded) array of a given size, null density and length
 pub fn create_string_view_array_with_len(
     size: usize,
@@ -357,6 +448,49 @@ pub fn create_string_dict_array<K: ArrowDictionaryKeyType>(
     data.iter().map(|x| x.as_deref()).collect()
 }
 
+/// Create a List/LargeList Array  of primitive values
+///
+/// Arguments:
+/// - `size`: number of lists in the array
+/// - `null_density`: density of nulls in the list array
+/// - `list_null_density`: density of nulls in the primitive arrays inside the lists
+/// - `max_list_size`: maximum size of each list (actual size is random between 0 and max_list_size)
+/// - `seed`: seed for the random number generator
+pub fn create_primitive_list_array_with_seed<O, T>(
+    size: usize,
+    null_density: f32,
+    list_null_density: f32,
+    max_list_size: usize,
+    seed: u64,
+) -> GenericListArray<O>
+where
+    O: OffsetSizeTrait,
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    let values = (0..size).map(|_| {
+        if rng.random::<f32>() < null_density {
+            None
+        } else {
+            let list_size = rng.random_range(0..=max_list_size);
+            let list_values: Vec<Option<T::Native>> = (0..list_size)
+                .map(|_| {
+                    if rng.random::<f32>() < list_null_density {
+                        None
+                    } else {
+                        Some(rng.random())
+                    }
+                })
+                .collect();
+            Some(list_values)
+        }
+    });
+
+    GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
 /// Create primitive run array for given logical and physical array lengths
 pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
     logical_array_len: usize,
@@ -376,7 +510,7 @@ pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
                 take_len += 1;
                 run_len_extra -= 1;
             }
-            std::iter::repeat(V::Native::from_usize(s).unwrap()).take(take_len)
+            std::iter::repeat_n(V::Native::from_usize(s).unwrap(), take_len)
         })
         .collect();
     while values.len() < logical_array_len {
@@ -414,7 +548,7 @@ pub fn create_string_array_for_runs(
                 take_len += 1;
                 run_len_extra -= 1;
             }
-            std::iter::repeat(s).take(take_len)
+            std::iter::repeat_n(s, take_len)
         })
         .collect();
     while values.len() < logical_array_len {
@@ -429,8 +563,29 @@ pub fn create_binary_array<Offset: OffsetSizeTrait>(
     size: usize,
     null_density: f32,
 ) -> GenericBinaryArray<Offset> {
-    let rng = &mut seedable_rng();
-    let range_rng = &mut seedable_rng();
+    create_binary_array_with_seed(
+        size,
+        null_density,
+        42, // bytes_seed
+        42, // bytes_length_seed
+    )
+}
+
+/// Creates a random [`GenericBinaryArray`] of a given `size` and `null_density`
+/// filling it with random bytes, generated using the provided `seed`s.
+///
+/// the `bytes_seed` is used to seed the RNG for generating the byte values,
+/// while the `bytes_length_seed` is used to seed the RNG for generating the length of an array item
+///
+/// These values can be the same as they are used to seed different RNGs internally.
+pub fn create_binary_array_with_seed<Offset: OffsetSizeTrait>(
+    size: usize,
+    null_density: f32,
+    bytes_seed: u64,
+    bytes_length_seed: u64,
+) -> GenericBinaryArray<Offset> {
+    let rng = &mut StdRng::seed_from_u64(bytes_seed);
+    let range_rng = &mut StdRng::seed_from_u64(bytes_length_seed);
 
     (0..size)
         .map(|_| {
@@ -447,6 +602,41 @@ pub fn create_binary_array<Offset: OffsetSizeTrait>(
         .collect()
 }
 
+/// Creates a random [`GenericBinaryArray`] of a given `size` and `null_density`
+/// filling it with random bytes with lengths in the specified range,
+/// all starting with the provided `prefix`, generated using the provided `seed`.
+///
+pub fn create_binary_array_with_len_range_and_prefix_and_seed<Offset: OffsetSizeTrait>(
+    size: usize,
+    null_density: f32,
+    min_len: usize,
+    max_len: usize,
+    prefix: &[u8],
+    seed: u64,
+) -> GenericBinaryArray<Offset> {
+    assert!(min_len <= max_len, "min_len must be <= max_len");
+    assert!(prefix.len() <= max_len, "Prefix length must be <= max_len");
+
+    let rng = &mut StdRng::seed_from_u64(seed);
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let remaining_len = rng
+                    .random_range(min_len.saturating_sub(prefix.len())..=(max_len - prefix.len()));
+
+                let remaining = rng
+                    .sample_iter::<u8, _>(StandardUniform)
+                    .take(remaining_len);
+
+                let value = prefix.iter().copied().chain(remaining).collect::<Vec<u8>>();
+                Some(value)
+            }
+        })
+        .collect()
+}
+
 /// Creates an random (but fixed-seeded) array of a given size and null density
 pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
     let rng = &mut seedable_rng();
@@ -567,3 +757,18 @@ pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
         })
         .collect()
 }
+
+/// Creates a random f64 array of a given size and nan-value density based on a given seed
+pub fn create_f64_array_with_seed(size: usize, nan_density: f32, seed: u64) -> Float64Array {
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < nan_density {
+                Some(f64::NAN)
+            } else {
+                Some(rng.random())
+            }
+        })
+        .collect()
+}
diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs
index 7ea05811d55b..023436e0a7f7 100644
--- a/arrow/src/util/data_gen.rs
+++ b/arrow/src/util/data_gen.rs
@@ -20,8 +20,8 @@
 use std::sync::Arc;
 
 use rand::{
-    distr::uniform::{SampleRange, SampleUniform},
     Rng,
+    distr::uniform::{SampleRange, SampleUniform},
 };
 
 use crate::array::*;
@@ -66,149 +66,106 @@ pub fn create_random_batch(
 pub fn create_random_array(
     field: &Field,
     size: usize,
-    null_density: f32,
+    mut null_density: f32,
     true_density: f32,
 ) -> Result<ArrayRef> {
-    // Override null density with 0.0 if the array is non-nullable
-    // and a primitive type in case a nested field is nullable
-    let primitive_null_density = match field.is_nullable() {
-        true => null_density,
-        false => 0.0,
-    };
+    // Override nullability in case of not nested and not dictionary
+    // For nested we don't want to override as we want to keep the nullability for the children
+    // For dictionary it handle the nullability internally
+    if !field.data_type().is_nested() && !matches!(field.data_type(), Dictionary(_, _)) {
+        // Override null density with 0.0 if the array is non-nullable
+        null_density = match field.is_nullable() {
+            true => null_density,
+            false => 0.0,
+        };
+    }
+
     use DataType::*;
-    Ok(match field.data_type() {
+    let array = match field.data_type() {
         Null => Arc::new(NullArray::new(size)) as ArrayRef,
-        Boolean => Arc::new(create_boolean_array(
-            size,
-            primitive_null_density,
-            true_density,
-        )),
-        Int8 => Arc::new(create_primitive_array::<Int8Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int16 => Arc::new(create_primitive_array::<Int16Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int32 => Arc::new(create_primitive_array::<Int32Type>(
-            size,
-            primitive_null_density,
-        )),
-        Int64 => Arc::new(create_primitive_array::<Int64Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(
-            size,
-            primitive_null_density,
-        )),
-        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(
-            size,
-            primitive_null_density,
-        )),
+        Boolean => Arc::new(create_boolean_array(size, null_density, true_density)),
+        Int8 => Arc::new(create_primitive_array::<Int8Type>(size, null_density)),
+        Int16 => Arc::new(create_primitive_array::<Int16Type>(size, null_density)),
+        Int32 => Arc::new(create_primitive_array::<Int32Type>(size, null_density)),
+        Int64 => Arc::new(create_primitive_array::<Int64Type>(size, null_density)),
+        UInt8 => Arc::new(create_primitive_array::<UInt8Type>(size, null_density)),
+        UInt16 => Arc::new(create_primitive_array::<UInt16Type>(size, null_density)),
+        UInt32 => Arc::new(create_primitive_array::<UInt32Type>(size, null_density)),
+        UInt64 => Arc::new(create_primitive_array::<UInt64Type>(size, null_density)),
         Float16 => {
             return Err(ArrowError::NotYetImplemented(
                 "Float16 is not implemented".to_string(),
-            ))
+            ));
         }
-        Float32 => Arc::new(create_primitive_array::<Float32Type>(
-            size,
-            primitive_null_density,
-        )),
-        Float64 => Arc::new(create_primitive_array::<Float64Type>(
-            size,
-            primitive_null_density,
-        )),
+        Float32 => Arc::new(create_primitive_array::<Float32Type>(size, null_density)),
+        Float64 => Arc::new(create_primitive_array::<Float64Type>(size, null_density)),
         Timestamp(unit, tz) => match unit {
             TimeUnit::Second => Arc::new(
-                create_random_temporal_array::<TimestampSecondType>(size, primitive_null_density)
+                create_random_temporal_array::<TimestampSecondType>(size, null_density)
                     .with_timezone_opt(tz.clone()),
-            ),
+            ) as ArrayRef,
             TimeUnit::Millisecond => Arc::new(
-                create_random_temporal_array::<TimestampMillisecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampMillisecondType>(size, null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
             TimeUnit::Microsecond => Arc::new(
-                create_random_temporal_array::<TimestampMicrosecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampMicrosecondType>(size, null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
             TimeUnit::Nanosecond => Arc::new(
-                create_random_temporal_array::<TimestampNanosecondType>(
-                    size,
-                    primitive_null_density,
-                )
-                .with_timezone_opt(tz.clone()),
+                create_random_temporal_array::<TimestampNanosecondType>(size, null_density)
+                    .with_timezone_opt(tz.clone()),
             ),
         },
         Date32 => Arc::new(create_random_temporal_array::<Date32Type>(
             size,
-            primitive_null_density,
+            null_density,
         )),
         Date64 => Arc::new(create_random_temporal_array::<Date64Type>(
             size,
-            primitive_null_density,
+            null_density,
         )),
         Time32(unit) => match unit {
             TimeUnit::Second => Arc::new(create_random_temporal_array::<Time32SecondType>(
                 size,
-                primitive_null_density,
+                null_density,
             )) as ArrayRef,
             TimeUnit::Millisecond => Arc::new(
-                create_random_temporal_array::<Time32MillisecondType>(size, primitive_null_density),
+                create_random_temporal_array::<Time32MillisecondType>(size, null_density),
             ),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "Unsupported unit {unit:?} for Time32"
-                )))
+                )));
             }
         },
         Time64(unit) => match unit {
             TimeUnit::Microsecond => Arc::new(
-                create_random_temporal_array::<Time64MicrosecondType>(size, primitive_null_density),
+                create_random_temporal_array::<Time64MicrosecondType>(size, null_density),
             ) as ArrayRef,
             TimeUnit::Nanosecond => Arc::new(create_random_temporal_array::<Time64NanosecondType>(
                 size,
-                primitive_null_density,
+                null_density,
             )),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "Unsupported unit {unit:?} for Time64"
-                )))
+                )));
             }
         },
-        Utf8 => Arc::new(create_string_array::<i32>(size, primitive_null_density)),
-        LargeUtf8 => Arc::new(create_string_array::<i64>(size, primitive_null_density)),
+        Utf8 => Arc::new(create_string_array::<i32>(size, null_density)),
+        LargeUtf8 => Arc::new(create_string_array::<i64>(size, null_density)),
         Utf8View => Arc::new(create_string_view_array_with_len(
             size,
-            primitive_null_density,
+            null_density,
             4,
             false,
         )),
-        Binary => Arc::new(create_binary_array::<i32>(size, primitive_null_density)),
-        LargeBinary => Arc::new(create_binary_array::<i64>(size, primitive_null_density)),
-        FixedSizeBinary(len) => Arc::new(create_fsb_array(
-            size,
-            primitive_null_density,
-            *len as usize,
-        )),
+        Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
+        LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
+        FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, *len as usize)),
         BinaryView => Arc::new(
-            create_string_view_array_with_len(size, primitive_null_density, 4, false)
-                .to_binary_view(),
+            create_string_view_array_with_len(size, null_density, 4, false).to_binary_view(),
         ),
         List(_) => create_random_list_array(field, size, null_density, true_density)?,
         LargeList(_) => create_random_list_array(field, size, null_density, true_density)?,
@@ -228,9 +185,15 @@ pub fn create_random_array(
         other => {
             return Err(ArrowError::NotYetImplemented(format!(
                 "Generating random arrays not yet implemented for {other:?}"
-            )))
+            )));
         }
-    })
+    };
+
+    if !field.is_nullable() {
+        assert_eq!(array.null_count(), 0);
+    }
+
+    Ok(array)
 }
 
 #[inline]
@@ -267,7 +230,7 @@ fn create_random_decimal_array(field: &Field, size: usize, null_density: f32) ->
             ))
         }
         _ => Err(ArrowError::InvalidArgumentError(format!(
-            "Cannot create decimal array for field {field:?}"
+            "Cannot create decimal array for field {field}"
         ))),
     }
 }
@@ -298,8 +261,8 @@ fn create_random_list_array(
         }
         _ => {
             return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot create list array for field {field:?}"
-            )))
+                "Cannot create list array for field {field}"
+            )));
         }
     };
 
@@ -336,8 +299,8 @@ fn create_random_struct_array(
         DataType::Struct(fields) => fields,
         _ => {
             return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot create struct array for field {field:?}"
-            )))
+                "Cannot create struct array for field {field}"
+            )));
         }
     };
 
@@ -383,7 +346,7 @@ fn create_random_map_array(
         _ => {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Cannot create map array for field {field:?}"
-            )))
+            )));
         }
     };
 
@@ -812,4 +775,23 @@ mod tests {
             assert_eq!(array.len(), size);
         }
     }
+
+    #[test]
+    fn create_non_nullable_decimal_array_with_null_density() {
+        let size = 10;
+        let fields = vec![
+            Field::new("a", DataType::Decimal128(10, -2), false),
+            Field::new("b", DataType::Decimal256(10, -2), false),
+        ];
+        let schema = Schema::new(fields);
+        let schema_ref = Arc::new(schema);
+        let batch = create_random_batch(schema_ref.clone(), size, 0.35, 0.7).unwrap();
+
+        assert_eq!(batch.schema(), schema_ref);
+        assert_eq!(batch.num_columns(), schema_ref.fields().len());
+        for array in batch.columns() {
+            assert_eq!(array.len(), size);
+            assert_eq!(array.null_count(), 0);
+        }
+    }
 }
diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs
index 566ccc6ab536..dbcea03ee74d 100644
--- a/arrow/src/util/test_util.rs
+++ b/arrow/src/util/test_util.rs
@@ -17,7 +17,7 @@
 
 //! Utils to make testing easier
 
-use rand::{rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::{env, error::Error, fs, io::Write, path::PathBuf};
 
 /// Returns a vector of size `n`, filled with randomly generated bytes.
@@ -216,26 +216,26 @@ mod tests {
         let non_existing = cwd.join("non-existing-dir").display().to_string();
         let non_existing_str = non_existing.as_str();
 
-        env::set_var(udf_env, non_existing_str);
+        unsafe { env::set_var(udf_env, non_existing_str) };
         let res = get_data_dir(udf_env, existing_str);
         assert!(res.is_err());
 
-        env::set_var(udf_env, "");
+        unsafe { env::set_var(udf_env, "") };
         let res = get_data_dir(udf_env, existing_str);
         assert!(res.is_ok());
         assert_eq!(res.unwrap(), existing_pb);
 
-        env::set_var(udf_env, " ");
+        unsafe { env::set_var(udf_env, " ") };
         let res = get_data_dir(udf_env, existing_str);
         assert!(res.is_ok());
         assert_eq!(res.unwrap(), existing_pb);
 
-        env::set_var(udf_env, existing_str);
+        unsafe { env::set_var(udf_env, existing_str) };
         let res = get_data_dir(udf_env, existing_str);
         assert!(res.is_ok());
         assert_eq!(res.unwrap(), existing_pb);
 
-        env::remove_var(udf_env);
+        unsafe { env::remove_var(udf_env) };
         let res = get_data_dir(udf_env, non_existing_str);
         assert!(res.is_err());
 
diff --git a/arrow/tests/arithmetic.rs b/arrow/tests/arithmetic.rs
index 59a162ef6dc0..cc6a97e123f8 100644
--- a/arrow/tests/arithmetic.rs
+++ b/arrow/tests/arithmetic.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow_arith::numeric::{add, sub};
-use arrow_arith::temporal::{date_part, DatePart};
+use arrow_arith::temporal::{DatePart, date_part};
 use arrow_array::cast::AsArray;
 use arrow_array::temporal_conversions::as_datetime_with_timezone;
 use arrow_array::timezone::Tz;
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index da7d37fc48a4..0e3b9c597b0d 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -18,21 +18,23 @@
 use arrow_array::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder, UnionBuilder};
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowDictionaryKeyType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Int8Type,
+    Int16Type, Int32Type, Int64Type, TimestampMicrosecondType, UInt8Type, UInt16Type, UInt32Type,
+    UInt64Type,
 };
 use arrow_array::{
     Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
-    DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
-    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
-    LargeStringArray, ListArray, NullArray, PrimitiveArray, StringArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
+    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, DurationMicrosecondArray,
+    DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
+    FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int16Array,
+    Int32Array, Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
+    IntervalYearMonthArray, LargeBinaryArray, LargeListArray, LargeStringArray, ListArray,
+    NullArray, PrimitiveArray, StringArray, StructArray, Time32MillisecondArray, Time32SecondArray,
+    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
+    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt8Array,
+    UInt16Array, UInt32Array, UInt64Array, UnionArray,
 };
-use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano};
+use arrow_buffer::{Buffer, IntervalDayTime, IntervalMonthDayNano, i256};
 use arrow_cast::pretty::pretty_format_columns;
 use arrow_cast::{can_cast_types, cast};
 use arrow_data::ArrayData;
@@ -162,13 +164,22 @@ fn test_can_cast_types() {
             // check for mismatch
             match (cast_result, reported_cast_ability) {
                 (Ok(_), false) => {
-                    panic!("Was able to cast array {:?} from {:?} to {:?} but can_cast_types reported false",
-                           array, array.data_type(), to_type)
+                    panic!(
+                        "Was able to cast array {:?} from {:?} to {:?} but can_cast_types reported false",
+                        array,
+                        array.data_type(),
+                        to_type
+                    )
                 }
                 (Err(e), true) => {
-                    panic!("Was not able to cast array {:?} from {:?} to {:?} but can_cast_types reported true. \
+                    panic!(
+                        "Was not able to cast array {:?} from {:?} to {:?} but can_cast_types reported true. \
                                 Error was {:?}",
-                           array, array.data_type(), to_type, e)
+                        array,
+                        array.data_type(),
+                        to_type,
+                        e
+                    )
                 }
                 // otherwise it was a match
                 _ => {}
@@ -261,7 +272,37 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
         Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
+        Arc::new(create_decimal32_array(vec![Some(1), Some(2), Some(3)], 9, 0).unwrap()),
+        Arc::new(create_decimal64_array(vec![Some(1), Some(2), Some(3)], 18, 0).unwrap()),
         Arc::new(create_decimal128_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
+        Arc::new(
+            create_decimal256_array(
+                vec![
+                    Some(i256::from_i128(1)),
+                    Some(i256::from_i128(2)),
+                    Some(i256::from_i128(3)),
+                ],
+                40,
+                0,
+            )
+            .unwrap(),
+        ),
+        make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal64Type>(vec![1, 2]),
         make_dictionary_primitive::<Int8Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int16Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int32Type, Decimal128Type>(vec![1, 2]),
@@ -411,6 +452,28 @@ fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
     Arc::new(b.finish())
 }
 
+fn create_decimal32_array(
+    array: Vec<Option<i32>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal32Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal32Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
+fn create_decimal64_array(
+    array: Vec<Option<i64>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal64Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal64Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
 fn create_decimal128_array(
     array: Vec<Option<i128>>,
     precision: u8,
@@ -422,6 +485,17 @@ fn create_decimal128_array(
         .with_precision_and_scale(precision, scale)
 }
 
+fn create_decimal256_array(
+    array: Vec<Option<i256>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal256Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal256Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
 // Get a selection of datatypes to try and cast to
 fn get_all_types() -> Vec<DataType> {
     use DataType::*;
@@ -478,13 +552,14 @@ fn get_all_types() -> Vec<DataType> {
             Field::new("f2", DataType::Utf8, true),
         ])),
         Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("f1", DataType::Int32, false),
                     Field::new("f2", DataType::Utf8, true),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         ),
         Decimal128(38, 0),
@@ -501,6 +576,8 @@ fn get_all_types() -> Vec<DataType> {
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeUtf8)),
                 Dictionary(Box::new(key_type.clone()), Box::new(Binary)),
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeBinary)),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal32(9, 0))),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal64(18, 0))),
                 Dictionary(Box::new(key_type.clone()), Box::new(Decimal128(38, 0))),
                 Dictionary(Box::new(key_type), Box::new(Decimal256(76, 0))),
             ]
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index 94fb85030bf3..381054a25df5 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -16,17 +16,23 @@
 // under the License.
 
 use arrow::array::{
-    make_array, Array, ArrayRef, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
-    FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, GenericStringArray,
-    Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, NullArray, OffsetSizeTrait,
-    StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
+    Array, ArrayRef, BooleanArray, Decimal128Array, FixedSizeBinaryArray, FixedSizeBinaryBuilder,
+    FixedSizeListBuilder, GenericBinaryArray, GenericStringArray, Int32Array, Int32Builder,
+    Int64Builder, ListArray, ListBuilder, NullArray, OffsetSizeTrait, StringArray,
+    StringDictionaryBuilder, StructArray, UnionBuilder, make_array,
 };
 use arrow::datatypes::{Int16Type, Int32Type};
-use arrow_array::builder::{StringBuilder, StringViewBuilder, StructBuilder};
-use arrow_array::{DictionaryArray, FixedSizeListArray, StringViewArray};
+use arrow_array::builder::{
+    GenericListViewBuilder, StringBuilder, StringViewBuilder, StructBuilder,
+};
+use arrow_array::cast::AsArray;
+use arrow_array::{
+    DictionaryArray, FixedSizeListArray, GenericListViewArray, PrimitiveArray, StringViewArray,
+};
 use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{DataType, Field, Fields};
+use arrow_select::take::take;
 use std::sync::Arc;
 
 #[test]
@@ -756,6 +762,125 @@ fn test_fixed_list_offsets() {
     test_equal(&a_slice, &b_slice, true);
 }
 
+fn create_list_view_array<
+    O: OffsetSizeTrait,
+    U: IntoIterator<Item = Option<i32>>,
+    T: IntoIterator<Item = Option<U>>,
+>(
+    data: T,
+) -> GenericListViewArray<O> {
+    let mut builder = GenericListViewBuilder::<O, _>::new(Int32Builder::new());
+    for d in data {
+        if let Some(v) = d {
+            builder.append_value(v);
+        } else {
+            builder.append_null();
+        }
+    }
+
+    builder.finish()
+}
+
+fn test_test_list_view_array<T: OffsetSizeTrait>() {
+    let a = create_list_view_array::<T, _, _>([
+        None,
+        Some(vec![Some(1), None, Some(2)]),
+        Some(vec![Some(3), Some(4), Some(5), None]),
+    ]);
+    let b = create_list_view_array::<T, _, _>([
+        None,
+        Some(vec![Some(1), None, Some(2)]),
+        Some(vec![Some(3), Some(4), Some(5), None]),
+    ]);
+
+    test_equal(&a, &b, true);
+
+    // Simple non-matching arrays by reordering
+    let b = create_list_view_array::<T, _, _>([
+        Some(vec![Some(3), Some(4), Some(5), None]),
+        Some(vec![Some(1), None, Some(2)]),
+    ]);
+    test_equal(&a, &b, false);
+
+    // reorder using take yields equal values
+    let indices: PrimitiveArray<Int32Type> = vec![None, Some(1), Some(0)].into();
+    let b = take(&b, &indices, None)
+        .unwrap()
+        .as_list_view::<T>()
+        .clone();
+
+    test_equal(&a, &b, true);
+
+    // Slicing one side yields unequal again
+    let a = a.slice(1, 2);
+
+    test_equal(&a, &b, false);
+
+    // Slicing the other to match makes them equal again
+    let b = b.slice(1, 2);
+
+    test_equal(&a, &b, true);
+}
+
+// Special test for List<ListView<i32>>.
+// This tests the equal_ranges kernel
+fn test_sliced_list_of_list_view<T: OffsetSizeTrait>() {
+    // First list view is created using the builder, with elements not deduplicated.
+    let mut a = ListBuilder::new(GenericListViewBuilder::<T, _>::new(Int32Builder::new()));
+
+    a.append_value([Some(vec![Some(1), Some(2), Some(3)]), Some(vec![])]);
+    a.append_null();
+    a.append_value([
+        Some(vec![Some(1), Some(2), Some(3)]),
+        None,
+        Some(vec![Some(6)]),
+    ]);
+
+    let a = a.finish();
+    // a = [[[1,2,3], []], null, [[4, null], [5], null, [6]]]
+
+    // First list view is created using the builder, with elements not deduplicated.
+    let mut b = ListBuilder::new(GenericListViewBuilder::<T, _>::new(Int32Builder::new()));
+
+    // Add an extra row that we will slice off, adjust the List offsets
+    b.append_value([Some(vec![Some(0), Some(0), Some(0)])]);
+    b.append_value([Some(vec![Some(1), Some(2), Some(3)]), Some(vec![])]);
+    b.append_null();
+    b.append_value([
+        Some(vec![Some(1), Some(2), Some(3)]),
+        None,
+        Some(vec![Some(6)]),
+    ]);
+
+    let b = b.finish();
+    // b = [[[0, 0, 0]], [[1,2,3], []], null, [[4, null], [5], null, [6]]]
+    let b = b.slice(1, 3);
+    // b = [[[1,2,3], []], null, [[4, null], [5], null, [6]]] but the outer ListArray
+    // has an offset
+
+    test_equal(&a, &b, true);
+}
+
+#[test]
+fn test_list_view_array() {
+    test_test_list_view_array::<i32>();
+}
+
+#[test]
+fn test_large_list_view_array() {
+    test_test_list_view_array::<i64>();
+}
+
+#[test]
+fn test_nested_list_view_array() {
+    test_sliced_list_of_list_view::<i32>();
+}
+
+#[test]
+fn test_nested_large_list_view_array() {
+    test_sliced_list_of_list_view::<i64>();
+}
+
 #[test]
 fn test_struct_equal() {
     let strings: ArrayRef = Arc::new(StringArray::from(vec![
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index c6de9f4a3417..511dc1e8bfcd 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -19,13 +19,13 @@ use arrow::array::{
     Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray, FixedSizeBinaryArray,
     FixedSizeListBuilder, Int16Array, Int32Array, Int64Array, Int64Builder, ListArray, ListBuilder,
     MapBuilder, NullArray, StringArray, StringBuilder, StringDictionaryBuilder, StructArray,
-    UInt16Array, UInt16Builder, UInt8Array, UnionArray,
+    UInt8Array, UInt16Array, UInt16Builder, UnionArray,
 };
 use arrow::datatypes::Int16Type;
 use arrow_array::StringViewArray;
 use arrow_buffer::{Buffer, ScalarBuffer};
-use arrow_data::transform::MutableArrayData;
 use arrow_data::ArrayData;
+use arrow_data::transform::MutableArrayData;
 use arrow_schema::{DataType, Field, Fields, UnionFields};
 use std::sync::Arc;
 
diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs
index 62cda6b8ec79..62e7241f5e48 100644
--- a/arrow/tests/array_validation.rs
+++ b/arrow/tests/array_validation.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use arrow::array::{
-    make_array, Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder, Int64Array,
-    StringArray, StructBuilder, UInt64Array,
+    Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder, Int64Array, StringArray,
+    StructBuilder, UInt64Array, make_array,
 };
 use arrow_array::Decimal128Array;
 use arrow_buffer::{ArrowNativeType, Buffer};
@@ -825,13 +825,14 @@ fn test_validate_union_different_types() {
 
     ArrayData::try_new(
         DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("field1", DataType::Int32, true),
                     Field::new("field2", DataType::Int64, true), // data is int32
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         ),
         2,
@@ -858,13 +859,14 @@ fn test_validate_union_sparse_different_child_len() {
 
     ArrayData::try_new(
         DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("field1", DataType::Int32, true),
                     Field::new("field2", DataType::Int64, true),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Sparse,
         ),
         2,
@@ -887,13 +889,14 @@ fn test_validate_union_dense_without_offsets() {
 
     ArrayData::try_new(
         DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("field1", DataType::Int32, true),
                     Field::new("field2", DataType::Int64, true),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         ),
         2,
@@ -917,13 +920,14 @@ fn test_validate_union_dense_with_bad_len() {
 
     ArrayData::try_new(
         DataType::Union(
-            UnionFields::new(
+            UnionFields::try_new(
                 vec![0, 1],
                 vec![
                     Field::new("field1", DataType::Int32, true),
                     Field::new("field2", DataType::Int64, true),
                 ],
-            ),
+            )
+            .unwrap(),
             UnionMode::Dense,
         ),
         2,
@@ -1056,10 +1060,19 @@ fn test_string_data_from_foreign() {
 
 #[test]
 fn test_decimal_full_validation() {
+    let array = Decimal128Array::from(vec![123456_i128])
+        .with_precision_and_scale(5, 2)
+        .unwrap();
+    let error = array.validate_decimal_precision(5).unwrap_err();
+    assert_eq!(
+        "Invalid argument error: 1234.56 is too large to store in a Decimal128 of precision 5. Max is 999.99",
+        error.to_string()
+    );
+
     let array = Decimal128Array::from(vec![123456_i128]);
     let error = array.validate_decimal_precision(5).unwrap_err();
     assert_eq!(
-        "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999",
+        "Invalid argument error: Decimal precision 5 is less than scale 10",
         error.to_string()
     );
 }
@@ -1097,5 +1110,8 @@ fn test_sliced_array_child() {
     };
 
     let err = data.validate_values().unwrap_err();
-    assert_eq!(err.to_string(), "Invalid argument error: Offset invariant failure: offset at position 1 out of bounds: 3 > 2");
+    assert_eq!(
+        err.to_string(),
+        "Invalid argument error: Offset invariant failure: offset at position 1 out of bounds: 3 > 2"
+    );
 }
diff --git a/arrow/tests/shrink_to_fit.rs b/arrow/tests/shrink_to_fit.rs
index 5d7c2cf98bc9..1613fefeda98 100644
--- a/arrow/tests/shrink_to_fit.rs
+++ b/arrow/tests/shrink_to_fit.rs
@@ -50,7 +50,9 @@ fn test_shrink_to_fit_after_concat() {
         });
     let expected_len = num_concats * array_len;
     assert_eq!(bytes_used(concatenated.clone()), expected_len);
-    eprintln!("The concatenated array is {expected_len} B long. Amount of memory used by this thread: {bytes_allocated_by_this_thread} B");
+    eprintln!(
+        "The concatenated array is {expected_len} B long. Amount of memory used by this thread: {bytes_allocated_by_this_thread} B"
+    );
 
     assert!(
         expected_len <= bytes_allocated_by_this_thread,
@@ -91,8 +93,8 @@ fn bytes_used(array: ArrayRef) -> usize {
 use std::{
     alloc::Layout,
     sync::{
-        atomic::{AtomicUsize, Ordering::Relaxed},
         Arc,
+        atomic::{AtomicUsize, Ordering::Relaxed},
     },
 };
 
diff --git a/arrow/tests/timezone.rs b/arrow/tests/timezone.rs
index d0db1d76e422..7b5ec8fbecb0 100644
--- a/arrow/tests/timezone.rs
+++ b/arrow/tests/timezone.rs
@@ -58,7 +58,7 @@ fn test_parse_timezone_invalid() {
         ),
         (
             "2023-01-01 04:05:06.789 +07:30:00",
-            "Parser error: Invalid timezone \"+07:30:00\": failed to parse timezone"
+            "Parser error: Invalid timezone \"+07:30:00\": failed to parse timezone",
         ),
         (
             // Sunday, 12 March 2023, 02:00:00 clocks are turned forward 1 hour to
diff --git a/dev/release/README.md b/dev/release/README.md
index 5b521368ea44..c89968b3ee69 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -84,7 +84,7 @@ python dev/release/label_issues.py
 
 # review change log / edit issues and labels if needed, rerun, repeat as necessary
 # note you need to revert changes to CHANGELOG-old.md if you want to rerun the script
-CHANGELOG_GITHUB_TOKEN=<TOKEN> ./dev/release/update_change_log.sh
+./dev/release/update_change_log.sh
 
 # Commit the changes
 git commit -a -m 'Update changelog'
@@ -105,25 +105,25 @@ create a release candidate using the following steps. Note you need to
 be a committer to run these scripts as they upload to the apache `svn`
 distribution servers.
 
+### Pick a Release Candidate (RC) number
+
+Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc.
+
 ### Create git tag for the release:
 
 While the official release artifact is a signed tarball, we also tag the commit it was created for convenience and code archaeology.
 
 Use a string such as `43.0.0` as the `<version>`.
 
-Create and push the tag thusly:
+Create and push the tag thusly (for example, for version `4.1.0` and `rc2` would be `4.1.0-rc2`):
 
 ```shell
 git fetch apache
-git tag <version> apache/main
+git tag <version>-<rc> apache/main
 # push tag to apache
-git push apache <version>
+git push apache <version>-<rc>
 ```
 
-### Pick an Release Candidate (RC) number
-
-Pick numbers in sequential order, with `1` for `rc1`, `2` for `rc2`, etc.
-
 ### Create, sign, and upload tarball
 
 Run `create-tarball.sh` with the `<version>` tag and `<rc>` and you found in previous steps.
@@ -191,9 +191,16 @@ If the release is not approved, fix whatever the problem is and try again with t
 
 ### If the release is approved,
 
-Move tarball to the release location in SVN, e.g. https://dist.apache.org/repos/dist/release/arrow/arrow-4.1.0/, using the `release-tarball.sh` script:
+Then, create a new release on GitHub using the tag `<version>` (e.g. `4.1.0`).
 
-Rust Arrow Crates:
+Push the release tag to github
+
+```shell
+git tag <version> <version>-<rc>
+git push apache <version>
+```
+
+Move tarball to the release location in SVN, e.g. https://dist.apache.org/repos/dist/release/arrow/arrow-rs-4.1.0/, using the `release-tarball.sh` script:
 
 ```shell
 ./dev/release/release-tarball.sh 4.1.0 2
@@ -237,17 +244,23 @@ Rust Arrow Crates:
 (cd arrow-data && cargo publish)
 (cd arrow-array && cargo publish)
 (cd arrow-select && cargo publish)
+(cd arrow-ord && cargo publish)
 (cd arrow-cast && cargo publish)
 (cd arrow-ipc && cargo publish)
 (cd arrow-csv && cargo publish)
 (cd arrow-json && cargo publish)
 (cd arrow-avro && cargo publish)
-(cd arrow-ord && cargo publish)
 (cd arrow-arith && cargo publish)
 (cd arrow-string && cargo publish)
 (cd arrow-row && cargo publish)
+(cd arrow-pyarrow && cargo publish)
 (cd arrow && cargo publish)
+(cd arrow-avro && cargo publish)
 (cd arrow-flight && cargo publish)
+(cd parquet-variant && cargo publish)
+(cd parquet-variant-json && cargo publish)
+(cd parquet-variant-compute && cargo publish)
+(cd parquet-geospatial && cargo publish)
 (cd parquet && cargo publish)
 (cd parquet_derive && cargo publish)
 (cd arrow-integration-test && cargo publish)
diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh
index 8b92509104c8..b75313b6f0d6 100755
--- a/dev/release/create-tarball.sh
+++ b/dev/release/create-tarball.sh
@@ -45,13 +45,14 @@ SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
 
 if [ "$#" -ne 2 ]; then
-    echo "Usage: $0 <tag> <rc>"
+    echo "Usage: $0 <version> <rc>"
     echo "ex. $0 4.1.0 2"
   exit
 fi
 
-tag=$1
+version=$1
 rc=$2
+tag="${version}-rc${rc}"
 
 
 # mac tar doesn't have --delete, so use gnutar
@@ -64,9 +65,12 @@ else
     tar=tar
 fi
 
-release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag})
+if ! git -C "${SOURCE_TOP_DIR}" rev-list --max-count=1 ${tag}; then
+    echo "Cannot continue: unknown git tag: $tag"
+fi
+
 
-release=apache-arrow-rs-${tag}
+release=apache-arrow-rs-${version}
 distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc}
 tarname=${release}.tar.gz
 tarball=${distdir}/${tarname}
@@ -75,22 +79,18 @@ url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}"
 echo "Attempting to create ${tarball} from tag ${tag}"
 
 
-if [ -z "$release_hash" ]; then
-    echo "Cannot continue: unknown git tag: $tag"
-fi
-
 echo "Draft email for dev@arrow.apache.org mailing list"
 echo ""
 echo "---------------------------------------------------------"
 cat <<MAIL
 To: dev@arrow.apache.org
-Subject: [VOTE][RUST] Release Apache Arrow Rust ${tag} RC${rc}
+Subject: [VOTE][RUST] Release Apache Arrow Rust ${version} RC${rc}
 
 Hi,
 
 I would like to propose a release of Apache Arrow Rust Implementation, version ${tag}.
 
-This release candidate is based on commit: ${release_hash} [1]
+This release candidate is based on commit: ${tag} [1]
 
 The proposed release tarball and signatures are hosted at [2].
 
@@ -106,22 +106,21 @@ The vote will be open for at least 72 hours.
 [ ] +0
 [ ] -1 Do not release this as Apache Arrow Rust ${version} because...
 
-[1]: https://github.com/apache/arrow-rs/tree/${release_hash}
+[1]: https://github.com/apache/arrow-rs/tree/${tag}
 [2]: ${url}
-[3]: https://github.com/apache/arrow-rs/blob/${release_hash}/CHANGELOG.md
-[4]: https://github.com/apache/arrow-rs/blob/main/dev/release/verify-release-candidate.sh
+[3]: https://github.com/apache/arrow-rs/blob/${tag}/CHANGELOG.md
+[4]: https://github.com/apache/arrow-rs/blob/master/dev/release/verify-release-candidate.sh
 MAIL
 echo "---------------------------------------------------------"
 
 
 
-# create <tarball> containing the files in git at $release_hash
-# the files in the tarball are prefixed with {tag} (e.g. 4.0.1)
-# use --delete to filter out `object_store` files
+# create <tarball> containing the files in git at $tag
+# the files in the tarball are prefixed with {release}
+# (e.g. apache-arrow-rs-4.0.1)
 mkdir -p ${distdir}
 (cd "${SOURCE_TOP_DIR}" && \
-     git archive ${release_hash} --prefix ${release}/ \
-         | $tar --delete ${release}/'object_store' \
+     git archive ${tag} --prefix ${release}/ \
          | gzip > ${tarball})
 
 echo "Running rat license checker on ${tarball}"
@@ -138,4 +137,4 @@ gpg --armor --output ${tarball}.asc --detach-sig ${tarball}
 echo "Uploading to apache dist/dev to ${url}"
 svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist
 svn add ${distdir}
-svn ci -m "Apache Arrow Rust ${tag} ${rc}" ${distdir}
+svn ci -m "Apache Arrow Rust ${version} ${rc}" ${distdir}
\ No newline at end of file
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index b1ae6112a0b7..7f0195bbd7bb 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,45 +29,13 @@
 
 set -e
 
-SINCE_TAG="55.1.0"
-FUTURE_RELEASE="55.2.0"
+SINCE_TAG="57.1.0"
+FUTURE_RELEASE="57.2.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
 
 OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG.md"
-OLD_OUTPUT_PATH="${SOURCE_TOP_DIR}/CHANGELOG-old.md"
-
-# remove license header so github-changelog-generator has a clean base to append
-sed -i.bak '1,21d' "${OUTPUT_PATH}"
-sed -i.bak '1,21d' "${OLD_OUTPUT_PATH}"
-# remove the github-changelog-generator footer from the old CHANGELOG.md
-LINE_COUNT=$(wc -l <"${OUTPUT_PATH}")
-sed -i.bak2 "$(( $LINE_COUNT-4+1 )),$ d" "${OUTPUT_PATH}"
-
-# Copy the previous CHANGELOG.md to CHANGELOG-old.md
-echo '<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Historical Changelog
-' | cat - "${OUTPUT_PATH}" "${OLD_OUTPUT_PATH}" > "${OLD_OUTPUT_PATH}".tmp
-mv "${OLD_OUTPUT_PATH}".tmp "${OLD_OUTPUT_PATH}"
 
 # use exclude-tags-regex to filter out tags used for object_store
 # crates and only only look at tags that DO NOT begin with `object_store_`
diff --git a/parquet-geospatial/Cargo.toml b/parquet-geospatial/Cargo.toml
new file mode 100644
index 000000000000..471b355dc6eb
--- /dev/null
+++ b/parquet-geospatial/Cargo.toml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "parquet-geospatial"
+version = { workspace = true }
+license = { workspace = true }
+description = "Apache Parquet Geometry and Geography implementation in Rust"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+keywords = ["arrow", "parquet", "geometry", "geography"]
+readme = "README.md"
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+[dependencies]
+arrow-schema = { workspace = true }
+geo-traits = { version = "0.3" }
+serde = { version = "1.0", default-features = false, features = ["derive"]}
+serde_json = { version = "1.0", default-features = false, features = ["std"]}
+wkb = { version = "0.9.1" }
+
+[dev-dependencies]
+wkt = { version = "0.14" }
+
+[lib]
+name = "parquet_geospatial"
+bench = false
diff --git a/parquet-geospatial/README.md b/parquet-geospatial/README.md
new file mode 100644
index 000000000000..67bdc51ddfd0
--- /dev/null
+++ b/parquet-geospatial/README.md
@@ -0,0 +1,37 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache Parquet Geometry/Geography Rust Implementation Details
+
+[![crates.io](https://img.shields.io/crates/v/parquet-geospatial.svg)](https://crates.io/crates/parquet-geospatial)
+[![docs.rs](https://img.shields.io/docsrs/parquet-geospatial.svg)](https://docs.rs/parquet/latest/parquet-geospatial/)
+
+This crate contains implementation details for the [Geometry and Geography Encoding] from
+[Apache Parquet], including utilities for calculating geospatial column statistics and writing
+tests. This software is developed as part of the [Apache Arrow] project.
+
+[Geometry and Geography Encoding]: https://github.com/apache/parquet-format/blob/master/Geospatial.md
+[Apache Parquet]: https://parquet.apache.org/
+[Apache Arrow]: https://arrow.apache.org/
+
+Please see the [API documentation](https://docs.rs/parquet-geospatial/latest) for more details.
+
+## License
+
+Licensed under the Apache License, Version 2.0: <http://www.apache.org/licenses/LICENSE-2.0>.
diff --git a/parquet-geospatial/src/bounding.rs b/parquet-geospatial/src/bounding.rs
new file mode 100644
index 000000000000..9726c16ba4b6
--- /dev/null
+++ b/parquet-geospatial/src/bounding.rs
@@ -0,0 +1,602 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashSet;
+
+use arrow_schema::ArrowError;
+use geo_traits::{
+    CoordTrait, Dimensions, GeometryCollectionTrait, GeometryTrait, GeometryType, LineStringTrait,
+    MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use wkb::reader::Wkb;
+
+use crate::interval::{Interval, IntervalTrait, WraparoundInterval};
+
+/// Geometry bounder
+///
+/// Utility to accumulate statistics for geometries as they are written.
+/// This bounder is designed to output statistics accumulated according
+/// to the Parquet specification such that the output can be written to
+/// Parquet statistics with minimal modification.
+///
+/// See the [IntervalTrait] for an in-depth discussion of wraparound bounding
+/// (which adds some complexity to this implementation).
+#[derive(Debug)]
+pub struct GeometryBounder {
+    /// Union of all contiguous x intervals to the left of the wraparound midpoint
+    x_left: Interval,
+    /// Union of all contiguous x intervals that intersect the wraparound midpoint
+    x_mid: Interval,
+    /// Union of all contiguous x intervals to the right of the wraparound midpoint
+    x_right: Interval,
+    /// Union of all y intervals
+    y: Interval,
+    /// Union of all z intervals
+    z: Interval,
+    /// Union of all m intervals
+    m: Interval,
+    /// Unique geometry type codes encountered by the bounder
+    ///
+    /// The integer codes are identical to the ISO WKB geometry type codes and
+    /// are documented as part of the Parquet specification:
+    /// <https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types>
+    geometry_types: HashSet<i32>,
+    wraparound_hint: Interval,
+}
+
+impl GeometryBounder {
+    /// Create a new, empty bounder that represents empty input
+    pub fn empty() -> Self {
+        Self {
+            x_left: Interval::empty(),
+            x_mid: Interval::empty(),
+            x_right: Interval::empty(),
+            y: Interval::empty(),
+            z: Interval::empty(),
+            m: Interval::empty(),
+            geometry_types: HashSet::<i32>::default(),
+            wraparound_hint: Interval::empty(),
+        }
+    }
+
+    /// Set the hint to use for generation of potential wraparound xmin/xmax output
+    ///
+    /// Usually this value should be set to (-180, 180), as wraparound is primarily
+    /// targeted at lon/lat coordinate systems where collections of features with
+    /// components at the very far left and very far right of the coordinate system
+    /// are actually very close to each other.
+    ///
+    /// It is safe to set this value even when the actual coordinate system of the
+    /// input is unknown: if the input has coordinate values that are outside the
+    /// range of the wraparound hint, wraparound xmin/xmax values will not be
+    /// generated. If the input has coordinate values that are well inside of the
+    /// range of the wraparound hint, the wraparound xmin/xmax value will be
+    /// substantially wider than the non-wraparound version and will not be returned.
+    pub fn with_wraparound_hint(self, wraparound_hint: impl Into<Interval>) -> Self {
+        Self {
+            wraparound_hint: wraparound_hint.into(),
+            ..self
+        }
+    }
+
+    /// Calculate the final xmin and xmax for geometries encountered by this bounder
+    ///
+    /// The interval returned may wraparound if a hint was set and the input
+    /// encountered by this bounder were exclusively at the far left and far right
+    /// of the input range. See [IntervalTrait] for an in-depth description of
+    /// wraparound intervals.
+    pub fn x(&self) -> WraparoundInterval {
+        let out_all = Interval::empty()
+            .merge_interval(&self.x_left)
+            .merge_interval(&self.x_mid)
+            .merge_interval(&self.x_right);
+
+        // Check if this even makes sense: if anything is covering the midpoint
+        // of the wraparound hint or the bounds don't make sense for the provided
+        // wraparound hint, just return the Cartesian bounds.
+        if !self.x_mid.is_empty() || !self.wraparound_hint.contains_interval(&out_all) {
+            return out_all.into();
+        }
+
+        // Check if our wraparound bounds are any better than our Cartesian bounds
+        // If the Cartesian bounds are tighter, return them.
+        let out_width = (self.x_left.hi() - self.wraparound_hint.lo())
+            + (self.wraparound_hint.hi() - self.x_right.hi());
+        if out_all.width() < out_width {
+            return out_all.into();
+        }
+
+        // Wraparound!
+        WraparoundInterval::new(self.x_right.lo(), self.x_left.hi())
+    }
+
+    /// Calculate the final ymin and ymax for geometries encountered by this bounder
+    pub fn y(&self) -> Interval {
+        self.y
+    }
+
+    /// Calculate the final zmin and zmax for geometries encountered by this bounder
+    pub fn z(&self) -> Interval {
+        self.z
+    }
+
+    /// Calculate the final mmin and mmax values for geometries encountered by this bounder
+    pub fn m(&self) -> Interval {
+        self.m
+    }
+
+    /// Calculate the final geometry type set
+    ///
+    /// Returns a copy of the unique geometry type/dimension combinations encountered
+    /// by this bounder. These identifiers are ISO WKB identifiers (e.g., 1001
+    /// for PointZ). The output is always returned sorted.
+    pub fn geometry_types(&self) -> Vec<i32> {
+        let mut out = self.geometry_types.iter().copied().collect::<Vec<_>>();
+        out.sort();
+        out
+    }
+
+    /// Update this bounder with one WKB-encoded geometry
+    ///
+    /// Parses and accumulates the bounds of one WKB-encoded geometry. This function
+    /// will error for invalid WKB input; however, clients may wish to ignore such
+    /// an error for the purposes of writing statistics.
+    pub fn update_wkb(&mut self, wkb: &[u8]) -> Result<(), ArrowError> {
+        let wkb = Wkb::try_new(wkb).map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+        self.update_geometry(&wkb)?;
+        Ok(())
+    }
+
+    fn update_geometry(&mut self, geom: &impl GeometryTrait<T = f64>) -> Result<(), ArrowError> {
+        let geometry_type = geometry_type(geom)?;
+        self.geometry_types.insert(geometry_type);
+
+        visit_intervals(geom, 'x', &mut |x| self.update_x(&x))?;
+        visit_intervals(geom, 'y', &mut |y| self.y.update_interval(&y))?;
+        visit_intervals(geom, 'z', &mut |z| self.z.update_interval(&z))?;
+        visit_intervals(geom, 'm', &mut |m| self.m.update_interval(&m))?;
+
+        Ok(())
+    }
+
+    fn update_x(&mut self, x: &Interval) {
+        if x.hi() < self.wraparound_hint.mid() {
+            // If the x interval is completely to the left of the midpoint, merge it
+            // with x_left
+            self.x_left.update_interval(x);
+        } else if x.lo() > self.wraparound_hint.mid() {
+            // If the x interval is completely to the right of the midpoint, merge it
+            // with x_right
+            self.x_right.update_interval(x);
+        } else {
+            // Otherwise, merge it with x_mid
+            self.x_mid.update_interval(x);
+        }
+    }
+}
+
+/// Visit contiguous intervals for a given dimension within a [GeometryTrait]
+///
+/// Here, contiguous intervals refers to intervals that must not be separated
+/// by wraparound bounding. Point components of a geometry are visited as
+/// degenerate intervals of a single value; linestring or polygon ring components
+/// are visited as single intervals.
+fn visit_intervals(
+    geom: &impl GeometryTrait<T = f64>,
+    dimension: char,
+    func: &mut impl FnMut(Interval),
+) -> Result<(), ArrowError> {
+    let n = if let Some(n) = dimension_index(geom.dim(), dimension) {
+        n
+    } else {
+        return Ok(());
+    };
+
+    match geom.as_type() {
+        GeometryType::Point(pt) => {
+            if let Some(coord) = PointTrait::coord(pt) {
+                visit_point(coord, n, func);
+            }
+        }
+        GeometryType::LineString(ls) => {
+            visit_sequence(ls.coords(), n, func);
+        }
+        GeometryType::Polygon(pl) => {
+            if let Some(exterior) = pl.exterior() {
+                visit_sequence(exterior.coords(), n, func);
+            }
+
+            for interior in pl.interiors() {
+                visit_sequence(interior.coords(), n, func);
+            }
+        }
+        GeometryType::MultiPoint(multi_pt) => {
+            visit_collection(multi_pt.points(), dimension, func)?;
+        }
+        GeometryType::MultiLineString(multi_ls) => {
+            visit_collection(multi_ls.line_strings(), dimension, func)?;
+        }
+        GeometryType::MultiPolygon(multi_pl) => {
+            visit_collection(multi_pl.polygons(), dimension, func)?;
+        }
+        GeometryType::GeometryCollection(collection) => {
+            visit_collection(collection.geometries(), dimension, func)?;
+        }
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(
+                "GeometryType not supported for dimension bounds".to_string(),
+            ));
+        }
+    }
+
+    Ok(())
+}
+
+/// Visit a point
+///
+/// Points can be separated by wraparound bounding even if they occur within
+/// the same feature, so we visit them as individual degenerate intervals.
+fn visit_point(coord: impl CoordTrait<T = f64>, n: usize, func: &mut impl FnMut(Interval)) {
+    let val = unsafe { coord.nth_unchecked(n) };
+    func((val, val).into());
+}
+
+/// Visit contiguous sequences
+///
+/// Sequences (e.g., linestrings or polygon rings) must always be considered
+/// together (i.e., are never separated by wraparound bounding).
+fn visit_sequence(
+    coords: impl IntoIterator<Item = impl CoordTrait<T = f64>>,
+    n: usize,
+    func: &mut impl FnMut(Interval),
+) {
+    let mut interval = Interval::empty();
+    for coord in coords {
+        interval.update_value(unsafe { coord.nth_unchecked(n) });
+    }
+
+    func(interval);
+}
+
+/// Visit intervals in a collection of geometries
+fn visit_collection(
+    collection: impl IntoIterator<Item = impl GeometryTrait<T = f64>>,
+    target: char,
+    func: &mut impl FnMut(Interval),
+) -> Result<(), ArrowError> {
+    for geom in collection {
+        visit_intervals(&geom, target, func)?;
+    }
+
+    Ok(())
+}
+
+/// Extract the geometry type code encountered by the bounder
+///
+/// The integer code is a ISO WKB geometry type codes is documented as part
+/// of the Parquet specification:
+/// <https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types>
+///
+/// This can also be derived from bytes 2-5 (possibly endian-swapped according to byte 1)
+/// of the input WKB buffer but is slightly clearer recomputed.
+fn geometry_type(geom: &impl GeometryTrait<T = f64>) -> Result<i32, ArrowError> {
+    let dimension_type = match geom.dim() {
+        Dimensions::Xy => 0,
+        Dimensions::Xyz => 1000,
+        Dimensions::Xym => 2000,
+        Dimensions::Xyzm => 3000,
+        Dimensions::Unknown(_) => {
+            return Err(ArrowError::InvalidArgumentError(
+                "Unsupported dimensions".to_string(),
+            ));
+        }
+    };
+
+    let geometry_type = match geom.as_type() {
+        GeometryType::Point(_) => 1,
+        GeometryType::LineString(_) => 2,
+        GeometryType::Polygon(_) => 3,
+        GeometryType::MultiPoint(_) => 4,
+        GeometryType::MultiLineString(_) => 5,
+        GeometryType::MultiPolygon(_) => 6,
+        GeometryType::GeometryCollection(_) => 7,
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(
+                "GeometryType not supported for dimension bounds".to_string(),
+            ));
+        }
+    };
+
+    Ok(dimension_type + geometry_type)
+}
+
+fn dimension_index(dim: Dimensions, target: char) -> Option<usize> {
+    match target {
+        'x' => return Some(0),
+        'y' => return Some(1),
+        _ => {}
+    }
+
+    match (dim, target) {
+        (Dimensions::Xyz, 'z') => Some(2),
+        (Dimensions::Xym, 'm') => Some(2),
+        (Dimensions::Xyzm, 'z') => Some(2),
+        (Dimensions::Xyzm, 'm') => Some(3),
+        (_, _) => None,
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use std::str::FromStr;
+
+    use wkt::Wkt;
+
+    use super::*;
+
+    fn wkt_bounds(
+        wkt_values: impl IntoIterator<Item = impl AsRef<str>>,
+    ) -> Result<GeometryBounder, ArrowError> {
+        wkt_bounds_with_wraparound(wkt_values, Interval::empty())
+    }
+
+    fn wkt_bounds_with_wraparound(
+        wkt_values: impl IntoIterator<Item = impl AsRef<str>>,
+        wraparound: impl Into<Interval>,
+    ) -> Result<GeometryBounder, ArrowError> {
+        let mut bounder = GeometryBounder::empty().with_wraparound_hint(wraparound);
+        for wkt_value in wkt_values {
+            let wkt: Wkt = Wkt::from_str(wkt_value.as_ref())
+                .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
+            bounder.update_geometry(&wkt)?;
+        }
+        Ok(bounder)
+    }
+
+    #[test]
+    fn test_wkb() {
+        let wkt: Wkt = Wkt::from_str("LINESTRING (0 1, 2 3)").unwrap();
+        let mut wkb = Vec::new();
+        wkb::writer::write_geometry(&mut wkb, &wkt, &Default::default()).unwrap();
+
+        let mut bounds = GeometryBounder::empty();
+        bounds.update_wkb(&wkb).unwrap();
+
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+    }
+
+    #[test]
+    fn test_geometry_types() {
+        let empties = [
+            "POINT EMPTY",
+            "LINESTRING EMPTY",
+            "POLYGON EMPTY",
+            "MULTIPOINT EMPTY",
+            "MULTILINESTRING EMPTY",
+            "MULTIPOLYGON EMPTY",
+            "GEOMETRYCOLLECTION EMPTY",
+        ];
+
+        assert_eq!(
+            wkt_bounds(empties).unwrap().geometry_types(),
+            vec![1, 2, 3, 4, 5, 6, 7]
+        );
+
+        let empties_z = [
+            "POINT Z EMPTY",
+            "LINESTRING Z EMPTY",
+            "POLYGON Z EMPTY",
+            "MULTIPOINT Z EMPTY",
+            "MULTILINESTRING Z EMPTY",
+            "MULTIPOLYGON Z EMPTY",
+            "GEOMETRYCOLLECTION Z EMPTY",
+        ];
+
+        assert_eq!(
+            wkt_bounds(empties_z).unwrap().geometry_types(),
+            vec![1001, 1002, 1003, 1004, 1005, 1006, 1007]
+        );
+
+        let empties_m = [
+            "POINT M EMPTY",
+            "LINESTRING M EMPTY",
+            "POLYGON M EMPTY",
+            "MULTIPOINT M EMPTY",
+            "MULTILINESTRING M EMPTY",
+            "MULTIPOLYGON M EMPTY",
+            "GEOMETRYCOLLECTION M EMPTY",
+        ];
+
+        assert_eq!(
+            wkt_bounds(empties_m).unwrap().geometry_types(),
+            vec![2001, 2002, 2003, 2004, 2005, 2006, 2007]
+        );
+
+        let empties_zm = [
+            "POINT ZM EMPTY",
+            "LINESTRING ZM EMPTY",
+            "POLYGON ZM EMPTY",
+            "MULTIPOINT ZM EMPTY",
+            "MULTILINESTRING ZM EMPTY",
+            "MULTIPOLYGON ZM EMPTY",
+            "GEOMETRYCOLLECTION ZM EMPTY",
+        ];
+
+        assert_eq!(
+            wkt_bounds(empties_zm).unwrap().geometry_types(),
+            vec![3001, 3002, 3003, 3004, 3005, 3006, 3007]
+        );
+    }
+
+    #[test]
+    fn test_bounds_empty() {
+        let empties = [
+            "POINT EMPTY",
+            "LINESTRING EMPTY",
+            "POLYGON EMPTY",
+            "MULTIPOINT EMPTY",
+            "MULTILINESTRING EMPTY",
+            "MULTIPOLYGON EMPTY",
+            "GEOMETRYCOLLECTION EMPTY",
+        ];
+
+        let bounds = wkt_bounds(empties).unwrap();
+        assert!(bounds.x().is_empty());
+        assert!(bounds.y().is_empty());
+        assert!(bounds.z().is_empty());
+        assert!(bounds.m().is_empty());
+
+        // With wraparound, still empty
+        let bounds = wkt_bounds_with_wraparound(empties, (-180, 180)).unwrap();
+        assert!(bounds.x().is_empty());
+        assert!(bounds.y().is_empty());
+        assert!(bounds.z().is_empty());
+        assert!(bounds.m().is_empty());
+    }
+
+    #[test]
+    fn test_bounds_coord() {
+        let bounds = wkt_bounds(["POINT (0 1)", "POINT (2 3)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+        assert!(bounds.z().is_empty());
+        assert!(bounds.m().is_empty());
+
+        let bounds = wkt_bounds(["POINT Z (0 1 2)", "POINT Z (3 4 5)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 3).into());
+        assert_eq!(bounds.y(), (1, 4).into());
+        assert_eq!(bounds.z(), (2, 5).into());
+        assert!(bounds.m().is_empty());
+
+        let bounds = wkt_bounds(["POINT M (0 1 2)", "POINT M (3 4 5)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 3).into());
+        assert_eq!(bounds.y(), (1, 4).into());
+        assert!(bounds.z().is_empty());
+        assert_eq!(bounds.m(), (2, 5).into());
+
+        let bounds = wkt_bounds(["POINT ZM (0 1 2 3)", "POINT ZM (4 5 6 7)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 4).into());
+        assert_eq!(bounds.y(), (1, 5).into());
+        assert_eq!(bounds.z(), (2, 6).into());
+        assert_eq!(bounds.m(), (3, 7).into());
+    }
+
+    #[test]
+    fn test_bounds_sequence() {
+        let bounds = wkt_bounds(["LINESTRING (0 1, 2 3)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+        assert!(bounds.z().is_empty());
+        assert!(bounds.m().is_empty());
+
+        let bounds = wkt_bounds(["LINESTRING Z (0 1 2, 3 4 5)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 3).into());
+        assert_eq!(bounds.y(), (1, 4).into());
+        assert_eq!(bounds.z(), (2, 5).into());
+        assert!(bounds.m().is_empty());
+
+        let bounds = wkt_bounds(["LINESTRING M (0 1 2, 3 4 5)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 3).into());
+        assert_eq!(bounds.y(), (1, 4).into());
+        assert!(bounds.z().is_empty());
+        assert_eq!(bounds.m(), (2, 5).into());
+
+        let bounds = wkt_bounds(["LINESTRING ZM (0 1 2 3, 4 5 6 7)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 4).into());
+        assert_eq!(bounds.y(), (1, 5).into());
+        assert_eq!(bounds.z(), (2, 6).into());
+        assert_eq!(bounds.m(), (3, 7).into());
+    }
+
+    #[test]
+    fn test_bounds_geometry_type() {
+        let bounds = wkt_bounds(["POINT (0 1)", "POINT (2 3)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+
+        let bounds = wkt_bounds(["LINESTRING (0 1, 2 3)"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+
+        // Normally interiors are supposed to be inside the exterior; however, we
+        // include a poorly formed polygon just to make sure they are considered
+        let bounds =
+            wkt_bounds(["POLYGON ((0 0, 0 1, 1 0, 0 0), (10 10, 10 11, 11 10, 10 10))"]).unwrap();
+        assert_eq!(bounds.x(), (0, 11).into());
+        assert_eq!(bounds.y(), (0, 11).into());
+
+        let bounds = wkt_bounds(["MULTIPOINT ((0 1), (2 3))"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+
+        let bounds = wkt_bounds(["MULTILINESTRING ((0 1, 2 3))"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+
+        let bounds = wkt_bounds(["MULTIPOLYGON (((0 0, 0 1, 1 0, 0 0)))"]).unwrap();
+        assert_eq!(bounds.x(), (0, 1).into());
+        assert_eq!(bounds.y(), (0, 1).into());
+
+        let bounds = wkt_bounds(["GEOMETRYCOLLECTION (POINT (0 1), POINT (2 3))"]).unwrap();
+        assert_eq!(bounds.x(), (0, 2).into());
+        assert_eq!(bounds.y(), (1, 3).into());
+    }
+
+    #[test]
+    fn test_bounds_wrap_basic() {
+        let geoms = ["POINT (-170 0)", "POINT (170 0)"];
+
+        // No wraparound because it was disabled
+        let bounds = wkt_bounds_with_wraparound(geoms, Interval::empty()).unwrap();
+        assert_eq!(bounds.x(), (-170, 170).into());
+
+        // Wraparound that can't happen because something is covering
+        // the midpoint.
+        let mut geoms_with_mid = geoms.to_vec();
+        geoms_with_mid.push("LINESTRING (-10 0, 10 0)");
+        let bounds = wkt_bounds_with_wraparound(geoms_with_mid, (-180, 180)).unwrap();
+        assert_eq!(bounds.x(), (-170, 170).into());
+
+        // Wraparound where the wrapped box is *not* better
+        let bounds = wkt_bounds_with_wraparound(geoms, (-1000, 1000)).unwrap();
+        assert_eq!(bounds.x(), (-170, 170).into());
+
+        // Wraparound where the wrapped box is inappropriate because it is
+        // outside the wrap hint
+        let bounds = wkt_bounds_with_wraparound(geoms, (-10, 10)).unwrap();
+        assert_eq!(bounds.x(), (-170, 170).into());
+
+        // Wraparound where the wrapped box *is* better
+        let bounds = wkt_bounds_with_wraparound(geoms, (-180, 180)).unwrap();
+        assert_eq!(bounds.x(), (170, -170).into());
+    }
+
+    #[test]
+    fn test_bounds_wrap_multipart() {
+        let fiji = "MULTIPOLYGON (
+        ((-180 -15.51, -180 -19.78, -178.61 -21.14, -178.02 -18.22, -178.57 -16.04, -180 -15.51)),
+        ((180 -15.51, 177.98 -16.25, 176.67 -17.14, 177.83 -19.31, 180 -19.78, 180 -15.51))
+        )";
+
+        let bounds = wkt_bounds_with_wraparound([fiji], (-180, 180)).unwrap();
+        assert!(bounds.x().is_wraparound());
+        assert_eq!(bounds.x(), (176.67, -178.02).into());
+        assert_eq!(bounds.y(), (-21.14, -15.51).into());
+    }
+}
diff --git a/parquet-geospatial/src/interval.rs b/parquet-geospatial/src/interval.rs
new file mode 100644
index 000000000000..72abcc86c127
--- /dev/null
+++ b/parquet-geospatial/src/interval.rs
@@ -0,0 +1,1081 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::ArrowError;
+
+/// Generic 1D intervals with wraparound support
+///
+/// This trait specifies common behaviour implemented by the [Interval] and
+/// [WraparoundInterval].
+///
+/// Briefly, "wraparound" support was included in the Parquet specification
+/// to ensure that geometries or geographies with components on both sides of
+/// antimeridian (180 degrees longitude) can be reasonably summarized. This
+/// concept was borrowed from the widely used GeoJSON and matches identical
+/// bounding box specifications for GeoParquet and STAC, among others.
+///
+/// Because the Parquet specification also states that longitude values
+/// are always stored as `x` values (i.e., the first coordinate component),
+/// this contingency is only available for the `xmin`/`xmax` component of the
+/// GeoStatistics. Thus, the `xmin`/`xmax` pair may either represent a regular
+/// interval (specified by xmin = 10 and xmax = 20):
+///
+/// ```text
+///           10         20
+///            |==========|
+/// ```
+///
+/// ...or a "wraparound" interval (specified by xmin = 20 and xmax = 10). This
+/// interval is the union of the two regular intervals (-Inf, 10] and (20, Inf).
+/// Infinity was chosen rather than any particular value to ensure that Parquet
+/// implementations did not have to consider the value of the coordinate
+/// reference system when comparing intervals.
+///
+/// ```text
+///           10         20
+/// <==========|          |============>
+/// ```
+///
+/// In general, one should use [Interval] unless specifically working with
+/// wraparound, as the contingency of wraparound incurs overhead (particularly
+/// in a loop). This trait is mostly used to simplify testing and unify
+/// documentation for the two concrete implementations.
+pub trait IntervalTrait: std::fmt::Debug + PartialEq {
+    /// Create an interval from lo and hi values
+    fn new(lo: f64, hi: f64) -> Self;
+
+    /// Create an empty interval that intersects nothing (except the full interval)
+    fn empty() -> Self;
+
+    /// Create the full interval (that intersects everything, including the empty interval)
+    fn full() -> Self;
+
+    /// Lower bound
+    ///
+    /// If `is_wraparound()` returns false, this is also the minimum value. When empty,
+    /// this value is Infinity; when full, this value is -Infinity.
+    fn lo(&self) -> f64;
+
+    /// Upper bound
+    ///
+    /// If `is_wraparound()` returns false, this is also the maximum value. When empty,
+    /// this value is -Infinity; when full, this value is Infinity.
+    fn hi(&self) -> f64;
+
+    /// Check for wraparound
+    ///
+    /// If `is_wraparound()` returns false, this interval represents the values that are
+    /// between lo and hi. If `is_wraparound()` returns true, this interval represents
+    /// the values that are *not* between lo and hi.
+    ///
+    /// It is recommended to work directly with an [Interval] where this is guaranteed to
+    /// return false unless wraparound support is specifically required.
+    fn is_wraparound(&self) -> bool;
+
+    /// Check for potential intersection with a value
+    ///
+    /// Note that intervals always contain their endpoints (for both the wraparound and
+    /// non-wraparound case).
+    fn intersects_value(&self, value: f64) -> bool;
+
+    /// Check for potential intersection with an interval
+    ///
+    /// Note that intervals always contain their endpoints (for both the wraparound and
+    /// non-wraparound case).
+    ///
+    /// This method accepts Self for performance reasons to prevent unnecessary checking of
+    /// `is_wraparound()` when not required for an implementation.
+    fn intersects_interval(&self, other: &Self) -> bool;
+
+    /// Check for potential containment of an interval
+    ///
+    /// Note that intervals always contain their endpoints (for both the wraparound and
+    /// non-wraparound case).
+    ///
+    /// This method accepts Self for performance reasons to prevent unnecessary checking of
+    /// `is_wraparound()` when not required for an implementation.
+    fn contains_interval(&self, other: &Self) -> bool;
+
+    /// The width of the interval
+    ///
+    /// For the non-wraparound case, this is the distance between lo and hi. For the wraparound
+    /// case, this is infinity.
+    fn width(&self) -> f64;
+
+    /// The midpoint of the interval
+    ///
+    /// For the non-wraparound case, this is the point exactly between lo and hi. For the wraparound
+    /// case, this is arbitrarily chosen as infinity (to preserve the property that intervals intersect
+    /// their midpoint).
+    fn mid(&self) -> f64;
+
+    /// True if this interval is empty (i.e. intersects no values)
+    fn is_empty(&self) -> bool;
+
+    /// Compute a new interval that is the union of both
+    ///
+    /// When accumulating intervals in a loop, use [Interval::update_interval].
+    fn merge_interval(&self, other: &Self) -> Self;
+
+    /// Compute a new interval that is the union of both
+    ///
+    /// When accumulating intervals in a loop, use [Interval::update_value].
+    fn merge_value(&self, other: f64) -> Self;
+
+    /// Expand this interval by a given distance
+    ///
+    /// Returns a new interval where both endpoints are moved outward by the given distance.
+    /// For regular intervals, this expands both lo and hi by the distance.
+    /// For wraparound intervals, this may result in the full interval if expansion is large enough.
+    fn expand_by(&self, distance: f64) -> Self;
+}
+
+/// 1D Interval that never wraps around
+///
+/// Represents a minimum and maximum value without wraparound logic (see [WraparoundInterval]
+/// for a wraparound implementation).
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct Interval {
+    /// Lower bound
+    lo: f64,
+
+    /// Upper bound
+    hi: f64,
+}
+
+impl Interval {
+    /// Expand this interval to the union of self and other in place
+    ///
+    /// Note that NaN values are ignored when updating bounds.
+    pub fn update_interval(&mut self, other: &Self) {
+        self.lo = self.lo.min(other.lo);
+        self.hi = self.hi.max(other.hi);
+    }
+
+    /// Expand this interval to the union of self and other in place
+    ///
+    /// Note that NaN values are ignored when updating bounds.
+    pub fn update_value(&mut self, other: f64) {
+        self.lo = self.lo.min(other);
+        self.hi = self.hi.max(other);
+    }
+}
+
+impl From<(f64, f64)> for Interval {
+    fn from(value: (f64, f64)) -> Self {
+        Interval::new(value.0, value.1)
+    }
+}
+
+impl From<(i32, i32)> for Interval {
+    fn from(value: (i32, i32)) -> Self {
+        Interval::new(value.0 as f64, value.1 as f64)
+    }
+}
+
+impl TryFrom<WraparoundInterval> for Interval {
+    type Error = ArrowError;
+
+    fn try_from(value: WraparoundInterval) -> Result<Self, Self::Error> {
+        if value.is_wraparound() {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "Can't convert wraparound interval {value:?} to Interval"
+            )))
+        } else {
+            Ok(Interval::new(value.lo(), value.hi()))
+        }
+    }
+}
+
+impl IntervalTrait for Interval {
+    fn new(lo: f64, hi: f64) -> Self {
+        Self { lo, hi }
+    }
+
+    fn empty() -> Self {
+        Self {
+            lo: f64::INFINITY,
+            hi: -f64::INFINITY,
+        }
+    }
+
+    fn full() -> Self {
+        Self {
+            lo: -f64::INFINITY,
+            hi: f64::INFINITY,
+        }
+    }
+
+    fn lo(&self) -> f64 {
+        self.lo
+    }
+
+    fn hi(&self) -> f64 {
+        self.hi
+    }
+
+    fn is_wraparound(&self) -> bool {
+        false
+    }
+
+    fn intersects_value(&self, value: f64) -> bool {
+        value >= self.lo && value <= self.hi
+    }
+
+    fn intersects_interval(&self, other: &Self) -> bool {
+        self.lo <= other.hi && other.lo <= self.hi
+    }
+
+    fn contains_interval(&self, other: &Self) -> bool {
+        self.lo <= other.lo && self.hi >= other.hi
+    }
+
+    fn width(&self) -> f64 {
+        self.hi - self.lo
+    }
+
+    fn mid(&self) -> f64 {
+        self.lo + self.width() / 2.0
+    }
+
+    fn is_empty(&self) -> bool {
+        self.width() == -f64::INFINITY
+    }
+
+    fn merge_interval(&self, other: &Self) -> Self {
+        let mut out = *self;
+        out.update_interval(other);
+        out
+    }
+
+    fn merge_value(&self, other: f64) -> Self {
+        let mut out = *self;
+        out.update_value(other);
+        out
+    }
+
+    fn expand_by(&self, distance: f64) -> Self {
+        if self.is_empty() || distance.is_nan() || distance < 0.0 {
+            return *self;
+        }
+
+        Self::new(self.lo - distance, self.hi + distance)
+    }
+}
+
+/// 1D Interval that may or may not wrap around
+///
+/// Concrete implementation that handles both the wraparound and regular
+/// interval case. This is separated from the [Interval] because the
+/// [Interval] is faster and most operations will use it directly (invoking
+/// this struct when it is specifically required).
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct WraparoundInterval {
+    inner: Interval,
+}
+
+impl WraparoundInterval {
+    /// Splits this interval into exactly two non-wraparound intervals
+    ///
+    /// If this interval does not wrap around, one of these intervals will
+    /// be empty.
+    fn split(&self) -> (Interval, Interval) {
+        if self.is_wraparound() {
+            (
+                Interval {
+                    lo: -f64::INFINITY,
+                    hi: self.inner.hi,
+                },
+                Interval {
+                    lo: self.inner.lo,
+                    hi: f64::INFINITY,
+                },
+            )
+        } else {
+            (self.inner, Interval::empty())
+        }
+    }
+}
+
+impl From<(f64, f64)> for WraparoundInterval {
+    fn from(value: (f64, f64)) -> Self {
+        WraparoundInterval::new(value.0, value.1)
+    }
+}
+
+impl From<(i32, i32)> for WraparoundInterval {
+    fn from(value: (i32, i32)) -> Self {
+        WraparoundInterval::new(value.0 as f64, value.1 as f64)
+    }
+}
+
+impl From<Interval> for WraparoundInterval {
+    fn from(value: Interval) -> Self {
+        WraparoundInterval::new(value.lo(), value.hi())
+    }
+}
+
+impl IntervalTrait for WraparoundInterval {
+    fn new(lo: f64, hi: f64) -> Self {
+        Self {
+            inner: Interval::new(lo, hi),
+        }
+    }
+
+    fn empty() -> Self {
+        Self {
+            inner: Interval::empty(),
+        }
+    }
+
+    fn full() -> Self {
+        Self {
+            inner: Interval::full(),
+        }
+    }
+
+    fn lo(&self) -> f64 {
+        self.inner.lo
+    }
+
+    fn hi(&self) -> f64 {
+        self.inner.hi
+    }
+
+    fn is_wraparound(&self) -> bool {
+        !self.is_empty() && self.inner.width() < 0.0
+    }
+
+    fn intersects_value(&self, value: f64) -> bool {
+        let (left, right) = self.split();
+        left.intersects_value(value) || right.intersects_value(value)
+    }
+
+    fn intersects_interval(&self, other: &Self) -> bool {
+        let (left, right) = self.split();
+        let (other_left, other_right) = other.split();
+        left.intersects_interval(&other_left)
+            || left.intersects_interval(&other_right)
+            || right.intersects_interval(&other_left)
+            || right.intersects_interval(&other_right)
+    }
+
+    fn contains_interval(&self, other: &Self) -> bool {
+        let (left, right) = self.split();
+        let (other_left, other_right) = other.split();
+        left.contains_interval(&other_left) && right.contains_interval(&other_right)
+    }
+
+    fn width(&self) -> f64 {
+        if self.is_wraparound() {
+            f64::INFINITY
+        } else {
+            self.inner.width()
+        }
+    }
+
+    fn mid(&self) -> f64 {
+        if self.is_wraparound() {
+            f64::INFINITY
+        } else {
+            self.inner.mid()
+        }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    fn merge_interval(&self, other: &Self) -> Self {
+        if self.is_empty() {
+            return *other;
+        }
+
+        if other.is_empty() {
+            return *self;
+        }
+
+        let (wraparound, not_wraparound) = match (self.is_wraparound(), other.is_wraparound()) {
+            // Handle wraparound/not wraparound below
+            (true, false) => (self, other),
+            (false, true) => (other, self),
+            // Both are wraparound: Merge the two left intervals, then merge the two right intervals
+            // and check if we need the full interval
+            (true, true) => {
+                let (left, right) = self.split();
+                let (other_left, other_right) = other.split();
+
+                let new_left = left.merge_interval(&other_left);
+                let new_right = right.merge_interval(&other_right);
+
+                // If the left and right intervals intersect each other, we need the full interval
+                if new_left.intersects_interval(&new_right) {
+                    return WraparoundInterval::full();
+                } else {
+                    return WraparoundInterval::new(new_right.lo(), new_left.hi());
+                }
+            }
+            // Neither are wraparound: just merge the inner intervals
+            (false, false) => {
+                return Self {
+                    inner: self.inner.merge_interval(&other.inner),
+                };
+            }
+        };
+
+        let (left, right) = wraparound.split();
+        let distance_not_wraparound_left = (not_wraparound.mid() - left.hi()).abs();
+        let distance_not_wraparound_right = (not_wraparound.mid() - right.lo()).abs();
+        let (new_left, new_right) = if distance_not_wraparound_left < distance_not_wraparound_right
+        {
+            (left.merge_interval(&not_wraparound.inner), right)
+        } else {
+            (left, right.merge_interval(&not_wraparound.inner))
+        };
+
+        // If the left and right intervals intersect each other, we need the full interval
+        if new_left.intersects_interval(&new_right) {
+            WraparoundInterval::full()
+        } else {
+            WraparoundInterval::new(new_right.lo(), new_left.hi())
+        }
+    }
+
+    fn merge_value(&self, value: f64) -> Self {
+        if self.intersects_value(value) || value.is_nan() {
+            return *self;
+        }
+
+        if !self.is_wraparound() {
+            return Self {
+                inner: self.inner.merge_value(value),
+            };
+        }
+
+        // Move only one of the endpoints
+        let distance_left = value - self.inner.hi;
+        let distance_right = self.inner.lo - value;
+        debug_assert!(distance_left > 0.0);
+        debug_assert!(distance_right > 0.0);
+        if distance_left < distance_right {
+            Self {
+                inner: Interval {
+                    lo: self.inner.lo,
+                    hi: value,
+                },
+            }
+        } else {
+            Self {
+                inner: Interval {
+                    lo: value,
+                    hi: self.inner.hi,
+                },
+            }
+        }
+    }
+
+    fn expand_by(&self, distance: f64) -> Self {
+        if self.is_empty() || distance.is_nan() || distance < 0.0 {
+            return *self;
+        }
+
+        if !self.is_wraparound() {
+            // For non-wraparound, just expand the inner interval
+            return Self {
+                inner: self.inner.expand_by(distance),
+            };
+        }
+
+        // For wraparound intervals, expanding means including more values
+        // Wraparound interval (a, b) where a > b excludes the region (b, a)
+        // To expand by distance d, we shrink the excluded region from (b, a) to (b+d, a-d)
+        // This means the new wraparound interval becomes (a-d, b+d)
+        let excluded_lo = self.inner.hi + distance; // b + d
+        let excluded_hi = self.inner.lo - distance; // a - d
+
+        // If the excluded region disappears (excluded_lo >= excluded_hi), we get the full interval
+        if excluded_lo >= excluded_hi {
+            return Self::full();
+        }
+
+        // The new wraparound interval excludes (excluded_lo, excluded_hi)
+        // So the interval itself is (excluded_hi, excluded_lo)
+        Self::new(excluded_hi, excluded_lo)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use core::f64;
+
+    use super::*;
+
+    fn test_empty<T: IntervalTrait>(empty: T) {
+        // Equals itself
+        #[allow(clippy::eq_op)]
+        {
+            assert_eq!(empty, empty);
+        }
+
+        // Empty intersects no values
+        assert!(!empty.intersects_value(0.0));
+        assert!(!empty.intersects_value(f64::INFINITY));
+        assert!(!empty.intersects_value(-f64::INFINITY));
+        assert!(!empty.intersects_value(f64::NAN));
+
+        // Empty intersects no intervals
+        assert!(!empty.intersects_interval(&T::new(-10.0, 10.0)));
+        assert!(!empty.intersects_interval(&T::empty()));
+
+        // ...except the full interval
+        assert!(empty.intersects_interval(&T::full()));
+
+        // Empty contains no intervals
+        assert!(!empty.contains_interval(&T::new(-10.0, 10.0)));
+        assert!(!empty.contains_interval(&T::full()));
+
+        // ...except empty itself (empty set is subset of itself)
+        assert!(empty.contains_interval(&T::empty()));
+
+        // Merging NaN is still empty
+        assert_eq!(empty.merge_value(f64::NAN), empty);
+
+        // Merging an empty interval results in an empty interval
+        assert_eq!(empty.merge_interval(&empty), empty);
+
+        // Merging a value results in a interval with equal lo/hi
+        assert_eq!(empty.merge_value(12.0), T::new(12.0, 12.0));
+
+        // Merging a non-empty interval results in the other interval
+        assert_eq!(
+            empty.merge_interval(&T::new(10.0, 20.0)),
+            T::new(10.0, 20.0)
+        );
+
+        // Expanding empty interval keeps it empty
+        assert_eq!(empty.expand_by(5.0), empty);
+        assert_eq!(empty.expand_by(0.0), empty);
+        assert_eq!(empty.expand_by(-1.0), empty);
+        assert_eq!(empty.expand_by(f64::NAN), empty);
+    }
+
+    #[test]
+    fn interval_empty() {
+        let empty = Interval::empty();
+        test_empty(empty);
+    }
+
+    #[test]
+    fn wraparound_interval_empty() {
+        let empty = WraparoundInterval::empty();
+
+        // Should pass all the regular interval tests
+        test_empty(empty);
+
+        // Empty shouldn't be treated as wraparound
+        assert!(!empty.is_wraparound());
+
+        // When merging an interval where the other one is a
+        // wraparound, we should get the other interval
+        assert_eq!(
+            empty.merge_interval(&WraparoundInterval::new(20.0, 10.0)),
+            WraparoundInterval::new(20.0, 10.0)
+        );
+    }
+
+    fn test_finite<T: IntervalTrait>(finite: T) {
+        // Check accessors
+        assert_eq!(finite.lo(), 10.0);
+        assert_eq!(finite.hi(), 20.0);
+        assert_eq!(finite.mid(), 15.0);
+        assert_eq!(finite.width(), 10.0);
+        assert!(!finite.is_wraparound());
+        assert!(!finite.is_empty());
+
+        // Intersects endpoints and midpoint
+        assert!(finite.intersects_value(10.0));
+        assert!(finite.intersects_value(15.0));
+        assert!(finite.intersects_value(20.0));
+
+        // Doesn't intersect infinite values, NaN, or finite values outside
+        // the range
+        assert!(!finite.intersects_value(0.0));
+        assert!(!finite.intersects_value(f64::INFINITY));
+        assert!(!finite.intersects_value(-f64::INFINITY));
+        assert!(!finite.intersects_value(f64::NAN));
+
+        // Intervals that intersect
+        assert!(finite.intersects_interval(&T::new(14.0, 16.0)));
+        assert!(finite.intersects_interval(&T::new(5.0, 15.0)));
+        assert!(finite.intersects_interval(&T::new(15.0, 25.0)));
+        assert!(finite.intersects_interval(&T::new(5.0, 25.0)));
+        assert!(finite.intersects_interval(&T::full()));
+
+        // Barely touching ones count
+        assert!(finite.intersects_interval(&T::new(5.0, 10.0)));
+        assert!(finite.intersects_interval(&T::new(20.0, 25.0)));
+
+        // Intervals that don't intersect
+        assert!(!finite.intersects_interval(&T::new(0.0, 5.0)));
+        assert!(!finite.intersects_interval(&T::new(25.0, 30.0)));
+        assert!(!finite.intersects_interval(&T::empty()));
+
+        // Intervals that are contained
+        assert!(finite.contains_interval(&T::new(14.0, 16.0)));
+        assert!(finite.contains_interval(&T::new(10.0, 15.0)));
+        assert!(finite.contains_interval(&T::new(15.0, 20.0)));
+        assert!(finite.contains_interval(&T::new(10.0, 20.0))); // itself
+        assert!(finite.contains_interval(&T::empty()));
+
+        // Intervals that are not contained
+        assert!(!finite.contains_interval(&T::new(5.0, 15.0))); // extends below
+        assert!(!finite.contains_interval(&T::new(15.0, 25.0))); // extends above
+        assert!(!finite.contains_interval(&T::new(5.0, 25.0))); // extends both ways
+        assert!(!finite.contains_interval(&T::new(0.0, 5.0))); // completely below
+        assert!(!finite.contains_interval(&T::new(25.0, 30.0))); // completely above
+        assert!(!finite.contains_interval(&T::full())); // full interval is larger
+
+        // Merging NaN
+        assert_eq!(finite.merge_value(f64::NAN), finite);
+
+        // Merging Infinities
+        assert_eq!(
+            finite.merge_value(f64::INFINITY),
+            T::new(finite.lo(), f64::INFINITY)
+        );
+        assert_eq!(
+            finite.merge_value(-f64::INFINITY),
+            T::new(-f64::INFINITY, finite.hi())
+        );
+
+        // Merging a value within the interval
+        assert_eq!(finite.merge_value(15.0), finite);
+
+        // Merging a value above
+        assert_eq!(finite.merge_value(25.0), T::new(10.0, 25.0));
+
+        // Merging a value below
+        assert_eq!(finite.merge_value(5.0), T::new(5.0, 20.0));
+
+        // Merging an empty interval
+        assert_eq!(finite.merge_interval(&T::empty()), finite);
+
+        // Merging an interval with itself
+        assert_eq!(finite.merge_interval(&finite), finite);
+
+        // Merging an interval with the full interval
+        assert_eq!(finite.merge_interval(&T::full()), T::full());
+
+        // Merging an interval within the interval
+        assert_eq!(finite.merge_interval(&T::new(14.0, 16.0)), finite);
+
+        // Merging a partially overlapping interval below
+        assert_eq!(finite.merge_interval(&T::new(5.0, 15.0)), T::new(5.0, 20.0));
+
+        // Merging a partially overlapping interval above
+        assert_eq!(
+            finite.merge_interval(&T::new(15.0, 25.0)),
+            T::new(10.0, 25.0)
+        );
+
+        // Merging a disjoint interval below
+        assert_eq!(finite.merge_interval(&T::new(0.0, 5.0)), T::new(0.0, 20.0));
+
+        // Merging a disjoint interval above
+        assert_eq!(
+            finite.merge_interval(&T::new(25.0, 30.0)),
+            T::new(10.0, 30.0)
+        );
+
+        // Expanding by positive distance
+        assert_eq!(finite.expand_by(2.0), T::new(8.0, 22.0));
+        assert_eq!(finite.expand_by(5.0), T::new(5.0, 25.0));
+
+        // Expanding by zero does nothing
+        assert_eq!(finite.expand_by(0.0), finite);
+
+        // Expanding by negative distance does nothing
+        assert_eq!(finite.expand_by(-1.0), finite);
+
+        // Expanding by NaN does nothing
+        assert_eq!(finite.expand_by(f64::NAN), finite);
+    }
+
+    #[test]
+    fn interval_finite() {
+        let finite = Interval::new(10.0, 20.0);
+        test_finite(finite);
+    }
+
+    #[test]
+    fn wraparound_interval_finite() {
+        let finite = WraparoundInterval::new(10.0, 20.0);
+        test_finite(finite);
+
+        // Convert to an Interval
+        let interval: Interval = finite.try_into().unwrap();
+        assert_eq!(interval, Interval::new(10.0, 20.0));
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_accessors() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+        assert!(wraparound.is_wraparound());
+        assert!(!wraparound.is_empty());
+        assert_eq!(wraparound.mid(), f64::INFINITY);
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_intersects_value() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Intersects endpoints but not a point between them
+        assert!(wraparound.intersects_value(10.0));
+        assert!(wraparound.intersects_value(20.0));
+        assert!(!wraparound.intersects_value(15.0));
+
+        // Intersects positive and negative infinity
+        assert!(wraparound.intersects_value(f64::INFINITY));
+        assert!(wraparound.intersects_value(-f64::INFINITY));
+
+        // ...but not NaN
+        assert!(!wraparound.intersects_value(f64::NAN));
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_intersects_interval() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Intersects itself
+        assert!(wraparound.intersects_interval(&wraparound));
+
+        // Intersects the full interval
+        assert!(wraparound.intersects_interval(&WraparoundInterval::full()));
+
+        // Interval completely between endpoints doesn't intersect
+        assert!(!wraparound.intersects_interval(&WraparoundInterval::new(14.0, 16.0)));
+        // ...unless it's also wraparound
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(16.0, 14.0)));
+
+        // Intervals overlapping endpoints intersect whether the are or aren't wraparound
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0, 15.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(15.0, 5.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(15.0, 25.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0, 15.0)));
+
+        // Barely touching ones still intersect whether the are or aren't wraparound
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0, 10.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(10.0, 5.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(20.0, 25.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0, 20.0)));
+
+        // Intervals completely above and below endpoints do intersect whether they
+        // are or aren't wraparound
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(0.0, 5.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0, 0.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0, 30.0)));
+        assert!(wraparound.intersects_interval(&WraparoundInterval::new(30.0, 25.0)));
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_contains_interval() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Contains itself
+        assert!(wraparound.contains_interval(&wraparound));
+
+        // Empty is contained by everything
+        assert!(wraparound.contains_interval(&WraparoundInterval::empty()));
+
+        // Does not contain the full interval
+        assert!(!wraparound.contains_interval(&WraparoundInterval::full()));
+
+        // Regular interval completely between endpoints is not contained
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(14.0, 16.0)));
+
+        // Wraparound intervals that exclude more (narrower included regions) are contained
+        assert!(wraparound.contains_interval(&WraparoundInterval::new(22.0, 8.0))); // excludes (8,22) which is larger than (10,20)
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(18.0, 12.0))); // excludes (12,18) which is smaller than (10,20)
+
+        // Regular intervals don't work the same way due to the split logic
+        // For a regular interval (a, b), split gives (left=(a,b), right=empty)
+        // For wraparound to contain it, we need both parts to be contained
+        // This means (-inf, 10] must contain (a,b) AND [20, inf) must contain empty
+        // The second is always true, but the first requires b <= 10
+        assert!(wraparound.contains_interval(&WraparoundInterval::new(0.0, 5.0))); // completely within left part
+        assert!(wraparound.contains_interval(&WraparoundInterval::new(-5.0, 10.0))); // fits in left part
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(25.0, 30.0))); // doesn't fit in left part
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(20.0, 25.0))); // doesn't fit in left part
+
+        // Regular intervals that overlap the excluded zone are not contained
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(5.0, 15.0))); // overlaps excluded zone
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(15.0, 25.0))); // overlaps excluded zone
+
+        // Wraparound intervals that exclude less (wider included regions) are not contained
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(15.0, 5.0))); // excludes (5,15) which is smaller
+        assert!(!wraparound.contains_interval(&WraparoundInterval::new(25.0, 15.0)));
+        // excludes (15,25) which is smaller
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_merge_value() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Merging NaN
+        assert_eq!(wraparound.merge_value(f64::NAN), wraparound);
+
+        // Merging a value closer to the left endpoint should move
+        // that endpoint
+        assert_eq!(
+            wraparound.merge_value(12.0),
+            WraparoundInterval::new(20.0, 12.0)
+        );
+
+        // Merging a value closer to the right endpoint should move
+        // that endpoint
+        assert_eq!(
+            wraparound.merge_value(18.0),
+            WraparoundInterval::new(18.0, 10.0)
+        );
+
+        // Merging a value that is already intersecting shouldn't change the interval
+        assert_eq!(wraparound.merge_value(5.0), wraparound);
+        assert_eq!(wraparound.merge_value(10.0), wraparound);
+        assert_eq!(wraparound.merge_value(20.0), wraparound);
+        assert_eq!(wraparound.merge_value(25.0), wraparound);
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_merge_interval() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Merging an empty interval
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::empty()),
+            wraparound
+        );
+
+        // Merging an interval with itself
+        assert_eq!(wraparound.merge_interval(&wraparound), wraparound);
+
+        // Merging a wraparound interval with a "larger" wraparound interval
+        //           10         20
+        // <==========|          |============>
+        // <==============|  |================>
+        //               14  16
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(16.0, 14.0)),
+            WraparoundInterval::new(16.0, 14.0)
+        );
+
+        // Merging a wraparound interval with a "smaller" wraparound interval
+        //           10         20
+        // <==========|          |============>
+        // <=====|                    |=======>
+        //       5                    25
+        // <==========|          |============>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(25.0, 5.0)),
+            wraparound
+        );
+
+        // Merge with partially intersecting wraparounds
+        //           10         20
+        // <==========|          |============>
+        // <=====|          |=================>
+        //       5          15
+        // <==========|     |=================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(15.0, 5.0)),
+            WraparoundInterval::new(15.0, 10.0)
+        );
+
+        //           10         20
+        // <==========|          |============>
+        // <================|          |======>
+        //                  15         25
+        // <================|    |============>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(25.0, 15.0)),
+            WraparoundInterval::new(20.0, 15.0)
+        );
+
+        // Merge wraparound with wraparound whose union is the full interval
+        //           10         20
+        // <==========|          |=========================>
+        // <=============================|          |======>
+        //                               25         30
+        // <===============================================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(30.0, 25.0)),
+            WraparoundInterval::full()
+        );
+
+        //                    10         20
+        // <===================|          |================>
+        // <==|          |=================================>
+        //    0          5
+        // <===============================================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(5.0, 0.0)),
+            WraparoundInterval::full()
+        );
+
+        // Merge wraparound with a regular interval completely contained by the original
+        //                  10         20
+        // <=================|          |==================>
+        //                                   |=========|
+        //                                  25         30
+        // <=================|          |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(25.0, 30.0)),
+            wraparound
+        );
+
+        //                  10         20
+        // <=================|          |==================>
+        //  |=========|
+        //  0         5
+        // <=================|          |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(0.0, 5.0)),
+            wraparound
+        );
+
+        // Merge wraparound with a partially intersecting regular interval that
+        // should extend the left side
+        //                  10         20
+        // <=================|          |==================>
+        //              |=========|
+        //              5         15
+        // <======================|     |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(5.0, 15.0)),
+            WraparoundInterval::new(20.0, 15.0)
+        );
+
+        // Merge wraparound with a partially intersecting regular interval that
+        // should extend the right side
+        //                  10         20
+        // <=================|          |==================>
+        //                         |=========|
+        //                         15        25
+        // <=================|     |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(15.0, 25.0)),
+            WraparoundInterval::new(15.0, 10.0)
+        );
+
+        // Merge wraparound with a disjoint regular interval that should extend the left side
+        //                  10         20
+        // <=================|          |==================>
+        //                     |==|
+        //                    12  15
+        // <======================|     |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(12.0, 15.0)),
+            WraparoundInterval::new(20.0, 15.0)
+        );
+
+        // Merge wraparound with a disjoint regular interval that should extend the right side
+        //                  10         20
+        // <=================|          |==================>
+        //                         |==|
+        //                        15  18
+        // <=================|     |==================>
+        assert_eq!(
+            wraparound.merge_interval(&WraparoundInterval::new(15.0, 18.0)),
+            WraparoundInterval::new(15.0, 10.0)
+        );
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_expand_by() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Expanding by a small amount shrinks the excluded region
+        // Original excludes (10, 20), expanding by 2 should exclude (12, 18)
+        // So the new interval should be (18, 12) = everything except (12, 18)
+        assert_eq!(
+            wraparound.expand_by(2.0),
+            WraparoundInterval::new(18.0, 12.0)
+        ); // now excludes (12, 18)
+
+        // Expanding by 4 should exclude (14, 16)
+        assert_eq!(
+            wraparound.expand_by(4.0),
+            WraparoundInterval::new(16.0, 14.0)
+        ); // now excludes (14, 16)
+
+        // Expanding by 5.0 should exactly eliminate the excluded region
+        // excluded region (10, 20) shrinks to (15, 15) which is empty
+        assert_eq!(wraparound.expand_by(5.0), WraparoundInterval::full()); // excluded region disappears
+
+        // Any expansion greater than 5.0 should also give full interval
+        assert_eq!(wraparound.expand_by(6.0), WraparoundInterval::full());
+
+        assert_eq!(wraparound.expand_by(100.0), WraparoundInterval::full());
+
+        // Expanding by zero does nothing
+        assert_eq!(wraparound.expand_by(0.0), wraparound);
+
+        // Expanding by negative distance does nothing
+        assert_eq!(wraparound.expand_by(-1.0), wraparound);
+
+        // Expanding by NaN does nothing
+        assert_eq!(wraparound.expand_by(f64::NAN), wraparound);
+
+        // Test a finite (non-wraparound) wraparound interval
+        let non_wraparound = WraparoundInterval::new(10.0, 20.0);
+        assert!(!non_wraparound.is_wraparound());
+        assert_eq!(
+            non_wraparound.expand_by(2.0),
+            WraparoundInterval::new(8.0, 22.0)
+        );
+
+        // Test another wraparound case - excludes (5, 15) with width 10
+        let wraparound2 = WraparoundInterval::new(15.0, 5.0);
+        // Expanding by 3 should shrink excluded region from (5, 15) to (8, 12)
+        assert_eq!(
+            wraparound2.expand_by(3.0),
+            WraparoundInterval::new(12.0, 8.0)
+        );
+
+        // Expanding by 5 should make excluded region disappear: (5+5, 15-5) = (10, 10)
+        assert_eq!(wraparound2.expand_by(5.0), WraparoundInterval::full());
+    }
+
+    #[test]
+    fn wraparound_interval_actually_wraparound_convert() {
+        // Everything *except* the interval (10, 20)
+        let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+        // Can't convert a wraparound interval that actually wraps around to an Interval
+        let err = Interval::try_from(wraparound).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Can't convert wraparound interval")
+        );
+    }
+}
diff --git a/arrow-array/benches/gc_view_types.rs b/parquet-geospatial/src/lib.rs
similarity index 50%
rename from arrow-array/benches/gc_view_types.rs
rename to parquet-geospatial/src/lib.rs
index cab60b47af79..7b2b6166a4b0 100644
--- a/arrow-array/benches/gc_view_types.rs
+++ b/parquet-geospatial/src/lib.rs
@@ -15,35 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::StringViewArray;
-use criterion::*;
-use std::hint;
+//! Implementation of [Geometry and Geography Encoding] from [Apache Parquet].
+//!
+//! [Geometry and Geography Encoding]: https://github.com/apache/parquet-format/blob/master/Geospatial.md
+//! [Apache Parquet]: https://parquet.apache.org/
+//!
+//! ## 🚧 Work In Progress
+//!
+//! This crate is under active development and is not yet ready for production use.
+//! If you are interested in helping, you can find more information on the GitHub [Geometry issue]
+//!
+//! [Geometry issue]: https://github.com/apache/arrow-rs/issues/8373
 
-fn gen_view_array(size: usize) -> StringViewArray {
-    StringViewArray::from_iter((0..size).map(|v| match v % 3 {
-        0 => Some("small"),
-        1 => Some("larger than 12 bytes array"),
-        2 => None,
-        _ => unreachable!("unreachable"),
-    }))
-}
+pub mod bounding;
+pub mod interval;
+pub mod testing;
 
-fn criterion_benchmark(c: &mut Criterion) {
-    let array = gen_view_array(100_000);
+mod types;
 
-    c.bench_function("gc view types all", |b| {
-        b.iter(|| {
-            hint::black_box(array.gc());
-        });
-    });
-
-    let sliced = array.slice(0, 100_000 / 2);
-    c.bench_function("gc view types slice half", |b| {
-        b.iter(|| {
-            hint::black_box(sliced.gc());
-        });
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
+pub use types::Edges as WkbEdges;
+pub use types::Hint as WkbTypeHint;
+pub use types::Metadata as WkbMetadata;
+pub use types::WkbType;
diff --git a/parquet-geospatial/src/testing.rs b/parquet-geospatial/src/testing.rs
new file mode 100644
index 000000000000..2807a53ac9dc
--- /dev/null
+++ b/parquet-geospatial/src/testing.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Testing utilities for geospatial Parquet types
+
+/// Build well-known binary representing a point with the given XY coordinate
+pub fn wkb_point_xy(x: f64, y: f64) -> Vec<u8> {
+    let mut item: [u8; 21] = [0; 21];
+    item[0] = 0x01;
+    item[1] = 0x01;
+    item[5..13].copy_from_slice(x.to_le_bytes().as_slice());
+    item[13..21].copy_from_slice(y.to_le_bytes().as_slice());
+    item.to_vec()
+}
+
+/// Build well-known binary representing a point with the given XYZM coordinate
+pub fn wkb_point_xyzm(x: f64, y: f64, z: f64, m: f64) -> Vec<u8> {
+    let mut item: [u8; 37] = [0; 37];
+    item[0] = 0x01;
+    item[1..5].copy_from_slice(3001_u32.to_le_bytes().as_slice());
+    item[5..13].copy_from_slice(x.to_le_bytes().as_slice());
+    item[13..21].copy_from_slice(y.to_le_bytes().as_slice());
+    item[21..29].copy_from_slice(z.to_le_bytes().as_slice());
+    item[29..37].copy_from_slice(m.to_le_bytes().as_slice());
+    item.to_vec()
+}
+
+#[cfg(test)]
+mod test {
+
+    use wkb::reader::Wkb;
+
+    use super::*;
+
+    #[test]
+    fn test_wkb_item() {
+        let bytes = wkb_point_xy(1.0, 2.0);
+        let geometry = Wkb::try_new(&bytes).unwrap();
+        let mut wkt = String::new();
+        wkt::to_wkt::write_geometry(&mut wkt, &geometry).unwrap();
+        assert_eq!(wkt, "POINT(1 2)");
+    }
+
+    #[test]
+    fn test_wkb_point_xyzm() {
+        let bytes = wkb_point_xyzm(1.0, 2.0, 3.0, 4.0);
+        let geometry = Wkb::try_new(&bytes).unwrap();
+        let mut wkt = String::new();
+        wkt::to_wkt::write_geometry(&mut wkt, &geometry).unwrap();
+        assert_eq!(wkt, "POINT ZM(1 2 3 4)");
+    }
+}
diff --git a/parquet-geospatial/src/types.rs b/parquet-geospatial/src/types.rs
new file mode 100644
index 000000000000..f19911ad055a
--- /dev/null
+++ b/parquet-geospatial/src/types.rs
@@ -0,0 +1,407 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::{ArrowError, DataType, extension::ExtensionType};
+use serde::{Deserialize, Serialize};
+
+/// Hints at the likely Parquet geospatial logical type represented by a [`Metadata`].
+///
+/// Based on the `algorithm` field:
+/// - [`Hint::Geometry`]: WKB format with linear/planar edge interpolation
+/// - [`Hint::Geography`]: WKB format with explicit non-linear/non-planar edge interpolation
+///
+/// See the [Parquet Geospatial specification](https://github.com/apache/parquet-format/blob/master/Geospatial.md)
+/// for more details.
+#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
+pub enum Hint {
+    /// Geospatial features in WKB format with linear/planar edge interpolation
+    Geometry,
+    /// Geospatial features in WKB format with explicit non-linear/non-planar edge interpolation
+    Geography,
+}
+
+/// The edge interpolation algorithms used with `GEOMETRY` logical types.
+#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Edges {
+    /// Edges are interpolated as geodesics on a sphere.
+    #[default]
+    Spherical,
+    /// <https://en.wikipedia.org/wiki/Vincenty%27s_formulae>
+    Vincenty,
+    /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970
+    Thomas,
+    /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965.
+    Andoyer,
+    /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55
+    Karney,
+}
+
+/// The metadata associated with a [`WkbType`].
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct Metadata {
+    /// The Coordinate Reference System (CRS) of the [`WkbType`], if present.
+    ///
+    /// This may be a raw string value (e.g., "EPSG:3857") or a JSON object (e.g., PROJJSON).
+    /// Note: Common lon/lat CRS representations (EPSG:4326, OGC:CRS84) are canonicalized
+    /// to `None` during serialization to match Parquet conventions.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub crs: Option<serde_json::Value>,
+    /// The edge interpolation algorithm of the [`WkbType`], if present.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub algorithm: Option<Edges>,
+}
+
+impl Metadata {
+    /// Constructs a new [`Metadata`] with the given CRS and algorithm.
+    ///
+    /// If a CRS is provided, and can be parsed as JSON, it will be stored as a JSON object instead
+    /// of its string representation.
+    pub fn new(crs: Option<&str>, algorithm: Option<Edges>) -> Self {
+        let crs = crs.map(|c| match serde_json::from_str(c) {
+            Ok(crs) => crs,
+            Err(_) => serde_json::Value::String(c.to_string()),
+        });
+
+        Self { crs, algorithm }
+    }
+
+    /// Returns a [`Hint`] to the likely underlying Logical Type that this [`Metadata`] represents.
+    pub fn type_hint(&self) -> Hint {
+        match &self.algorithm {
+            Some(_) => Hint::Geography,
+            None => Hint::Geometry,
+        }
+    }
+
+    /// Detect if the CRS is a common representation of lon/lat on the standard WGS84 ellipsoid
+    fn crs_is_lon_lat(&self) -> bool {
+        use serde_json::Value;
+
+        let Some(crs) = &self.crs else {
+            return false;
+        };
+
+        match crs {
+            Value::String(s) if s == "EPSG:4326" || s == "OGC:CRS84" => true,
+            Value::Object(_) => match (&crs["id"]["authority"], &crs["id"]["code"]) {
+                (Value::String(auth), Value::String(code)) if auth == "OGC" && code == "CRS84" => {
+                    true
+                }
+                (Value::String(auth), Value::String(code)) if auth == "EPSG" && code == "4326" => {
+                    true
+                }
+                (Value::String(auth), Value::Number(code))
+                    if auth == "EPSG" && code.as_i64() == Some(4326) =>
+                {
+                    true
+                }
+                _ => false,
+            },
+            _ => false,
+        }
+    }
+}
+
+/// Well-Known Binary (WKB) [`ExtensionType`] for geospatial data.
+///
+/// Represents the canonical Arrow Extension Type for storing
+/// [GeoArrow](https://github.com/geoarrow/geoarrow) data.
+#[derive(Debug, Default)]
+pub struct WkbType(Metadata);
+
+impl WkbType {
+    /// Constructs a new [`WkbType`] with the given [`Metadata`].
+    ///
+    /// If `None` is provided, default (empty) metadata is used.
+    pub fn new(metadata: Option<Metadata>) -> Self {
+        Self(metadata.unwrap_or_default())
+    }
+}
+
+type ArrowResult<T> = Result<T, ArrowError>;
+impl ExtensionType for WkbType {
+    const NAME: &'static str = "geoarrow.wkb";
+
+    type Metadata = Metadata;
+
+    fn metadata(&self) -> &Self::Metadata {
+        &self.0
+    }
+
+    fn serialize_metadata(&self) -> Option<String> {
+        let md = if self.0.crs_is_lon_lat() {
+            &Metadata {
+                crs: None, // lon/lat CRS is canonicalized as omitted (None) for Parquet
+                algorithm: self.0.algorithm,
+            }
+        } else {
+            &self.0
+        };
+
+        serde_json::to_string(md).ok()
+    }
+
+    fn deserialize_metadata(metadata: Option<&str>) -> ArrowResult<Self::Metadata> {
+        let Some(metadata) = metadata else {
+            return Ok(Self::Metadata::default());
+        };
+
+        serde_json::from_str(metadata).map_err(|e| ArrowError::JsonError(e.to_string()))
+    }
+
+    fn supports_data_type(&self, data_type: &arrow_schema::DataType) -> ArrowResult<()> {
+        match data_type {
+            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Ok(()),
+            dt => Err(ArrowError::InvalidArgumentError(format!(
+                "Geometry data type mismatch, expected one of Binary, LargeBinary, BinaryView. Found {dt}"
+            ))),
+        }
+    }
+
+    fn try_new(data_type: &arrow_schema::DataType, metadata: Self::Metadata) -> ArrowResult<Self> {
+        let wkb = Self(metadata);
+        wkb.supports_data_type(data_type)?;
+        Ok(wkb)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_schema::Field;
+
+    /// Test metadata serialization and deserialization with empty/default metadata
+    #[test]
+    fn test_metadata_empty_roundtrip() -> ArrowResult<()> {
+        let metadata = Metadata::default();
+        let wkb = WkbType::new(Some(metadata));
+
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
+        assert!(deserialized.crs.is_none());
+        assert!(deserialized.algorithm.is_none());
+
+        Ok(())
+    }
+
+    /// Test metadata serialization with CRS as a simple string
+    #[test]
+    fn test_metadata_crs_string_roundtrip() -> ArrowResult<()> {
+        let metadata = Metadata::new(Some("srid:1234"), None);
+        let wkb = WkbType::new(Some(metadata));
+
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, r#"{"crs":"srid:1234"}"#);
+
+        let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
+        assert_eq!(
+            deserialized.crs.unwrap(),
+            serde_json::Value::String(String::from("srid:1234"))
+        );
+        assert!(deserialized.algorithm.is_none());
+
+        Ok(())
+    }
+
+    /// Test metadata serialization with CRS as a JSON object
+    #[test]
+    fn test_metadata_crs_json_object_roundtrip() -> ArrowResult<()> {
+        let crs_json = r#"{"type":"custom_json","properties":{"name":"EPSG:4326"}}"#;
+        let metadata = Metadata::new(Some(crs_json), None);
+        let wkb = WkbType::new(Some(metadata));
+
+        let serialized = wkb.serialize_metadata().unwrap();
+        // Validate by parsing the JSON and checking structure (field order is not guaranteed)
+        let parsed: serde_json::Value = serde_json::from_str(&serialized).unwrap();
+        assert_eq!(parsed["crs"]["type"], "custom_json");
+        assert_eq!(parsed["crs"]["properties"]["name"], "EPSG:4326");
+
+        let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
+
+        // Verify it's a JSON object with expected structure
+        let crs = deserialized.crs.unwrap();
+        assert!(crs.is_object());
+        assert_eq!(crs["type"], "custom_json");
+        assert_eq!(crs["properties"]["name"], "EPSG:4326");
+
+        Ok(())
+    }
+
+    /// Test metadata serialization with algorithm field
+    #[test]
+    fn test_metadata_algorithm_roundtrip() -> ArrowResult<()> {
+        let metadata = Metadata::new(None, Some(Edges::Spherical));
+        let wkb = WkbType::new(Some(metadata));
+
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, r#"{"algorithm":"spherical"}"#);
+
+        let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
+        assert!(deserialized.crs.is_none());
+        assert_eq!(deserialized.algorithm, Some(Edges::Spherical));
+
+        Ok(())
+    }
+
+    /// Test metadata serialization with both CRS and algorithm
+    #[test]
+    fn test_metadata_full_roundtrip() -> ArrowResult<()> {
+        let metadata = Metadata::new(Some("srid:1234"), Some(Edges::Spherical));
+        let wkb = WkbType::new(Some(metadata));
+
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, r#"{"crs":"srid:1234","algorithm":"spherical"}"#);
+
+        let deserialized = WkbType::deserialize_metadata(Some(&serialized))?;
+        assert_eq!(
+            deserialized.crs.unwrap(),
+            serde_json::Value::String("srid:1234".to_string())
+        );
+        assert_eq!(deserialized.algorithm, Some(Edges::Spherical));
+
+        Ok(())
+    }
+
+    /// Test deserialization of None metadata
+    #[test]
+    fn test_metadata_deserialize_none() -> ArrowResult<()> {
+        let deserialized = WkbType::deserialize_metadata(None)?;
+        assert!(deserialized.crs.is_none());
+        assert!(deserialized.algorithm.is_none());
+        Ok(())
+    }
+
+    /// Test deserialization of invalid JSON
+    #[test]
+    fn test_metadata_deserialize_invalid_json() {
+        let result = WkbType::deserialize_metadata(Some("not valid json {"));
+        assert!(matches!(result, Err(ArrowError::JsonError(_))));
+    }
+
+    /// Test metadata that results in a Geometry type hint
+    #[test]
+    fn test_type_hint_geometry() {
+        let metadata = Metadata::new(None, None);
+        assert!(matches!(metadata.type_hint(), Hint::Geometry));
+    }
+
+    /// Test metadata that results in a Geography type hint
+    #[test]
+    fn test_type_hint_edges_is_geography() {
+        let algorithms = vec![
+            Edges::Spherical,
+            Edges::Vincenty,
+            Edges::Thomas,
+            Edges::Andoyer,
+            Edges::Karney,
+        ];
+        for algo in algorithms {
+            let metadata = Metadata::new(None, Some(algo));
+            assert!(matches!(metadata.type_hint(), Hint::Geography));
+        }
+    }
+
+    /// Test extension type integration using a Field
+    #[test]
+    fn test_extension_type_with_field() -> ArrowResult<()> {
+        let metadata = Metadata::new(Some("srid:1234"), None);
+        let wkb_type = WkbType::new(Some(metadata));
+
+        let mut field = Field::new("geometry", DataType::Binary, false);
+        field.try_with_extension_type(wkb_type)?;
+
+        // Verify we can extract the extension type back
+        let extracted = field.try_extension_type::<WkbType>()?;
+        assert_eq!(
+            extracted.metadata().crs.as_ref().unwrap(),
+            &serde_json::Value::String(String::from("srid:1234"))
+        );
+
+        Ok(())
+    }
+
+    /// Test extension type DataType support
+    #[test]
+    fn test_extension_type_support() -> ArrowResult<()> {
+        let wkb = WkbType::default();
+        // supported types
+        wkb.supports_data_type(&DataType::Binary)?;
+        wkb.supports_data_type(&DataType::LargeBinary)?;
+        wkb.supports_data_type(&DataType::BinaryView)?;
+
+        // reject unsupported types with an error
+        let result = wkb.supports_data_type(&DataType::Utf8);
+        assert!(matches!(result, Err(ArrowError::InvalidArgumentError(_))));
+
+        Ok(())
+    }
+
+    /// Test CRS canonicalization logic for common lon/lat representations
+    #[test]
+    fn test_crs_canonicalization() -> ArrowResult<()> {
+        // EPSG:4326 as string should be omitted
+        let metadata = Metadata::new(Some("EPSG:4326"), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        // OGC:CRS84 as string should be omitted
+        let metadata = Metadata::new(Some("OGC:CRS84"), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        // A JSON object that reasonably looks like PROJJSON for EPSG:4326 should be omitted
+        // detect "4326" as a string
+        let crs_json = r#"{"id":{"authority":"EPSG","code":"4326"}}"#;
+        let metadata = Metadata::new(Some(crs_json), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        // detect 4326 as a number
+        let crs_json = r#"{"id":{"authority":"EPSG","code":4326}}"#;
+        let metadata = Metadata::new(Some(crs_json), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        // A JSON object that reasonably looks like PROJJSON for OGC:CRS84 should be omitted
+        let crs_json = r#"{"id":{"authority":"OGC","code":"CRS84"}}"#;
+        let metadata = Metadata::new(Some(crs_json), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, "{}");
+
+        // Other input types should be preserved
+        let metadata = Metadata::new(Some("srid:1234"), None);
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, r#"{"crs":"srid:1234"}"#);
+
+        // Canonicalization should work with algorithm field
+        let metadata = Metadata::new(Some("EPSG:4326"), Some(Edges::Spherical));
+        let wkb = WkbType::new(Some(metadata));
+        let serialized = wkb.serialize_metadata().unwrap();
+        assert_eq!(serialized, r#"{"algorithm":"spherical"}"#);
+
+        Ok(())
+    }
+}
diff --git a/parquet-testing b/parquet-testing
index b68bea40fed8..a3d96a65e11e 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit b68bea40fed8d1a780a9e09dd2262017e04b19ad
+Subproject commit a3d96a65e11e2bbca7d22a894e8313ede90a33a3
diff --git a/parquet-variant-compute/Cargo.toml b/parquet-variant-compute/Cargo.toml
new file mode 100644
index 000000000000..85d66a9cf706
--- /dev/null
+++ b/parquet-variant-compute/Cargo.toml
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "parquet-variant-compute"
+version = { workspace = true }
+license = { workspace = true }
+description = "Apache Parquet Variant Batch Processing"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+keywords = ["arrow", "parquet", "variant"]
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+
+[dependencies]
+arrow = { workspace = true , features = ["canonical_extension_types"]}
+arrow-schema = { workspace = true }
+half = { version = "2.1", default-features = false }
+indexmap = "2.10.0"
+parquet-variant = { workspace = true }
+parquet-variant-json = { workspace = true }
+chrono = { workspace = true }
+uuid = { version = "1.18.0", features = ["v4"]}
+serde_json = "1.0"
+
+[lib]
+name = "parquet_variant_compute"
+bench = false
+
+[dev-dependencies]
+rand = "0.9.1"
+criterion = { workspace = true, default-features = false }
+arrow = { workspace = true, features = ["test_utils"] }
+
+
+[[bench]]
+name = "variant_kernels"
+harness = false
diff --git a/parquet-variant-compute/benches/variant_kernels.rs b/parquet-variant-compute/benches/variant_kernels.rs
new file mode 100644
index 000000000000..383697ab8cc6
--- /dev/null
+++ b/parquet-variant-compute/benches/variant_kernels.rs
@@ -0,0 +1,558 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayRef, BinaryViewArray, StringArray, StructArray};
+use arrow::util::test_util::seedable_rng;
+use arrow_schema::{DataType, Field, FieldRef, Fields};
+use criterion::{Criterion, criterion_group, criterion_main};
+use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, Variant, VariantBuilder};
+use parquet_variant_compute::{
+    GetOptions, VariantArray, VariantArrayBuilder, json_to_variant, variant_get,
+};
+use parquet_variant_json::append_json;
+use rand::Rng;
+use rand::SeedableRng;
+use rand::distr::Alphanumeric;
+use rand::rngs::StdRng;
+use serde_json::Value;
+use std::fmt::Write;
+use std::sync::Arc;
+
+fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
+    let input_array = StringArray::from_iter_values(json_repeated_struct(8000));
+    let array_ref: ArrayRef = Arc::new(input_array);
+    c.bench_function(
+        "batch_json_string_to_variant repeated_struct 8k string",
+        |b| {
+            b.iter(|| {
+                let _ = json_to_variant(&array_ref).unwrap();
+            });
+        },
+    );
+
+    let input_array = StringArray::from_iter_values(json_repeated_list(8000));
+    let array_ref: ArrayRef = Arc::new(input_array);
+    c.bench_function("batch_json_string_to_variant json_list 8k string", |b| {
+        b.iter(|| {
+            let _ = json_to_variant(&array_ref).unwrap();
+        });
+    });
+
+    let input_array = StringArray::from_iter_values(random_json_structure(8000));
+    let total_input_bytes = input_array
+        .iter()
+        .flatten() // filter None
+        .map(|v| v.len())
+        .sum::<usize>();
+    let id = format!(
+        "batch_json_string_to_variant random_json({} bytes per document)",
+        total_input_bytes / input_array.len()
+    );
+    let array_ref: ArrayRef = Arc::new(input_array);
+    c.bench_function(&id, |b| {
+        b.iter(|| {
+            let _ = json_to_variant(&array_ref).unwrap();
+        });
+    });
+
+    let input_array = StringArray::from_iter_values(random_structure(8000, 200));
+    let total_input_bytes = input_array
+        .iter()
+        .flatten() // filter None
+        .map(|v| v.len())
+        .sum::<usize>();
+    let id = format!(
+        "batch_json_string_to_variant object - 1 depth(200 fields) random_json({} bytes per document)",
+        total_input_bytes / input_array.len()
+    );
+    let array_ref: ArrayRef = Arc::new(input_array);
+    let string_array = array_ref.as_any().downcast_ref::<StringArray>().unwrap();
+    let mut json_array: Vec<Value> = Vec::with_capacity(string_array.len());
+    for i in 0..string_array.len() {
+        json_array.push(serde_json::from_str(string_array.value(i)).unwrap());
+    }
+    c.bench_function(&id, |b| {
+        b.iter(|| {
+            let mut variant_array_builder = VariantArrayBuilder::new(string_array.len());
+            for json in &json_array {
+                append_json(json, &mut variant_array_builder).unwrap();
+            }
+            let _ = variant_array_builder.build();
+        });
+    });
+
+    let input_array = StringArray::from_iter_values(random_structure(8000, 100));
+    let total_input_bytes = input_array
+        .iter()
+        .flatten() // filter None
+        .map(|v| v.len())
+        .sum::<usize>();
+    let id = format!(
+        "batch_json_string_to_variant object - 1 depth(100 fields) random_json({} bytes per document)",
+        total_input_bytes / input_array.len()
+    );
+    let array_ref: ArrayRef = Arc::new(input_array);
+    let string_array = array_ref.as_any().downcast_ref::<StringArray>().unwrap();
+    let mut json_array: Vec<Value> = Vec::with_capacity(string_array.len());
+    for i in 0..string_array.len() {
+        json_array.push(serde_json::from_str(string_array.value(i)).unwrap());
+    }
+    c.bench_function(&id, |b| {
+        b.iter(|| {
+            let mut variant_array_builder = VariantArrayBuilder::new(string_array.len());
+            for json in &json_array {
+                append_json(json, &mut variant_array_builder).unwrap();
+            }
+            let _ = variant_array_builder.build();
+        });
+    });
+
+    let input_array = StringArray::from_iter_values(random_json_structure(8000));
+    let total_input_bytes = input_array
+        .iter()
+        .flatten() // filter None
+        .map(|v| v.len())
+        .sum::<usize>();
+    let id = format!(
+        "batch_json_string_to_variant random_json({} bytes per document)",
+        total_input_bytes / input_array.len()
+    );
+    let array_ref: ArrayRef = Arc::new(input_array);
+    c.bench_function(&id, |b| {
+        b.iter(|| {
+            let _ = json_to_variant(&array_ref).unwrap();
+        });
+    });
+}
+
+pub fn variant_get_bench(c: &mut Criterion) {
+    let variant_array = create_primitive_variant_array(8192);
+    let input = ArrayRef::from(variant_array);
+
+    let options = GetOptions {
+        path: vec![].into(),
+        as_type: None,
+        cast_options: Default::default(),
+    };
+
+    c.bench_function("variant_get_primitive", |b| {
+        b.iter(|| variant_get(&input.clone(), options.clone()))
+    });
+}
+
+pub fn variant_get_shredded_utf8_bench(c: &mut Criterion) {
+    let variant_array = create_shredded_utf8_variant_array(8192);
+    let input = ArrayRef::from(variant_array);
+
+    let field: FieldRef = Arc::new(Field::new("typed_value", DataType::Utf8, true));
+    let options = GetOptions {
+        path: vec![].into(),
+        as_type: Some(field),
+        cast_options: Default::default(),
+    };
+
+    c.bench_function("variant_get_shredded_utf8", |b| {
+        b.iter(|| variant_get(&input.clone(), options.clone()))
+    });
+}
+
+criterion_group!(
+    benches,
+    variant_get_bench,
+    variant_get_shredded_utf8_bench,
+    benchmark_batch_json_string_to_variant
+);
+criterion_main!(benches);
+
+/// Creates a `VariantArray` with a specified number of Variant::Int64 values each with random value.
+fn create_primitive_variant_array(size: usize) -> VariantArray {
+    let mut rng = StdRng::seed_from_u64(42);
+
+    let mut variant_builder = VariantArrayBuilder::new(1);
+
+    for _ in 0..size {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(rng.random::<i64>());
+        let (metadata, value) = builder.finish();
+        variant_builder.append_variant(Variant::try_new(&metadata, &value).unwrap());
+    }
+
+    variant_builder.build()
+}
+
+/// Creates a `VariantArray` where the values are already shredded as UTF8.
+fn create_shredded_utf8_variant_array(size: usize) -> VariantArray {
+    let metadata =
+        BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, size));
+    let typed_value = StringArray::from_iter_values((0..size).map(|i| format!("value_{i}")));
+
+    let metadata_ref: ArrayRef = Arc::new(metadata);
+    let typed_value_ref: ArrayRef = Arc::new(typed_value);
+
+    let fields = Fields::from(vec![
+        Arc::new(Field::new(
+            "metadata",
+            metadata_ref.data_type().clone(),
+            false,
+        )),
+        Arc::new(Field::new(
+            "typed_value",
+            typed_value_ref.data_type().clone(),
+            true,
+        )),
+    ]);
+
+    let struct_array = StructArray::new(fields, vec![metadata_ref, typed_value_ref], None);
+    let struct_array_ref: ArrayRef = Arc::new(struct_array);
+
+    VariantArray::try_new(struct_array_ref.as_ref())
+        .expect("created struct should be a valid shredded variant")
+}
+
+/// Return an iterator off JSON strings, each representing a person
+/// with random first name, last name, and age.
+///
+/// Example:
+/// ```json
+/// {
+///   "first" : random_string_of_1_to_20_characters,
+///   "last" : random_string_of_1_to_20_characters,
+///   "age": random_value_between_20_and_80,
+/// }
+/// ```
+fn json_repeated_struct(count: usize) -> impl Iterator<Item = String> {
+    let mut rng = seedable_rng();
+    (0..count).map(move |_| {
+        let first: String = (0..rng.random_range(1..=20))
+            .map(|_| rng.sample(Alphanumeric) as char)
+            .collect();
+        let last: String = (0..rng.random_range(1..=20))
+            .map(|_| rng.sample(Alphanumeric) as char)
+            .collect();
+        let age: u8 = rng.random_range(20..=80);
+        format!("{{\"first\":\"{first}\",\"last\":\"{last}\",\"age\":{age}}}")
+    })
+}
+
+/// Return a vector of JSON strings, each representing a list of numbers
+///
+/// Example:
+/// ```json
+/// [1.0, 2.0, 3.0, 4.0, 5.0],
+/// [5.0],
+/// [],
+/// null,
+/// [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
+/// ```
+fn json_repeated_list(count: usize) -> impl Iterator<Item = String> {
+    let mut rng = seedable_rng();
+    (0..count).map(move |_| {
+        let length = rng.random_range(0..=100);
+        let mut output = String::new();
+        output.push('[');
+        for i in 0..length {
+            let value: f64 = rng.random_range(0.0..10000.0);
+            write!(&mut output, "{value:.1}").unwrap();
+            if i < length - 1 {
+                output.push(',');
+            }
+        }
+
+        output.push(']');
+        output
+    })
+}
+
+/// This function generates a vector of JSON strings which have many fields
+/// and a random structure (including field names)
+fn random_json_structure(count: usize) -> impl Iterator<Item = String> {
+    let mut generator = RandomJsonGenerator {
+        null_weight: 5,
+        string_weight: 25,
+        number_weight: 25,
+        boolean_weight: 10,
+        object_weight: 25,
+        array_weight: 25,
+        max_fields: 10,
+        max_array_length: 10,
+        max_depth: 5,
+        ..Default::default()
+    };
+    (0..count).map(move |_| generator.next().to_string())
+}
+
+fn random_structure(count: usize, max_fields: usize) -> impl Iterator<Item = String> {
+    let mut generator = RandomJsonGenerator {
+        null_weight: 5,
+        string_weight: 25,
+        number_weight: 25,
+        boolean_weight: 10,
+        object_weight: 25,
+        array_weight: 0,
+        max_fields,
+        max_array_length: 0,
+        max_depth: 1,
+        ..Default::default()
+    };
+    (0..count).map(move |_| generator.next_object().to_string())
+}
+
+/// Creates JSON with random structure and fields.
+///
+/// Each type is created in proportion controlled by the
+/// weights
+#[derive(Debug)]
+struct RandomJsonGenerator {
+    /// Random number generator
+    rng: StdRng,
+    /// the probability of generating a null value
+    null_weight: usize,
+    /// the probability of generating a string value
+    string_weight: usize,
+    /// the probability of generating a number value
+    number_weight: usize,
+    /// the probability of generating a boolean value
+    boolean_weight: usize,
+    /// the probability of generating an object value
+    object_weight: usize,
+    /// the probability of generating an array value
+    array_weight: usize,
+
+    /// The max number of fields in an object
+    max_fields: usize,
+    /// the max number of elements in an array
+    max_array_length: usize,
+
+    /// The maximum depth of the generated JSON structure
+    max_depth: usize,
+    /// output buffer
+    output_buffer: String,
+}
+
+impl Default for RandomJsonGenerator {
+    fn default() -> Self {
+        let rng = seedable_rng();
+        Self {
+            rng,
+            null_weight: 0,
+            string_weight: 0,
+            number_weight: 0,
+            boolean_weight: 0,
+            object_weight: 0,
+            array_weight: 0,
+            max_fields: 1,
+            max_array_length: 1,
+            max_depth: 1,
+            output_buffer: String::new(),
+        }
+    }
+}
+
+impl RandomJsonGenerator {
+    // Generate the next random JSON string.
+    fn next(&mut self) -> &str {
+        self.output_buffer.clear();
+        self.append_random_json(0);
+        &self.output_buffer
+    }
+
+    fn next_object(&mut self) -> &str {
+        self.output_buffer.clear();
+        self.append_random_json_for_object();
+        &self.output_buffer
+    }
+
+    fn append_random_json_for_object(&mut self) {
+        // use destructuring to ensure each field is used
+        let Self {
+            rng,
+            null_weight,
+            string_weight,
+            number_weight,
+            boolean_weight,
+            max_fields,
+            output_buffer,
+            ..
+        } = self;
+
+        write!(output_buffer, "{{").unwrap();
+        for i in 0..*max_fields {
+            let key_length = rng.random_range(1..=20);
+            let key: String = (0..key_length)
+                .map(|_| rng.sample(Alphanumeric) as char)
+                .collect();
+            write!(output_buffer, "\"{key}\":").unwrap();
+
+            let total_weight = *null_weight + *string_weight + *number_weight + *boolean_weight;
+
+            // Generate a random number to determine the type
+            let mut random_value: usize = rng.random_range(0..total_weight);
+
+            if random_value <= *null_weight {
+                write!(output_buffer, "null").unwrap();
+            } else {
+                random_value -= *null_weight;
+
+                if random_value <= *string_weight {
+                    // Generate a random string between 1 and 20 characters
+                    let length = rng.random_range(1..=20);
+                    let random_string: String = (0..length)
+                        .map(|_| rng.sample(Alphanumeric) as char)
+                        .collect();
+                    write!(output_buffer, "\"{random_string}\"",).unwrap();
+                } else {
+                    random_value -= *string_weight;
+
+                    if random_value <= *number_weight {
+                        // 50% chance of generating an integer or a float
+                        if rng.random_bool(0.5) {
+                            // Generate a random integer
+                            let random_integer: i64 = rng.random_range(-1000..1000);
+                            write!(output_buffer, "{random_integer}",).unwrap();
+                        } else {
+                            // Generate a random float
+                            let random_float: f64 = rng.random_range(-1000.0..1000.0);
+                            write!(output_buffer, "{random_float}",).unwrap();
+                        }
+                    } else {
+                        random_value -= *number_weight;
+
+                        if random_value <= *boolean_weight {
+                            // Generate a random boolean
+                            let random_boolean: bool = rng.random();
+                            write!(output_buffer, "{random_boolean}",).unwrap();
+                        }
+                    }
+                }
+            }
+            if i < *max_fields - 1 {
+                write!(output_buffer, ",").unwrap();
+            }
+        }
+        write!(&mut self.output_buffer, "}}").unwrap();
+    }
+
+    /// Appends a random JSON value to the output buffer.
+    fn append_random_json(&mut self, current_depth: usize) {
+        // use destructuring to ensure each field is used
+        let Self {
+            rng,
+            null_weight,
+            string_weight,
+            number_weight,
+            boolean_weight,
+            object_weight,
+            array_weight,
+            max_fields,
+            max_array_length,
+            max_depth,
+            output_buffer,
+        } = self;
+
+        if current_depth >= *max_depth {
+            write!(output_buffer, "\"max_depth reached\"").unwrap();
+            return;
+        }
+
+        let total_weight = *null_weight
+            + *string_weight
+            + *number_weight
+            + *boolean_weight
+            + *object_weight
+            + *array_weight;
+
+        // Generate a random number to determine the type
+        let mut random_value: usize = rng.random_range(0..total_weight);
+
+        if random_value <= *null_weight {
+            write!(output_buffer, "null").unwrap();
+            return;
+        }
+        random_value -= *null_weight;
+
+        if random_value <= *string_weight {
+            // Generate a random string between 1 and 20 characters
+            let length = rng.random_range(1..=20);
+            let random_string: String = (0..length)
+                .map(|_| rng.sample(Alphanumeric) as char)
+                .collect();
+            write!(output_buffer, "\"{random_string}\"",).unwrap();
+            return;
+        }
+        random_value -= *string_weight;
+
+        if random_value <= *number_weight {
+            // 50% chance of generating an integer or a float
+            if rng.random_bool(0.5) {
+                // Generate a random integer
+                let random_integer: i64 = rng.random_range(-1000..1000);
+                write!(output_buffer, "{random_integer}",).unwrap();
+            } else {
+                // Generate a random float
+                let random_float: f64 = rng.random_range(-1000.0..1000.0);
+                write!(output_buffer, "{random_float}",).unwrap();
+            }
+            return;
+        }
+        random_value -= *number_weight;
+
+        if random_value <= *boolean_weight {
+            // Generate a random boolean
+            let random_boolean: bool = rng.random();
+            write!(output_buffer, "{random_boolean}",).unwrap();
+            return;
+        }
+        random_value -= *boolean_weight;
+
+        if random_value <= *object_weight {
+            // Generate a random object
+            let num_fields = rng.random_range(1..=*max_fields);
+
+            write!(output_buffer, "{{").unwrap();
+            for i in 0..num_fields {
+                let key_length = self.rng.random_range(1..=20);
+                let key: String = (0..key_length)
+                    .map(|_| self.rng.sample(Alphanumeric) as char)
+                    .collect();
+                write!(&mut self.output_buffer, "\"{key}\":").unwrap();
+                self.append_random_json(current_depth + 1);
+                if i < num_fields - 1 {
+                    write!(&mut self.output_buffer, ",").unwrap();
+                }
+            }
+            write!(&mut self.output_buffer, "}}").unwrap();
+            return;
+        }
+        random_value -= *object_weight;
+
+        if random_value <= *array_weight {
+            // Generate a random array
+            let length = rng.random_range(1..=*max_array_length);
+            write!(output_buffer, "[").unwrap();
+            for i in 0..length {
+                self.append_random_json(current_depth + 1);
+                if i < length - 1 {
+                    write!(&mut self.output_buffer, ",").unwrap();
+                }
+            }
+            write!(&mut self.output_buffer, "]").unwrap();
+            return;
+        }
+
+        panic!("Random value did not match any type");
+    }
+}
diff --git a/parquet-variant-compute/src/arrow_to_variant.rs b/parquet-variant-compute/src/arrow_to_variant.rs
new file mode 100644
index 000000000000..be241a9a4e00
--- /dev/null
+++ b/parquet-variant-compute/src/arrow_to_variant.rs
@@ -0,0 +1,2077 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{
+    Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
+    GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+};
+use arrow::compute::{CastOptions, kernels::cast};
+use arrow::datatypes::{
+    self as datatypes, ArrowNativeType, ArrowPrimitiveType, ArrowTemporalType, ArrowTimestampType,
+    DecimalType, RunEndIndexType,
+};
+use arrow::temporal_conversions::{as_date, as_datetime, as_time};
+use arrow_schema::{ArrowError, DataType, TimeUnit};
+use chrono::{DateTime, TimeZone, Utc};
+use parquet_variant::{
+    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
+    VariantDecimal16, VariantDecimalType,
+};
+use std::collections::HashMap;
+use std::ops::Range;
+
+// ============================================================================
+// Row-oriented builders for efficient Arrow-to-Variant conversion
+// ============================================================================
+
+/// Row builder for converting Arrow arrays to VariantArray row by row
+pub(crate) enum ArrowToVariantRowBuilder<'a> {
+    Null(NullArrowToVariantBuilder),
+    Boolean(BooleanArrowToVariantBuilder<'a>),
+    PrimitiveInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::Int8Type>),
+    PrimitiveInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::Int16Type>),
+    PrimitiveInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::Int32Type>),
+    PrimitiveInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::Int64Type>),
+    PrimitiveUInt8(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt8Type>),
+    PrimitiveUInt16(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt16Type>),
+    PrimitiveUInt32(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt32Type>),
+    PrimitiveUInt64(PrimitiveArrowToVariantBuilder<'a, datatypes::UInt64Type>),
+    PrimitiveFloat16(PrimitiveArrowToVariantBuilder<'a, datatypes::Float16Type>),
+    PrimitiveFloat32(PrimitiveArrowToVariantBuilder<'a, datatypes::Float32Type>),
+    PrimitiveFloat64(PrimitiveArrowToVariantBuilder<'a, datatypes::Float64Type>),
+    Decimal32(DecimalArrowToVariantBuilder<'a, datatypes::Decimal32Type, VariantDecimal4>),
+    Decimal64(DecimalArrowToVariantBuilder<'a, datatypes::Decimal64Type, VariantDecimal8>),
+    Decimal128(DecimalArrowToVariantBuilder<'a, datatypes::Decimal128Type, VariantDecimal16>),
+    Decimal256(Decimal256ArrowToVariantBuilder<'a>),
+    TimestampSecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampSecondType>),
+    TimestampMillisecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMillisecondType>),
+    TimestampMicrosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampMicrosecondType>),
+    TimestampNanosecond(TimestampArrowToVariantBuilder<'a, datatypes::TimestampNanosecondType>),
+    Date32(DateArrowToVariantBuilder<'a, datatypes::Date32Type>),
+    Date64(DateArrowToVariantBuilder<'a, datatypes::Date64Type>),
+    Time32Second(TimeArrowToVariantBuilder<'a, datatypes::Time32SecondType>),
+    Time32Millisecond(TimeArrowToVariantBuilder<'a, datatypes::Time32MillisecondType>),
+    Time64Microsecond(TimeArrowToVariantBuilder<'a, datatypes::Time64MicrosecondType>),
+    Time64Nanosecond(TimeArrowToVariantBuilder<'a, datatypes::Time64NanosecondType>),
+    Binary(BinaryArrowToVariantBuilder<'a, i32>),
+    LargeBinary(BinaryArrowToVariantBuilder<'a, i64>),
+    BinaryView(BinaryViewArrowToVariantBuilder<'a>),
+    FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder<'a>),
+    Utf8(StringArrowToVariantBuilder<'a, i32>),
+    LargeUtf8(StringArrowToVariantBuilder<'a, i64>),
+    Utf8View(StringViewArrowToVariantBuilder<'a>),
+    List(ListArrowToVariantBuilder<'a, GenericListArray<i32>>),
+    LargeList(ListArrowToVariantBuilder<'a, GenericListArray<i64>>),
+    ListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i32>>),
+    LargeListView(ListArrowToVariantBuilder<'a, GenericListViewArray<i64>>),
+    FixedSizeList(ListArrowToVariantBuilder<'a, FixedSizeListArray>),
+    Struct(StructArrowToVariantBuilder<'a>),
+    Map(MapArrowToVariantBuilder<'a>),
+    Union(UnionArrowToVariantBuilder<'a>),
+    Dictionary(DictionaryArrowToVariantBuilder<'a>),
+    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int16Type>),
+    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int32Type>),
+    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder<'a, datatypes::Int64Type>),
+}
+
+impl<'a> ArrowToVariantRowBuilder<'a> {
+    /// Appends a single row at the given index to the supplied builder.
+    pub fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        use ArrowToVariantRowBuilder::*;
+        match self {
+            Null(b) => b.append_row(builder, index),
+            Boolean(b) => b.append_row(builder, index),
+            PrimitiveInt8(b) => b.append_row(builder, index),
+            PrimitiveInt16(b) => b.append_row(builder, index),
+            PrimitiveInt32(b) => b.append_row(builder, index),
+            PrimitiveInt64(b) => b.append_row(builder, index),
+            PrimitiveUInt8(b) => b.append_row(builder, index),
+            PrimitiveUInt16(b) => b.append_row(builder, index),
+            PrimitiveUInt32(b) => b.append_row(builder, index),
+            PrimitiveUInt64(b) => b.append_row(builder, index),
+            PrimitiveFloat16(b) => b.append_row(builder, index),
+            PrimitiveFloat32(b) => b.append_row(builder, index),
+            PrimitiveFloat64(b) => b.append_row(builder, index),
+            Decimal32(b) => b.append_row(builder, index),
+            Decimal64(b) => b.append_row(builder, index),
+            Decimal128(b) => b.append_row(builder, index),
+            Decimal256(b) => b.append_row(builder, index),
+            TimestampSecond(b) => b.append_row(builder, index),
+            TimestampMillisecond(b) => b.append_row(builder, index),
+            TimestampMicrosecond(b) => b.append_row(builder, index),
+            TimestampNanosecond(b) => b.append_row(builder, index),
+            Date32(b) => b.append_row(builder, index),
+            Date64(b) => b.append_row(builder, index),
+            Time32Second(b) => b.append_row(builder, index),
+            Time32Millisecond(b) => b.append_row(builder, index),
+            Time64Microsecond(b) => b.append_row(builder, index),
+            Time64Nanosecond(b) => b.append_row(builder, index),
+            Binary(b) => b.append_row(builder, index),
+            LargeBinary(b) => b.append_row(builder, index),
+            BinaryView(b) => b.append_row(builder, index),
+            FixedSizeBinary(b) => b.append_row(builder, index),
+            Utf8(b) => b.append_row(builder, index),
+            LargeUtf8(b) => b.append_row(builder, index),
+            Utf8View(b) => b.append_row(builder, index),
+            List(b) => b.append_row(builder, index),
+            LargeList(b) => b.append_row(builder, index),
+            ListView(b) => b.append_row(builder, index),
+            LargeListView(b) => b.append_row(builder, index),
+            FixedSizeList(b) => b.append_row(builder, index),
+            Struct(b) => b.append_row(builder, index),
+            Map(b) => b.append_row(builder, index),
+            Union(b) => b.append_row(builder, index),
+            Dictionary(b) => b.append_row(builder, index),
+            RunEndEncodedInt16(b) => b.append_row(builder, index),
+            RunEndEncodedInt32(b) => b.append_row(builder, index),
+            RunEndEncodedInt64(b) => b.append_row(builder, index),
+        }
+    }
+}
+
+/// Factory function to create the appropriate row builder for a given DataType
+pub(crate) fn make_arrow_to_variant_row_builder<'a>(
+    data_type: &'a DataType,
+    array: &'a dyn Array,
+    options: &'a CastOptions,
+) -> Result<ArrowToVariantRowBuilder<'a>, ArrowError> {
+    use ArrowToVariantRowBuilder::*;
+    let builder =
+        match data_type {
+            DataType::Null => Null(NullArrowToVariantBuilder),
+            DataType::Boolean => Boolean(BooleanArrowToVariantBuilder::new(array)),
+            DataType::Int8 => PrimitiveInt8(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Int16 => PrimitiveInt16(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Int32 => PrimitiveInt32(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Int64 => PrimitiveInt64(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::UInt8 => PrimitiveUInt8(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::UInt16 => PrimitiveUInt16(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::UInt32 => PrimitiveUInt32(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::UInt64 => PrimitiveUInt64(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Float16 => PrimitiveFloat16(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Float32 => PrimitiveFloat32(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Float64 => PrimitiveFloat64(PrimitiveArrowToVariantBuilder::new(array)),
+            DataType::Decimal32(_, scale) => {
+                Decimal32(DecimalArrowToVariantBuilder::new(array, options, *scale))
+            }
+            DataType::Decimal64(_, scale) => {
+                Decimal64(DecimalArrowToVariantBuilder::new(array, options, *scale))
+            }
+            DataType::Decimal128(_, scale) => {
+                Decimal128(DecimalArrowToVariantBuilder::new(array, options, *scale))
+            }
+            DataType::Decimal256(_, scale) => {
+                Decimal256(Decimal256ArrowToVariantBuilder::new(array, options, *scale))
+            }
+            DataType::Timestamp(time_unit, time_zone) => {
+                match time_unit {
+                    TimeUnit::Second => TimestampSecond(TimestampArrowToVariantBuilder::new(
+                        array,
+                        options,
+                        time_zone.is_some(),
+                    )),
+                    TimeUnit::Millisecond => TimestampMillisecond(
+                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
+                    ),
+                    TimeUnit::Microsecond => TimestampMicrosecond(
+                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
+                    ),
+                    TimeUnit::Nanosecond => TimestampNanosecond(
+                        TimestampArrowToVariantBuilder::new(array, options, time_zone.is_some()),
+                    ),
+                }
+            }
+            DataType::Date32 => Date32(DateArrowToVariantBuilder::new(array, options)),
+            DataType::Date64 => Date64(DateArrowToVariantBuilder::new(array, options)),
+            DataType::Time32(time_unit) => match time_unit {
+                TimeUnit::Second => Time32Second(TimeArrowToVariantBuilder::new(array, options)),
+                TimeUnit::Millisecond => {
+                    Time32Millisecond(TimeArrowToVariantBuilder::new(array, options))
+                }
+                _ => {
+                    return Err(ArrowError::CastError(format!(
+                        "Unsupported Time32 unit: {time_unit:?}"
+                    )));
+                }
+            },
+            DataType::Time64(time_unit) => match time_unit {
+                TimeUnit::Microsecond => {
+                    Time64Microsecond(TimeArrowToVariantBuilder::new(array, options))
+                }
+                TimeUnit::Nanosecond => {
+                    Time64Nanosecond(TimeArrowToVariantBuilder::new(array, options))
+                }
+                _ => {
+                    return Err(ArrowError::CastError(format!(
+                        "Unsupported Time64 unit: {time_unit:?}"
+                    )));
+                }
+            },
+            DataType::Duration(_) | DataType::Interval(_) => {
+                return Err(ArrowError::InvalidArgumentError(
+                    "Casting duration/interval types to Variant is not supported. \
+                    The Variant format does not define duration/interval types."
+                        .to_string(),
+                ));
+            }
+            DataType::Binary => Binary(BinaryArrowToVariantBuilder::new(array)),
+            DataType::LargeBinary => LargeBinary(BinaryArrowToVariantBuilder::new(array)),
+            DataType::BinaryView => BinaryView(BinaryViewArrowToVariantBuilder::new(array)),
+            DataType::FixedSizeBinary(_) => {
+                FixedSizeBinary(FixedSizeBinaryArrowToVariantBuilder::new(array))
+            }
+            DataType::Utf8 => Utf8(StringArrowToVariantBuilder::new(array)),
+            DataType::LargeUtf8 => LargeUtf8(StringArrowToVariantBuilder::new(array)),
+            DataType::Utf8View => Utf8View(StringViewArrowToVariantBuilder::new(array)),
+            DataType::List(_) => List(ListArrowToVariantBuilder::new(array.as_list(), options)?),
+            DataType::LargeList(_) => {
+                LargeList(ListArrowToVariantBuilder::new(array.as_list(), options)?)
+            }
+            DataType::ListView(_) => ListView(ListArrowToVariantBuilder::new(
+                array.as_list_view(),
+                options,
+            )?),
+            DataType::LargeListView(_) => LargeListView(ListArrowToVariantBuilder::new(
+                array.as_list_view(),
+                options,
+            )?),
+            DataType::FixedSizeList(_, _) => FixedSizeList(ListArrowToVariantBuilder::new(
+                array.as_fixed_size_list(),
+                options,
+            )?),
+            DataType::Struct(_) => Struct(StructArrowToVariantBuilder::new(
+                array.as_struct(),
+                options,
+            )?),
+            DataType::Map(_, _) => Map(MapArrowToVariantBuilder::new(array, options)?),
+            DataType::Union(_, _) => Union(UnionArrowToVariantBuilder::new(array, options)?),
+            DataType::Dictionary(_, _) => {
+                Dictionary(DictionaryArrowToVariantBuilder::new(array, options)?)
+            }
+            DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
+                DataType::Int16 => {
+                    RunEndEncodedInt16(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
+                }
+                DataType::Int32 => {
+                    RunEndEncodedInt32(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
+                }
+                DataType::Int64 => {
+                    RunEndEncodedInt64(RunEndEncodedArrowToVariantBuilder::new(array, options)?)
+                }
+                _ => {
+                    return Err(ArrowError::CastError(format!(
+                        "Unsupported run ends type: {}",
+                        run_ends.data_type()
+                    )));
+                }
+            },
+        };
+    Ok(builder)
+}
+
+/// Macro to define (possibly generic) row builders with consistent structure and behavior.
+///
+/// The macro optionally allows to define a transform for values read from the underlying
+/// array. Transforms of the form `|value| { ... }` are infallible (and should produce something
+/// that implements `Into<Variant>`), while transforms of the form `|value| -> Option<_> { ... }`
+/// are fallible (and should produce `Option<impl Into<Variant>>`); a failed tarnsform will either
+/// append null to the builder or return an error, depending on cast options.
+///
+/// Also supports optional extra fields that are passed to the constructor and which are available
+/// by reference in the value transform. Providing a fallible value transform requires also
+/// providing the extra field `options: &'a CastOptions`.
+// TODO: If/when the macro_metavar_expr feature stabilizes, the `ignore` meta-function would allow
+// us to "use" captured tokens without emitting them:
+//
+// ```
+// $(
+//     ${ignore($value)}
+//     $(
+//         ${ignore($option_ty)}
+//         options: &$lifetime CastOptions,
+//     )?
+// )?
+// ```
+//
+// That, in turn, would allow us to inject the `options` field whenever the user specifies a
+// fallible value transform, instead of requiring them to manually define it. This might not be
+// worth the trouble, tho, because it makes for some pretty bulky and unwieldy macro expansions.
+macro_rules! define_row_builder {
+    (
+        struct $name:ident<$lifetime:lifetime $(, $generic:ident $( : $bound:path )? )*>
+        $( where $where_path:path: $where_bound:path $(,)? )?
+        $({ $( $field:ident: $field_type:ty ),+ $(,)? })?,
+        |$array_param:ident| -> $array_type:ty { $init_expr:expr }
+        $(, |$value:ident| $(-> Option<$option_ty:ty>)? $value_transform:expr )?
+    ) => {
+        pub(crate) struct $name<$lifetime $(, $generic: $( $bound )? )*>
+        $( where $where_path: $where_bound )?
+        {
+            array: &$lifetime $array_type,
+            $( $( $field: $field_type, )+ )?
+            _phantom: std::marker::PhantomData<($( $generic, )*)>, // capture all type params
+        }
+
+        impl<$lifetime $(, $generic: $( $bound )? )*> $name<$lifetime $(, $generic)*>
+        $( where $where_path: $where_bound )?
+        {
+            pub(crate) fn new($array_param: &$lifetime dyn Array $( $(, $field: $field_type )+ )?) -> Self {
+                Self {
+                    array: $init_expr,
+                    $( $( $field, )+ )?
+                    _phantom: std::marker::PhantomData,
+                }
+            }
+
+            fn append_row(&self, builder: &mut impl VariantBuilderExt, index: usize) -> Result<(), ArrowError> {
+                if self.array.is_null(index) {
+                    builder.append_null();
+                } else {
+                    // Macro hygiene: Give any extra fields names the value transform can access.
+                    //
+                    // The value transform doesn't normally reference cast options, but the macro's
+                    // caller still has to declare the field because stable rust has no way to "use"
+                    // a captured token without emitting it. So, silence unused variable warnings,
+                    // assuming that's the `options` field. Unfortunately, that also silences
+                    // legitimate compiler warnings if an infallible value transform fails to use
+                    // its first extra field.
+                    $(
+                        #[allow(unused)]
+                        $( let $field = &self.$field; )+
+                    )?
+
+                    // Apply the value transform, if any (with name swapping for hygiene)
+                    let value = self.array.value(index);
+                    $(
+                        let $value = value;
+                        let value = $value_transform;
+                        $(
+                            // NOTE: The `?` macro expansion fails without the type annotation.
+                            let Some(value): Option<$option_ty> = value else {
+                                if !self.options.safe {
+                                    return Err(ArrowError::ComputeError(format!(
+                                        "Failed to convert value at index {index}: conversion failed",
+                                    )));
+                                } else {
+                                    // Overflow is encoded as Variant::Null,
+                                    // distinct from None indicating a missing value
+                                    builder.append_value(Variant::Null);
+                                    return Ok(());
+                                }
+                            };
+                        )?
+                    )?
+                    builder.append_value(value);
+                }
+                Ok(())
+            }
+        }
+    };
+}
+
+define_row_builder!(
+    struct BooleanArrowToVariantBuilder<'a>,
+    |array| -> arrow::array::BooleanArray { array.as_boolean() }
+);
+
+define_row_builder!(
+    struct PrimitiveArrowToVariantBuilder<'a, T: ArrowPrimitiveType>
+    where T::Native: Into<Variant<'a, 'a>>,
+    |array| -> PrimitiveArray<T> { array.as_primitive() }
+);
+
+define_row_builder!(
+    struct DecimalArrowToVariantBuilder<'a, A: DecimalType, V>
+    where
+        V: VariantDecimalType<Native = A::Native>,
+    {
+        options: &'a CastOptions<'a>,
+        scale: i8,
+    },
+    |array| -> PrimitiveArray<A> { array.as_primitive() },
+    |value| -> Option<_> { V::try_new_with_signed_scale(value, *scale).ok() }
+);
+
+// Decimal256 needs a two-stage conversion via i128
+define_row_builder!(
+    struct Decimal256ArrowToVariantBuilder<'a> {
+        options: &'a CastOptions<'a>,
+        scale: i8,
+    },
+    |array| -> arrow::array::Decimal256Array { array.as_primitive() },
+    |value| -> Option<_> {
+        let value = value.to_i128();
+        value.and_then(|v| VariantDecimal16::try_new_with_signed_scale(v, *scale).ok())
+    }
+);
+
+define_row_builder!(
+    struct TimestampArrowToVariantBuilder<'a, T: ArrowTimestampType> {
+        options: &'a CastOptions<'a>,
+        has_time_zone: bool,
+    },
+    |array| -> PrimitiveArray<T> { array.as_primitive() },
+    |value| -> Option<_> {
+        // Convert using Arrow's temporal conversion functions
+        as_datetime::<T>(value).map(|naive_datetime| {
+            if *has_time_zone {
+                // Has timezone -> DateTime<Utc> -> TimestampMicros/TimestampNanos
+                let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&naive_datetime);
+                Variant::from(utc_dt) // Uses From<DateTime<Utc>> for Variant
+            } else {
+                // No timezone -> NaiveDateTime -> TimestampNtzMicros/TimestampNtzNanos
+                Variant::from(naive_datetime) // Uses From<NaiveDateTime> for Variant
+            }
+        })
+    }
+);
+
+define_row_builder!(
+    struct DateArrowToVariantBuilder<'a, T: ArrowTemporalType>
+    where
+        i64: From<T::Native>,
+    {
+        options: &'a CastOptions<'a>,
+    },
+    |array| -> PrimitiveArray<T> { array.as_primitive() },
+    |value| -> Option<_> {
+        let date_value = i64::from(value);
+        as_date::<T>(date_value)
+    }
+);
+
+define_row_builder!(
+    struct TimeArrowToVariantBuilder<'a, T: ArrowTemporalType>
+    where
+        i64: From<T::Native>,
+    {
+        options: &'a CastOptions<'a>,
+    },
+    |array| -> PrimitiveArray<T> { array.as_primitive() },
+    |value| -> Option<_> {
+        let time_value = i64::from(value);
+        as_time::<T>(time_value)
+    }
+);
+
+define_row_builder!(
+    struct BinaryArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
+    |array| -> GenericBinaryArray<O> { array.as_binary() }
+);
+
+define_row_builder!(
+    struct BinaryViewArrowToVariantBuilder<'a>,
+    |array| -> arrow::array::BinaryViewArray { array.as_byte_view() }
+);
+
+define_row_builder!(
+    struct FixedSizeBinaryArrowToVariantBuilder<'a>,
+    |array| -> arrow::array::FixedSizeBinaryArray { array.as_fixed_size_binary() }
+);
+
+define_row_builder!(
+    struct StringArrowToVariantBuilder<'a, O: OffsetSizeTrait>,
+    |array| -> GenericStringArray<O> { array.as_string() }
+);
+
+define_row_builder!(
+    struct StringViewArrowToVariantBuilder<'a>,
+    |array| -> arrow::array::StringViewArray { array.as_string_view() }
+);
+
+/// Null builder that always appends null
+pub(crate) struct NullArrowToVariantBuilder;
+
+impl NullArrowToVariantBuilder {
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        _index: usize,
+    ) -> Result<(), ArrowError> {
+        builder.append_null();
+        Ok(())
+    }
+}
+
+/// Generic list builder for ListLikeArray types including List, LargeList, ListView, LargeListView,
+/// and FixedSizeList
+pub(crate) struct ListArrowToVariantBuilder<'a, L: ListLikeArray> {
+    list_array: &'a L,
+    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
+}
+
+impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
+    pub(crate) fn new(array: &'a L, options: &'a CastOptions) -> Result<Self, ArrowError> {
+        let values = array.values();
+        let values_builder =
+            make_arrow_to_variant_row_builder(values.data_type(), values, options)?;
+
+        Ok(Self {
+            list_array: array,
+            values_builder: Box::new(values_builder),
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        if self.list_array.is_null(index) {
+            builder.append_null();
+            return Ok(());
+        }
+
+        let range = self.list_array.element_range(index);
+
+        let mut list_builder = builder.try_new_list()?;
+        for value_index in range {
+            self.values_builder
+                .append_row(&mut list_builder, value_index)?;
+        }
+        list_builder.finish();
+        Ok(())
+    }
+}
+
+/// Trait for list-like arrays that can provide element ranges
+pub(crate) trait ListLikeArray: Array {
+    /// Get the values array
+    fn values(&self) -> &ArrayRef;
+
+    /// Get the start and end indices for a list element
+    fn element_range(&self, index: usize) -> Range<usize>;
+}
+
+impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> Range<usize> {
+        let offsets = self.offsets();
+        let start = offsets[index].as_usize();
+        let end = offsets[index + 1].as_usize();
+        start..end
+    }
+}
+
+impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> Range<usize> {
+        let offsets = self.value_offsets();
+        let sizes = self.value_sizes();
+        let offset = offsets[index].as_usize();
+        let size = sizes[index].as_usize();
+        offset..(offset + size)
+    }
+}
+
+impl ListLikeArray for FixedSizeListArray {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> Range<usize> {
+        let value_length = self.value_length().as_usize();
+        let offset = index * value_length;
+        offset..(offset + value_length)
+    }
+}
+
+/// Struct builder for StructArray
+pub(crate) struct StructArrowToVariantBuilder<'a> {
+    struct_array: &'a arrow::array::StructArray,
+    field_builders: Vec<(&'a str, ArrowToVariantRowBuilder<'a>)>,
+}
+
+impl<'a> StructArrowToVariantBuilder<'a> {
+    pub(crate) fn new(
+        struct_array: &'a arrow::array::StructArray,
+        options: &'a CastOptions,
+    ) -> Result<Self, ArrowError> {
+        let mut field_builders = Vec::new();
+
+        // Create a row builder for each field
+        for (field_name, field_array) in struct_array
+            .column_names()
+            .iter()
+            .zip(struct_array.columns().iter())
+        {
+            let field_builder = make_arrow_to_variant_row_builder(
+                field_array.data_type(),
+                field_array.as_ref(),
+                options,
+            )?;
+            field_builders.push((*field_name, field_builder));
+        }
+
+        Ok(Self {
+            struct_array,
+            field_builders,
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        if self.struct_array.is_null(index) {
+            builder.append_null();
+        } else {
+            // Create object builder for this struct row
+            let mut obj_builder = builder.try_new_object()?;
+
+            // Process each field
+            for (field_name, row_builder) in &mut self.field_builders {
+                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut obj_builder);
+                row_builder.append_row(&mut field_builder, index)?;
+            }
+
+            obj_builder.finish();
+        }
+        Ok(())
+    }
+}
+
+/// Map builder for MapArray types
+pub(crate) struct MapArrowToVariantBuilder<'a> {
+    map_array: &'a arrow::array::MapArray,
+    key_strings: arrow::array::StringArray,
+    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
+}
+
+impl<'a> MapArrowToVariantBuilder<'a> {
+    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
+        let map_array = array.as_map();
+
+        // Pre-cast keys to strings once
+        let keys = cast(map_array.keys(), &DataType::Utf8)?;
+        let key_strings = keys.as_string::<i32>().clone();
+
+        // Create recursive builder for values
+        let values = map_array.values();
+        let values_builder =
+            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
+
+        Ok(Self {
+            map_array,
+            key_strings,
+            values_builder: Box::new(values_builder),
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        // Check for NULL map first (via null bitmap)
+        if self.map_array.is_null(index) {
+            builder.append_null();
+            return Ok(());
+        }
+
+        let offsets = self.map_array.offsets();
+        let start = offsets[index].as_usize();
+        let end = offsets[index + 1].as_usize();
+
+        // Create object builder for this map
+        let mut object_builder = builder.try_new_object()?;
+
+        // Add each key-value pair (loop does nothing for empty maps - correct!)
+        for kv_index in start..end {
+            let key = self.key_strings.value(kv_index);
+            let mut field_builder = ObjectFieldBuilder::new(key, &mut object_builder);
+            self.values_builder
+                .append_row(&mut field_builder, kv_index)?;
+        }
+
+        object_builder.finish();
+        Ok(())
+    }
+}
+
+/// Union builder for both sparse and dense union arrays
+///
+/// NOTE: Union type ids are _not_ required to be dense, hence the hash map for child builders.
+pub(crate) struct UnionArrowToVariantBuilder<'a> {
+    union_array: &'a arrow::array::UnionArray,
+    child_builders: HashMap<i8, Box<ArrowToVariantRowBuilder<'a>>>,
+}
+
+impl<'a> UnionArrowToVariantBuilder<'a> {
+    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
+        let union_array = array.as_union();
+        let type_ids = union_array.type_ids();
+
+        // Create child builders for each union field
+        let mut child_builders = HashMap::new();
+        for &type_id in type_ids {
+            let child_array = union_array.child(type_id);
+            let child_builder = make_arrow_to_variant_row_builder(
+                child_array.data_type(),
+                child_array.as_ref(),
+                options,
+            )?;
+            child_builders.insert(type_id, Box::new(child_builder));
+        }
+
+        Ok(Self {
+            union_array,
+            child_builders,
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        let type_id = self.union_array.type_id(index);
+        let value_offset = self.union_array.value_offset(index);
+
+        // Delegate to the appropriate child builder, or append null to handle an invalid type_id
+        match self.child_builders.get_mut(&type_id) {
+            Some(child_builder) => child_builder.append_row(builder, value_offset)?,
+            None => builder.append_null(),
+        }
+
+        Ok(())
+    }
+}
+
+/// Dictionary array builder with simple O(1) indexing
+pub(crate) struct DictionaryArrowToVariantBuilder<'a> {
+    keys: &'a dyn Array, // only needed for null checks
+    normalized_keys: Vec<usize>,
+    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
+}
+
+impl<'a> DictionaryArrowToVariantBuilder<'a> {
+    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
+        let dict_array = array.as_any_dictionary();
+        let values = dict_array.values();
+        let values_builder =
+            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
+
+        // WARNING: normalized_keys panics if values is empty
+        let normalized_keys = match values.len() {
+            0 => Vec::new(),
+            _ => dict_array.normalized_keys(),
+        };
+
+        Ok(Self {
+            keys: dict_array.keys(),
+            normalized_keys,
+            values_builder: Box::new(values_builder),
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        if self.keys.is_null(index) {
+            builder.append_null();
+        } else {
+            let normalized_key = self.normalized_keys[index];
+            self.values_builder.append_row(builder, normalized_key)?;
+        }
+        Ok(())
+    }
+}
+
+/// Run-end encoded array builder with efficient sequential access
+pub(crate) struct RunEndEncodedArrowToVariantBuilder<'a, R: RunEndIndexType> {
+    run_array: &'a arrow::array::RunArray<R>,
+    values_builder: Box<ArrowToVariantRowBuilder<'a>>,
+
+    run_ends: &'a [R::Native],
+    run_number: usize, // Physical index into run_ends and values
+    run_start: usize,  // Logical start index of current run
+}
+
+impl<'a, R: RunEndIndexType> RunEndEncodedArrowToVariantBuilder<'a, R> {
+    pub(crate) fn new(array: &'a dyn Array, options: &'a CastOptions) -> Result<Self, ArrowError> {
+        let Some(run_array) = array.as_run_opt() else {
+            return Err(ArrowError::CastError("Expected RunArray".to_string()));
+        };
+
+        let values = run_array.values();
+        let values_builder =
+            make_arrow_to_variant_row_builder(values.data_type(), values.as_ref(), options)?;
+
+        Ok(Self {
+            run_array,
+            values_builder: Box::new(values_builder),
+            run_ends: run_array.run_ends().values(),
+            run_number: 0,
+            run_start: 0,
+        })
+    }
+
+    fn set_run_for_index(&mut self, index: usize) -> Result<(), ArrowError> {
+        if index >= self.run_start {
+            let Some(run_end) = self.run_ends.get(self.run_number) else {
+                return Err(ArrowError::CastError(format!(
+                    "Index {index} beyond run array"
+                )));
+            };
+            if index < run_end.as_usize() {
+                return Ok(());
+            }
+            if index == run_end.as_usize() {
+                self.run_number += 1;
+                self.run_start = run_end.as_usize();
+                return Ok(());
+            }
+        }
+
+        // Use partition_point for all non-sequential cases
+        let run_number = self
+            .run_ends
+            .partition_point(|&run_end| run_end.as_usize() <= index);
+        if run_number >= self.run_ends.len() {
+            return Err(ArrowError::CastError(format!(
+                "Index {index} beyond run array"
+            )));
+        }
+        self.run_number = run_number;
+        self.run_start = match run_number {
+            0 => 0,
+            _ => self.run_ends[run_number - 1].as_usize(),
+        };
+        Ok(())
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<(), ArrowError> {
+        self.set_run_for_index(index)?;
+
+        // Handle null values
+        if self.run_array.values().is_null(self.run_number) {
+            builder.append_null();
+            return Ok(());
+        }
+
+        // Re-encode the value
+        self.values_builder.append_row(builder, self.run_number)?;
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{VariantArray, VariantArrayBuilder};
+    use arrow::array::{ArrayRef, BooleanArray, Int32Array, StringArray};
+    use arrow::datatypes::Int32Type;
+    use std::sync::Arc;
+
+    /// Builds a VariantArray from an Arrow array using the row builder.
+    fn execute_row_builder_test(array: &dyn Array) -> VariantArray {
+        execute_row_builder_test_with_options(
+            array,
+            CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        )
+    }
+
+    /// Variant of `execute_row_builder_test` that allows specifying options
+    fn execute_row_builder_test_with_options(
+        array: &dyn Array,
+        options: CastOptions,
+    ) -> VariantArray {
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(array.data_type(), array, &options).unwrap();
+
+        let mut array_builder = VariantArrayBuilder::new(array.len());
+
+        // The repetitive loop that appears in every test
+        for i in 0..array.len() {
+            row_builder.append_row(&mut array_builder, i).unwrap();
+        }
+
+        let variant_array = array_builder.build();
+        assert_eq!(variant_array.len(), array.len());
+        variant_array
+    }
+
+    /// Generic helper function to test row builders with basic assertion patterns.
+    /// Uses execute_row_builder_test and adds simple value comparison assertions.
+    fn test_row_builder_basic(array: &dyn Array, expected_values: Vec<Option<Variant>>) {
+        test_row_builder_basic_with_options(
+            array,
+            expected_values,
+            CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        );
+    }
+
+    /// Variant of `test_row_builder_basic` that allows specifying options
+    fn test_row_builder_basic_with_options(
+        array: &dyn Array,
+        expected_values: Vec<Option<Variant>>,
+        options: CastOptions,
+    ) {
+        let variant_array = execute_row_builder_test_with_options(array, options);
+
+        // The repetitive assertion pattern
+        for (i, expected) in expected_values.iter().enumerate() {
+            match expected {
+                Some(variant) => {
+                    assert_eq!(variant_array.value(i), *variant, "Mismatch at index {}", i)
+                }
+                None => assert!(variant_array.is_null(i), "Expected null at index {}", i),
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_row_builder() {
+        let int_array = Int32Array::from(vec![Some(42), None, Some(100)]);
+        test_row_builder_basic(
+            &int_array,
+            vec![Some(Variant::Int32(42)), None, Some(Variant::Int32(100))],
+        );
+    }
+
+    #[test]
+    fn test_string_row_builder() {
+        let string_array = StringArray::from(vec![Some("hello"), None, Some("world")]);
+        test_row_builder_basic(
+            &string_array,
+            vec![
+                Some(Variant::from("hello")),
+                None,
+                Some(Variant::from("world")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_boolean_row_builder() {
+        let bool_array = BooleanArray::from(vec![Some(true), None, Some(false)]);
+        test_row_builder_basic(
+            &bool_array,
+            vec![Some(Variant::from(true)), None, Some(Variant::from(false))],
+        );
+    }
+
+    #[test]
+    fn test_struct_row_builder() {
+        use arrow::array::{ArrayRef, Int32Array, StringArray, StructArray};
+        use arrow_schema::{DataType, Field};
+        use std::sync::Arc;
+
+        // Create a struct array with int and string fields
+        let int_field = Field::new("id", DataType::Int32, true);
+        let string_field = Field::new("name", DataType::Utf8, true);
+
+        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
+        let string_array = StringArray::from(vec![Some("Alice"), Some("Bob"), None]);
+
+        let struct_array = StructArray::try_new(
+            vec![int_field, string_field].into(),
+            vec![
+                Arc::new(int_array) as ArrayRef,
+                Arc::new(string_array) as ArrayRef,
+            ],
+            None,
+        )
+        .unwrap();
+
+        let variant_array = execute_row_builder_test(&struct_array);
+
+        // Check first row - should have both fields
+        let first_variant = variant_array.value(0);
+        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
+        assert_eq!(
+            first_variant.get_object_field("name"),
+            Some(Variant::from("Alice"))
+        );
+
+        // Check second row - should have name field but not id (null field omitted)
+        let second_variant = variant_array.value(1);
+        assert_eq!(second_variant.get_object_field("id"), None); // null field omitted
+        assert_eq!(
+            second_variant.get_object_field("name"),
+            Some(Variant::from("Bob"))
+        );
+
+        // Check third row - should have id field but not name (null field omitted)
+        let third_variant = variant_array.value(2);
+        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(3)));
+        assert_eq!(third_variant.get_object_field("name"), None); // null field omitted
+    }
+
+    #[test]
+    fn test_run_end_encoded_row_builder() {
+        use arrow::array::{Int32Array, RunArray};
+        use arrow::datatypes::Int32Type;
+
+        // Create a run-end encoded array: [A, A, B, B, B, C]
+        // run_ends: [2, 5, 6]
+        // values: ["A", "B", "C"]
+        let values = StringArray::from(vec!["A", "B", "C"]);
+        let run_ends = Int32Array::from(vec![2, 5, 6]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+
+        let variant_array = execute_row_builder_test(&run_array);
+
+        // Verify the values
+        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
+        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
+        assert_eq!(variant_array.value(2), Variant::from("B")); // Run 1
+        assert_eq!(variant_array.value(3), Variant::from("B")); // Run 1
+        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 1
+        assert_eq!(variant_array.value(5), Variant::from("C")); // Run 2
+    }
+
+    #[test]
+    fn test_run_end_encoded_random_access() {
+        use arrow::array::{Int32Array, RunArray};
+        use arrow::datatypes::Int32Type;
+
+        // Create a run-end encoded array: [A, A, B, B, B, C]
+        let values = StringArray::from(vec!["A", "B", "C"]);
+        let run_ends = Int32Array::from(vec![2, 5, 6]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
+
+        // Test random access pattern (backward jumps, forward jumps)
+        let access_pattern = [0, 5, 2, 4, 1, 3]; // Mix of all cases
+        let expected_values = ["A", "C", "B", "B", "A", "B"];
+
+        for (i, &index) in access_pattern.iter().enumerate() {
+            let mut array_builder = VariantArrayBuilder::new(1);
+            row_builder.append_row(&mut array_builder, index).unwrap();
+            let variant_array = array_builder.build();
+            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
+        }
+    }
+
+    #[test]
+    fn test_run_end_encoded_with_nulls() {
+        use arrow::array::{Int32Array, RunArray};
+        use arrow::datatypes::Int32Type;
+
+        // Create a run-end encoded array with null values: [A, A, null, null, B]
+        let values = StringArray::from(vec![Some("A"), None, Some("B")]);
+        let run_ends = Int32Array::from(vec![2, 4, 5]);
+        let run_array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(run_array.data_type(), &run_array, &options).unwrap();
+        let mut array_builder = VariantArrayBuilder::new(5);
+
+        // Test sequential access
+        for i in 0..5 {
+            row_builder.append_row(&mut array_builder, i).unwrap();
+        }
+
+        let variant_array = array_builder.build();
+        assert_eq!(variant_array.len(), 5);
+
+        // Verify the values
+        assert_eq!(variant_array.value(0), Variant::from("A")); // Run 0
+        assert_eq!(variant_array.value(1), Variant::from("A")); // Run 0
+        assert!(variant_array.is_null(2)); // Run 1 (null)
+        assert!(variant_array.is_null(3)); // Run 1 (null)
+        assert_eq!(variant_array.value(4), Variant::from("B")); // Run 2
+    }
+
+    #[test]
+    fn test_dictionary_row_builder() {
+        use arrow::array::{DictionaryArray, Int32Array};
+        use arrow::datatypes::Int32Type;
+
+        // Create a dictionary array: keys=[0, 1, 0, 2, 1], values=["apple", "banana", "cherry"]
+        let values = StringArray::from(vec!["apple", "banana", "cherry"]);
+        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
+        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
+
+        let variant_array = execute_row_builder_test(&dict_array);
+
+        // Verify the values match the dictionary lookup
+        assert_eq!(variant_array.value(0), Variant::from("apple")); // keys[0] = 0 -> values[0] = "apple"
+        assert_eq!(variant_array.value(1), Variant::from("banana")); // keys[1] = 1 -> values[1] = "banana"
+        assert_eq!(variant_array.value(2), Variant::from("apple")); // keys[2] = 0 -> values[0] = "apple"
+        assert_eq!(variant_array.value(3), Variant::from("cherry")); // keys[3] = 2 -> values[2] = "cherry"
+        assert_eq!(variant_array.value(4), Variant::from("banana")); // keys[4] = 1 -> values[1] = "banana"
+    }
+
+    #[test]
+    fn test_dictionary_with_nulls() {
+        use arrow::array::{DictionaryArray, Int32Array};
+        use arrow::datatypes::Int32Type;
+
+        // Create a dictionary array with null keys: keys=[0, null, 1, null, 2], values=["x", "y", "z"]
+        let values = StringArray::from(vec!["x", "y", "z"]);
+        let keys = Int32Array::from(vec![Some(0), None, Some(1), None, Some(2)]);
+        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
+                .unwrap();
+        let mut array_builder = VariantArrayBuilder::new(5);
+
+        // Test sequential access
+        for i in 0..5 {
+            row_builder.append_row(&mut array_builder, i).unwrap();
+        }
+
+        let variant_array = array_builder.build();
+        assert_eq!(variant_array.len(), 5);
+
+        // Verify the values and nulls
+        assert_eq!(variant_array.value(0), Variant::from("x")); // keys[0] = 0 -> values[0] = "x"
+        assert!(variant_array.is_null(1)); // keys[1] = null
+        assert_eq!(variant_array.value(2), Variant::from("y")); // keys[2] = 1 -> values[1] = "y"
+        assert!(variant_array.is_null(3)); // keys[3] = null
+        assert_eq!(variant_array.value(4), Variant::from("z")); // keys[4] = 2 -> values[2] = "z"
+    }
+
+    #[test]
+    fn test_dictionary_random_access() {
+        use arrow::array::{DictionaryArray, Int32Array};
+        use arrow::datatypes::Int32Type;
+
+        // Create a dictionary array: keys=[0, 1, 2, 0, 1, 2], values=["red", "green", "blue"]
+        let values = StringArray::from(vec!["red", "green", "blue"]);
+        let keys = Int32Array::from(vec![0, 1, 2, 0, 1, 2]);
+        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
+                .unwrap();
+
+        // Test random access pattern
+        let access_pattern = [5, 0, 3, 1, 4, 2]; // Random order
+        let expected_values = ["blue", "red", "red", "green", "green", "blue"];
+
+        for (i, &index) in access_pattern.iter().enumerate() {
+            let mut array_builder = VariantArrayBuilder::new(1);
+            row_builder.append_row(&mut array_builder, index).unwrap();
+            let variant_array = array_builder.build();
+            assert_eq!(variant_array.value(0), Variant::from(expected_values[i]));
+        }
+    }
+
+    #[test]
+    fn test_nested_dictionary() {
+        use arrow::array::{DictionaryArray, Int32Array, StructArray};
+        use arrow::datatypes::{Field, Int32Type};
+
+        // Create a dictionary with struct values
+        let id_array = Int32Array::from(vec![1, 2, 3]);
+        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie"]);
+        let struct_array = StructArray::from(vec![
+            (
+                Arc::new(Field::new("id", DataType::Int32, false)),
+                Arc::new(id_array) as ArrayRef,
+            ),
+            (
+                Arc::new(Field::new("name", DataType::Utf8, false)),
+                Arc::new(name_array) as ArrayRef,
+            ),
+        ]);
+
+        let keys = Int32Array::from(vec![0, 1, 0, 2, 1]);
+        let dict_array =
+            DictionaryArray::<Int32Type>::try_new(keys, Arc::new(struct_array)).unwrap();
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(dict_array.data_type(), &dict_array, &options)
+                .unwrap();
+        let mut array_builder = VariantArrayBuilder::new(5);
+
+        // Test sequential access
+        for i in 0..5 {
+            row_builder.append_row(&mut array_builder, i).unwrap();
+        }
+
+        let variant_array = array_builder.build();
+        assert_eq!(variant_array.len(), 5);
+
+        // Verify the nested struct values
+        let first_variant = variant_array.value(0);
+        assert_eq!(first_variant.get_object_field("id"), Some(Variant::from(1)));
+        assert_eq!(
+            first_variant.get_object_field("name"),
+            Some(Variant::from("Alice"))
+        );
+
+        let second_variant = variant_array.value(1);
+        assert_eq!(
+            second_variant.get_object_field("id"),
+            Some(Variant::from(2))
+        );
+        assert_eq!(
+            second_variant.get_object_field("name"),
+            Some(Variant::from("Bob"))
+        );
+
+        // Test that repeated keys give same values
+        let third_variant = variant_array.value(2);
+        assert_eq!(third_variant.get_object_field("id"), Some(Variant::from(1)));
+        assert_eq!(
+            third_variant.get_object_field("name"),
+            Some(Variant::from("Alice"))
+        );
+    }
+
+    #[test]
+    fn test_list_row_builder() {
+        use arrow::array::ListArray;
+
+        // Create a list array: [[1, 2], [3, 4, 5], null, []]
+        let data = vec![
+            Some(vec![Some(1), Some(2)]),
+            Some(vec![Some(3), Some(4), Some(5)]),
+            None,
+            Some(vec![]),
+        ];
+        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
+
+        let variant_array = execute_row_builder_test(&list_array);
+
+        // Row 0: [1, 2]
+        let row0 = variant_array.value(0);
+        let list0 = row0.as_list().unwrap();
+        assert_eq!(list0.len(), 2);
+        assert_eq!(list0.get(0), Some(Variant::from(1)));
+        assert_eq!(list0.get(1), Some(Variant::from(2)));
+
+        // Row 1: [3, 4, 5]
+        let row1 = variant_array.value(1);
+        let list1 = row1.as_list().unwrap();
+        assert_eq!(list1.len(), 3);
+        assert_eq!(list1.get(0), Some(Variant::from(3)));
+        assert_eq!(list1.get(1), Some(Variant::from(4)));
+        assert_eq!(list1.get(2), Some(Variant::from(5)));
+
+        // Row 2: null
+        assert!(variant_array.is_null(2));
+
+        // Row 3: []
+        let row3 = variant_array.value(3);
+        let list3 = row3.as_list().unwrap();
+        assert_eq!(list3.len(), 0);
+    }
+
+    #[test]
+    fn test_sliced_list_row_builder() {
+        use arrow::array::ListArray;
+
+        // Create a list array: [[1, 2], [3, 4, 5], [6]]
+        let data = vec![
+            Some(vec![Some(1), Some(2)]),
+            Some(vec![Some(3), Some(4), Some(5)]),
+            Some(vec![Some(6)]),
+        ];
+        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
+
+        // Slice to get just the middle element: [[3, 4, 5]]
+        let sliced_array = list_array.slice(1, 1);
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(sliced_array.data_type(), &sliced_array, &options)
+                .unwrap();
+        let mut variant_array_builder = VariantArrayBuilder::new(sliced_array.len());
+
+        // Test the single row
+        row_builder
+            .append_row(&mut variant_array_builder, 0)
+            .unwrap();
+        let variant_array = variant_array_builder.build();
+
+        // Verify result
+        assert_eq!(variant_array.len(), 1);
+
+        // Row 0: [3, 4, 5]
+        let row0 = variant_array.value(0);
+        let list0 = row0.as_list().unwrap();
+        assert_eq!(list0.len(), 3);
+        assert_eq!(list0.get(0), Some(Variant::from(3)));
+        assert_eq!(list0.get(1), Some(Variant::from(4)));
+        assert_eq!(list0.get(2), Some(Variant::from(5)));
+    }
+
+    #[test]
+    fn test_nested_list_row_builder() {
+        use arrow::array::ListArray;
+        use arrow::datatypes::Field;
+
+        // Build the nested structure manually
+        let inner_field = Arc::new(Field::new("item", DataType::Int32, true));
+        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_field), true));
+
+        let values_data = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3)])];
+        let values_list = ListArray::from_iter_primitive::<Int32Type, _, _>(values_data);
+
+        let outer_offsets = arrow::buffer::OffsetBuffer::new(vec![0i32, 2, 2].into());
+        let outer_list = ListArray::new(
+            inner_list_field,
+            outer_offsets,
+            Arc::new(values_list),
+            Some(arrow::buffer::NullBuffer::from(vec![true, false])),
+        );
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(outer_list.data_type(), &outer_list, &options)
+                .unwrap();
+        let mut variant_array_builder = VariantArrayBuilder::new(outer_list.len());
+
+        for i in 0..outer_list.len() {
+            row_builder
+                .append_row(&mut variant_array_builder, i)
+                .unwrap();
+        }
+
+        let variant_array = variant_array_builder.build();
+
+        // Verify results
+        assert_eq!(variant_array.len(), 2);
+
+        // Row 0: [[1, 2], [3]]
+        let row0 = variant_array.value(0);
+        let outer_list0 = row0.as_list().unwrap();
+        assert_eq!(outer_list0.len(), 2);
+
+        let inner_list0_0 = outer_list0.get(0).unwrap();
+        let inner_list0_0 = inner_list0_0.as_list().unwrap();
+        assert_eq!(inner_list0_0.len(), 2);
+        assert_eq!(inner_list0_0.get(0), Some(Variant::from(1)));
+        assert_eq!(inner_list0_0.get(1), Some(Variant::from(2)));
+
+        let inner_list0_1 = outer_list0.get(1).unwrap();
+        let inner_list0_1 = inner_list0_1.as_list().unwrap();
+        assert_eq!(inner_list0_1.len(), 1);
+        assert_eq!(inner_list0_1.get(0), Some(Variant::from(3)));
+
+        // Row 1: null
+        assert!(variant_array.is_null(1));
+    }
+
+    #[test]
+    fn test_map_row_builder() {
+        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
+        use arrow::buffer::{NullBuffer, OffsetBuffer};
+        use arrow::datatypes::{DataType, Field, Fields};
+        use std::sync::Arc;
+
+        // Create the entries struct array (key-value pairs)
+        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
+        let values = Int32Array::from(vec![1, 2, 3]);
+        let entries_fields = Fields::from(vec![
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, true),
+        ]);
+        let entries = StructArray::new(
+            entries_fields.clone(),
+            vec![Arc::new(keys), Arc::new(values)],
+            None, // No nulls in the entries themselves
+        );
+
+        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
+        // Map 0: {"key1": 1}    (1 entry)
+        // Map 1: {}             (0 entries - empty)
+        // Map 2: null           (0 entries but NULL via null buffer)
+        // Map 3: {"key2": 2, "key3": 3}  (2 entries)
+        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
+
+        // Create null buffer - map at index 2 is NULL
+        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
+
+        // Create the map field
+        let map_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(entries_fields),
+            false, // Keys are non-nullable
+        ));
+
+        // Create MapArray using try_new
+        let map_array = MapArray::try_new(
+            map_field,
+            offsets,
+            entries,
+            null_buffer,
+            false, // not ordered
+        )
+        .unwrap();
+
+        let variant_array = execute_row_builder_test(&map_array);
+
+        // Map 0: {"key1": 1}
+        let map0 = variant_array.value(0);
+        let obj0 = map0.as_object().unwrap();
+        assert_eq!(obj0.len(), 1);
+        assert_eq!(obj0.get("key1"), Some(Variant::from(1)));
+
+        // Map 1: {} (empty object, not null)
+        let map1 = variant_array.value(1);
+        let obj1 = map1.as_object().unwrap();
+        assert_eq!(obj1.len(), 0); // Empty object
+
+        // Map 2: null (actual NULL)
+        assert!(variant_array.is_null(2));
+
+        // Map 3: {"key2": 2, "key3": 3}
+        let map3 = variant_array.value(3);
+        let obj3 = map3.as_object().unwrap();
+        assert_eq!(obj3.len(), 2);
+        assert_eq!(obj3.get("key2"), Some(Variant::from(2)));
+        assert_eq!(obj3.get("key3"), Some(Variant::from(3)));
+    }
+
+    #[test]
+    fn test_union_sparse_row_builder() {
+        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
+        use arrow::buffer::ScalarBuffer;
+        use arrow::datatypes::{DataType, Field, UnionFields};
+        use std::sync::Arc;
+
+        // Create a sparse union array with mixed types (int, float, string)
+        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
+        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
+        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
+        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
+
+        let union_fields = UnionFields::from_fields(vec![
+            Field::new("int_field", DataType::Int32, false),
+            Field::new("float_field", DataType::Float64, false),
+            Field::new("string_field", DataType::Utf8, false),
+        ]);
+
+        let children: Vec<Arc<dyn Array>> = vec![
+            Arc::new(int_array),
+            Arc::new(float_array),
+            Arc::new(string_array),
+        ];
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            None, // Sparse union
+            children,
+        )
+        .unwrap();
+
+        let variant_array = execute_row_builder_test(&union_array);
+        assert_eq!(variant_array.value(0), Variant::Int32(1));
+        assert_eq!(variant_array.value(1), Variant::Double(3.2));
+        assert_eq!(variant_array.value(2), Variant::from("hello"));
+        assert_eq!(variant_array.value(3), Variant::Double(32.5));
+        assert_eq!(variant_array.value(4), Variant::Int32(34));
+        assert!(variant_array.is_null(5));
+    }
+
+    #[test]
+    fn test_union_dense_row_builder() {
+        use arrow::array::{Float64Array, Int32Array, StringArray, UnionArray};
+        use arrow::buffer::ScalarBuffer;
+        use arrow::datatypes::{DataType, Field, UnionFields};
+        use std::sync::Arc;
+
+        // Create a dense union array with mixed types (int, float, string)
+        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
+        let float_array = Float64Array::from(vec![3.2, 32.5]);
+        let string_array = StringArray::from(vec!["hello"]);
+        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets = [0, 0, 0, 1, 1, 2]
+            .into_iter()
+            .collect::<ScalarBuffer<i32>>();
+
+        let union_fields = UnionFields::from_fields(vec![
+            Field::new("int_field", DataType::Int32, false),
+            Field::new("float_field", DataType::Float64, false),
+            Field::new("string_field", DataType::Utf8, false),
+        ]);
+
+        let children: Vec<Arc<dyn Array>> = vec![
+            Arc::new(int_array),
+            Arc::new(float_array),
+            Arc::new(string_array),
+        ];
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            Some(offsets), // Dense union
+            children,
+        )
+        .unwrap();
+
+        // Test the row builder
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
+                .unwrap();
+
+        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
+        for i in 0..union_array.len() {
+            row_builder.append_row(&mut variant_builder, i).unwrap();
+        }
+        let variant_array = variant_builder.build();
+
+        assert_eq!(variant_array.len(), 6);
+        assert_eq!(variant_array.value(0), Variant::Int32(1));
+        assert_eq!(variant_array.value(1), Variant::Double(3.2));
+        assert_eq!(variant_array.value(2), Variant::from("hello"));
+        assert_eq!(variant_array.value(3), Variant::Double(32.5));
+        assert_eq!(variant_array.value(4), Variant::Int32(34));
+        assert!(variant_array.is_null(5));
+    }
+
+    #[test]
+    fn test_union_sparse_type_ids_row_builder() {
+        use arrow::array::{Int32Array, StringArray, UnionArray};
+        use arrow::buffer::ScalarBuffer;
+        use arrow::datatypes::{DataType, Field, UnionFields};
+        use std::sync::Arc;
+
+        // Create a sparse union with non-contiguous type IDs (1, 3)
+        let int_array = Int32Array::from(vec![Some(42), None]);
+        let string_array = StringArray::from(vec![None, Some("test")]);
+        let type_ids = [1, 3].into_iter().collect::<ScalarBuffer<i8>>();
+
+        let union_fields = UnionFields::try_new(
+            vec![1, 3], // Non-contiguous type IDs
+            vec![
+                Field::new("int_field", DataType::Int32, false),
+                Field::new("string_field", DataType::Utf8, false),
+            ],
+        )
+        .unwrap();
+
+        let children: Vec<Arc<dyn Array>> = vec![Arc::new(int_array), Arc::new(string_array)];
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            None, // Sparse union
+            children,
+        )
+        .unwrap();
+
+        // Test the row builder
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut row_builder =
+            make_arrow_to_variant_row_builder(union_array.data_type(), &union_array, &options)
+                .unwrap();
+
+        let mut variant_builder = VariantArrayBuilder::new(union_array.len());
+        for i in 0..union_array.len() {
+            row_builder.append_row(&mut variant_builder, i).unwrap();
+        }
+        let variant_array = variant_builder.build();
+
+        // Verify results
+        assert_eq!(variant_array.len(), 2);
+
+        // Row 0: int 42 (type_id = 1)
+        assert_eq!(variant_array.value(0), Variant::Int32(42));
+
+        // Row 1: string "test" (type_id = 3)
+        assert_eq!(variant_array.value(1), Variant::from("test"));
+    }
+
+    #[test]
+    fn test_decimal32_row_builder() {
+        use arrow::array::Decimal32Array;
+        use parquet_variant::VariantDecimal4;
+
+        // Test Decimal32Array with scale 2 (e.g., for currency: 12.34)
+        let decimal_array = Decimal32Array::from(vec![Some(1234), None, Some(-5678)])
+            .with_precision_and_scale(9, 2)
+            .unwrap();
+
+        test_row_builder_basic(
+            &decimal_array,
+            vec![
+                Some(Variant::from(VariantDecimal4::try_new(1234, 2).unwrap())),
+                None,
+                Some(Variant::from(VariantDecimal4::try_new(-5678, 2).unwrap())),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_decimal128_row_builder() {
+        use arrow::array::Decimal128Array;
+        use parquet_variant::VariantDecimal16;
+
+        // Test Decimal128Array with negative scale (multiply by 10^|scale|)
+        let decimal_array = Decimal128Array::from(vec![Some(123), None, Some(456)])
+            .with_precision_and_scale(10, -2)
+            .unwrap();
+
+        test_row_builder_basic(
+            &decimal_array,
+            vec![
+                Some(Variant::from(VariantDecimal16::try_new(12300, 0).unwrap())),
+                None,
+                Some(Variant::from(VariantDecimal16::try_new(45600, 0).unwrap())),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_decimal256_overflow_row_builder() {
+        use arrow::array::Decimal256Array;
+        use arrow::datatypes::i256;
+
+        // Test Decimal256Array with a value that overflows i128
+        let large_value = i256::from_i128(i128::MAX) + i256::from(1); // Overflows i128
+        let decimal_array = Decimal256Array::from(vec![Some(large_value), Some(i256::from(123))])
+            .with_precision_and_scale(76, 3)
+            .unwrap();
+
+        test_row_builder_basic_with_options(
+            &decimal_array,
+            vec![
+                Some(Variant::Null), // Overflow value becomes Variant::Null
+                Some(Variant::from(VariantDecimal16::try_new(123, 3).unwrap())),
+            ],
+            CastOptions::default(),
+        );
+    }
+
+    #[test]
+    fn test_binary_row_builder() {
+        use arrow::array::BinaryArray;
+
+        let binary_data = vec![
+            Some(b"hello".as_slice()),
+            None,
+            Some(b"\x00\x01\x02\xFF".as_slice()),
+            Some(b"".as_slice()), // Empty binary
+        ];
+        let binary_array = BinaryArray::from(binary_data);
+
+        test_row_builder_basic(
+            &binary_array,
+            vec![
+                Some(Variant::from(b"hello".as_slice())),
+                None,
+                Some(Variant::from([0x00, 0x01, 0x02, 0xFF].as_slice())),
+                Some(Variant::from([].as_slice())),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_binary_view_row_builder() {
+        use arrow::array::BinaryViewArray;
+
+        let binary_data = vec![
+            Some(b"short".as_slice()),
+            None,
+            Some(b"this is a longer binary view that exceeds inline storage".as_slice()),
+        ];
+        let binary_view_array = BinaryViewArray::from(binary_data);
+
+        test_row_builder_basic(
+            &binary_view_array,
+            vec![
+                Some(Variant::from(b"short".as_slice())),
+                None,
+                Some(Variant::from(
+                    b"this is a longer binary view that exceeds inline storage".as_slice(),
+                )),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_fixed_size_binary_row_builder() {
+        use arrow::array::FixedSizeBinaryArray;
+
+        let binary_data = vec![
+            Some([0x01, 0x02, 0x03, 0x04]),
+            None,
+            Some([0xFF, 0xFE, 0xFD, 0xFC]),
+        ];
+        let fixed_binary_array =
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(binary_data.into_iter(), 4)
+                .unwrap();
+
+        test_row_builder_basic(
+            &fixed_binary_array,
+            vec![
+                Some(Variant::from([0x01, 0x02, 0x03, 0x04].as_slice())),
+                None,
+                Some(Variant::from([0xFF, 0xFE, 0xFD, 0xFC].as_slice())),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_utf8_view_row_builder() {
+        use arrow::array::StringViewArray;
+
+        let string_data = vec![
+            Some("short"),
+            None,
+            Some("this is a much longer string that will be stored out-of-line in the buffer"),
+        ];
+        let string_view_array = StringViewArray::from(string_data);
+
+        test_row_builder_basic(
+            &string_view_array,
+            vec![
+                Some(Variant::from("short")),
+                None,
+                Some(Variant::from(
+                    "this is a much longer string that will be stored out-of-line in the buffer",
+                )),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_timestamp_second_row_builder() {
+        use arrow::array::TimestampSecondArray;
+
+        let timestamp_data = vec![
+            Some(1609459200), // 2021-01-01 00:00:00 UTC
+            None,
+            Some(1640995200), // 2022-01-01 00:00:00 UTC
+        ];
+        let timestamp_array = TimestampSecondArray::from(timestamp_data);
+
+        let expected_naive1 = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
+        let expected_naive2 = DateTime::from_timestamp(1640995200, 0).unwrap().naive_utc();
+
+        test_row_builder_basic(
+            &timestamp_array,
+            vec![
+                Some(Variant::from(expected_naive1)),
+                None,
+                Some(Variant::from(expected_naive2)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_timestamp_with_timezone_row_builder() {
+        use arrow::array::TimestampMicrosecondArray;
+        use chrono::DateTime;
+
+        let timestamp_data = vec![
+            Some(1609459200000000), // 2021-01-01 00:00:00 UTC (in microseconds)
+            None,
+            Some(1640995200000000), // 2022-01-01 00:00:00 UTC (in microseconds)
+        ];
+        let timezone = "UTC".to_string();
+        let timestamp_array =
+            TimestampMicrosecondArray::from(timestamp_data).with_timezone(timezone);
+
+        let expected_utc1 = DateTime::from_timestamp(1609459200, 0).unwrap();
+        let expected_utc2 = DateTime::from_timestamp(1640995200, 0).unwrap();
+
+        test_row_builder_basic(
+            &timestamp_array,
+            vec![
+                Some(Variant::from(expected_utc1)),
+                None,
+                Some(Variant::from(expected_utc2)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_timestamp_nanosecond_precision_row_builder() {
+        use arrow::array::TimestampNanosecondArray;
+
+        let timestamp_data = vec![
+            Some(1609459200123456789), // 2021-01-01 00:00:00.123456789 UTC
+            None,
+            Some(1609459200000000000), // 2021-01-01 00:00:00.000000000 UTC (no fractional seconds)
+        ];
+        let timestamp_array = TimestampNanosecondArray::from(timestamp_data);
+
+        let expected_with_nanos = DateTime::from_timestamp(1609459200, 123456789)
+            .unwrap()
+            .naive_utc();
+        let expected_no_nanos = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
+
+        test_row_builder_basic(
+            &timestamp_array,
+            vec![
+                Some(Variant::from(expected_with_nanos)),
+                None,
+                Some(Variant::from(expected_no_nanos)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_timestamp_millisecond_row_builder() {
+        use arrow::array::TimestampMillisecondArray;
+
+        let timestamp_data = vec![
+            Some(1609459200123), // 2021-01-01 00:00:00.123 UTC
+            None,
+            Some(1609459200000), // 2021-01-01 00:00:00.000 UTC
+        ];
+        let timestamp_array = TimestampMillisecondArray::from(timestamp_data);
+
+        let expected_with_millis = DateTime::from_timestamp(1609459200, 123000000)
+            .unwrap()
+            .naive_utc();
+        let expected_no_millis = DateTime::from_timestamp(1609459200, 0).unwrap().naive_utc();
+
+        test_row_builder_basic(
+            &timestamp_array,
+            vec![
+                Some(Variant::from(expected_with_millis)),
+                None,
+                Some(Variant::from(expected_no_millis)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_date32_row_builder() {
+        use arrow::array::Date32Array;
+        use chrono::NaiveDate;
+
+        let date_data = vec![
+            Some(0), // 1970-01-01
+            None,
+            Some(19723),   // 2024-01-01 (days since epoch)
+            Some(-719162), // 0001-01-01 (near minimum)
+        ];
+        let date_array = Date32Array::from(date_data);
+
+        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
+        let expected_min = NaiveDate::from_ymd_opt(1, 1, 1).unwrap();
+
+        test_row_builder_basic(
+            &date_array,
+            vec![
+                Some(Variant::from(expected_epoch)),
+                None,
+                Some(Variant::from(expected_2024)),
+                Some(Variant::from(expected_min)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_date64_row_builder() {
+        use arrow::array::Date64Array;
+        use chrono::NaiveDate;
+
+        // Test Date64Array with various dates (milliseconds since epoch)
+        let date_data = vec![
+            Some(0), // 1970-01-01
+            None,
+            Some(1704067200000), // 2024-01-01 (milliseconds since epoch)
+            Some(86400000),      // 1970-01-02
+        ];
+        let date_array = Date64Array::from(date_data);
+
+        let expected_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+        let expected_2024 = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
+        let expected_next_day = NaiveDate::from_ymd_opt(1970, 1, 2).unwrap();
+
+        test_row_builder_basic(
+            &date_array,
+            vec![
+                Some(Variant::from(expected_epoch)),
+                None,
+                Some(Variant::from(expected_2024)),
+                Some(Variant::from(expected_next_day)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_time32_second_row_builder() {
+        use arrow::array::Time32SecondArray;
+        use chrono::NaiveTime;
+
+        // Test Time32SecondArray with various times (seconds since midnight)
+        let time_data = vec![
+            Some(0), // 00:00:00
+            None,
+            Some(3661),  // 01:01:01
+            Some(86399), // 23:59:59
+        ];
+        let time_array = Time32SecondArray::from(time_data);
+
+        let expected_midnight = NaiveTime::from_hms_opt(0, 0, 0).unwrap();
+        let expected_time = NaiveTime::from_hms_opt(1, 1, 1).unwrap();
+        let expected_last = NaiveTime::from_hms_opt(23, 59, 59).unwrap();
+
+        test_row_builder_basic(
+            &time_array,
+            vec![
+                Some(Variant::from(expected_midnight)),
+                None,
+                Some(Variant::from(expected_time)),
+                Some(Variant::from(expected_last)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_time32_millisecond_row_builder() {
+        use arrow::array::Time32MillisecondArray;
+        use chrono::NaiveTime;
+
+        // Test Time32MillisecondArray with various times (milliseconds since midnight)
+        let time_data = vec![
+            Some(0), // 00:00:00.000
+            None,
+            Some(3661123),  // 01:01:01.123
+            Some(86399999), // 23:59:59.999
+        ];
+        let time_array = Time32MillisecondArray::from(time_data);
+
+        let expected_midnight = NaiveTime::from_hms_milli_opt(0, 0, 0, 0).unwrap();
+        let expected_time = NaiveTime::from_hms_milli_opt(1, 1, 1, 123).unwrap();
+        let expected_last = NaiveTime::from_hms_milli_opt(23, 59, 59, 999).unwrap();
+
+        test_row_builder_basic(
+            &time_array,
+            vec![
+                Some(Variant::from(expected_midnight)),
+                None,
+                Some(Variant::from(expected_time)),
+                Some(Variant::from(expected_last)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_time64_microsecond_row_builder() {
+        use arrow::array::Time64MicrosecondArray;
+        use chrono::NaiveTime;
+
+        // Test Time64MicrosecondArray with various times (microseconds since midnight)
+        let time_data = vec![
+            Some(0), // 00:00:00.000000
+            None,
+            Some(3661123456),  // 01:01:01.123456
+            Some(86399999999), // 23:59:59.999999
+        ];
+        let time_array = Time64MicrosecondArray::from(time_data);
+
+        let expected_midnight = NaiveTime::from_hms_micro_opt(0, 0, 0, 0).unwrap();
+        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
+        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
+
+        test_row_builder_basic(
+            &time_array,
+            vec![
+                Some(Variant::from(expected_midnight)),
+                None,
+                Some(Variant::from(expected_time)),
+                Some(Variant::from(expected_last)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_time64_nanosecond_row_builder() {
+        use arrow::array::Time64NanosecondArray;
+        use chrono::NaiveTime;
+
+        // Test Time64NanosecondArray with various times (nanoseconds since midnight)
+        let time_data = vec![
+            Some(0), // 00:00:00.000000000
+            None,
+            Some(3661123456789),  // 01:01:01.123456789
+            Some(86399999999999), // 23:59:59.999999999
+        ];
+        let time_array = Time64NanosecondArray::from(time_data);
+
+        let expected_midnight = NaiveTime::from_hms_nano_opt(0, 0, 0, 0).unwrap();
+        // Nanoseconds are truncated to microsecond precision in Variant
+        let expected_time = NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap();
+        let expected_last = NaiveTime::from_hms_micro_opt(23, 59, 59, 999999).unwrap();
+
+        test_row_builder_basic(
+            &time_array,
+            vec![
+                Some(Variant::from(expected_midnight)),
+                None,
+                Some(Variant::from(expected_time)),
+                Some(Variant::from(expected_last)),
+            ],
+        );
+    }
+}
diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs
new file mode 100644
index 000000000000..b6c968b0678d
--- /dev/null
+++ b/parquet-variant-compute/src/cast_to_variant.rs
@@ -0,0 +1,2288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::arrow_to_variant::make_arrow_to_variant_row_builder;
+use crate::{VariantArray, VariantArrayBuilder};
+use arrow::array::Array;
+use arrow::compute::CastOptions;
+use arrow_schema::ArrowError;
+
+/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
+/// need to convert a specific data type
+///
+/// # Arguments
+/// * `input` - A reference to the input [`Array`] to cast
+///
+/// # Notes
+/// If the input array element is null, the corresponding element in the
+/// output `VariantArray` will also be null (not `Variant::Null`).
+///
+/// # Example
+/// ```
+/// # use arrow::array::{Array, ArrayRef, Int64Array};
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::cast_to_variant;
+/// // input is an Int64Array, which will be cast to a VariantArray
+/// let input = Int64Array::from(vec![Some(1), None, Some(3)]);
+/// let result = cast_to_variant(&input).unwrap();
+/// assert_eq!(result.len(), 3);
+/// assert_eq!(result.value(0), Variant::Int64(1));
+/// assert!(result.is_null(1)); // note null, not Variant::Null
+/// assert_eq!(result.value(2), Variant::Int64(3));
+/// ```
+///
+/// For `DataType::Timestamp`s: if the timestamp has any level of precision
+/// greater than a microsecond, it will be truncated. For example
+/// `1970-01-01T00:00:01.234567890Z`
+/// will be truncated to
+/// `1970-01-01T00:00:01.234567Z`
+///
+/// # Arguments
+/// * `input` - The array to convert to VariantArray
+/// * `options` - Options controlling conversion behavior
+pub fn cast_to_variant_with_options(
+    input: &dyn Array,
+    options: &CastOptions,
+) -> Result<VariantArray, ArrowError> {
+    // Create row builder for the input array type
+    let mut row_builder = make_arrow_to_variant_row_builder(input.data_type(), input, options)?;
+
+    // Create output array builder
+    let mut array_builder = VariantArrayBuilder::new(input.len());
+
+    // Process each row using the row builder
+    for i in 0..input.len() {
+        row_builder.append_row(&mut array_builder, i)?;
+    }
+
+    Ok(array_builder.build())
+}
+
+/// Convert an array to a [`VariantArray`] with strict mode enabled (returns errors on conversion
+/// failures).
+///
+/// This function provides backward compatibility. For non-strict behavior,
+/// use [`cast_to_variant_with_options`] with `CastOptions { safe: true, ..Default::default() }`.
+pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
+    cast_to_variant_with_options(
+        input,
+        &CastOptions {
+            safe: false,
+            ..Default::default()
+        },
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::{
+        ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal32Array,
+        Decimal64Array, Decimal128Array, Decimal256Array, DictionaryArray,
+        DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
+        DurationSecondArray, FixedSizeBinaryBuilder, FixedSizeListBuilder, Float16Array,
+        Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder, Int8Array,
+        Int16Array, Int32Array, Int64Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
+        IntervalYearMonthArray, LargeListArray, LargeListViewBuilder, LargeStringArray, ListArray,
+        ListViewBuilder, MapArray, NullArray, StringArray, StringRunBuilder, StringViewArray,
+        StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+        Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+        TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array,
+        UInt64Array, UnionArray,
+    };
+    use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
+    use arrow::datatypes::{
+        BinaryType, BinaryViewType, Date32Type, Date64Type, Int8Type, Int32Type, Int64Type,
+        IntervalDayTime, IntervalMonthDayNano, LargeBinaryType, i256,
+    };
+    use arrow::temporal_conversions::timestamp_s_to_datetime;
+    use arrow_schema::{
+        DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, DECIMAL128_MAX_PRECISION,
+    };
+    use arrow_schema::{DataType, Field, Fields, UnionFields};
+    use chrono::{DateTime, NaiveDate, NaiveTime};
+    use half::f16;
+    use parquet_variant::{
+        Variant, VariantBuilder, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
+        VariantDecimal16,
+    };
+    use std::{sync::Arc, vec};
+
+    macro_rules! max_unscaled_value {
+        (32, $precision:expr) => {
+            (u32::pow(10, $precision as u32) - 1) as i32
+        };
+        (64, $precision:expr) => {
+            (u64::pow(10, $precision as u32) - 1) as i64
+        };
+        (128, $precision:expr) => {
+            (u128::pow(10, $precision as u32) - 1) as i128
+        };
+    }
+
+    #[test]
+    fn test_cast_to_variant_null() {
+        run_test(Arc::new(NullArray::new(2)), vec![None, None])
+    }
+
+    #[test]
+    fn test_cast_to_variant_bool() {
+        run_test(
+            Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])),
+            vec![
+                Some(Variant::BooleanTrue),
+                None,
+                Some(Variant::BooleanFalse),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_int8() {
+        run_test(
+            Arc::new(Int8Array::from(vec![
+                Some(i8::MIN),
+                None,
+                Some(-1),
+                Some(1),
+                Some(i8::MAX),
+            ])),
+            vec![
+                Some(Variant::Int8(i8::MIN)),
+                None,
+                Some(Variant::Int8(-1)),
+                Some(Variant::Int8(1)),
+                Some(Variant::Int8(i8::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_int16() {
+        run_test(
+            Arc::new(Int16Array::from(vec![
+                Some(i16::MIN),
+                None,
+                Some(-1),
+                Some(1),
+                Some(i16::MAX),
+            ])),
+            vec![
+                Some(Variant::Int16(i16::MIN)),
+                None,
+                Some(Variant::Int16(-1)),
+                Some(Variant::Int16(1)),
+                Some(Variant::Int16(i16::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_int32() {
+        run_test(
+            Arc::new(Int32Array::from(vec![
+                Some(i32::MIN),
+                None,
+                Some(-1),
+                Some(1),
+                Some(i32::MAX),
+            ])),
+            vec![
+                Some(Variant::Int32(i32::MIN)),
+                None,
+                Some(Variant::Int32(-1)),
+                Some(Variant::Int32(1)),
+                Some(Variant::Int32(i32::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_int64() {
+        run_test(
+            Arc::new(Int64Array::from(vec![
+                Some(i64::MIN),
+                None,
+                Some(-1),
+                Some(1),
+                Some(i64::MAX),
+            ])),
+            vec![
+                Some(Variant::Int64(i64::MIN)),
+                None,
+                Some(Variant::Int64(-1)),
+                Some(Variant::Int64(1)),
+                Some(Variant::Int64(i64::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_uint8() {
+        run_test(
+            Arc::new(UInt8Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(127),
+                Some(u8::MAX),
+            ])),
+            vec![
+                Some(Variant::Int8(0)),
+                None,
+                Some(Variant::Int8(1)),
+                Some(Variant::Int8(127)),
+                Some(Variant::Int16(255)), // u8::MAX cannot fit in Int8
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_uint16() {
+        run_test(
+            Arc::new(UInt16Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(32767),
+                Some(u16::MAX),
+            ])),
+            vec![
+                Some(Variant::Int16(0)),
+                None,
+                Some(Variant::Int16(1)),
+                Some(Variant::Int16(32767)),
+                Some(Variant::Int32(65535)), // u16::MAX cannot fit in Int16
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_uint32() {
+        run_test(
+            Arc::new(UInt32Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(2147483647),
+                Some(u32::MAX),
+            ])),
+            vec![
+                Some(Variant::Int32(0)),
+                None,
+                Some(Variant::Int32(1)),
+                Some(Variant::Int32(2147483647)),
+                Some(Variant::Int64(4294967295)), // u32::MAX cannot fit in Int32
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_uint64() {
+        run_test(
+            Arc::new(UInt64Array::from(vec![
+                Some(0),
+                None,
+                Some(1),
+                Some(9223372036854775807),
+                Some(u64::MAX),
+            ])),
+            vec![
+                Some(Variant::Int64(0)),
+                None,
+                Some(Variant::Int64(1)),
+                Some(Variant::Int64(9223372036854775807)),
+                Some(Variant::Decimal16(
+                    // u64::MAX cannot fit in Int64
+                    VariantDecimal16::try_from(18446744073709551615).unwrap(),
+                )),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_float16() {
+        run_test(
+            Arc::new(Float16Array::from(vec![
+                Some(f16::MIN),
+                None,
+                Some(f16::from_f32(-1.5)),
+                Some(f16::from_f32(0.0)),
+                Some(f16::from_f32(1.5)),
+                Some(f16::MAX),
+            ])),
+            vec![
+                Some(Variant::Float(f16::MIN.into())),
+                None,
+                Some(Variant::Float(-1.5)),
+                Some(Variant::Float(0.0)),
+                Some(Variant::Float(1.5)),
+                Some(Variant::Float(f16::MAX.into())),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_float32() {
+        run_test(
+            Arc::new(Float32Array::from(vec![
+                Some(f32::MIN),
+                None,
+                Some(-1.5),
+                Some(0.0),
+                Some(1.5),
+                Some(f32::MAX),
+            ])),
+            vec![
+                Some(Variant::Float(f32::MIN)),
+                None,
+                Some(Variant::Float(-1.5)),
+                Some(Variant::Float(0.0)),
+                Some(Variant::Float(1.5)),
+                Some(Variant::Float(f32::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_float64() {
+        run_test(
+            Arc::new(Float64Array::from(vec![
+                Some(f64::MIN),
+                None,
+                Some(-1.5),
+                Some(0.0),
+                Some(1.5),
+                Some(f64::MAX),
+            ])),
+            vec![
+                Some(Variant::Double(f64::MIN)),
+                None,
+                Some(Variant::Double(-1.5)),
+                Some(Variant::Double(0.0)),
+                Some(Variant::Double(1.5)),
+                Some(Variant::Double(f64::MAX)),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal32() {
+        run_test(
+            Arc::new(
+                Decimal32Array::from(vec![
+                    Some(i32::MIN),
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION)), // The min of Decimal32 with positive scale that can be cast to VariantDecimal4
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION)), // The max of Decimal32 with positive scale that can be cast to VariantDecimal4
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION) + 1), // Overflow value will be cast to Null
+                    Some(i32::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL32_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal4::try_new(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                None,
+                Some(VariantDecimal4::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal4::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal4::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal4::try_new(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal32_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal32Array::from(vec![
+                    Some(i32::MIN),
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3)), // The min of Decimal32 with scale -3 that can be cast to VariantDecimal4
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3)), // The max of Decimal32 with scale -3 that can be cast to VariantDecimal4
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) + 1), // Overflow value will be cast to Null
+                    Some(i32::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL32_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal4::try_new(
+                        -max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal4::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal4::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal4::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal4::try_new(
+                        max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal32_overflow_strict_mode() {
+        run_test_in_strict_mode(
+            Arc::new(
+                Decimal32Array::from(vec![Some(i32::MIN)])
+                    .with_precision_and_scale(DECIMAL32_MAX_PRECISION, 3)
+                    .unwrap(),
+            ),
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal64() {
+        run_test(
+            Arc::new(
+                Decimal64Array::from(vec![
+                    Some(i64::MIN),
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION)), // The min of Decimal64 with positive scale that can be cast to VariantDecimal8
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION)), // The max of Decimal64 with positive scale that can be cast to VariantDecimal8
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION) + 1), // Overflow value will be cast to Null
+                    Some(i64::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL64_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal8::try_new(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                None,
+                Some(VariantDecimal8::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal8::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal8::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal8::try_new(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal64_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal64Array::from(vec![
+                    Some(i64::MIN),
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3)), // The min of Decimal64 with scale -3 that can be cast to VariantDecimal8
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3)), // The max of Decimal64 with scale -3 that can be cast to VariantDecimal8
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) + 1), // Overflow value will be cast to Null
+                    Some(i64::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL64_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal8::try_new(
+                        -max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal8::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal8::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal8::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal8::try_new(
+                        max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal64_overflow_strict_mode() {
+        run_test_in_strict_mode(
+            Arc::new(
+                Decimal64Array::from(vec![Some(i64::MAX)])
+                    .with_precision_and_scale(DECIMAL64_MAX_PRECISION, 3)
+                    .unwrap(),
+            ),
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal128() {
+        run_test(
+            Arc::new(
+                Decimal128Array::from(vec![
+                    Some(i128::MIN),
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)), // The min of Decimal128 with positive scale that can be cast to VariantDecimal16
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)), // The max of Decimal128 with positive scale that can be cast to VariantDecimal16
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) + 1), // Overflow value will be cast to Null
+                    Some(i128::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal128_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal128Array::from(vec![
+                    Some(i128::MIN),
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3)), // The min of Decimal128 with scale -3 that can be cast to VariantDecimal16
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3)), // The max of Decimal128 with scale -3 that can be cast to VariantDecimal16
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) + 1), // Overflow value will be cast to Null
+                    Some(i128::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal128_overflow_strict_mode() {
+        run_test_in_strict_mode(
+            Arc::new(
+                Decimal128Array::from(vec![Some(
+                    -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) - 1,
+                )])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal256() {
+        run_test(
+            Arc::new(
+                Decimal256Array::from(vec![
+                    Some(i256::MIN),
+                    Some(i256::from_i128(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) - 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::from_i128(-max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION
+                    ))), // The min of Decimal256 with positive scale that can be cast to VariantDecimal16
+                    None,
+                    Some(i256::from_i128(-123)),
+                    Some(i256::from_i128(0)),
+                    Some(i256::from_i128(123)),
+                    Some(i256::from_i128(max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION
+                    ))), // The max of Decimal256 with positive scale that can be cast to VariantDecimal16
+                    Some(i256::from_i128(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) + 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal256_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal256Array::from(vec![
+                    Some(i256::MIN),
+                    Some(i256::from_i128(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) - 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::from_i128(-max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION - 3
+                    ))), // The min of Decimal256 with scale -3 that can be cast to VariantDecimal16
+                    None,
+                    Some(i256::from_i128(-123)),
+                    Some(i256::from_i128(0)),
+                    Some(i256::from_i128(123)),
+                    Some(i256::from_i128(max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION - 3
+                    ))), // The max of Decimal256 with scale -3 that can be cast to VariantDecimal16
+                    Some(i256::from_i128(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) + 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal256_overflow_strict_mode() {
+        run_test_in_strict_mode(
+            Arc::new(
+                Decimal256Array::from(vec![Some(i256::from_i128(
+                    max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) + 1,
+                ))])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_timestamp() {
+        let run_array_tests =
+            |microseconds: i64, array_ntz: Arc<dyn Array>, array_tz: Arc<dyn Array>| {
+                let timestamp = DateTime::from_timestamp_nanos(microseconds * 1000);
+                run_test(
+                    array_tz,
+                    vec![Some(Variant::TimestampMicros(timestamp)), None],
+                );
+                run_test(
+                    array_ntz,
+                    vec![
+                        Some(Variant::TimestampNtzMicros(timestamp.naive_utc())),
+                        None,
+                    ],
+                );
+            };
+
+        let nanosecond = 1234567890;
+        let microsecond = 1234567;
+        let millisecond = 1234;
+        let second = 1;
+
+        let second_array = TimestampSecondArray::from(vec![Some(second), None]);
+        run_array_tests(
+            second * 1000 * 1000,
+            Arc::new(second_array.clone()),
+            Arc::new(second_array.with_timezone("+01:00".to_string())),
+        );
+
+        let millisecond_array = TimestampMillisecondArray::from(vec![Some(millisecond), None]);
+        run_array_tests(
+            millisecond * 1000,
+            Arc::new(millisecond_array.clone()),
+            Arc::new(millisecond_array.with_timezone("+01:00".to_string())),
+        );
+
+        let microsecond_array = TimestampMicrosecondArray::from(vec![Some(microsecond), None]);
+        run_array_tests(
+            microsecond,
+            Arc::new(microsecond_array.clone()),
+            Arc::new(microsecond_array.with_timezone("+01:00".to_string())),
+        );
+
+        let timestamp = DateTime::from_timestamp_nanos(nanosecond);
+        let nanosecond_array = TimestampNanosecondArray::from(vec![Some(nanosecond), None]);
+        run_test(
+            Arc::new(nanosecond_array.clone()),
+            vec![
+                Some(Variant::TimestampNtzNanos(timestamp.naive_utc())),
+                None,
+            ],
+        );
+        run_test(
+            Arc::new(nanosecond_array.with_timezone("+01:00".to_string())),
+            vec![Some(Variant::TimestampNanos(timestamp)), None],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_timestamp_overflow_strict_mode() {
+        let ts_array = TimestampSecondArray::from(vec![Some(i64::MAX), Some(0), Some(1609459200)])
+            .with_timezone_opt(None::<&str>);
+
+        let values = Arc::new(ts_array);
+        run_test_in_strict_mode(
+            values,
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_timestamp_overflow_non_strict_mode() {
+        let ts_array = TimestampSecondArray::from(vec![Some(i64::MAX), Some(0), Some(1609459200)])
+            .with_timezone_opt(None::<&str>);
+
+        let values = Arc::new(ts_array);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Null), // Invalid timestamp becomes null
+                Some(Variant::TimestampNtzMicros(
+                    timestamp_s_to_datetime(0).unwrap(),
+                )),
+                Some(Variant::TimestampNtzMicros(
+                    timestamp_s_to_datetime(1609459200).unwrap(),
+                )),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_date() {
+        // Date32Array
+        run_test(
+            Arc::new(Date32Array::from(vec![
+                Some(Date32Type::from_naive_date(NaiveDate::MIN)),
+                None,
+                Some(Date32Type::from_naive_date(
+                    NaiveDate::from_ymd_opt(2025, 8, 1).unwrap(),
+                )),
+                Some(Date32Type::from_naive_date(NaiveDate::MAX)),
+            ])),
+            vec![
+                Some(Variant::Date(NaiveDate::MIN)),
+                None,
+                Some(Variant::Date(NaiveDate::from_ymd_opt(2025, 8, 1).unwrap())),
+                Some(Variant::Date(NaiveDate::MAX)),
+            ],
+        );
+
+        // Date64Array
+        run_test(
+            Arc::new(Date64Array::from(vec![
+                Some(Date64Type::from_naive_date(NaiveDate::MIN)),
+                None,
+                Some(Date64Type::from_naive_date(
+                    NaiveDate::from_ymd_opt(2025, 8, 1).unwrap(),
+                )),
+                Some(Date64Type::from_naive_date(NaiveDate::MAX)),
+            ])),
+            vec![
+                Some(Variant::Date(NaiveDate::MIN)),
+                None,
+                Some(Variant::Date(NaiveDate::from_ymd_opt(2025, 8, 1).unwrap())),
+                Some(Variant::Date(NaiveDate::MAX)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_date64_strict_mode() {
+        let date64_values = Date64Array::from(vec![Some(i64::MAX), Some(0), Some(i64::MIN)]);
+
+        let values = Arc::new(date64_values);
+        run_test_in_strict_mode(
+            values,
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_date64_non_strict_mode() {
+        let date64_values = Date64Array::from(vec![Some(i64::MAX), Some(0), Some(i64::MIN)]);
+
+        let values = Arc::new(date64_values);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Date(Date64Type::to_naive_date_opt(0).unwrap())),
+                Some(Variant::Null),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_time32_second() {
+        let array: Time32SecondArray = vec![Some(1), Some(86_399), None].into();
+        let values = Arc::new(array);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(1, 0).unwrap(),
+                )),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(86_399, 0).unwrap(),
+                )),
+                None,
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_time32_millisecond() {
+        let array: Time32MillisecondArray = vec![Some(123_456), Some(456_000), None].into();
+        let values = Arc::new(array);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(123, 456_000_000).unwrap(),
+                )),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(456, 0).unwrap(),
+                )),
+                None,
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_time64_micro() {
+        let array: Time64MicrosecondArray = vec![Some(1), Some(123_456_789), None].into();
+        let values = Arc::new(array);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(0, 1_000).unwrap(),
+                )),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(123, 456_789_000).unwrap(),
+                )),
+                None,
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_time64_nano() {
+        let array: Time64NanosecondArray =
+            vec![Some(1), Some(1001), Some(123_456_789_012), None].into();
+        run_test(
+            Arc::new(array),
+            // as we can only present with micro second, so the nano second will round donw to 0
+            vec![
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(0, 0).unwrap(),
+                )),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(0, 1_000).unwrap(),
+                )),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(123, 456_789_000).unwrap(),
+                )),
+                None,
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_time32_strict_mode() {
+        let time32_array = Time32SecondArray::from(vec![Some(90000), Some(3600), Some(-1)]);
+
+        let values = Arc::new(time32_array);
+        run_test_in_strict_mode(
+            values,
+            Err(ArrowError::ComputeError(
+                "Failed to convert value at index 0: conversion failed".to_string(),
+            )),
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_time32_non_strict_mode() {
+        let time32_array = Time32SecondArray::from(vec![Some(90000), Some(3600), Some(-1)]);
+
+        let values = Arc::new(time32_array);
+        run_test(
+            values,
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Time(
+                    NaiveTime::from_num_seconds_from_midnight_opt(3600, 0).unwrap(),
+                )),
+                Some(Variant::Null),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_duration_or_interval_errors() {
+        let arrays: Vec<Box<dyn Array>> = vec![
+            // Duration types
+            Box::new(DurationSecondArray::from(vec![Some(10), None, Some(-5)])),
+            Box::new(DurationMillisecondArray::from(vec![
+                Some(10),
+                None,
+                Some(-5),
+            ])),
+            Box::new(DurationMicrosecondArray::from(vec![
+                Some(10),
+                None,
+                Some(-5),
+            ])),
+            Box::new(DurationNanosecondArray::from(vec![
+                Some(10),
+                None,
+                Some(-5),
+            ])),
+            // Interval types
+            Box::new(IntervalYearMonthArray::from(vec![Some(12), None, Some(-6)])),
+            Box::new(IntervalDayTimeArray::from(vec![
+                Some(IntervalDayTime::new(12, 0)),
+                None,
+                Some(IntervalDayTime::new(-6, 0)),
+            ])),
+            Box::new(IntervalMonthDayNanoArray::from(vec![
+                Some(IntervalMonthDayNano::new(12, 0, 0)),
+                None,
+                Some(IntervalMonthDayNano::new(-6, 0, 0)),
+            ])),
+        ];
+
+        for array in arrays {
+            let result = cast_to_variant(array.as_ref());
+            assert!(result.is_err());
+            match result.unwrap_err() {
+                ArrowError::InvalidArgumentError(msg) => {
+                    assert!(
+                        msg.contains("Casting duration/interval types to Variant is not supported")
+                    );
+                    assert!(
+                        msg.contains("The Variant format does not define duration/interval types")
+                    );
+                }
+                _ => panic!("Expected InvalidArgumentError"),
+            }
+        }
+    }
+
+    #[test]
+    fn test_cast_to_variant_binary() {
+        // BinaryType
+        let mut builder = GenericByteBuilder::<BinaryType>::new();
+        builder.append_value(b"hello");
+        builder.append_value(b"");
+        builder.append_null();
+        builder.append_value(b"world");
+        let binary_array = builder.finish();
+        run_test(
+            Arc::new(binary_array),
+            vec![
+                Some(Variant::Binary(b"hello")),
+                Some(Variant::Binary(b"")),
+                None,
+                Some(Variant::Binary(b"world")),
+            ],
+        );
+
+        // LargeBinaryType
+        let mut builder = GenericByteBuilder::<LargeBinaryType>::new();
+        builder.append_value(b"hello");
+        builder.append_value(b"");
+        builder.append_null();
+        builder.append_value(b"world");
+        let large_binary_array = builder.finish();
+        run_test(
+            Arc::new(large_binary_array),
+            vec![
+                Some(Variant::Binary(b"hello")),
+                Some(Variant::Binary(b"")),
+                None,
+                Some(Variant::Binary(b"world")),
+            ],
+        );
+
+        // BinaryViewType
+        let mut builder = GenericByteViewBuilder::<BinaryViewType>::new();
+        builder.append_value(b"hello");
+        builder.append_value(b"");
+        builder.append_null();
+        builder.append_value(b"world");
+        let byte_view_array = builder.finish();
+        run_test(
+            Arc::new(byte_view_array),
+            vec![
+                Some(Variant::Binary(b"hello")),
+                Some(Variant::Binary(b"")),
+                None,
+                Some(Variant::Binary(b"world")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_fixed_size_binary() {
+        let v1 = vec![1, 2];
+        let v2 = vec![3, 4];
+        let v3 = vec![5, 6];
+
+        let mut builder = FixedSizeBinaryBuilder::new(2);
+        builder.append_value(&v1).unwrap();
+        builder.append_value(&v2).unwrap();
+        builder.append_null();
+        builder.append_value(&v3).unwrap();
+        let array = builder.finish();
+
+        run_test(
+            Arc::new(array),
+            vec![
+                Some(Variant::Binary(&v1)),
+                Some(Variant::Binary(&v2)),
+                None,
+                Some(Variant::Binary(&v3)),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_utf8() {
+        // Test with short strings (should become ShortString variants)
+        let short_strings = vec![Some("hello"), Some(""), None, Some("world"), Some("test")];
+        let string_array = StringArray::from(short_strings.clone());
+
+        run_test(
+            Arc::new(string_array),
+            vec![
+                Some(Variant::from("hello")),
+                Some(Variant::from("")),
+                None,
+                Some(Variant::from("world")),
+                Some(Variant::from("test")),
+            ],
+        );
+
+        // Test with a long string (should become String variant)
+        let long_string = "a".repeat(100); // > 63 bytes, so will be Variant::String
+        let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
+        let string_array = StringArray::from(long_strings);
+
+        run_test(
+            Arc::new(string_array),
+            vec![
+                Some(Variant::from(long_string.as_str())),
+                None,
+                Some(Variant::from("short")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_large_utf8() {
+        // Test with short strings (should become ShortString variants)
+        let short_strings = vec![Some("hello"), Some(""), None, Some("world")];
+        let string_array = LargeStringArray::from(short_strings.clone());
+
+        run_test(
+            Arc::new(string_array),
+            vec![
+                Some(Variant::from("hello")),
+                Some(Variant::from("")),
+                None,
+                Some(Variant::from("world")),
+            ],
+        );
+
+        // Test with a long string (should become String variant)
+        let long_string = "b".repeat(100); // > 63 bytes, so will be Variant::String
+        let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
+        let string_array = LargeStringArray::from(long_strings);
+
+        run_test(
+            Arc::new(string_array),
+            vec![
+                Some(Variant::from(long_string.as_str())),
+                None,
+                Some(Variant::from("short")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_utf8_view() {
+        // Test with short strings (should become ShortString variants)
+        let short_strings = vec![Some("hello"), Some(""), None, Some("world")];
+        let string_view_array = StringViewArray::from(short_strings.clone());
+
+        run_test(
+            Arc::new(string_view_array),
+            vec![
+                Some(Variant::from("hello")),
+                Some(Variant::from("")),
+                None,
+                Some(Variant::from("world")),
+            ],
+        );
+
+        // Test with a long string (should become String variant)
+        let long_string = "c".repeat(100); // > 63 bytes, so will be Variant::String
+        let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
+        let string_view_array = StringViewArray::from(long_strings);
+
+        run_test(
+            Arc::new(string_view_array),
+            vec![
+                Some(Variant::from(long_string.as_str())),
+                None,
+                Some(Variant::from("short")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_list() {
+        // List Array
+        let data = vec![Some(vec![Some(0), Some(1), Some(2)]), None];
+        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
+
+        // Expected value
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(0);
+            list.append_value(1);
+            list.append_value(2);
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(Arc::new(list_array), vec![Some(variant), None]);
+    }
+
+    #[test]
+    fn test_cast_to_variant_sliced_list() {
+        // List Array
+        let data = vec![
+            Some(vec![Some(0), Some(1), Some(2)]),
+            Some(vec![Some(3), Some(4), Some(5)]),
+            None,
+        ];
+        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
+
+        // Expected value
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3);
+            list.append_value(4);
+            list.append_value(5);
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(Arc::new(list_array.slice(1, 2)), vec![Some(variant), None]);
+    }
+
+    #[test]
+    fn test_cast_to_variant_large_list() {
+        // Large List Array
+        let data = vec![Some(vec![Some(0), Some(1), Some(2)]), None];
+        let large_list_array = LargeListArray::from_iter_primitive::<Int64Type, _, _>(data);
+
+        // Expected value
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(0i64);
+            list.append_value(1i64);
+            list.append_value(2i64);
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(Arc::new(large_list_array), vec![Some(variant), None]);
+    }
+
+    #[test]
+    fn test_cast_to_variant_sliced_large_list() {
+        // List Array
+        let data = vec![
+            Some(vec![Some(0), Some(1), Some(2)]),
+            Some(vec![Some(3), Some(4), Some(5)]),
+            None,
+        ];
+        let large_list_array = ListArray::from_iter_primitive::<Int64Type, _, _>(data);
+
+        // Expected value
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3i64);
+            list.append_value(4i64);
+            list.append_value(5i64);
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(large_list_array.slice(1, 2)),
+            vec![Some(variant), None],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_list_view() {
+        // Create a ListViewArray with some data
+        let mut builder = ListViewBuilder::new(Int32Array::builder(0));
+        builder.append_value(&Int32Array::from(vec![Some(0), None, Some(2)]));
+        builder.append_value(&Int32Array::from(vec![Some(3), Some(4)]));
+        builder.append_null();
+        builder.append_value(&Int32Array::from(vec![None, None]));
+        let list_view_array = builder.finish();
+
+        // Expected values
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(0i32);
+            list.append_null();
+            list.append_value(2i32);
+            list.finish();
+            builder.finish()
+        };
+        let variant0 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3i32);
+            list.append_value(4i32);
+            list.finish();
+            builder.finish()
+        };
+        let variant1 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_null();
+            list.append_null();
+            list.finish();
+            builder.finish()
+        };
+        let variant3 = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(list_view_array),
+            vec![Some(variant0), Some(variant1), None, Some(variant3)],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_sliced_list_view() {
+        // Create a ListViewArray with some data
+        let mut builder = ListViewBuilder::new(Int32Array::builder(0));
+        builder.append_value(&Int32Array::from(vec![Some(0), Some(1), Some(2)]));
+        builder.append_value(&Int32Array::from(vec![Some(3), None]));
+        builder.append_null();
+        let list_view_array = builder.finish();
+
+        // Expected value for slice(1, 2) - should get the second and third elements
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3i32);
+            list.append_null();
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(list_view_array.slice(1, 2)),
+            vec![Some(variant), None],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_large_list_view() {
+        // Create a LargeListViewArray with some data
+        let mut builder = LargeListViewBuilder::new(Int64Array::builder(0));
+        builder.append_value(&Int64Array::from(vec![Some(0), None, Some(2)]));
+        builder.append_value(&Int64Array::from(vec![Some(3), Some(4)]));
+        builder.append_null();
+        builder.append_value(&Int64Array::from(vec![None, None]));
+        let large_list_view_array = builder.finish();
+
+        // Expected values
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(0i64);
+            list.append_null();
+            list.append_value(2i64);
+            list.finish();
+            builder.finish()
+        };
+        let variant0 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3i64);
+            list.append_value(4i64);
+            list.finish();
+            builder.finish()
+        };
+        let variant1 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_null();
+            list.append_null();
+            list.finish();
+            builder.finish()
+        };
+        let variant3 = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(large_list_view_array),
+            vec![Some(variant0), Some(variant1), None, Some(variant3)],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_sliced_large_list_view() {
+        // Create a LargeListViewArray with some data
+        let mut builder = LargeListViewBuilder::new(Int64Array::builder(0));
+        builder.append_value(&Int64Array::from(vec![Some(0), Some(1), Some(2)]));
+        builder.append_value(&Int64Array::from(vec![Some(3), None]));
+        builder.append_null();
+        let large_list_view_array = builder.finish();
+
+        // Expected value for slice(1, 2) - should get the second and third elements
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(3i64);
+            list.append_null();
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(large_list_view_array.slice(1, 2)),
+            vec![Some(variant), None],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_fixed_size_list() {
+        let mut builder = FixedSizeListBuilder::new(Int32Array::builder(0), 2);
+        builder.values().append_value(0);
+        builder.values().append_value(1);
+        builder.append(true); // First list: [0, 1]
+
+        builder.values().append_null();
+        builder.values().append_value(3);
+        builder.append(true); // Second list: [null, 3]
+
+        builder.values().append_value(4);
+        builder.values().append_null();
+        builder.append(false); // Third list: null
+
+        builder.values().append_nulls(2);
+        builder.append(true); // Last list: [null, null]
+
+        let fixed_size_list_array = builder.finish();
+
+        // Expected values
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_value(0i32);
+            list.append_value(1i32);
+            list.finish();
+            builder.finish()
+        };
+        let variant0 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_null();
+            list.append_value(3i32);
+            list.finish();
+            builder.finish()
+        };
+        let variant1 = Variant::new(&metadata, &value);
+
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_null();
+            list.append_null();
+            list.finish();
+            builder.finish()
+        };
+        let variant3 = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(fixed_size_list_array),
+            vec![Some(variant0), Some(variant1), None, Some(variant3)],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_sliced_fixed_size_list() {
+        // Create a FixedSizeListArray with size 2
+        let mut builder = FixedSizeListBuilder::new(Int64Array::builder(0), 2);
+        builder.values().append_value(0);
+        builder.values().append_value(1);
+        builder.append(true); // First list: [0, 1]
+
+        builder.values().append_null();
+        builder.values().append_value(3);
+        builder.append(true); // Second list: [null, 3]
+
+        builder.values().append_value(4);
+        builder.values().append_null();
+        builder.append(false); // Third list: null
+
+        let fixed_size_list_array = builder.finish();
+
+        // Expected value for slice(1, 2) - should get the second and third elements
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut list = builder.new_list();
+            list.append_null();
+            list.append_value(3i64);
+            list.finish();
+            builder.finish()
+        };
+        let variant = Variant::new(&metadata, &value);
+
+        run_test(
+            Arc::new(fixed_size_list_array.slice(1, 2)),
+            vec![Some(variant), None],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_struct() {
+        // Test a simple struct with two fields: id (int64) and age (int32)
+        let id_array = Int64Array::from(vec![Some(1001), Some(1002), None, Some(1003)]);
+        let age_array = Int32Array::from(vec![Some(25), Some(30), Some(35), None]);
+
+        let fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, true),
+            Field::new("age", DataType::Int32, true),
+        ]);
+
+        let struct_array = StructArray::new(
+            fields,
+            vec![Arc::new(id_array), Arc::new(age_array)],
+            None, // no nulls at the struct level
+        );
+
+        let result = cast_to_variant(&struct_array).unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Check first row: {"id": 1001, "age": 25}
+        let variant1 = result.value(0);
+        let obj1 = variant1.as_object().unwrap();
+        assert_eq!(obj1.get("id"), Some(Variant::from(1001i64)));
+        assert_eq!(obj1.get("age"), Some(Variant::from(25i32)));
+
+        // Check second row: {"id": 1002, "age": 30}
+        let variant2 = result.value(1);
+        let obj2 = variant2.as_object().unwrap();
+        assert_eq!(obj2.get("id"), Some(Variant::from(1002i64)));
+        assert_eq!(obj2.get("age"), Some(Variant::from(30i32)));
+
+        // Check third row: {"age": 35} (id is null, so omitted)
+        let variant3 = result.value(2);
+        let obj3 = variant3.as_object().unwrap();
+        assert_eq!(obj3.get("id"), None);
+        assert_eq!(obj3.get("age"), Some(Variant::from(35i32)));
+
+        // Check fourth row: {"id": 1003} (age is null, so omitted)
+        let variant4 = result.value(3);
+        let obj4 = variant4.as_object().unwrap();
+        assert_eq!(obj4.get("id"), Some(Variant::from(1003i64)));
+        assert_eq!(obj4.get("age"), None);
+    }
+
+    #[test]
+    fn test_cast_to_variant_struct_with_nulls() {
+        // Test struct with null values at the struct level
+        let id_array = Int64Array::from(vec![Some(1001), Some(1002)]);
+        let age_array = Int32Array::from(vec![Some(25), Some(30)]);
+
+        let fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("age", DataType::Int32, false),
+        ]);
+
+        // Create null buffer to make second row null
+        let null_buffer = NullBuffer::from(vec![true, false]);
+
+        let struct_array = StructArray::new(
+            fields,
+            vec![Arc::new(id_array), Arc::new(age_array)],
+            Some(null_buffer),
+        );
+
+        let result = cast_to_variant(&struct_array).unwrap();
+        assert_eq!(result.len(), 2);
+
+        // Check first row: {"id": 1001, "age": 25}
+        assert!(!result.is_null(0));
+        let variant1 = result.value(0);
+        let obj1 = variant1.as_object().unwrap();
+        assert_eq!(obj1.get("id"), Some(Variant::from(1001i64)));
+        assert_eq!(obj1.get("age"), Some(Variant::from(25i32)));
+
+        // Check second row: null struct
+        assert!(result.is_null(1));
+    }
+
+    #[test]
+    fn test_cast_to_variant_struct_performance() {
+        // Test with a larger struct to demonstrate performance optimization
+        // This test ensures that field arrays are only converted once, not per row
+        let size = 1000;
+
+        let id_array = Int64Array::from((0..size).map(|i| Some(i as i64)).collect::<Vec<_>>());
+        let age_array = Int32Array::from(
+            (0..size)
+                .map(|i| Some((i % 100) as i32))
+                .collect::<Vec<_>>(),
+        );
+        let score_array =
+            Float64Array::from((0..size).map(|i| Some(i as f64 * 0.1)).collect::<Vec<_>>());
+
+        let fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, false),
+            Field::new("age", DataType::Int32, false),
+            Field::new("score", DataType::Float64, false),
+        ]);
+
+        let struct_array = StructArray::new(
+            fields,
+            vec![
+                Arc::new(id_array),
+                Arc::new(age_array),
+                Arc::new(score_array),
+            ],
+            None,
+        );
+
+        let result = cast_to_variant(&struct_array).unwrap();
+        assert_eq!(result.len(), size);
+
+        // Verify a few sample rows
+        let variant0 = result.value(0);
+        let obj0 = variant0.as_object().unwrap();
+        assert_eq!(obj0.get("id"), Some(Variant::from(0i64)));
+        assert_eq!(obj0.get("age"), Some(Variant::from(0i32)));
+        assert_eq!(obj0.get("score"), Some(Variant::from(0.0f64)));
+
+        let variant999 = result.value(999);
+        let obj999 = variant999.as_object().unwrap();
+        assert_eq!(obj999.get("id"), Some(Variant::from(999i64)));
+        assert_eq!(obj999.get("age"), Some(Variant::from(99i32))); // 999 % 100 = 99
+        assert_eq!(obj999.get("score"), Some(Variant::from(99.9f64)));
+    }
+
+    #[test]
+    fn test_cast_to_variant_struct_performance_large() {
+        // Test with even larger struct and more fields to demonstrate optimization benefits
+        let size = 10000;
+        let num_fields = 10;
+
+        // Create arrays for many fields
+        let mut field_arrays: Vec<ArrayRef> = Vec::new();
+        let mut fields = Vec::new();
+
+        for field_idx in 0..num_fields {
+            match field_idx % 4 {
+                0 => {
+                    // Int64 fields
+                    let array = Int64Array::from(
+                        (0..size)
+                            .map(|i| Some(i as i64 + field_idx as i64))
+                            .collect::<Vec<_>>(),
+                    );
+                    field_arrays.push(Arc::new(array));
+                    fields.push(Field::new(
+                        format!("int_field_{}", field_idx),
+                        DataType::Int64,
+                        false,
+                    ));
+                }
+                1 => {
+                    // Int32 fields
+                    let array = Int32Array::from(
+                        (0..size)
+                            .map(|i| Some((i % 1000) as i32 + field_idx as i32))
+                            .collect::<Vec<_>>(),
+                    );
+                    field_arrays.push(Arc::new(array));
+                    fields.push(Field::new(
+                        format!("int32_field_{}", field_idx),
+                        DataType::Int32,
+                        false,
+                    ));
+                }
+                2 => {
+                    // Float64 fields
+                    let array = Float64Array::from(
+                        (0..size)
+                            .map(|i| Some(i as f64 * 0.1 + field_idx as f64))
+                            .collect::<Vec<_>>(),
+                    );
+                    field_arrays.push(Arc::new(array));
+                    fields.push(Field::new(
+                        format!("float_field_{}", field_idx),
+                        DataType::Float64,
+                        false,
+                    ));
+                }
+                _ => {
+                    // Binary fields
+                    let binary_data: Vec<Option<&[u8]>> = (0..size)
+                        .map(|i| {
+                            // Use static data to avoid lifetime issues in tests
+                            match i % 3 {
+                                0 => Some(b"test_data_0" as &[u8]),
+                                1 => Some(b"test_data_1" as &[u8]),
+                                _ => Some(b"test_data_2" as &[u8]),
+                            }
+                        })
+                        .collect();
+                    let array = BinaryArray::from(binary_data);
+                    field_arrays.push(Arc::new(array));
+                    fields.push(Field::new(
+                        format!("binary_field_{}", field_idx),
+                        DataType::Binary,
+                        false,
+                    ));
+                }
+            }
+        }
+
+        let struct_array = StructArray::new(Fields::from(fields), field_arrays, None);
+
+        let result = cast_to_variant(&struct_array).unwrap();
+        assert_eq!(result.len(), size);
+
+        // Verify a sample of rows
+        for sample_idx in [0, size / 4, size / 2, size - 1] {
+            let variant = result.value(sample_idx);
+            let obj = variant.as_object().unwrap();
+
+            // Should have all fields
+            assert_eq!(obj.len(), num_fields);
+
+            // Verify a few field values
+            if let Some(int_field_0) = obj.get("int_field_0") {
+                assert_eq!(int_field_0, Variant::from(sample_idx as i64));
+            }
+            if let Some(float_field_2) = obj.get("float_field_2") {
+                assert_eq!(float_field_2, Variant::from(sample_idx as f64 * 0.1 + 2.0));
+            }
+        }
+    }
+
+    #[test]
+    fn test_cast_to_variant_nested_struct() {
+        // Test nested struct: person with location struct
+        let id_array = Int64Array::from(vec![Some(1001), Some(1002)]);
+        let x_array = Float64Array::from(vec![Some(40.7), Some(37.8)]);
+        let y_array = Float64Array::from(vec![Some(-74.0), Some(-122.4)]);
+
+        // Create location struct
+        let location_fields = Fields::from(vec![
+            Field::new("x", DataType::Float64, true),
+            Field::new("y", DataType::Float64, true),
+        ]);
+        let location_struct = StructArray::new(
+            location_fields.clone(),
+            vec![Arc::new(x_array), Arc::new(y_array)],
+            None,
+        );
+
+        // Create person struct containing location
+        let person_fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, true),
+            Field::new("location", DataType::Struct(location_fields), true),
+        ]);
+        let person_struct = StructArray::new(
+            person_fields,
+            vec![Arc::new(id_array), Arc::new(location_struct)],
+            None,
+        );
+
+        let result = cast_to_variant(&person_struct).unwrap();
+        assert_eq!(result.len(), 2);
+
+        // Check first row
+        let variant1 = result.value(0);
+        let obj1 = variant1.as_object().unwrap();
+        assert_eq!(obj1.get("id"), Some(Variant::from(1001i64)));
+
+        let location_variant1 = obj1.get("location").unwrap();
+        let location_obj1 = location_variant1.as_object().unwrap();
+        assert_eq!(location_obj1.get("x"), Some(Variant::from(40.7f64)));
+        assert_eq!(location_obj1.get("y"), Some(Variant::from(-74.0f64)));
+
+        // Check second row
+        let variant2 = result.value(1);
+        let obj2 = variant2.as_object().unwrap();
+        assert_eq!(obj2.get("id"), Some(Variant::from(1002i64)));
+
+        let location_variant2 = obj2.get("location").unwrap();
+        let location_obj2 = location_variant2.as_object().unwrap();
+        assert_eq!(location_obj2.get("x"), Some(Variant::from(37.8f64)));
+        assert_eq!(location_obj2.get("y"), Some(Variant::from(-122.4f64)));
+    }
+
+    #[test]
+    fn test_cast_to_variant_map() {
+        let keys = vec!["key1", "key2", "key3"];
+        let values_data = Int32Array::from(vec![1, 2, 3]);
+        let entry_offsets = vec![0, 1, 3];
+        let map_array =
+            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
+                .unwrap();
+
+        let result = cast_to_variant(&map_array).unwrap();
+        // [{"key1":1}]
+        let variant1 = result.value(0);
+        assert_eq!(
+            variant1.as_object().unwrap().get("key1").unwrap(),
+            Variant::from(1)
+        );
+
+        // [{"key2":2},{"key3":3}]
+        let variant2 = result.value(1);
+        assert_eq!(
+            variant2.as_object().unwrap().get("key2").unwrap(),
+            Variant::from(2)
+        );
+        assert_eq!(
+            variant2.as_object().unwrap().get("key3").unwrap(),
+            Variant::from(3)
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_map_with_nulls_and_empty() {
+        use arrow::array::{Int32Array, MapArray, StringArray, StructArray};
+        use arrow::buffer::{NullBuffer, OffsetBuffer};
+        use arrow::datatypes::{DataType, Field, Fields};
+        use std::sync::Arc;
+
+        // Create entries struct array
+        let keys = StringArray::from(vec!["key1", "key2", "key3"]);
+        let values = Int32Array::from(vec![1, 2, 3]);
+        let entries_fields = Fields::from(vec![
+            Field::new("key", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, true),
+        ]);
+        let entries = StructArray::new(
+            entries_fields.clone(),
+            vec![Arc::new(keys), Arc::new(values)],
+            None,
+        );
+
+        // Create offsets for 4 maps: [0..1], [1..1], [1..1], [1..3]
+        let offsets = OffsetBuffer::new(vec![0, 1, 1, 1, 3].into());
+
+        // Create null buffer - map at index 2 is NULL
+        let null_buffer = Some(NullBuffer::from(vec![true, true, false, true]));
+
+        let map_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(entries_fields),
+            false,
+        ));
+
+        let map_array = MapArray::try_new(map_field, offsets, entries, null_buffer, false).unwrap();
+
+        let result = cast_to_variant(&map_array).unwrap();
+
+        // Map 0: {"key1": 1}
+        let variant0 = result.value(0);
+        assert_eq!(
+            variant0.as_object().unwrap().get("key1").unwrap(),
+            Variant::from(1)
+        );
+
+        // Map 1: {} (empty, not null)
+        let variant1 = result.value(1);
+        let obj1 = variant1.as_object().unwrap();
+        assert_eq!(obj1.len(), 0); // Empty object
+
+        // Map 2: null (actual NULL)
+        assert!(result.is_null(2));
+
+        // Map 3: {"key2": 2, "key3": 3}
+        let variant3 = result.value(3);
+        assert_eq!(
+            variant3.as_object().unwrap().get("key2").unwrap(),
+            Variant::from(2)
+        );
+        assert_eq!(
+            variant3.as_object().unwrap().get("key3").unwrap(),
+            Variant::from(3)
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_map_with_non_string_keys() {
+        let offsets = OffsetBuffer::new(vec![0, 1, 3].into());
+        let fields = Fields::from(vec![
+            Field::new("key", DataType::Int32, false),
+            Field::new("values", DataType::Int32, false),
+        ]);
+        let columns = vec![
+            Arc::new(Int32Array::from(vec![1, 2, 3])) as _,
+            Arc::new(Int32Array::from(vec![1, 2, 3])) as _,
+        ];
+
+        let entries = StructArray::new(fields.clone(), columns, None);
+        let field = Arc::new(Field::new("entries", DataType::Struct(fields), false));
+
+        let map_array = MapArray::new(field.clone(), offsets.clone(), entries.clone(), None, false);
+
+        let result = cast_to_variant(&map_array).unwrap();
+
+        let variant1 = result.value(0);
+        assert_eq!(
+            variant1.as_object().unwrap().get("1").unwrap(),
+            Variant::from(1)
+        );
+
+        let variant2 = result.value(1);
+        assert_eq!(
+            variant2.as_object().unwrap().get("2").unwrap(),
+            Variant::from(2)
+        );
+        assert_eq!(
+            variant2.as_object().unwrap().get("3").unwrap(),
+            Variant::from(3)
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_union_sparse() {
+        // Create a sparse union array with mixed types (int, float, string)
+        let int_array = Int32Array::from(vec![Some(1), None, None, None, Some(34), None]);
+        let float_array = Float64Array::from(vec![None, Some(3.2), None, Some(32.5), None, None]);
+        let string_array = StringArray::from(vec![None, None, Some("hello"), None, None, None]);
+        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
+
+        let union_fields = UnionFields::from_fields(vec![
+            Field::new("int_field", DataType::Int32, false),
+            Field::new("float_field", DataType::Float64, false),
+            Field::new("string_field", DataType::Utf8, false),
+        ]);
+
+        let children: Vec<Arc<dyn Array>> = vec![
+            Arc::new(int_array),
+            Arc::new(float_array),
+            Arc::new(string_array),
+        ];
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            None, // Sparse union
+            children,
+        )
+        .unwrap();
+
+        run_test(
+            Arc::new(union_array),
+            vec![
+                Some(Variant::Int32(1)),
+                Some(Variant::Double(3.2)),
+                Some(Variant::from("hello")),
+                Some(Variant::Double(32.5)),
+                Some(Variant::Int32(34)),
+                None,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_union_dense() {
+        // Create a dense union array with mixed types (int, float, string)
+        let int_array = Int32Array::from(vec![Some(1), Some(34), None]);
+        let float_array = Float64Array::from(vec![3.2, 32.5]);
+        let string_array = StringArray::from(vec!["hello"]);
+        let type_ids = [0, 1, 2, 1, 0, 0].into_iter().collect::<ScalarBuffer<i8>>();
+        let offsets = [0, 0, 0, 1, 1, 2]
+            .into_iter()
+            .collect::<ScalarBuffer<i32>>();
+
+        let union_fields = UnionFields::from_fields(vec![
+            Field::new("int_field", DataType::Int32, false),
+            Field::new("float_field", DataType::Float64, false),
+            Field::new("string_field", DataType::Utf8, false),
+        ]);
+
+        let children: Vec<Arc<dyn Array>> = vec![
+            Arc::new(int_array),
+            Arc::new(float_array),
+            Arc::new(string_array),
+        ];
+
+        let union_array = UnionArray::try_new(
+            union_fields,
+            type_ids,
+            Some(offsets), // Dense union
+            children,
+        )
+        .unwrap();
+
+        run_test(
+            Arc::new(union_array),
+            vec![
+                Some(Variant::Int32(1)),
+                Some(Variant::Double(3.2)),
+                Some(Variant::from("hello")),
+                Some(Variant::Double(32.5)),
+                Some(Variant::Int32(34)),
+                None,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_dictionary() {
+        let values = StringArray::from(vec!["apple", "banana", "cherry", "date"]);
+        let keys = Int32Array::from(vec![Some(0), Some(1), None, Some(2), Some(0), Some(3)]);
+        let dict_array = DictionaryArray::<Int32Type>::try_new(keys, Arc::new(values)).unwrap();
+
+        run_test(
+            Arc::new(dict_array),
+            vec![
+                Some(Variant::from("apple")),
+                Some(Variant::from("banana")),
+                None,
+                Some(Variant::from("cherry")),
+                Some(Variant::from("apple")),
+                Some(Variant::from("date")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_dictionary_with_nulls() {
+        // Test dictionary with null values in the values array
+        let values = StringArray::from(vec![Some("a"), None, Some("c")]);
+        let keys = Int8Array::from(vec![Some(0), Some(1), Some(2), Some(0)]);
+        let dict_array = DictionaryArray::<Int8Type>::try_new(keys, Arc::new(values)).unwrap();
+
+        run_test(
+            Arc::new(dict_array),
+            vec![
+                Some(Variant::from("a")),
+                None, // key 1 points to null value
+                Some(Variant::from("c")),
+                Some(Variant::from("a")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_run_end_encoded() {
+        let mut builder = StringRunBuilder::<Int32Type>::new();
+        builder.append_value("apple");
+        builder.append_value("apple");
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_value("cherry");
+        let run_array = builder.finish();
+
+        run_test(
+            Arc::new(run_array),
+            vec![
+                Some(Variant::from("apple")),
+                Some(Variant::from("apple")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                Some(Variant::from("cherry")),
+            ],
+        );
+    }
+
+    #[test]
+    fn test_cast_to_variant_run_end_encoded_with_nulls() {
+        use arrow::array::StringRunBuilder;
+        use arrow::datatypes::Int32Type;
+
+        // Test run-end encoded array with nulls
+        let mut builder = StringRunBuilder::<Int32Type>::new();
+        builder.append_value("apple");
+        builder.append_null();
+        builder.append_value("banana");
+        builder.append_value("banana");
+        builder.append_null();
+        builder.append_null();
+        let run_array = builder.finish();
+
+        run_test(
+            Arc::new(run_array),
+            vec![
+                Some(Variant::from("apple")),
+                None,
+                Some(Variant::from("banana")),
+                Some(Variant::from("banana")),
+                None,
+                None,
+            ],
+        );
+    }
+
+    /// Converts the given `Array` to a `VariantArray` and tests the conversion
+    /// against the expected values. It also tests the handling of nulls by
+    /// setting one element to null and verifying the output.
+    fn run_test_with_options(
+        values: ArrayRef,
+        expected: Vec<Option<Variant>>,
+        options: CastOptions,
+    ) {
+        let variant_array = cast_to_variant_with_options(&values, &options).unwrap();
+        assert_eq!(variant_array.len(), expected.len());
+        for (i, expected_value) in expected.iter().enumerate() {
+            match expected_value {
+                Some(value) => {
+                    assert!(!variant_array.is_null(i), "Expected non-null at index {i}");
+                    assert_eq!(variant_array.value(i), *value, "mismatch at index {i}");
+                }
+                None => {
+                    assert!(variant_array.is_null(i), "Expected null at index {i}");
+                }
+            }
+        }
+    }
+
+    fn run_test(values: ArrayRef, expected: Vec<Option<Variant>>) {
+        run_test_with_options(values, expected, CastOptions::default());
+    }
+
+    fn run_test_in_strict_mode(
+        values: ArrayRef,
+        expected: Result<Vec<Option<Variant>>, ArrowError>,
+    ) {
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        match expected {
+            Ok(expected) => run_test_with_options(values, expected, options),
+            Err(_) => {
+                let result = cast_to_variant_with_options(values.as_ref(), &options);
+                assert!(result.is_err());
+                assert_eq!(
+                    result.unwrap_err().to_string(),
+                    expected.unwrap_err().to_string()
+                );
+            }
+        }
+    }
+}
diff --git a/parquet-variant-compute/src/from_json.rs b/parquet-variant-compute/src/from_json.rs
new file mode 100644
index 000000000000..0983147132a2
--- /dev/null
+++ b/parquet-variant-compute/src/from_json.rs
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for transforming a batch of JSON strings into a batch of Variants represented as
+//! STRUCT<metadata: BINARY, value: BINARY>
+
+use crate::{VariantArray, VariantArrayBuilder};
+use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
+use arrow_schema::ArrowError;
+use parquet_variant_json::JsonToVariant;
+
+/// Macro to convert string array to variant array
+macro_rules! string_array_to_variant {
+    ($input:expr, $array:expr, $builder:expr) => {{
+        for i in 0..$input.len() {
+            if $input.is_null(i) {
+                $builder.append_null();
+            } else {
+                $builder.append_json($array.value(i))?;
+            }
+        }
+    }};
+}
+
+/// Parse a batch of JSON strings into a batch of Variants represented as
+/// STRUCT<metadata: BINARY, value: BINARY> where nulls are preserved. The JSON strings in the input
+/// must be valid.
+///
+/// Supports the following string array types:
+/// - [`StringArray`]
+/// - [`LargeStringArray`]
+/// - [`StringViewArray`]
+pub fn json_to_variant(input: &ArrayRef) -> Result<VariantArray, ArrowError> {
+    let mut variant_array_builder = VariantArrayBuilder::new(input.len());
+
+    // Try each string array type in sequence
+    if let Some(string_array) = input.as_any().downcast_ref::<StringArray>() {
+        string_array_to_variant!(input, string_array, variant_array_builder);
+    } else if let Some(large_string_array) = input.as_any().downcast_ref::<LargeStringArray>() {
+        string_array_to_variant!(input, large_string_array, variant_array_builder);
+    } else if let Some(string_view_array) = input.as_any().downcast_ref::<StringViewArray>() {
+        string_array_to_variant!(input, string_view_array, variant_array_builder);
+    } else {
+        return Err(ArrowError::CastError(
+            "Expected reference to StringArray, LargeStringArray, or StringViewArray as input"
+                .into(),
+        ));
+    }
+
+    Ok(variant_array_builder.build())
+}
+
+#[cfg(test)]
+mod test {
+    use crate::json_to_variant;
+    use arrow::array::{Array, ArrayRef, LargeStringArray, StringArray, StringViewArray};
+    use arrow_schema::ArrowError;
+    use parquet_variant::{Variant, VariantBuilder};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_json_to_variant() -> Result<(), ArrowError> {
+        let input = StringArray::from(vec![
+            Some("1"),
+            None,
+            Some("{\"a\": 32}"),
+            Some("null"),
+            None,
+        ]);
+        let array_ref: ArrayRef = Arc::new(input);
+        let variant_array = json_to_variant(&array_ref).unwrap();
+
+        let metadata_array = variant_array.metadata_field();
+        let value_array = variant_array.value_field().expect("value field");
+
+        // Compare row 0
+        assert!(!variant_array.is_null(0));
+        assert_eq!(variant_array.value(0), Variant::Int8(1));
+
+        // Compare row 1
+        assert!(variant_array.is_null(1));
+
+        // Compare row 2
+        assert!(!variant_array.is_null(2));
+        {
+            let mut vb = VariantBuilder::new();
+            let mut ob = vb.new_object();
+            ob.insert("a", Variant::Int8(32));
+            ob.finish();
+            let (object_metadata, object_value) = vb.finish();
+            let expected = Variant::new(&object_metadata, &object_value);
+            assert_eq!(variant_array.value(2), expected);
+        }
+
+        // Compare row 3 (Note this is a variant NULL, not a null row)
+        assert!(!variant_array.is_null(3));
+        assert_eq!(variant_array.value(3), Variant::Null);
+
+        // Compare row 4
+        assert!(variant_array.is_null(4));
+
+        // Ensure that the subfields are not nullable
+        assert!(!metadata_array.is_null(1));
+        assert!(!value_array.is_null(1));
+        assert!(!metadata_array.is_null(4));
+        assert!(!value_array.is_null(4));
+        Ok(())
+    }
+
+    #[test]
+    fn test_json_to_variant_large_string() -> Result<(), ArrowError> {
+        let input = LargeStringArray::from(vec![
+            Some("1"),
+            None,
+            Some("{\"a\": 32}"),
+            Some("null"),
+            None,
+        ]);
+        let array_ref: ArrayRef = Arc::new(input);
+        let variant_array = json_to_variant(&array_ref).unwrap();
+
+        let metadata_array = variant_array.metadata_field();
+        let value_array = variant_array.value_field().expect("value field");
+
+        // Compare row 0
+        assert!(!variant_array.is_null(0));
+        assert_eq!(variant_array.value(0), Variant::Int8(1));
+
+        // Compare row 1
+        assert!(variant_array.is_null(1));
+
+        // Compare row 2
+        assert!(!variant_array.is_null(2));
+        {
+            let mut vb = VariantBuilder::new();
+            let mut ob = vb.new_object();
+            ob.insert("a", Variant::Int8(32));
+            ob.finish();
+            let (object_metadata, object_value) = vb.finish();
+            let expected = Variant::new(&object_metadata, &object_value);
+            assert_eq!(variant_array.value(2), expected);
+        }
+
+        // Compare row 3 (Note this is a variant NULL, not a null row)
+        assert!(!variant_array.is_null(3));
+        assert_eq!(variant_array.value(3), Variant::Null);
+
+        // Compare row 4
+        assert!(variant_array.is_null(4));
+
+        // Ensure that the subfields are not nullable
+        assert!(!metadata_array.is_null(1));
+        assert!(!value_array.is_null(1));
+        assert!(!metadata_array.is_null(4));
+        assert!(!value_array.is_null(4));
+        Ok(())
+    }
+
+    #[test]
+    fn test_json_to_variant_string_view() -> Result<(), ArrowError> {
+        let input = StringViewArray::from(vec![
+            Some("1"),
+            None,
+            Some("{\"a\": 32}"),
+            Some("null"),
+            None,
+        ]);
+        let array_ref: ArrayRef = Arc::new(input);
+        let variant_array = json_to_variant(&array_ref).unwrap();
+
+        let metadata_array = variant_array.metadata_field();
+        let value_array = variant_array.value_field().expect("value field");
+
+        // Compare row 0
+        assert!(!variant_array.is_null(0));
+        assert_eq!(variant_array.value(0), Variant::Int8(1));
+
+        // Compare row 1
+        assert!(variant_array.is_null(1));
+
+        // Compare row 2
+        assert!(!variant_array.is_null(2));
+        {
+            let mut vb = VariantBuilder::new();
+            let mut ob = vb.new_object();
+            ob.insert("a", Variant::Int8(32));
+            ob.finish();
+            let (object_metadata, object_value) = vb.finish();
+            let expected = Variant::new(&object_metadata, &object_value);
+            assert_eq!(variant_array.value(2), expected);
+        }
+
+        // Compare row 3 (Note this is a variant NULL, not a null row)
+        assert!(!variant_array.is_null(3));
+        assert_eq!(variant_array.value(3), Variant::Null);
+
+        // Compare row 4
+        assert!(variant_array.is_null(4));
+
+        // Ensure that the subfields are not nullable
+        assert!(!metadata_array.is_null(1));
+        assert!(!value_array.is_null(1));
+        assert!(!metadata_array.is_null(4));
+        assert!(!value_array.is_null(4));
+        Ok(())
+    }
+}
diff --git a/parquet-variant-compute/src/lib.rs b/parquet-variant-compute/src/lib.rs
new file mode 100644
index 000000000000..b05d0e023653
--- /dev/null
+++ b/parquet-variant-compute/src/lib.rs
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`VariantArray`] and compute kernels for the [Variant Binary Encoding] from [Apache Parquet].
+//!
+//! ## Main APIs
+//! - [`VariantArray`] : Represents an array of `Variant` values.
+//! - [`VariantArrayBuilder`]: For building [`VariantArray`]
+//!
+//! # Compute Kernels
+//! - [`json_to_variant()`]: Function to convert Arrays of JSON strings to a `VariantArray`.
+//! - [`variant_to_json()`]: Function to convert a `VariantArray` to arrays of JSON strings.
+//! - [`cast_to_variant()`]: Cast Arrow arrays to `VariantArray`.
+//! - [`variant_get()`]: Convert `VariantArray` (or an inner path) to a strongly-typed Arrow array.
+//! - [`shred_variant()`]: Shred a `VariantArray` according to the provided shredding schema
+//! - [`unshred_variant()`]: Unshred a `VariantArray` to pure binary variant.
+//!
+//! ## 🚧 Work In Progress
+//!
+//! This crate is under active development and is not yet ready for production use.
+//! If you are interested in helping, you can find more information on the GitHub [Variant issue]
+//!
+//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
+//! [Apache Parquet]: https://parquet.apache.org/
+//! [`VariantPath`]: parquet_variant::VariantPath
+//! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736
+
+mod arrow_to_variant;
+mod cast_to_variant;
+mod from_json;
+mod shred_variant;
+mod to_json;
+mod type_conversion;
+mod unshred_variant;
+mod variant_array;
+mod variant_array_builder;
+mod variant_get;
+mod variant_to_arrow;
+
+pub use variant_array::{BorrowedShreddingState, ShreddingState, VariantArray, VariantType};
+pub use variant_array_builder::{VariantArrayBuilder, VariantValueArrayBuilder};
+
+pub use cast_to_variant::{cast_to_variant, cast_to_variant_with_options};
+pub use from_json::json_to_variant;
+pub use shred_variant::{IntoShreddingField, ShreddedSchemaBuilder, shred_variant};
+pub use to_json::variant_to_json;
+pub use unshred_variant::unshred_variant;
+pub use variant_get::{GetOptions, variant_get};
diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs
new file mode 100644
index 000000000000..7f253d249dfb
--- /dev/null
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -0,0 +1,2467 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for shredding VariantArray with a given schema.
+
+use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
+use crate::variant_to_arrow::{
+    ArrayVariantToArrowRowBuilder, PrimitiveVariantToArrowRowBuilder,
+    make_primitive_variant_to_arrow_row_builder,
+};
+use crate::{VariantArray, VariantValueArrayBuilder};
+use arrow::array::{ArrayRef, BinaryViewArray, NullBufferBuilder};
+use arrow::buffer::NullBuffer;
+use arrow::compute::CastOptions;
+use arrow::datatypes::{DataType, Field, FieldRef, Fields, TimeUnit};
+use arrow::error::{ArrowError, Result};
+use indexmap::IndexMap;
+use parquet_variant::{Variant, VariantBuilderExt, VariantPath, VariantPathElement};
+use std::collections::BTreeMap;
+use std::sync::Arc;
+
+/// Shreds the input binary variant using a target shredding schema derived from the requested data type.
+///
+/// For example, requesting `DataType::Int64` would produce an output variant array with the schema:
+///
+/// ```text
+/// {
+///    metadata: BINARY,
+///    value: BINARY,
+///    typed_value: LONG,
+/// }
+/// ```
+///
+/// Similarly, requesting `DataType::Struct` with two integer fields `a` and `b` would produce an
+/// output variant array with the schema:
+///
+/// ```text
+/// {
+///   metadata: BINARY,
+///   value: BINARY,
+///   typed_value: {
+///     a: {
+///       value: BINARY,
+///       typed_value: INT,
+///     },
+///     b: {
+///       value: BINARY,
+///       typed_value: INT,
+///     },
+///   }
+/// }
+/// ```
+///
+/// See [`ShreddedSchemaBuilder`] for a convenient way to build the `as_type`
+/// value passed to this function.
+pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<VariantArray> {
+    if array.typed_value_field().is_some() {
+        return Err(ArrowError::InvalidArgumentError(
+            "Input is already shredded".to_string(),
+        ));
+    }
+
+    if array.value_field().is_none() {
+        // all-null case -- nothing to do.
+        return Ok(array.clone());
+    };
+
+    let cast_options = CastOptions::default();
+    let mut builder = make_variant_to_shredded_variant_arrow_row_builder(
+        as_type,
+        &cast_options,
+        array.len(),
+        true,
+    )?;
+    for i in 0..array.len() {
+        if array.is_null(i) {
+            builder.append_null()?;
+        } else {
+            builder.append_value(array.value(i))?;
+        }
+    }
+    let (value, typed_value, nulls) = builder.finish()?;
+    Ok(VariantArray::from_parts(
+        array.metadata_field().clone(),
+        Some(value),
+        Some(typed_value),
+        nulls,
+    ))
+}
+
+pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
+    data_type: &'a DataType,
+    cast_options: &'a CastOptions,
+    capacity: usize,
+    top_level: bool,
+) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
+    let builder = match data_type {
+        DataType::Struct(fields) => {
+            let typed_value_builder = VariantToShreddedObjectVariantRowBuilder::try_new(
+                fields,
+                cast_options,
+                capacity,
+                top_level,
+            )?;
+            VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
+        }
+        DataType::List(_)
+        | DataType::LargeList(_)
+        | DataType::ListView(_)
+        | DataType::LargeListView(_)
+        | DataType::FixedSizeList(..) => {
+            let typed_value_builder = VariantToShreddedArrayVariantRowBuilder::try_new(
+                data_type,
+                cast_options,
+                capacity,
+            )?;
+            VariantToShreddedVariantRowBuilder::Array(typed_value_builder)
+        }
+        // Supported shredded primitive types, see Variant shredding spec:
+        // https://github.com/apache/parquet-format/blob/master/VariantShredding.md#shredded-value-types
+        DataType::Boolean
+        | DataType::Int8
+        | DataType::Int16
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::Float32
+        | DataType::Float64
+        | DataType::Decimal32(..)
+        | DataType::Decimal64(..)
+        | DataType::Decimal128(..)
+        | DataType::Date32
+        | DataType::Time64(TimeUnit::Microsecond)
+        | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _)
+        | DataType::Binary
+        | DataType::BinaryView
+        | DataType::Utf8
+        | DataType::Utf8View
+        | DataType::FixedSizeBinary(16) // UUID
+        => {
+            let builder =
+                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
+            let typed_value_builder =
+                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
+            VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
+        }
+        DataType::FixedSizeBinary(_) => {
+            return Err(ArrowError::InvalidArgumentError(format!("{data_type} is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is supported.")))
+        }
+        _ => {
+            return Err(ArrowError::InvalidArgumentError(format!("{data_type} is not a valid variant shredding type")))
+        }
+    };
+    Ok(builder)
+}
+
+pub(crate) enum VariantToShreddedVariantRowBuilder<'a> {
+    Primitive(VariantToShreddedPrimitiveVariantRowBuilder<'a>),
+    Array(VariantToShreddedArrayVariantRowBuilder<'a>),
+    Object(VariantToShreddedObjectVariantRowBuilder<'a>),
+}
+
+impl<'a> VariantToShreddedVariantRowBuilder<'a> {
+    pub fn append_null(&mut self) -> Result<()> {
+        use VariantToShreddedVariantRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_null(),
+            Array(b) => b.append_null(),
+            Object(b) => b.append_null(),
+        }
+    }
+
+    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        use VariantToShreddedVariantRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_value(value),
+            Array(b) => b.append_value(value),
+            Object(b) => b.append_value(value),
+        }
+    }
+
+    pub fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
+        use VariantToShreddedVariantRowBuilder::*;
+        match self {
+            Primitive(b) => b.finish(),
+            Array(b) => b.finish(),
+            Object(b) => b.finish(),
+        }
+    }
+}
+
+/// A top-level variant shredder -- appending NULL produces typed_value=NULL and value=Variant::Null
+pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
+    value_builder: VariantValueArrayBuilder,
+    typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
+    nulls: NullBufferBuilder,
+    top_level: bool,
+}
+
+impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
+    pub(crate) fn new(
+        typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
+        capacity: usize,
+        top_level: bool,
+    ) -> Self {
+        Self {
+            value_builder: VariantValueArrayBuilder::new(capacity),
+            typed_value_builder,
+            nulls: NullBufferBuilder::new(capacity),
+            top_level,
+        }
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        // Only the top-level struct that represents the variant can be nullable; object fields and
+        // array elements are non-nullable.
+        self.nulls.append(!self.top_level);
+        self.value_builder.append_null();
+        self.typed_value_builder.append_null()
+    }
+
+    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        self.nulls.append_non_null();
+        if self.typed_value_builder.append_value(&value)? {
+            self.value_builder.append_null();
+        } else {
+            self.value_builder.append_value(value);
+        }
+        Ok(true)
+    }
+
+    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
+        Ok((
+            self.value_builder.build()?,
+            self.typed_value_builder.finish()?,
+            self.nulls.finish(),
+        ))
+    }
+}
+
+pub(crate) struct VariantToShreddedArrayVariantRowBuilder<'a> {
+    value_builder: VariantValueArrayBuilder,
+    typed_value_builder: ArrayVariantToArrowRowBuilder<'a>,
+}
+
+impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
+    fn try_new(
+        data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        Ok(Self {
+            value_builder: VariantValueArrayBuilder::new(capacity),
+            typed_value_builder: ArrayVariantToArrowRowBuilder::try_new(
+                data_type,
+                cast_options,
+                capacity,
+            )?,
+        })
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        self.value_builder.append_value(Variant::Null);
+        self.typed_value_builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, variant: Variant<'_, '_>) -> Result<bool> {
+        // If the variant is not an array, typed_value must be null.
+        // If the variant is an array, value must be null.
+        match variant {
+            Variant::List(list) => {
+                self.value_builder.append_null();
+                self.typed_value_builder.append_value(list)?;
+                Ok(true)
+            }
+            other => {
+                self.value_builder.append_value(other);
+                self.typed_value_builder.append_null();
+                Ok(false)
+            }
+        }
+    }
+
+    fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
+        Ok((
+            self.value_builder.build()?,
+            self.typed_value_builder.finish()?,
+            // All elements of an array must be present (not missing) because
+            // the array Variant encoding does not allow missing elements
+            None,
+        ))
+    }
+}
+
+pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
+    value_builder: VariantValueArrayBuilder,
+    typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
+    typed_value_nulls: NullBufferBuilder,
+    nulls: NullBufferBuilder,
+    top_level: bool,
+}
+
+impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
+    fn try_new(
+        fields: &'a Fields,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+        top_level: bool,
+    ) -> Result<Self> {
+        let typed_value_builders = fields.iter().map(|field| {
+            let builder = make_variant_to_shredded_variant_arrow_row_builder(
+                field.data_type(),
+                cast_options,
+                capacity,
+                false,
+            )?;
+            Ok((field.name().as_str(), builder))
+        });
+        Ok(Self {
+            value_builder: VariantValueArrayBuilder::new(capacity),
+            typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
+            typed_value_nulls: NullBufferBuilder::new(capacity),
+            nulls: NullBufferBuilder::new(capacity),
+            top_level,
+        })
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        // Only the top-level struct that represents the variant can be nullable; object fields and
+        // array elements are non-nullable.
+        self.nulls.append(!self.top_level);
+        self.value_builder.append_null();
+        self.typed_value_nulls.append_null();
+        for (_, typed_value_builder) in &mut self.typed_value_builders {
+            typed_value_builder.append_null()?;
+        }
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        let Variant::Object(ref obj) = value else {
+            // Not an object => fall back
+            self.nulls.append_non_null();
+            self.value_builder.append_value(value);
+            self.typed_value_nulls.append_null();
+            for (_, typed_value_builder) in &mut self.typed_value_builders {
+                typed_value_builder.append_null()?;
+            }
+            return Ok(false);
+        };
+
+        // Route the object's fields by name as either shredded or unshredded
+        let mut builder = self.value_builder.builder_ext(value.metadata());
+        let mut object_builder = builder.try_new_object()?;
+        let mut seen = std::collections::HashSet::new();
+        let mut partially_shredded = false;
+        for (field_name, value) in obj.iter() {
+            match self.typed_value_builders.get_mut(field_name) {
+                Some(typed_value_builder) => {
+                    typed_value_builder.append_value(value)?;
+                    seen.insert(field_name);
+                }
+                None => {
+                    object_builder.insert_bytes(field_name, value);
+                    partially_shredded = true;
+                }
+            }
+        }
+
+        // Handle missing fields
+        for (field_name, typed_value_builder) in &mut self.typed_value_builders {
+            if !seen.contains(field_name) {
+                typed_value_builder.append_null()?;
+            }
+        }
+
+        // Only emit the value if it captured any unshredded object fields
+        if partially_shredded {
+            object_builder.finish();
+        } else {
+            drop(object_builder);
+            self.value_builder.append_null();
+        }
+
+        self.typed_value_nulls.append_non_null();
+        self.nulls.append_non_null();
+        Ok(true)
+    }
+
+    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
+        let mut builder = StructArrayBuilder::new();
+        for (field_name, typed_value_builder) in self.typed_value_builders {
+            let (value, typed_value, nulls) = typed_value_builder.finish()?;
+            let array =
+                ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
+            builder = builder.with_field(field_name, ArrayRef::from(array), false);
+        }
+        if let Some(nulls) = self.typed_value_nulls.finish() {
+            builder = builder.with_nulls(nulls);
+        }
+        Ok((
+            self.value_builder.build()?,
+            Arc::new(builder.build()),
+            self.nulls.finish(),
+        ))
+    }
+}
+
+/// Field configuration captured by the builder (data type + nullability).
+#[derive(Clone)]
+pub struct ShreddingField {
+    data_type: DataType,
+    nullable: bool,
+}
+
+impl ShreddingField {
+    fn new(data_type: DataType, nullable: bool) -> Self {
+        Self {
+            data_type,
+            nullable,
+        }
+    }
+
+    fn null() -> Self {
+        Self::new(DataType::Null, true)
+    }
+}
+
+/// Convenience conversion to allow passing either `FieldRef`, `DataType`, or `(DataType, bool)`.
+pub trait IntoShreddingField {
+    fn into_shredding_field(self) -> ShreddingField;
+}
+
+impl IntoShreddingField for FieldRef {
+    fn into_shredding_field(self) -> ShreddingField {
+        ShreddingField::new(self.data_type().clone(), self.is_nullable())
+    }
+}
+
+impl IntoShreddingField for &DataType {
+    fn into_shredding_field(self) -> ShreddingField {
+        ShreddingField::new(self.clone(), true)
+    }
+}
+
+impl IntoShreddingField for DataType {
+    fn into_shredding_field(self) -> ShreddingField {
+        ShreddingField::new(self, true)
+    }
+}
+
+impl IntoShreddingField for (&DataType, bool) {
+    fn into_shredding_field(self) -> ShreddingField {
+        ShreddingField::new(self.0.clone(), self.1)
+    }
+}
+
+impl IntoShreddingField for (DataType, bool) {
+    fn into_shredding_field(self) -> ShreddingField {
+        ShreddingField::new(self.0, self.1)
+    }
+}
+
+/// Builder for constructing a variant shredding schema.
+///
+/// The builder pattern makes it easy to incrementally define which fields
+/// should be shredded and with what types. Fields are nullable by default; pass
+/// a `(data_type, nullable)` pair or a `FieldRef` to control nullability.
+///
+/// Note: this builder currently only supports struct fields. List support
+/// will be added in the future.
+///
+/// # Example
+///
+/// ```
+/// use std::sync::Arc;
+/// use arrow::datatypes::{DataType, Field, TimeUnit};
+/// use parquet_variant::{VariantPath, VariantPathElement};
+/// use parquet_variant_compute::ShreddedSchemaBuilder;
+///
+/// // Define the shredding schema using the builder
+/// let shredding_type = ShreddedSchemaBuilder::default()
+///     // store the "time" field as a separate UTC timestamp
+///     .with_path("time", (&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())), true))
+///     // store hostname as non-nullable Utf8
+///     .with_path("hostname", (&DataType::Utf8, false))
+///     // pass a FieldRef directly
+///     .with_path(
+///         "metadata.trace_id",
+///         Arc::new(Field::new("trace_id", DataType::FixedSizeBinary(16), false)),
+///     )
+///     // field name with a dot: use VariantPath to avoid splitting
+///     .with_path(
+///         VariantPath::from_iter([VariantPathElement::from("metrics.cpu")]),
+///         &DataType::Float64,
+///     )
+///     .build();
+///
+/// // The shredding_type can now be passed to shred_variant:
+/// // let shredded = shred_variant(&input, &shredding_type)?;
+/// ```
+#[derive(Default, Clone)]
+pub struct ShreddedSchemaBuilder {
+    root: VariantSchemaNode,
+}
+
+impl ShreddedSchemaBuilder {
+    /// Create a new empty schema builder.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert a typed path into the schema using dot notation (or any
+    /// [`VariantPath`] convertible).
+    ///
+    /// The path uses dot notation to specify nested fields.
+    /// For example, "a.b.c" will create a nested structure.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - Anything convertible to [`VariantPath`] (e.g., a `&str`)
+    /// * `field` - Anything convertible via [`IntoShreddingField`] (e.g. `FieldRef`,
+    ///   `&DataType`, or `(&DataType, bool)` to control nullability)
+    pub fn with_path<'a, P, F>(mut self, path: P, field: F) -> Self
+    where
+        P: Into<VariantPath<'a>>,
+        F: IntoShreddingField,
+    {
+        let path: VariantPath<'a> = path.into();
+        self.root.insert_path(&path, field.into_shredding_field());
+        self
+    }
+
+    /// Build the final [`DataType`].
+    pub fn build(self) -> DataType {
+        let shredding_type = self.root.to_shredding_type();
+        match shredding_type {
+            Some(shredding_type) => shredding_type,
+            None => DataType::Null,
+        }
+    }
+}
+
+/// Internal tree node structure for building variant schemas.
+#[derive(Clone)]
+enum VariantSchemaNode {
+    /// A leaf node with a primitive/scalar type (and nullability)
+    Leaf(ShreddingField),
+    /// An inner struct node with nested fields
+    Struct(BTreeMap<String, VariantSchemaNode>),
+}
+
+impl Default for VariantSchemaNode {
+    fn default() -> Self {
+        Self::Leaf(ShreddingField::null())
+    }
+}
+
+impl VariantSchemaNode {
+    /// Insert a path into this node with the given data type.
+    fn insert_path(&mut self, path: &VariantPath<'_>, field: ShreddingField) {
+        self.insert_path_elements(path, field);
+    }
+
+    fn insert_path_elements(&mut self, segments: &[VariantPathElement<'_>], field: ShreddingField) {
+        let Some((head, tail)) = segments.split_first() else {
+            *self = Self::Leaf(field);
+            return;
+        };
+
+        match head {
+            VariantPathElement::Field { name } => {
+                // Ensure this node is a Struct node
+                let children = match self {
+                    Self::Struct(children) => children,
+                    _ => {
+                        *self = Self::Struct(BTreeMap::new());
+                        match self {
+                            Self::Struct(children) => children,
+                            _ => unreachable!(),
+                        }
+                    }
+                };
+
+                children
+                    .entry(name.to_string())
+                    .or_default()
+                    .insert_path_elements(tail, field);
+            }
+            VariantPathElement::Index { .. } => {
+                // List support to be added later; reject for now
+                unreachable!("List paths are not supported yet");
+            }
+        }
+    }
+
+    /// Convert this node to a shredding type.
+    ///
+    /// Returns the [`DataType`] for passing to [`shred_variant`].
+    fn to_shredding_type(&self) -> Option<DataType> {
+        match self {
+            Self::Leaf(field) => Some(field.data_type.clone()),
+            Self::Struct(children) => {
+                let child_fields: Vec<_> = children
+                    .iter()
+                    .filter_map(|(name, child)| child.to_shredding_field(name))
+                    .collect();
+                if child_fields.is_empty() {
+                    None
+                } else {
+                    Some(DataType::Struct(Fields::from(child_fields)))
+                }
+            }
+        }
+    }
+
+    fn to_shredding_field(&self, name: &str) -> Option<FieldRef> {
+        match self {
+            Self::Leaf(field) => Some(Arc::new(Field::new(
+                name,
+                field.data_type.clone(),
+                field.nullable,
+            ))),
+            Self::Struct(_) => self
+                .to_shredding_type()
+                .map(|data_type| Arc::new(Field::new(name, data_type, true))),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::VariantArrayBuilder;
+    use crate::arrow_to_variant::ListLikeArray;
+    use arrow::array::{
+        Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, GenericListArray,
+        GenericListViewArray, Int64Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray,
+    };
+    use arrow::datatypes::{
+        ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, UnionFields, UnionMode,
+    };
+    use parquet_variant::{
+        BuilderSpecificState, EMPTY_VARIANT_METADATA_BYTES, ObjectBuilder, ReadOnlyMetadataBuilder,
+        Variant, VariantBuilder, VariantPath, VariantPathElement,
+    };
+    use std::sync::Arc;
+    use uuid::Uuid;
+
+    #[derive(Clone)]
+    enum VariantValue<'a> {
+        Value(Variant<'a, 'a>),
+        List(Vec<VariantValue<'a>>),
+        Object(Vec<(&'a str, VariantValue<'a>)>),
+        Null,
+    }
+
+    impl<'a, T> From<T> for VariantValue<'a>
+    where
+        T: Into<Variant<'a, 'a>>,
+    {
+        fn from(value: T) -> Self {
+            Self::Value(value.into())
+        }
+    }
+
+    #[derive(Clone)]
+    enum VariantRow<'a> {
+        Value(VariantValue<'a>),
+        List(Vec<VariantValue<'a>>),
+        Object(Vec<(&'a str, VariantValue<'a>)>),
+        Null,
+    }
+
+    fn build_variant_array(rows: Vec<VariantRow<'static>>) -> VariantArray {
+        let mut builder = VariantArrayBuilder::new(rows.len());
+
+        fn append_variant_value<B: VariantBuilderExt>(builder: &mut B, value: VariantValue) {
+            match value {
+                VariantValue::Value(v) => builder.append_value(v),
+                VariantValue::List(values) => {
+                    let mut list = builder.new_list();
+                    for v in values {
+                        append_variant_value(&mut list, v);
+                    }
+                    list.finish();
+                }
+                VariantValue::Object(fields) => {
+                    let mut object = builder.new_object();
+                    for (name, value) in fields {
+                        append_variant_field(&mut object, name, value);
+                    }
+                    object.finish();
+                }
+                VariantValue::Null => builder.append_null(),
+            }
+        }
+
+        fn append_variant_field<'a, S: BuilderSpecificState>(
+            object: &mut ObjectBuilder<'_, S>,
+            name: &'a str,
+            value: VariantValue<'a>,
+        ) {
+            match value {
+                VariantValue::Value(v) => {
+                    object.insert(name, v);
+                }
+                VariantValue::List(values) => {
+                    let mut list = object.new_list(name);
+                    for v in values {
+                        append_variant_value(&mut list, v);
+                    }
+                    list.finish();
+                }
+                VariantValue::Object(fields) => {
+                    let mut nested = object.new_object(name);
+                    for (field_name, v) in fields {
+                        append_variant_field(&mut nested, field_name, v);
+                    }
+                    nested.finish();
+                }
+                VariantValue::Null => {
+                    object.insert(name, Variant::Null);
+                }
+            }
+        }
+
+        rows.into_iter().for_each(|row| match row {
+            VariantRow::Value(value) => append_variant_value(&mut builder, value),
+            VariantRow::List(values) => {
+                let mut list = builder.new_list();
+                for value in values {
+                    append_variant_value(&mut list, value);
+                }
+                list.finish();
+            }
+            VariantRow::Object(fields) => {
+                let mut object = builder.new_object();
+                for (name, value) in fields {
+                    append_variant_field(&mut object, name, value);
+                }
+                object.finish();
+            }
+            VariantRow::Null => builder.append_null(),
+        });
+        builder.build()
+    }
+
+    trait TestListLikeArray: ListLikeArray {
+        type OffsetSize: OffsetSizeTrait;
+        fn value_offsets(&self) -> Option<&[Self::OffsetSize]>;
+        fn value_size(&self, index: usize) -> Self::OffsetSize;
+    }
+
+    impl<O: OffsetSizeTrait> TestListLikeArray for GenericListArray<O> {
+        type OffsetSize = O;
+
+        fn value_offsets(&self) -> Option<&[Self::OffsetSize]> {
+            Some(GenericListArray::value_offsets(self))
+        }
+
+        fn value_size(&self, index: usize) -> Self::OffsetSize {
+            GenericListArray::value_length(self, index)
+        }
+    }
+
+    impl<O: OffsetSizeTrait> TestListLikeArray for GenericListViewArray<O> {
+        type OffsetSize = O;
+
+        fn value_offsets(&self) -> Option<&[Self::OffsetSize]> {
+            Some(GenericListViewArray::value_offsets(self))
+        }
+
+        fn value_size(&self, index: usize) -> Self::OffsetSize {
+            GenericListViewArray::value_size(self, index)
+        }
+    }
+
+    fn downcast_list_like_array<O: OffsetSizeTrait>(
+        array: &VariantArray,
+    ) -> &dyn TestListLikeArray<OffsetSize = O> {
+        let typed_value = array.typed_value_field().unwrap();
+        if let Some(list) = typed_value.as_any().downcast_ref::<GenericListArray<O>>() {
+            list
+        } else if let Some(list_view) = typed_value
+            .as_any()
+            .downcast_ref::<GenericListViewArray<O>>()
+        {
+            list_view
+        } else {
+            panic!(
+                "Expected list-like typed_value with matching offset type, got {}",
+                typed_value.data_type()
+            );
+        }
+    }
+
+    fn assert_list_structure<O: OffsetSizeTrait>(
+        array: &VariantArray,
+        expected_len: usize,
+        expected_offsets: &[O],
+        expected_sizes: &[Option<O>],
+        expected_fallbacks: &[Option<Variant<'static, 'static>>],
+    ) {
+        assert_eq!(array.len(), expected_len);
+
+        let fallbacks = (array.value_field().unwrap(), Some(array.metadata_field()));
+        let array = downcast_list_like_array::<O>(array);
+
+        assert_eq!(
+            array.value_offsets().unwrap(),
+            expected_offsets,
+            "list offsets mismatch"
+        );
+        assert_eq!(
+            array.len(),
+            expected_sizes.len(),
+            "expected_sizes should match array length"
+        );
+        assert_eq!(
+            array.len(),
+            expected_fallbacks.len(),
+            "expected_fallbacks should match array length"
+        );
+        assert_eq!(
+            array.len(),
+            fallbacks.0.len(),
+            "fallbacks value field should match array length"
+        );
+
+        // Validate per-row shredding outcomes for the list array
+        for (idx, (expected_size, expected_fallback)) in expected_sizes
+            .iter()
+            .zip(expected_fallbacks.iter())
+            .enumerate()
+        {
+            match expected_size {
+                Some(len) => {
+                    // Successfully shredded: typed list value present, no fallback value
+                    assert!(array.is_valid(idx));
+                    assert_eq!(array.value_size(idx), *len);
+                    assert!(fallbacks.0.is_null(idx));
+                }
+                None => {
+                    // Unable to shred: typed list value absent, fallback should carry the variant
+                    assert!(array.is_null(idx));
+                    assert_eq!(array.value_size(idx), O::zero());
+                    match expected_fallback {
+                        Some(expected_variant) => {
+                            assert!(fallbacks.0.is_valid(idx));
+                            let metadata_bytes = fallbacks
+                                .1
+                                .filter(|m| m.is_valid(idx))
+                                .map(|m| m.value(idx))
+                                .filter(|bytes| !bytes.is_empty())
+                                .unwrap_or(EMPTY_VARIANT_METADATA_BYTES);
+                            assert_eq!(
+                                Variant::new(metadata_bytes, fallbacks.0.value(idx)),
+                                expected_variant.clone()
+                            );
+                        }
+                        None => unreachable!(),
+                    }
+                }
+            }
+        }
+    }
+
+    fn assert_list_structure_and_elements<T: ArrowPrimitiveType, O: OffsetSizeTrait>(
+        array: &VariantArray,
+        expected_len: usize,
+        expected_offsets: &[O],
+        expected_sizes: &[Option<O>],
+        expected_fallbacks: &[Option<Variant<'static, 'static>>],
+        expected_shredded_elements: (&[Option<T::Native>], &[Option<Variant<'static, 'static>>]),
+    ) {
+        assert_list_structure(
+            array,
+            expected_len,
+            expected_offsets,
+            expected_sizes,
+            expected_fallbacks,
+        );
+        let array = downcast_list_like_array::<O>(array);
+
+        // Validate the shredded state of list elements (typed values and fallbacks)
+        let (expected_values, expected_fallbacks) = expected_shredded_elements;
+        assert_eq!(
+            expected_values.len(),
+            expected_fallbacks.len(),
+            "expected_values and expected_fallbacks should be aligned"
+        );
+
+        // Validate the shredded primitive values for list elements
+        let element_array = ShreddedVariantFieldArray::try_new(array.values().as_ref()).unwrap();
+        let element_values = element_array
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<PrimitiveArray<T>>()
+            .unwrap();
+        assert_eq!(element_values.len(), expected_values.len());
+        for (idx, expected_value) in expected_values.iter().enumerate() {
+            match expected_value {
+                Some(value) => {
+                    assert!(element_values.is_valid(idx));
+                    assert_eq!(element_values.value(idx), *value);
+                }
+                None => assert!(element_values.is_null(idx)),
+            }
+        }
+
+        // Validate fallback variants for list elements that could not be shredded
+        let element_fallbacks = element_array.value_field().unwrap();
+        assert_eq!(element_fallbacks.len(), expected_fallbacks.len());
+        for (idx, expected_fallback) in expected_fallbacks.iter().enumerate() {
+            match expected_fallback {
+                Some(expected_variant) => {
+                    assert!(element_fallbacks.is_valid(idx));
+                    assert_eq!(
+                        Variant::new(EMPTY_VARIANT_METADATA_BYTES, element_fallbacks.value(idx)),
+                        expected_variant.clone()
+                    );
+                }
+                None => assert!(element_fallbacks.is_null(idx)),
+            }
+        }
+    }
+
+    #[test]
+    fn test_already_shredded_input_error() {
+        // Create a VariantArray that already has typed_value_field
+        // First create a valid VariantArray, then extract its parts to construct a shredded one
+        let temp_array = VariantArray::from_iter(vec![Some(Variant::from("test"))]);
+        let metadata = temp_array.metadata_field().clone();
+        let value = temp_array.value_field().unwrap().clone();
+        let typed_value = Arc::new(Int64Array::from(vec![42])) as ArrayRef;
+
+        let shredded_array =
+            VariantArray::from_parts(metadata, Some(value), Some(typed_value), None);
+
+        let result = shred_variant(&shredded_array, &DataType::Int64);
+        assert!(matches!(
+            result.unwrap_err(),
+            ArrowError::InvalidArgumentError(_)
+        ));
+    }
+
+    #[test]
+    fn test_all_null_input() {
+        // Create VariantArray with no value field (all null case)
+        let metadata = BinaryViewArray::from_iter_values([&[1u8, 0u8]]); // minimal valid metadata
+        let all_null_array = VariantArray::from_parts(metadata, None, None, None);
+        let result = shred_variant(&all_null_array, &DataType::Int64).unwrap();
+
+        // Should return array with no value/typed_value fields
+        assert!(result.value_field().is_none());
+        assert!(result.typed_value_field().is_none());
+    }
+
+    #[test]
+    fn test_invalid_fixed_size_binary_shredding() {
+        let mock_uuid_1 = Uuid::new_v4();
+
+        let input = VariantArray::from_iter([Some(Variant::from(mock_uuid_1)), None]);
+
+        // shred_variant only supports FixedSizeBinary(16). Any other length will err.
+        let err = shred_variant(&input, &DataType::FixedSizeBinary(17)).unwrap_err();
+
+        assert_eq!(
+            err.to_string(),
+            "Invalid argument error: FixedSizeBinary(17) is not a valid variant shredding type. Only FixedSizeBinary(16) for UUID is supported."
+        );
+    }
+
+    #[test]
+    fn test_uuid_shredding() {
+        let mock_uuid_1 = Uuid::new_v4();
+        let mock_uuid_2 = Uuid::new_v4();
+
+        let input = VariantArray::from_iter([
+            Some(Variant::from(mock_uuid_1)),
+            None,
+            Some(Variant::from(false)),
+            Some(Variant::from(mock_uuid_2)),
+        ]);
+
+        let variant_array = shred_variant(&input, &DataType::FixedSizeBinary(16)).unwrap();
+
+        // // inspect the typed_value Field and make sure it contains the canonical Uuid extension type
+        // let typed_value_field = variant_array
+        //     .inner()
+        //     .fields()
+        //     .into_iter()
+        //     .find(|f| f.name() == "typed_value")
+        //     .unwrap();
+
+        // assert!(
+        //     typed_value_field
+        //         .try_extension_type::<extension::Uuid>()
+        //         .is_ok()
+        // );
+
+        // probe the downcasted typed_value array to make sure uuids are shredded correctly
+        let uuids = variant_array
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+
+        assert_eq!(uuids.len(), 4);
+
+        assert!(!uuids.is_null(0));
+
+        let got_uuid_1: &[u8] = uuids.value(0);
+        assert_eq!(got_uuid_1, mock_uuid_1.as_bytes());
+
+        assert!(uuids.is_null(1));
+        assert!(uuids.is_null(2));
+
+        assert!(!uuids.is_null(3));
+
+        let got_uuid_2: &[u8] = uuids.value(3);
+        assert_eq!(got_uuid_2, mock_uuid_2.as_bytes());
+    }
+
+    #[test]
+    fn test_primitive_shredding_comprehensive() {
+        // Test mixed scenarios in a single array
+        let input = VariantArray::from_iter(vec![
+            Some(Variant::from(42i64)),   // successful shred
+            Some(Variant::from("hello")), // failed shred (string)
+            Some(Variant::from(100i64)),  // successful shred
+            None,                         // array-level null
+            Some(Variant::Null),          // variant null
+            Some(Variant::from(3i8)),     // successful shred (int8->int64 conversion)
+        ]);
+
+        let result = shred_variant(&input, &DataType::Int64).unwrap();
+
+        // Verify structure
+        let metadata_field = result.metadata_field();
+        let value_field = result.value_field().unwrap();
+        let typed_value_field = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+
+        // Check specific outcomes for each row
+        assert_eq!(result.len(), 6);
+
+        // Row 0: 42 -> should shred successfully
+        assert!(!result.is_null(0));
+        assert!(value_field.is_null(0)); // value should be null when shredded
+        assert!(!typed_value_field.is_null(0));
+        assert_eq!(typed_value_field.value(0), 42);
+
+        // Row 1: "hello" -> should fail to shred
+        assert!(!result.is_null(1));
+        assert!(!value_field.is_null(1)); // value should contain original
+        assert!(typed_value_field.is_null(1)); // typed_value should be null
+        assert_eq!(
+            Variant::new(metadata_field.value(1), value_field.value(1)),
+            Variant::from("hello")
+        );
+
+        // Row 2: 100 -> should shred successfully
+        assert!(!result.is_null(2));
+        assert!(value_field.is_null(2));
+        assert_eq!(typed_value_field.value(2), 100);
+
+        // Row 3: array null -> should be null in result
+        assert!(result.is_null(3));
+
+        // Row 4: Variant::Null -> should not shred (it's a null variant, not an integer)
+        assert!(!result.is_null(4));
+        assert!(!value_field.is_null(4)); // should contain Variant::Null
+        assert_eq!(
+            Variant::new(metadata_field.value(4), value_field.value(4)),
+            Variant::Null
+        );
+        assert!(typed_value_field.is_null(4));
+
+        // Row 5: 3i8 -> should shred successfully (int8->int64 conversion)
+        assert!(!result.is_null(5));
+        assert!(value_field.is_null(5)); // value should be null when shredded
+        assert!(!typed_value_field.is_null(5));
+        assert_eq!(typed_value_field.value(5), 3);
+    }
+
+    #[test]
+    fn test_primitive_different_target_types() {
+        let input = VariantArray::from_iter(vec![
+            Variant::from(42i32),
+            Variant::from(3.15f64),
+            Variant::from("not_a_number"),
+        ]);
+
+        // Test Int32 target
+        let result_int32 = shred_variant(&input, &DataType::Int32).unwrap();
+        let typed_value_int32 = result_int32
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::Int32Array>()
+            .unwrap();
+        assert_eq!(typed_value_int32.value(0), 42);
+        assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32
+        assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32
+
+        // Test Float64 target
+        let result_float64 = shred_variant(&input, &DataType::Float64).unwrap();
+        let typed_value_float64 = result_float64
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        assert_eq!(typed_value_float64.value(0), 42.0); // int converts to float
+        assert_eq!(typed_value_float64.value(1), 3.15);
+        assert!(typed_value_float64.is_null(2)); // string doesn't convert
+    }
+
+    #[test]
+    fn test_invalid_shredded_types_rejected() {
+        let input = VariantArray::from_iter([Variant::from(42)]);
+
+        let invalid_types = vec![
+            DataType::UInt8,
+            DataType::Float16,
+            DataType::Decimal256(38, 10),
+            DataType::Date64,
+            DataType::Time32(TimeUnit::Second),
+            DataType::Time64(TimeUnit::Nanosecond),
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            DataType::LargeBinary,
+            DataType::LargeUtf8,
+            DataType::FixedSizeBinary(17),
+            DataType::Union(
+                UnionFields::from_fields(vec![
+                    Field::new("int_field", DataType::Int32, false),
+                    Field::new("str_field", DataType::Utf8, true),
+                ]),
+                UnionMode::Dense,
+            ),
+            DataType::Map(
+                Arc::new(Field::new(
+                    "entries",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("key", DataType::Utf8, false),
+                        Field::new("value", DataType::Int32, true),
+                    ])),
+                    false,
+                )),
+                false,
+            ),
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            DataType::RunEndEncoded(
+                Arc::new(Field::new("run_ends", DataType::Int32, false)),
+                Arc::new(Field::new("values", DataType::Utf8, true)),
+            ),
+        ];
+
+        for data_type in invalid_types {
+            let err = shred_variant(&input, &data_type).unwrap_err();
+            assert!(
+                matches!(err, ArrowError::InvalidArgumentError(_)),
+                "expected InvalidArgumentError for {:?}, got {:?}",
+                data_type,
+                err
+            );
+        }
+    }
+
+    #[test]
+    fn test_array_shredding_as_list() {
+        let input = build_variant_array(vec![
+            // Row 0: List of ints should shred entirely into typed_value
+            VariantRow::List(vec![
+                VariantValue::from(1i64),
+                VariantValue::from(2i64),
+                VariantValue::from(3i64),
+            ]),
+            // Row 1: Contains incompatible types so values fall back
+            VariantRow::List(vec![
+                VariantValue::from(1i64),
+                VariantValue::from("two"),
+                VariantValue::from(Variant::Null),
+            ]),
+            // Row 2: Not a list -> entire row falls back
+            VariantRow::Value(VariantValue::from("not a list")),
+            // Row 3: Array-level null propagates
+            VariantRow::Null,
+            // Row 4: Empty list exercises zero-length offsets
+            VariantRow::List(vec![]),
+        ]);
+        let list_schema = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 5);
+
+        assert_list_structure_and_elements::<Int64Type, i32>(
+            &result,
+            5,
+            &[0, 3, 6, 6, 6, 6],
+            &[Some(3), Some(3), None, None, Some(0)],
+            &[
+                None,
+                None,
+                Some(Variant::from("not a list")),
+                Some(Variant::Null),
+                None,
+            ],
+            (
+                &[Some(1), Some(2), Some(3), Some(1), None, None],
+                &[
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(Variant::from("two")),
+                    Some(Variant::Null),
+                ],
+            ),
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_as_large_list() {
+        let input = build_variant_array(vec![
+            // Row 0: List of ints shreds to typed_value
+            VariantRow::List(vec![VariantValue::from(1i64), VariantValue::from(2i64)]),
+            // Row 1: Not a list -> entire row falls back
+            VariantRow::Value(VariantValue::from("not a list")),
+            // Row 2: Empty list
+            VariantRow::List(vec![]),
+        ]);
+        let list_schema = DataType::LargeList(Arc::new(Field::new("item", DataType::Int64, true)));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 3);
+
+        assert_list_structure_and_elements::<Int64Type, i64>(
+            &result,
+            3,
+            &[0, 2, 2, 2],
+            &[Some(2), None, Some(0)],
+            &[None, Some(Variant::from("not a list")), None],
+            (&[Some(1), Some(2)], &[None, None]),
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_as_list_view() {
+        let input = build_variant_array(vec![
+            // Row 0: Standard list
+            VariantRow::List(vec![
+                VariantValue::from(1i64),
+                VariantValue::from(2i64),
+                VariantValue::from(3i64),
+            ]),
+            // Row 1: List with incompatible types -> element fallback
+            VariantRow::List(vec![
+                VariantValue::from(1i64),
+                VariantValue::from("two"),
+                VariantValue::from(Variant::Null),
+            ]),
+            // Row 2: Not a list -> top-level fallback
+            VariantRow::Value(VariantValue::from("not a list")),
+            // Row 3: Top-level Null
+            VariantRow::Null,
+            // Row 4: Empty list
+            VariantRow::List(vec![]),
+        ]);
+        let list_schema = DataType::ListView(Arc::new(Field::new("item", DataType::Int64, true)));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 5);
+
+        assert_list_structure_and_elements::<Int64Type, i32>(
+            &result,
+            5,
+            &[0, 3, 6, 6, 6],
+            &[Some(3), Some(3), None, None, Some(0)],
+            &[
+                None,
+                None,
+                Some(Variant::from("not a list")),
+                Some(Variant::Null),
+                None,
+            ],
+            (
+                &[Some(1), Some(2), Some(3), Some(1), None, None],
+                &[
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(Variant::from("two")),
+                    Some(Variant::Null),
+                ],
+            ),
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_as_large_list_view() {
+        let input = build_variant_array(vec![
+            // Row 0: List of ints shreds to typed_value
+            VariantRow::List(vec![VariantValue::from(1i64), VariantValue::from(2i64)]),
+            // Row 1: Not a list -> entire row falls back
+            VariantRow::Value(VariantValue::from("fallback")),
+            // Row 2: Empty list
+            VariantRow::List(vec![]),
+        ]);
+        let list_schema =
+            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int64, true)));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 3);
+
+        assert_list_structure_and_elements::<Int64Type, i64>(
+            &result,
+            3,
+            &[0, 2, 2],
+            &[Some(2), None, Some(0)],
+            &[None, Some(Variant::from("fallback")), None],
+            (&[Some(1), Some(2)], &[None, None]),
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_as_fixed_size_list() {
+        let input = build_variant_array(vec![VariantRow::List(vec![
+            VariantValue::from(1i64),
+            VariantValue::from(2i64),
+            VariantValue::from(3i64),
+        ])]);
+        let list_schema =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 2);
+        let err = shred_variant(&input, &list_schema).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Not yet implemented: Converting unshredded variant arrays to arrow fixed-size lists"
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_with_array_elements() {
+        let input = build_variant_array(vec![
+            // Row 0: [[1, 2], [3, 4], []] - clean nested lists
+            VariantRow::List(vec![
+                VariantValue::List(vec![VariantValue::from(1i64), VariantValue::from(2i64)]),
+                VariantValue::List(vec![VariantValue::from(3i64), VariantValue::from(4i64)]),
+                VariantValue::List(vec![]),
+            ]),
+            // Row 1: [[5, "bad", null], "not a list inner", null] - inner fallbacks
+            VariantRow::List(vec![
+                VariantValue::List(vec![
+                    VariantValue::from(5i64),
+                    VariantValue::from("bad"),
+                    VariantValue::from(Variant::Null),
+                ]),
+                VariantValue::from("not a list inner"),
+                VariantValue::Null,
+            ]),
+            // Row 2: "not a list" - top-level fallback
+            VariantRow::Value(VariantValue::from("not a list")),
+            // Row 3: null row
+            VariantRow::Null,
+        ]);
+        let inner_field = Arc::new(Field::new("item", DataType::Int64, true));
+        let inner_list_schema = DataType::List(inner_field);
+        let list_schema = DataType::List(Arc::new(Field::new(
+            "item",
+            inner_list_schema.clone(),
+            true,
+        )));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 4);
+
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+
+        assert_list_structure::<i32>(
+            &result,
+            4,
+            &[0, 3, 6, 6, 6],
+            &[Some(3), Some(3), None, None],
+            &[
+                None,
+                None,
+                Some(Variant::from("not a list")),
+                Some(Variant::Null),
+            ],
+        );
+
+        let outer_elements =
+            ShreddedVariantFieldArray::try_new(typed_value.values().as_ref()).unwrap();
+        assert_eq!(outer_elements.len(), 6);
+        let outer_values = outer_elements
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+        let outer_fallbacks = outer_elements.value_field().unwrap();
+
+        let outer_metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+            EMPTY_VARIANT_METADATA_BYTES,
+            outer_elements.len(),
+        ));
+        let outer_variant = VariantArray::from_parts(
+            outer_metadata,
+            Some(outer_fallbacks.clone()),
+            Some(Arc::new(outer_values.clone())),
+            None,
+        );
+
+        assert_list_structure_and_elements::<Int64Type, i32>(
+            &outer_variant,
+            outer_elements.len(),
+            &[0, 2, 4, 4, 7, 7, 7],
+            &[Some(2), Some(2), Some(0), Some(3), None, None],
+            &[
+                None,
+                None,
+                None,
+                None,
+                Some(Variant::from("not a list inner")),
+                Some(Variant::Null),
+            ],
+            (
+                &[Some(1), Some(2), Some(3), Some(4), Some(5), None, None],
+                &[
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(Variant::from("bad")),
+                    Some(Variant::Null),
+                ],
+            ),
+        );
+    }
+
+    #[test]
+    fn test_array_shredding_with_object_elements() {
+        let input = build_variant_array(vec![
+            // Row 0: [{"id": 1, "name": "Alice"}, {"id": null}] fully shards
+            VariantRow::List(vec![
+                VariantValue::Object(vec![
+                    ("id", VariantValue::from(1i64)),
+                    ("name", VariantValue::from("Alice")),
+                ]),
+                VariantValue::Object(vec![("id", VariantValue::from(Variant::Null))]),
+            ]),
+            // Row 1: "not a list" -> fallback
+            VariantRow::Value(VariantValue::from("not a list")),
+            // Row 2: Null row
+            VariantRow::Null,
+        ]);
+
+        // Target schema is List<Struct<id:int64,name:utf8>>
+        let object_fields = Fields::from(vec![
+            Field::new("id", DataType::Int64, true),
+            Field::new("name", DataType::Utf8, true),
+        ]);
+        let list_schema = DataType::List(Arc::new(Field::new(
+            "item",
+            DataType::Struct(object_fields),
+            true,
+        )));
+        let result = shred_variant(&input, &list_schema).unwrap();
+        assert_eq!(result.len(), 3);
+
+        assert_list_structure::<i32>(
+            &result,
+            3,
+            &[0, 2, 2, 2],
+            &[Some(2), None, None],
+            &[None, Some(Variant::from("not a list")), Some(Variant::Null)],
+        );
+
+        // Validate nested struct fields for each element
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap();
+        let element_array =
+            ShreddedVariantFieldArray::try_new(typed_value.values().as_ref()).unwrap();
+        assert_eq!(element_array.len(), 2);
+        let element_objects = element_array
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StructArray>()
+            .unwrap();
+
+        // Id field [1, Variant::Null]
+        let id_field =
+            ShreddedVariantFieldArray::try_new(element_objects.column_by_name("id").unwrap())
+                .unwrap();
+        let id_values = id_field.value_field().unwrap();
+        let id_typed_values = id_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+        assert!(id_values.is_null(0));
+        assert_eq!(id_typed_values.value(0), 1);
+        // null is stored as Variant::Null in values
+        assert!(id_values.is_valid(1));
+        assert_eq!(
+            Variant::new(EMPTY_VARIANT_METADATA_BYTES, id_values.value(1)),
+            Variant::Null
+        );
+        assert!(id_typed_values.is_null(1));
+
+        // Name field ["Alice", null]
+        let name_field =
+            ShreddedVariantFieldArray::try_new(element_objects.column_by_name("name").unwrap())
+                .unwrap();
+        let name_values = name_field.value_field().unwrap();
+        let name_typed_values = name_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<StringArray>()
+            .unwrap();
+        assert!(name_values.is_null(0));
+        assert_eq!(name_typed_values.value(0), "Alice");
+        // No value provided, both value and typed_value are null
+        assert!(name_values.is_null(1));
+        assert!(name_typed_values.is_null(1));
+    }
+
+    #[test]
+    fn test_object_shredding_comprehensive() {
+        let input = build_variant_array(vec![
+            // Row 0: Fully shredded object
+            VariantRow::Object(vec![
+                ("score", VariantValue::from(95.5f64)),
+                ("age", VariantValue::from(30i64)),
+            ]),
+            // Row 1: Partially shredded object (extra email field)
+            VariantRow::Object(vec![
+                ("score", VariantValue::from(87.2f64)),
+                ("age", VariantValue::from(25i64)),
+                ("email", VariantValue::from("bob@example.com")),
+            ]),
+            // Row 2: Missing field (no score)
+            VariantRow::Object(vec![("age", VariantValue::from(35i64))]),
+            // Row 3: Type mismatch (score is string, age is string)
+            VariantRow::Object(vec![
+                ("score", VariantValue::from("ninety-five")),
+                ("age", VariantValue::from("thirty")),
+            ]),
+            // Row 4: Non-object
+            VariantRow::Value(VariantValue::from("not an object")),
+            // Row 5: Empty object
+            VariantRow::Object(vec![]),
+            // Row 6: Null
+            VariantRow::Null,
+            // Row 7: Object with only "wrong" fields
+            VariantRow::Object(vec![("foo", VariantValue::from(10))]),
+            // Row 8: Object with one "right" and one "wrong" field
+            VariantRow::Object(vec![
+                ("score", VariantValue::from(66.67f64)),
+                ("foo", VariantValue::from(10)),
+            ]),
+        ]);
+
+        // Create target schema: struct<score: float64, age: int64>
+        // Both types are supported for shredding
+        let target_schema = ShreddedSchemaBuilder::default()
+            .with_path("score", &DataType::Float64)
+            .with_path("age", &DataType::Int64)
+            .build();
+
+        let result = shred_variant(&input, &target_schema).unwrap();
+
+        // Verify structure
+        assert!(result.value_field().is_some());
+        assert!(result.typed_value_field().is_some());
+        assert_eq!(result.len(), 9);
+
+        let metadata = result.metadata_field();
+
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StructArray>()
+            .unwrap();
+
+        // Extract score and age fields from typed_value struct
+        let score_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("score").unwrap())
+                .unwrap();
+        let age_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("age").unwrap()).unwrap();
+
+        let score_value = score_field
+            .value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryViewArray>()
+            .unwrap();
+        let score_typed_value = score_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Float64Array>()
+            .unwrap();
+        let age_value = age_field
+            .value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryViewArray>()
+            .unwrap();
+        let age_typed_value = age_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+
+        // Set up exhaustive checking of all shredded columns and their nulls/values
+        struct ShreddedValue<'m, 'v, T> {
+            value: Option<Variant<'m, 'v>>,
+            typed_value: Option<T>,
+        }
+        struct ShreddedStruct<'m, 'v> {
+            score: ShreddedValue<'m, 'v, f64>,
+            age: ShreddedValue<'m, 'v, i64>,
+        }
+        fn get_value<'m, 'v>(
+            i: usize,
+            metadata: &'m BinaryViewArray,
+            value: &'v BinaryViewArray,
+        ) -> Variant<'m, 'v> {
+            Variant::new(metadata.value(i), value.value(i))
+        }
+        let expect = |i, expected_result: Option<ShreddedValue<ShreddedStruct>>| {
+            match expected_result {
+                Some(ShreddedValue {
+                    value: expected_value,
+                    typed_value: expected_typed_value,
+                }) => {
+                    assert!(result.is_valid(i));
+                    match expected_value {
+                        Some(expected_value) => {
+                            assert!(value.is_valid(i));
+                            assert_eq!(expected_value, get_value(i, metadata, value));
+                        }
+                        None => {
+                            assert!(value.is_null(i));
+                        }
+                    }
+                    match expected_typed_value {
+                        Some(ShreddedStruct {
+                            score: expected_score,
+                            age: expected_age,
+                        }) => {
+                            assert!(typed_value.is_valid(i));
+                            assert!(score_field.is_valid(i)); // non-nullable
+                            assert!(age_field.is_valid(i)); // non-nullable
+                            match expected_score.value {
+                                Some(expected_score_value) => {
+                                    assert!(score_value.is_valid(i));
+                                    assert_eq!(
+                                        expected_score_value,
+                                        get_value(i, metadata, score_value)
+                                    );
+                                }
+                                None => {
+                                    assert!(score_value.is_null(i));
+                                }
+                            }
+                            match expected_score.typed_value {
+                                Some(expected_score) => {
+                                    assert!(score_typed_value.is_valid(i));
+                                    assert_eq!(expected_score, score_typed_value.value(i));
+                                }
+                                None => {
+                                    assert!(score_typed_value.is_null(i));
+                                }
+                            }
+                            match expected_age.value {
+                                Some(expected_age_value) => {
+                                    assert!(age_value.is_valid(i));
+                                    assert_eq!(
+                                        expected_age_value,
+                                        get_value(i, metadata, age_value)
+                                    );
+                                }
+                                None => {
+                                    assert!(age_value.is_null(i));
+                                }
+                            }
+                            match expected_age.typed_value {
+                                Some(expected_age) => {
+                                    assert!(age_typed_value.is_valid(i));
+                                    assert_eq!(expected_age, age_typed_value.value(i));
+                                }
+                                None => {
+                                    assert!(age_typed_value.is_null(i));
+                                }
+                            }
+                        }
+                        None => {
+                            assert!(typed_value.is_null(i));
+                        }
+                    }
+                }
+                None => {
+                    assert!(result.is_null(i));
+                }
+            };
+        };
+
+        // Row 0: Fully shredded - both fields shred successfully
+        expect(
+            0,
+            Some(ShreddedValue {
+                value: None,
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: Some(95.5),
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: Some(30),
+                    },
+                }),
+            }),
+        );
+
+        // Row 1: Partially shredded - value contains extra email field
+        let mut builder = VariantBuilder::new();
+        builder
+            .new_object()
+            .with_field("email", "bob@example.com")
+            .finish();
+        let (m, v) = builder.finish();
+        let expected_value = Variant::new(&m, &v);
+
+        expect(
+            1,
+            Some(ShreddedValue {
+                value: Some(expected_value),
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: Some(87.2),
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: Some(25),
+                    },
+                }),
+            }),
+        );
+
+        // Row 2: Fully shredded -- missing score field
+        expect(
+            2,
+            Some(ShreddedValue {
+                value: None,
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: Some(35),
+                    },
+                }),
+            }),
+        );
+
+        // Row 3: Type mismatches - both score and age are strings
+        expect(
+            3,
+            Some(ShreddedValue {
+                value: None,
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: Some(Variant::from("ninety-five")),
+                        typed_value: None,
+                    },
+                    age: ShreddedValue {
+                        value: Some(Variant::from("thirty")),
+                        typed_value: None,
+                    },
+                }),
+            }),
+        );
+
+        // Row 4: Non-object - falls back to value field
+        expect(
+            4,
+            Some(ShreddedValue {
+                value: Some(Variant::from("not an object")),
+                typed_value: None,
+            }),
+        );
+
+        // Row 5: Empty object
+        expect(
+            5,
+            Some(ShreddedValue {
+                value: None,
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                }),
+            }),
+        );
+
+        // Row 6: Null
+        expect(6, None);
+
+        // Helper to correctly create a variant object using a row's existing metadata
+        let object_with_foo_field = |i| {
+            use parquet_variant::{ParentState, ValueBuilder, VariantMetadata};
+            let metadata = VariantMetadata::new(metadata.value(i));
+            let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
+            let mut value_builder = ValueBuilder::new();
+            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
+            ObjectBuilder::new(state, false)
+                .with_field("foo", 10)
+                .finish();
+            (metadata, value_builder.into_inner())
+        };
+
+        // Row 7: Object with only a "wrong" field
+        let (m, v) = object_with_foo_field(7);
+        expect(
+            7,
+            Some(ShreddedValue {
+                value: Some(Variant::new_with_metadata(m, &v)),
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                }),
+            }),
+        );
+
+        // Row 8: Object with one "wrong" and one "right" field
+        let (m, v) = object_with_foo_field(8);
+        expect(
+            8,
+            Some(ShreddedValue {
+                value: Some(Variant::new_with_metadata(m, &v)),
+                typed_value: Some(ShreddedStruct {
+                    score: ShreddedValue {
+                        value: None,
+                        typed_value: Some(66.67),
+                    },
+                    age: ShreddedValue {
+                        value: None,
+                        typed_value: None,
+                    },
+                }),
+            }),
+        );
+    }
+
+    #[test]
+    fn test_object_shredding_with_array_field() {
+        let input = build_variant_array(vec![
+            // Row 0: Object with well-typed scores list
+            VariantRow::Object(vec![(
+                "scores",
+                VariantValue::List(vec![VariantValue::from(10i64), VariantValue::from(20i64)]),
+            )]),
+            // Row 1: Object whose scores list contains incompatible type
+            VariantRow::Object(vec![(
+                "scores",
+                VariantValue::List(vec![
+                    VariantValue::from("oops"),
+                    VariantValue::from(Variant::Null),
+                ]),
+            )]),
+            // Row 2: Object missing the scores field entirely
+            VariantRow::Object(vec![]),
+            // Row 3: Non-object fallback
+            VariantRow::Value(VariantValue::from("not an object")),
+            // Row 4: Top-level Null
+            VariantRow::Null,
+        ]);
+        let list_field = Arc::new(Field::new("item", DataType::Int64, true));
+        let inner_list_schema = DataType::List(list_field);
+        let schema = DataType::Struct(Fields::from(vec![Field::new(
+            "scores",
+            inner_list_schema.clone(),
+            true,
+        )]));
+
+        let result = shred_variant(&input, &schema).unwrap();
+        assert_eq!(result.len(), 5);
+
+        // Access base value/typed_value columns
+        let value_field = result.value_field().unwrap();
+        let typed_struct = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StructArray>()
+            .unwrap();
+
+        // Validate base value fallbacks for non-object rows
+        assert!(value_field.is_null(0));
+        assert!(value_field.is_null(1));
+        assert!(value_field.is_null(2));
+        assert!(value_field.is_valid(3));
+        assert_eq!(
+            Variant::new(result.metadata_field().value(3), value_field.value(3)),
+            Variant::from("not an object")
+        );
+        assert!(value_field.is_null(4));
+
+        // Typed struct should only be null for the fallback row
+        assert!(typed_struct.is_valid(0));
+        assert!(typed_struct.is_valid(1));
+        assert!(typed_struct.is_valid(2));
+        assert!(typed_struct.is_null(3));
+        assert!(typed_struct.is_null(4));
+
+        // Drill into the scores field on the typed struct
+        let scores_field =
+            ShreddedVariantFieldArray::try_new(typed_struct.column_by_name("scores").unwrap())
+                .unwrap();
+        assert_list_structure_and_elements::<Int64Type, i32>(
+            &VariantArray::from_parts(
+                BinaryViewArray::from_iter_values(std::iter::repeat_n(
+                    EMPTY_VARIANT_METADATA_BYTES,
+                    scores_field.len(),
+                )),
+                Some(scores_field.value_field().unwrap().clone()),
+                Some(scores_field.typed_value_field().unwrap().clone()),
+                None,
+            ),
+            scores_field.len(),
+            &[0i32, 2, 4, 4, 4, 4],
+            &[Some(2), Some(2), None, None, None],
+            &[
+                None,
+                None,
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+            (
+                &[Some(10), Some(20), None, None],
+                &[None, None, Some(Variant::from("oops")), Some(Variant::Null)],
+            ),
+        );
+    }
+
+    #[test]
+    fn test_object_different_schemas() {
+        // Create object with multiple fields
+        let input = build_variant_array(vec![VariantRow::Object(vec![
+            ("id", VariantValue::from(123i32)),
+            ("age", VariantValue::from(25i64)),
+            ("score", VariantValue::from(95.5f64)),
+        ])]);
+
+        // Test with schema containing only id field
+        let schema1 = ShreddedSchemaBuilder::default()
+            .with_path("id", &DataType::Int32)
+            .build();
+        let result1 = shred_variant(&input, &schema1).unwrap();
+        let value_field1 = result1.value_field().unwrap();
+        assert!(!value_field1.is_null(0)); // should contain {"age": 25, "score": 95.5}
+
+        // Test with schema containing id and age fields
+        let schema2 = ShreddedSchemaBuilder::default()
+            .with_path("id", &DataType::Int32)
+            .with_path("age", &DataType::Int64)
+            .build();
+        let result2 = shred_variant(&input, &schema2).unwrap();
+        let value_field2 = result2.value_field().unwrap();
+        assert!(!value_field2.is_null(0)); // should contain {"score": 95.5}
+
+        // Test with schema containing all fields
+        let schema3 = ShreddedSchemaBuilder::default()
+            .with_path("id", &DataType::Int32)
+            .with_path("age", &DataType::Int64)
+            .with_path("score", &DataType::Float64)
+            .build();
+        let result3 = shred_variant(&input, &schema3).unwrap();
+        let value_field3 = result3.value_field().unwrap();
+        assert!(value_field3.is_null(0)); // fully shredded, no remaining fields
+    }
+
+    #[test]
+    fn test_uuid_shredding_in_objects() {
+        let mock_uuid_1 = Uuid::new_v4();
+        let mock_uuid_2 = Uuid::new_v4();
+        let mock_uuid_3 = Uuid::new_v4();
+
+        let input = build_variant_array(vec![
+            // Row 0: Fully shredded object with both UUID fields
+            VariantRow::Object(vec![
+                ("id", VariantValue::from(mock_uuid_1)),
+                ("session_id", VariantValue::from(mock_uuid_2)),
+            ]),
+            // Row 1: Partially shredded object - UUID fields plus extra field
+            VariantRow::Object(vec![
+                ("id", VariantValue::from(mock_uuid_2)),
+                ("session_id", VariantValue::from(mock_uuid_3)),
+                ("name", VariantValue::from("test_user")),
+            ]),
+            // Row 2: Missing UUID field (no session_id)
+            VariantRow::Object(vec![("id", VariantValue::from(mock_uuid_1))]),
+            // Row 3: Type mismatch - id is UUID but session_id is a string
+            VariantRow::Object(vec![
+                ("id", VariantValue::from(mock_uuid_3)),
+                ("session_id", VariantValue::from("not-a-uuid")),
+            ]),
+            // Row 4: Object with non-UUID value in id field
+            VariantRow::Object(vec![
+                ("id", VariantValue::from(12345i64)),
+                ("session_id", VariantValue::from(mock_uuid_1)),
+            ]),
+            // Row 5: Null
+            VariantRow::Null,
+        ]);
+
+        let target_schema = ShreddedSchemaBuilder::default()
+            .with_path("id", DataType::FixedSizeBinary(16))
+            .with_path("session_id", DataType::FixedSizeBinary(16))
+            .build();
+
+        let result = shred_variant(&input, &target_schema).unwrap();
+
+        assert!(result.value_field().is_some());
+        assert!(result.typed_value_field().is_some());
+        assert_eq!(result.len(), 6);
+
+        let metadata = result.metadata_field();
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StructArray>()
+            .unwrap();
+
+        // Extract id and session_id fields from typed_value struct
+        let id_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("id").unwrap()).unwrap();
+        let session_id_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("session_id").unwrap())
+                .unwrap();
+
+        let id_value = id_field
+            .value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryViewArray>()
+            .unwrap();
+        let id_typed_value = id_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        let session_id_value = session_id_field
+            .value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryViewArray>()
+            .unwrap();
+        let session_id_typed_value = session_id_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+
+        // Row 0: Fully shredded - both UUID fields shred successfully
+        assert!(result.is_valid(0));
+
+        assert!(value.is_null(0)); // fully shredded, no remaining fields
+        assert!(id_value.is_null(0));
+        assert!(session_id_value.is_null(0));
+
+        assert!(typed_value.is_valid(0));
+        assert!(id_typed_value.is_valid(0));
+        assert!(session_id_typed_value.is_valid(0));
+
+        assert_eq!(id_typed_value.value(0), mock_uuid_1.as_bytes());
+        assert_eq!(session_id_typed_value.value(0), mock_uuid_2.as_bytes());
+
+        // Row 1: Partially shredded - value contains extra name field
+        assert!(result.is_valid(1));
+
+        assert!(value.is_valid(1)); // contains unshredded "name" field
+        assert!(typed_value.is_valid(1));
+
+        assert!(id_value.is_null(1));
+        assert!(id_typed_value.is_valid(1));
+        assert_eq!(id_typed_value.value(1), mock_uuid_2.as_bytes());
+
+        assert!(session_id_value.is_null(1));
+        assert!(session_id_typed_value.is_valid(1));
+        assert_eq!(session_id_typed_value.value(1), mock_uuid_3.as_bytes());
+
+        // Verify the value field contains the name field
+        let row_1_variant = Variant::new(metadata.value(1), value.value(1));
+        let Variant::Object(obj) = row_1_variant else {
+            panic!("Expected object");
+        };
+
+        assert_eq!(obj.get("name"), Some(Variant::from("test_user")));
+
+        // Row 2: Missing session_id field
+        assert!(result.is_valid(2));
+
+        assert!(value.is_null(2)); // fully shredded, no extra fields
+        assert!(typed_value.is_valid(2));
+
+        assert!(id_value.is_null(2));
+        assert!(id_typed_value.is_valid(2));
+        assert_eq!(id_typed_value.value(2), mock_uuid_1.as_bytes());
+
+        assert!(session_id_value.is_null(2));
+        assert!(session_id_typed_value.is_null(2)); // missing field
+
+        // Row 3: Type mismatch - session_id is a string, not UUID
+        assert!(result.is_valid(3));
+
+        assert!(value.is_null(3)); // no extra fields
+        assert!(typed_value.is_valid(3));
+
+        assert!(id_value.is_null(3));
+        assert!(id_typed_value.is_valid(3));
+        assert_eq!(id_typed_value.value(3), mock_uuid_3.as_bytes());
+
+        assert!(session_id_value.is_valid(3)); // type mismatch, stored in value
+        assert!(session_id_typed_value.is_null(3));
+        let session_id_variant = Variant::new(metadata.value(3), session_id_value.value(3));
+        assert_eq!(session_id_variant, Variant::from("not-a-uuid"));
+
+        // Row 4: Type mismatch - id is int64, not UUID
+        assert!(result.is_valid(4));
+
+        assert!(value.is_null(4)); // no extra fields
+        assert!(typed_value.is_valid(4));
+
+        assert!(id_value.is_valid(4)); // type mismatch, stored in value
+        assert!(id_typed_value.is_null(4));
+        let id_variant = Variant::new(metadata.value(4), id_value.value(4));
+        assert_eq!(id_variant, Variant::from(12345i64));
+
+        assert!(session_id_value.is_null(4));
+        assert!(session_id_typed_value.is_valid(4));
+        assert_eq!(session_id_typed_value.value(4), mock_uuid_1.as_bytes());
+
+        // Row 5: Null
+        assert!(result.is_null(5));
+    }
+
+    #[test]
+    fn test_spec_compliance() {
+        let input = VariantArray::from_iter(vec![Variant::from(42i64), Variant::from("hello")]);
+
+        let result = shred_variant(&input, &DataType::Int64).unwrap();
+
+        // Test field access by name (not position)
+        let inner_struct = result.inner();
+        assert!(inner_struct.column_by_name("metadata").is_some());
+        assert!(inner_struct.column_by_name("value").is_some());
+        assert!(inner_struct.column_by_name("typed_value").is_some());
+
+        // Test metadata preservation
+        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
+        // The metadata should be the same reference (cheap clone)
+        // Note: BinaryViewArray doesn't have a .values() method, so we compare the arrays directly
+        assert_eq!(result.metadata_field().len(), input.metadata_field().len());
+
+        // Test output structure correctness
+        assert_eq!(result.len(), input.len());
+        assert!(result.value_field().is_some());
+        assert!(result.typed_value_field().is_some());
+
+        // For primitive shredding, verify that value and typed_value are never both non-null
+        // (This rule applies to primitives; for objects, both can be non-null for partial shredding)
+        let value_field = result.value_field().unwrap();
+        let typed_value_field = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+
+        for i in 0..result.len() {
+            if !result.is_null(i) {
+                let value_is_null = value_field.is_null(i);
+                let typed_value_is_null = typed_value_field.is_null(i);
+                // For primitive shredding, at least one should be null
+                assert!(
+                    value_is_null || typed_value_is_null,
+                    "Row {}: both value and typed_value are non-null for primitive shredding",
+                    i
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_variant_schema_builder_simple() {
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path("a", &DataType::Int64)
+            .with_path("b", &DataType::Float64)
+            .build();
+
+        assert_eq!(
+            shredding_type,
+            DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int64, true),
+                Field::new("b", DataType::Float64, true),
+            ]))
+        );
+    }
+
+    #[test]
+    fn test_variant_schema_builder_nested() {
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path("a", &DataType::Int64)
+            .with_path("b.c", &DataType::Utf8)
+            .with_path("b.d", &DataType::Float64)
+            .build();
+
+        assert_eq!(
+            shredding_type,
+            DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int64, true),
+                Field::new(
+                    "b",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("c", DataType::Utf8, true),
+                        Field::new("d", DataType::Float64, true),
+                    ])),
+                    true
+                ),
+            ]))
+        );
+    }
+
+    #[test]
+    fn test_variant_schema_builder_with_path_variant_path_arg() {
+        let path = VariantPath::from_iter([VariantPathElement::from("a.b")]);
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path(path, &DataType::Int64)
+            .build();
+
+        match shredding_type {
+            DataType::Struct(fields) => {
+                assert_eq!(fields.len(), 1);
+                assert_eq!(fields[0].name(), "a.b");
+                assert_eq!(fields[0].data_type(), &DataType::Int64);
+            }
+            _ => panic!("expected struct data type"),
+        }
+    }
+
+    #[test]
+    fn test_variant_schema_builder_custom_nullability() {
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path(
+                "foo",
+                Arc::new(Field::new("should_be_renamed", DataType::Utf8, false)),
+            )
+            .with_path("bar", (&DataType::Int64, false))
+            .build();
+
+        let DataType::Struct(fields) = shredding_type else {
+            panic!("expected struct data type");
+        };
+
+        let foo = fields.iter().find(|f| f.name() == "foo").unwrap();
+        assert_eq!(foo.data_type(), &DataType::Utf8);
+        assert!(!foo.is_nullable());
+
+        let bar = fields.iter().find(|f| f.name() == "bar").unwrap();
+        assert_eq!(bar.data_type(), &DataType::Int64);
+        assert!(!bar.is_nullable());
+    }
+
+    #[test]
+    fn test_variant_schema_builder_with_shred_variant() {
+        let input = build_variant_array(vec![
+            VariantRow::Object(vec![
+                ("time", VariantValue::from(1234567890i64)),
+                ("hostname", VariantValue::from("server1")),
+                ("extra", VariantValue::from(42)),
+            ]),
+            VariantRow::Object(vec![
+                ("time", VariantValue::from(9876543210i64)),
+                ("hostname", VariantValue::from("server2")),
+            ]),
+            VariantRow::Null,
+        ]);
+
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path("time", &DataType::Int64)
+            .with_path("hostname", &DataType::Utf8)
+            .build();
+
+        let result = shred_variant(&input, &shredding_type).unwrap();
+
+        assert_eq!(
+            result.data_type(),
+            &DataType::Struct(Fields::from(vec![
+                Field::new("metadata", DataType::BinaryView, false),
+                Field::new("value", DataType::BinaryView, true),
+                Field::new(
+                    "typed_value",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new(
+                            "hostname",
+                            DataType::Struct(Fields::from(vec![
+                                Field::new("value", DataType::BinaryView, true),
+                                Field::new("typed_value", DataType::Utf8, true),
+                            ])),
+                            false,
+                        ),
+                        Field::new(
+                            "time",
+                            DataType::Struct(Fields::from(vec![
+                                Field::new("value", DataType::BinaryView, true),
+                                Field::new("typed_value", DataType::Int64, true),
+                            ])),
+                            false,
+                        ),
+                    ])),
+                    true,
+                ),
+            ]))
+        );
+
+        assert_eq!(result.len(), 3);
+        assert!(result.typed_value_field().is_some());
+
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StructArray>()
+            .unwrap();
+
+        let time_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("time").unwrap())
+                .unwrap();
+        let hostname_field =
+            ShreddedVariantFieldArray::try_new(typed_value.column_by_name("hostname").unwrap())
+                .unwrap();
+
+        let time_typed = time_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+        let hostname_typed = hostname_field
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<arrow::array::StringArray>()
+            .unwrap();
+
+        // Row 0
+        assert!(!result.is_null(0));
+        assert_eq!(time_typed.value(0), 1234567890);
+        assert_eq!(hostname_typed.value(0), "server1");
+
+        // Row 1
+        assert!(!result.is_null(1));
+        assert_eq!(time_typed.value(1), 9876543210);
+        assert_eq!(hostname_typed.value(1), "server2");
+
+        // Row 2
+        assert!(result.is_null(2));
+    }
+
+    #[test]
+    fn test_variant_schema_builder_conflicting_path() {
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path("a", &DataType::Int64)
+            .with_path("a", &DataType::Float64)
+            .build();
+
+        assert_eq!(
+            shredding_type,
+            DataType::Struct(Fields::from(
+                vec![Field::new("a", DataType::Float64, true),]
+            ))
+        );
+    }
+
+    #[test]
+    fn test_variant_schema_builder_root_path() {
+        let path = VariantPath::new(vec![]);
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path(path, &DataType::Int64)
+            .build();
+
+        assert_eq!(shredding_type, DataType::Int64);
+    }
+
+    #[test]
+    fn test_variant_schema_builder_empty_path() {
+        let shredding_type = ShreddedSchemaBuilder::default()
+            .with_path("", &DataType::Int64)
+            .build();
+
+        assert_eq!(shredding_type, DataType::Int64);
+    }
+
+    #[test]
+    fn test_variant_schema_builder_default() {
+        let shredding_type = ShreddedSchemaBuilder::default().build();
+        assert_eq!(shredding_type, DataType::Null);
+    }
+}
diff --git a/parquet-variant-compute/src/to_json.rs b/parquet-variant-compute/src/to_json.rs
new file mode 100644
index 000000000000..efe50c2a0987
--- /dev/null
+++ b/parquet-variant-compute/src/to_json.rs
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for transforming a batch of Variants represented as
+//! STRUCT<metadata: BINARY, value: BINARY> into a batch of JSON strings.
+
+use arrow::array::{Array, ArrayRef, BinaryArray, BooleanBufferBuilder, StringArray, StructArray};
+use arrow::buffer::{Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
+use arrow::datatypes::DataType;
+use arrow_schema::ArrowError;
+use parquet_variant::Variant;
+use parquet_variant_json::VariantToJson;
+
+/// Transform a batch of Variant represented as STRUCT<metadata: BINARY, value: BINARY> to a batch
+/// of JSON strings where nulls are preserved. The JSON strings in the input must be valid.
+pub fn variant_to_json(input: &ArrayRef) -> Result<StringArray, ArrowError> {
+    let struct_array = input
+        .as_any()
+        .downcast_ref::<StructArray>()
+        .ok_or_else(|| ArrowError::CastError("Expected StructArray as input".into()))?;
+
+    // Validate field types
+    let data_type = struct_array.data_type();
+    match data_type {
+        DataType::Struct(inner_fields) => {
+            if inner_fields.len() != 2
+                || inner_fields[0].data_type() != &DataType::Binary
+                || inner_fields[1].data_type() != &DataType::Binary
+            {
+                return Err(ArrowError::CastError(
+                    "Expected struct with two binary fields".into(),
+                ));
+            }
+        }
+        _ => {
+            return Err(ArrowError::CastError(
+                "Expected StructArray with known fields".into(),
+            ));
+        }
+    }
+
+    let metadata_array = struct_array
+        .column(0)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .ok_or_else(|| ArrowError::CastError("Expected BinaryArray for 'metadata'".into()))?;
+
+    let value_array = struct_array
+        .column(1)
+        .as_any()
+        .downcast_ref::<BinaryArray>()
+        .ok_or_else(|| ArrowError::CastError("Expected BinaryArray for 'value'".into()))?;
+
+    // Zero-copy builder
+    // The size per JSON string is assumed to be 128 bytes. If this holds true, resizing could be
+    // minimized for performance.
+    let mut json_buffer: Vec<u8> = Vec::with_capacity(struct_array.len() * 128);
+    let mut offsets: Vec<i32> = Vec::with_capacity(struct_array.len() + 1);
+    let mut validity = BooleanBufferBuilder::new(struct_array.len());
+    let mut current_offset: i32 = 0;
+    offsets.push(current_offset);
+
+    for i in 0..struct_array.len() {
+        if struct_array.is_null(i) {
+            validity.append(false);
+            offsets.push(current_offset);
+        } else {
+            let metadata = metadata_array.value(i);
+            let value = value_array.value(i);
+            let variant = Variant::new(metadata, value);
+            let start_len = json_buffer.len();
+            variant.to_json(&mut json_buffer)?;
+            let written = (json_buffer.len() - start_len) as i32;
+            current_offset += written;
+            offsets.push(current_offset);
+            validity.append(true);
+        }
+    }
+
+    let offsets_buffer = OffsetBuffer::new(ScalarBuffer::from(offsets));
+    let value_buffer = Buffer::from_vec(json_buffer);
+    let null_buffer = NullBuffer::new(validity.finish());
+
+    StringArray::try_new(offsets_buffer, value_buffer, Some(null_buffer))
+}
+
+#[cfg(test)]
+mod test {
+    use crate::variant_to_json;
+    use arrow::array::{Array, ArrayRef, BinaryBuilder, BooleanBufferBuilder, StructArray};
+    use arrow::buffer::NullBuffer;
+    use arrow::datatypes::DataType;
+    use arrow::datatypes::Field;
+    use arrow_schema::Fields;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_variant_to_json() {
+        let mut metadata_builder = BinaryBuilder::new();
+        let mut value_builder = BinaryBuilder::new();
+
+        // Row 0: [1, 0, 0], [12, 0]
+        metadata_builder.append_value([1, 0, 0]);
+        value_builder.append_value([12, 0]);
+
+        // Row 1: null
+        metadata_builder.append_null();
+        value_builder.append_null();
+
+        // Row 2: [1, 1, 0, 1, 97], [2, 1, 0, 0, 1, 32]
+        metadata_builder.append_value([1, 1, 0, 1, 97]);
+        value_builder.append_value([2, 1, 0, 0, 2, 12, 32]);
+
+        // Row 3: [1, 0, 0], [0]
+        metadata_builder.append_value([1, 0, 0]);
+        value_builder.append_value([0]);
+
+        // Row 4: null
+        metadata_builder.append_null();
+        value_builder.append_null();
+
+        let metadata_array = Arc::new(metadata_builder.finish()) as ArrayRef;
+        let value_array = Arc::new(value_builder.finish()) as ArrayRef;
+
+        let fields: Fields = vec![
+            Field::new("metadata", DataType::Binary, true),
+            Field::new("value", DataType::Binary, true),
+        ]
+        .into();
+
+        let mut validity = BooleanBufferBuilder::new(value_array.len());
+        for i in 0..value_array.len() {
+            let is_valid = value_array.is_valid(i) && metadata_array.is_valid(i);
+            validity.append(is_valid);
+        }
+        let null_buffer = NullBuffer::new(validity.finish());
+
+        let struct_array = StructArray::new(
+            fields,
+            vec![metadata_array.clone(), value_array.clone()],
+            Some(null_buffer), // Null bitmap (let Arrow infer from children)
+        );
+
+        let input = Arc::new(struct_array) as ArrayRef;
+
+        let result = variant_to_json(&input).unwrap();
+
+        // Expected output: ["0", null, "{\"a\":32}", "null", null]
+        let expected = vec![Some("0"), None, Some("{\"a\":32}"), Some("null"), None];
+
+        let result_vec: Vec<Option<&str>> = (0..result.len())
+            .map(|i| {
+                if result.is_null(i) {
+                    None
+                } else {
+                    Some(result.value(i))
+                }
+            })
+            .collect();
+
+        assert_eq!(result_vec, expected);
+    }
+}
diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs
new file mode 100644
index 000000000000..6a0a743c9029
--- /dev/null
+++ b/parquet-variant-compute/src/type_conversion.rs
@@ -0,0 +1,309 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for transforming a typed arrow `Array` to `VariantArray`.
+
+use arrow::compute::{DecimalCast, rescale_decimal};
+use arrow::datatypes::{
+    self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
+    DecimalType,
+};
+use chrono::Timelike;
+use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
+
+/// Extension trait for Arrow primitive types that can extract their native value from a Variant
+pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
+    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
+}
+
+/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
+/// We can't use [`PrimitiveFromVariant`] directly because we need _two_ implementations for each
+/// timestamp type -- the `NTZ` param here.
+pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
+    fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
+}
+
+/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
+macro_rules! impl_primitive_from_variant {
+    ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
+        impl PrimitiveFromVariant for $arrow_type {
+            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
+                let value = variant.$variant_method();
+                $( let value = value.and_then($cast_fn); )?
+                value
+            }
+        }
+    };
+}
+
+macro_rules! impl_timestamp_from_variant {
+    ($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
+        impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
+            fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
+                variant.$variant_method().and_then($cast_fn)
+            }
+        }
+    };
+}
+
+impl_primitive_from_variant!(datatypes::Int32Type, as_int32);
+impl_primitive_from_variant!(datatypes::Int16Type, as_int16);
+impl_primitive_from_variant!(datatypes::Int8Type, as_int8);
+impl_primitive_from_variant!(datatypes::Int64Type, as_int64);
+impl_primitive_from_variant!(datatypes::UInt8Type, as_u8);
+impl_primitive_from_variant!(datatypes::UInt16Type, as_u16);
+impl_primitive_from_variant!(datatypes::UInt32Type, as_u32);
+impl_primitive_from_variant!(datatypes::UInt64Type, as_u64);
+impl_primitive_from_variant!(datatypes::Float16Type, as_f16);
+impl_primitive_from_variant!(datatypes::Float32Type, as_f32);
+impl_primitive_from_variant!(datatypes::Float64Type, as_f64);
+impl_primitive_from_variant!(datatypes::Date32Type, as_naive_date, |v| {
+    Some(datatypes::Date32Type::from_naive_date(v))
+});
+impl_primitive_from_variant!(datatypes::Date64Type, as_naive_date, |v| {
+    Some(datatypes::Date64Type::from_naive_date(v))
+});
+impl_primitive_from_variant!(datatypes::Time32SecondType, as_time_utc, |v| {
+    // Return None if there are leftover nanoseconds
+    if v.nanosecond() != 0 {
+        None
+    } else {
+        Some(v.num_seconds_from_midnight() as i32)
+    }
+});
+impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v| {
+    // Return None if there are leftover microseconds
+    if v.nanosecond() % 1_000_000 != 0 {
+        None
+    } else {
+        Some((v.num_seconds_from_midnight() * 1_000) as i32 + (v.nanosecond() / 1_000_000) as i32)
+    }
+});
+impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
+    Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
+});
+impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
+    // convert micro to nano seconds
+    Some(v.num_seconds_from_midnight() as i64 * 1_000_000_000 + v.nanosecond() as i64)
+});
+impl_timestamp_from_variant!(
+    datatypes::TimestampSecondType,
+    as_timestamp_ntz_nanos,
+    ntz = true,
+    |timestamp| {
+        // Return None if there are leftover nanoseconds
+        if timestamp.nanosecond() != 0 {
+            None
+        } else {
+            Self::make_value(timestamp)
+        }
+    }
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampSecondType,
+    as_timestamp_nanos,
+    ntz = false,
+    |timestamp| {
+        // Return None if there are leftover nanoseconds
+        if timestamp.nanosecond() != 0 {
+            None
+        } else {
+            Self::make_value(timestamp.naive_utc())
+        }
+    }
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampMillisecondType,
+    as_timestamp_ntz_nanos,
+    ntz = true,
+    |timestamp| {
+        // Return None if there are leftover microseconds
+        if timestamp.nanosecond() % 1_000_000 != 0 {
+            None
+        } else {
+            Self::make_value(timestamp)
+        }
+    }
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampMillisecondType,
+    as_timestamp_nanos,
+    ntz = false,
+    |timestamp| {
+        // Return None if there are leftover microseconds
+        if timestamp.nanosecond() % 1_000_000 != 0 {
+            None
+        } else {
+            Self::make_value(timestamp.naive_utc())
+        }
+    }
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampMicrosecondType,
+    as_timestamp_ntz_micros,
+    ntz = true,
+    Self::make_value,
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampMicrosecondType,
+    as_timestamp_micros,
+    ntz = false,
+    |timestamp| Self::make_value(timestamp.naive_utc())
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampNanosecondType,
+    as_timestamp_ntz_nanos,
+    ntz = true,
+    Self::make_value
+);
+impl_timestamp_from_variant!(
+    datatypes::TimestampNanosecondType,
+    as_timestamp_nanos,
+    ntz = false,
+    |timestamp| Self::make_value(timestamp.naive_utc())
+);
+
+/// Returns the unscaled integer representation for Arrow decimal type `O`
+/// from a `Variant`.
+///
+/// - `precision` and `scale` specify the target Arrow decimal parameters
+/// - Integer variants (`Int8/16/32/64`) are treated as decimals with scale 0
+/// - Decimal variants (`Decimal4/8/16`) use their embedded precision and scale
+///
+/// The value is rescaled to (`precision`, `scale`) using `rescale_decimal` and
+/// returns `None` if it cannot fit the requested precision.
+pub(crate) fn variant_to_unscaled_decimal<O>(
+    variant: &Variant<'_, '_>,
+    precision: u8,
+    scale: i8,
+) -> Option<O::Native>
+where
+    O: DecimalType,
+    O::Native: DecimalCast,
+{
+    match variant {
+        Variant::Int8(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int16(i) => rescale_decimal::<Decimal32Type, O>(
+            *i as i32,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int32(i) => rescale_decimal::<Decimal32Type, O>(
+            *i,
+            VariantDecimal4::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Int64(i) => rescale_decimal::<Decimal64Type, O>(
+            *i,
+            VariantDecimal8::MAX_PRECISION,
+            0,
+            precision,
+            scale,
+        ),
+        Variant::Decimal4(d) => rescale_decimal::<Decimal32Type, O>(
+            d.integer(),
+            VariantDecimal4::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal8(d) => rescale_decimal::<Decimal64Type, O>(
+            d.integer(),
+            VariantDecimal8::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        Variant::Decimal16(d) => rescale_decimal::<Decimal128Type, O>(
+            d.integer(),
+            VariantDecimal16::MAX_PRECISION,
+            d.scale() as i8,
+            precision,
+            scale,
+        ),
+        _ => None,
+    }
+}
+
+/// Convert the value at a specific index in the given array into a `Variant`.
+macro_rules! non_generic_conversion_single_value {
+    ($array:expr, $cast_fn:expr, $index:expr) => {{
+        let array = $array;
+        if array.is_null($index) {
+            Ok(Variant::Null)
+        } else {
+            let cast_value = $cast_fn(array.value($index));
+            Ok(Variant::from(cast_value))
+        }
+    }};
+}
+pub(crate) use non_generic_conversion_single_value;
+
+/// Convert the value at a specific index in the given array into a `Variant`,
+/// using `method` requiring a generic type to downcast the generic array
+/// to a specific array type and `cast_fn` to transform the element.
+macro_rules! generic_conversion_single_value {
+    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
+        $crate::type_conversion::non_generic_conversion_single_value!(
+            $input.$method::<$t>(),
+            $cast_fn,
+            $index
+        )
+    }};
+}
+pub(crate) use generic_conversion_single_value;
+
+macro_rules! generic_conversion_single_value_with_result {
+    ($t:ty, $method:ident, $cast_fn:expr, $input:expr, $index:expr) => {{
+        let arr = $input.$method::<$t>();
+        let v = arr.value($index);
+        match ($cast_fn)(v) {
+            Ok(var) => Ok(Variant::from(var)),
+            Err(e) => Err(ArrowError::CastError(format!(
+                "Cast failed at index {idx} (array type: {ty}): {e}",
+                idx = $index,
+                ty = <$t as ::arrow::datatypes::ArrowPrimitiveType>::DATA_TYPE
+            ))),
+        }
+    }};
+}
+
+pub(crate) use generic_conversion_single_value_with_result;
+
+/// Convert the value at a specific index in the given array into a `Variant`.
+macro_rules! primitive_conversion_single_value {
+    ($t:ty, $input:expr, $index:expr) => {{
+        $crate::type_conversion::generic_conversion_single_value!(
+            $t,
+            as_primitive,
+            |v| v,
+            $input,
+            $index
+        )
+    }};
+}
+pub(crate) use primitive_conversion_single_value;
diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs
new file mode 100644
index 000000000000..c20bb697903c
--- /dev/null
+++ b/parquet-variant-compute/src/unshred_variant.rs
@@ -0,0 +1,666 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for unshredding VariantArray by folding typed_value columns back into the value column.
+
+use crate::arrow_to_variant::ListLikeArray;
+use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
+use arrow::array::{
+    Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray,
+    GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray,
+};
+use arrow::buffer::NullBuffer;
+use arrow::datatypes::{
+    ArrowPrimitiveType, DataType, Date32Type, Decimal32Type, Decimal64Type, Decimal128Type,
+    DecimalType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
+    Time64MicrosecondType, TimeUnit, TimestampMicrosecondType, TimestampNanosecondType,
+};
+use arrow::error::{ArrowError, Result};
+use arrow::temporal_conversions::time64us_to_time;
+use chrono::{DateTime, Utc};
+use indexmap::IndexMap;
+use parquet_variant::{
+    ObjectFieldBuilder, Variant, VariantBuilderExt, VariantDecimal4, VariantDecimal8,
+    VariantDecimal16, VariantDecimalType, VariantMetadata,
+};
+use std::marker::PhantomData;
+use uuid::Uuid;
+
+/// Removes all (nested) typed_value columns from a VariantArray by converting them back to binary
+/// variant and merging the resulting values back into the value column.
+///
+/// This function efficiently converts a shredded VariantArray back to an unshredded form where all
+/// data resides in the value column.
+///
+/// # Arguments
+/// * `array` - The VariantArray to unshred
+///
+/// # Returns
+/// A new VariantArray with all data in the value column and no typed_value column
+///
+/// # Errors
+/// - If the shredded data contains spec violations (e.g., field name conflicts)
+/// - If unsupported data types are encountered in typed_value columns
+pub fn unshred_variant(array: &VariantArray) -> Result<VariantArray> {
+    // Check if already unshredded (optimization for common case)
+    if array.typed_value_field().is_none() && array.value_field().is_some() {
+        return Ok(array.clone());
+    }
+
+    // NOTE: None/None at top-level is technically invalid, but the shredding spec requires us to
+    // emit `Variant::Null` when a required value is missing.
+    let nulls = array.nulls();
+    let mut row_builder = UnshredVariantRowBuilder::try_new_opt(array.shredding_state().borrow())?
+        .unwrap_or_else(|| UnshredVariantRowBuilder::null(nulls));
+
+    let metadata = array.metadata_field();
+    let mut value_builder = VariantValueArrayBuilder::new(array.len());
+    for i in 0..array.len() {
+        if array.is_null(i) {
+            value_builder.append_null();
+        } else {
+            let metadata = VariantMetadata::new(metadata.value(i));
+            let mut value_builder = value_builder.builder_ext(&metadata);
+            row_builder.append_row(&mut value_builder, &metadata, i)?;
+        }
+    }
+
+    let value = value_builder.build()?;
+    Ok(VariantArray::from_parts(
+        metadata.clone(),
+        Some(value),
+        None,
+        nulls.cloned(),
+    ))
+}
+
+/// Row builder for converting shredded VariantArray rows back to unshredded form
+enum UnshredVariantRowBuilder<'a> {
+    PrimitiveInt8(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int8Type>>),
+    PrimitiveInt16(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int16Type>>),
+    PrimitiveInt32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int32Type>>),
+    PrimitiveInt64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Int64Type>>),
+    PrimitiveFloat32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float32Type>>),
+    PrimitiveFloat64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Float64Type>>),
+    Decimal32(DecimalUnshredRowBuilder<'a, Decimal32Type, VariantDecimal4>),
+    Decimal64(DecimalUnshredRowBuilder<'a, Decimal64Type, VariantDecimal8>),
+    Decimal128(DecimalUnshredRowBuilder<'a, Decimal128Type, VariantDecimal16>),
+    PrimitiveDate32(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Date32Type>>),
+    PrimitiveTime64(UnshredPrimitiveRowBuilder<'a, PrimitiveArray<Time64MicrosecondType>>),
+    TimestampMicrosecond(TimestampUnshredRowBuilder<'a, TimestampMicrosecondType>),
+    TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>),
+    PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
+    PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
+    PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
+    PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
+    List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
+    LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
+    ListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i32>>),
+    LargeListView(ListUnshredVariantBuilder<'a, GenericListViewArray<i64>>),
+    FixedSizeList(ListUnshredVariantBuilder<'a, FixedSizeListArray>),
+    Struct(StructUnshredVariantBuilder<'a>),
+    ValueOnly(ValueOnlyUnshredVariantBuilder<'a>),
+    Null(NullUnshredVariantBuilder<'a>),
+}
+
+impl<'a> UnshredVariantRowBuilder<'a> {
+    /// Creates an all-null row builder.
+    fn null(nulls: Option<&'a NullBuffer>) -> Self {
+        Self::Null(NullUnshredVariantBuilder::new(nulls))
+    }
+
+    /// Appends a single row at the given value index to the supplied builder.
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        match self {
+            Self::PrimitiveInt8(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveInt16(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveInt32(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveInt64(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveFloat32(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveFloat64(b) => b.append_row(builder, metadata, index),
+            Self::Decimal32(b) => b.append_row(builder, metadata, index),
+            Self::Decimal64(b) => b.append_row(builder, metadata, index),
+            Self::Decimal128(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveDate32(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveTime64(b) => b.append_row(builder, metadata, index),
+            Self::TimestampMicrosecond(b) => b.append_row(builder, metadata, index),
+            Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
+            Self::List(b) => b.append_row(builder, metadata, index),
+            Self::LargeList(b) => b.append_row(builder, metadata, index),
+            Self::ListView(b) => b.append_row(builder, metadata, index),
+            Self::LargeListView(b) => b.append_row(builder, metadata, index),
+            Self::FixedSizeList(b) => b.append_row(builder, metadata, index),
+            Self::Struct(b) => b.append_row(builder, metadata, index),
+            Self::ValueOnly(b) => b.append_row(builder, metadata, index),
+            Self::Null(b) => b.append_row(builder, metadata, index),
+        }
+    }
+
+    /// Creates a new UnshredVariantRowBuilder from shredding state
+    /// Returns None for None/None case - caller decides how to handle based on context
+    fn try_new_opt(shredding_state: BorrowedShreddingState<'a>) -> Result<Option<Self>> {
+        let value = shredding_state.value_field();
+        let typed_value = shredding_state.typed_value_field();
+        let Some(typed_value) = typed_value else {
+            // Copy the value across directly, if present. Else caller decides what to do.
+            return Ok(value.map(|v| Self::ValueOnly(ValueOnlyUnshredVariantBuilder::new(v))));
+        };
+
+        // Has typed_value -> determine type and create appropriate builder
+        macro_rules! primitive_builder {
+            ($enum_variant:ident, $cast_fn:ident) => {
+                Self::$enum_variant(UnshredPrimitiveRowBuilder::new(
+                    value,
+                    typed_value.$cast_fn(),
+                ))
+            };
+        }
+
+        let builder = match typed_value.data_type() {
+            DataType::Int8 => primitive_builder!(PrimitiveInt8, as_primitive),
+            DataType::Int16 => primitive_builder!(PrimitiveInt16, as_primitive),
+            DataType::Int32 => primitive_builder!(PrimitiveInt32, as_primitive),
+            DataType::Int64 => primitive_builder!(PrimitiveInt64, as_primitive),
+            DataType::Float32 => primitive_builder!(PrimitiveFloat32, as_primitive),
+            DataType::Float64 => primitive_builder!(PrimitiveFloat64, as_primitive),
+            DataType::Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => {
+                Self::Decimal32(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
+            }
+            DataType::Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
+                Self::Decimal64(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
+            }
+            DataType::Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => {
+                Self::Decimal128(DecimalUnshredRowBuilder::new(value, typed_value, *s as _))
+            }
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
+            | DataType::Decimal256(_, _) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "{} is not a valid variant shredding type",
+                    typed_value.data_type()
+                )));
+            }
+            DataType::Date32 => primitive_builder!(PrimitiveDate32, as_primitive),
+            DataType::Time64(TimeUnit::Microsecond) => {
+                primitive_builder!(PrimitiveTime64, as_primitive)
+            }
+            DataType::Time64(time_unit) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Time64({time_unit}) is not a valid variant shredding type",
+                )));
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, timezone) => Self::TimestampMicrosecond(
+                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, timezone) => Self::TimestampNanosecond(
+                TimestampUnshredRowBuilder::new(value, typed_value, timezone.is_some()),
+            ),
+            DataType::Timestamp(time_unit, _) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Timestamp({time_unit}) is not a valid variant shredding type",
+                )));
+            }
+            DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean),
+            DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
+            DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view),
+            DataType::FixedSizeBinary(16) => {
+                primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
+            }
+            DataType::FixedSizeBinary(size) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "FixedSizeBinary({size}) is not a valid variant shredding type",
+                )));
+            }
+            DataType::Struct(_) => Self::Struct(StructUnshredVariantBuilder::try_new(
+                value,
+                typed_value.as_struct(),
+            )?),
+            DataType::List(_) => Self::List(ListUnshredVariantBuilder::try_new(
+                value,
+                typed_value.as_list(),
+            )?),
+            DataType::LargeList(_) => Self::LargeList(ListUnshredVariantBuilder::try_new(
+                value,
+                typed_value.as_list(),
+            )?),
+            DataType::ListView(_) => Self::ListView(ListUnshredVariantBuilder::try_new(
+                value,
+                typed_value.as_list_view(),
+            )?),
+            DataType::LargeListView(_) => Self::LargeListView(ListUnshredVariantBuilder::try_new(
+                value,
+                typed_value.as_list_view(),
+            )?),
+            DataType::FixedSizeList(_, _) => Self::FixedSizeList(
+                ListUnshredVariantBuilder::try_new(value, typed_value.as_fixed_size_list())?,
+            ),
+            _ => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "Unshredding not yet supported for type: {}",
+                    typed_value.data_type()
+                )));
+            }
+        };
+        Ok(Some(builder))
+    }
+}
+
+/// Builder for arrays with neither typed_value nor value (all NULL/Variant::Null)
+struct NullUnshredVariantBuilder<'a> {
+    nulls: Option<&'a NullBuffer>,
+}
+
+impl<'a> NullUnshredVariantBuilder<'a> {
+    fn new(nulls: Option<&'a NullBuffer>) -> Self {
+        Self { nulls }
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        _metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        if self.nulls.is_some_and(|nulls| nulls.is_null(index)) {
+            builder.append_null();
+        } else {
+            builder.append_value(Variant::Null);
+        }
+        Ok(())
+    }
+}
+
+/// Builder for arrays that only have value column (already unshredded)
+struct ValueOnlyUnshredVariantBuilder<'a> {
+    value: &'a arrow::array::BinaryViewArray,
+}
+
+impl<'a> ValueOnlyUnshredVariantBuilder<'a> {
+    fn new(value: &'a BinaryViewArray) -> Self {
+        Self { value }
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        if self.value.is_null(index) {
+            builder.append_null();
+        } else {
+            let variant = Variant::new_with_metadata(metadata.clone(), self.value.value(index));
+            builder.append_value(variant);
+        }
+        Ok(())
+    }
+}
+
+/// Extension trait that directly adds row builder support for arrays that correspond to primitive
+/// variant types.
+trait AppendToVariantBuilder: Array {
+    fn append_to_variant_builder(
+        &self,
+        builder: &mut impl VariantBuilderExt,
+        index: usize,
+    ) -> Result<()>;
+}
+
+/// Macro that handles the unshredded case (typed_value is missing or NULL) and returns early if
+/// handled.  If not handled (shredded case), validates and returns the extracted value.
+macro_rules! handle_unshredded_case {
+    ($self:expr, $builder:expr, $metadata:expr, $index:expr, $partial_shredding:expr) => {{
+        let value = $self.value.as_ref().filter(|v| v.is_valid($index));
+        let value = value.map(|v| Variant::new_with_metadata($metadata.clone(), v.value($index)));
+
+        // If typed_value is null, handle unshredded case and return early
+        if $self.typed_value.is_null($index) {
+            match value {
+                Some(value) => $builder.append_value(value),
+                None => $builder.append_null(),
+            }
+            return Ok(());
+        }
+
+        // Only partial shredding allows value and typed_value to both be non-NULL
+        if !$partial_shredding && value.is_some() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Invalid shredded variant: both value and typed_value are non-null".to_string(),
+            ));
+        }
+
+        // Return the extracted value for the partial shredded case
+        value
+    }};
+}
+
+/// Generic unshred builder that works with any Array implementing AppendToVariantBuilder
+struct UnshredPrimitiveRowBuilder<'a, T> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: &'a T,
+}
+
+impl<'a, T: AppendToVariantBuilder> UnshredPrimitiveRowBuilder<'a, T> {
+    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a T) -> Self {
+        Self { value, typed_value }
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        handle_unshredded_case!(self, builder, metadata, index, false);
+
+        // If we get here, typed_value is valid and value is NULL
+        self.typed_value.append_to_variant_builder(builder, index)
+    }
+}
+
+// Macro to generate AppendToVariantBuilder implementations with optional value transformation
+macro_rules! impl_append_to_variant_builder {
+    ($array_type:ty $(, |$v:ident| $transform:expr)? ) => {
+        impl AppendToVariantBuilder for $array_type {
+            fn append_to_variant_builder(
+                &self,
+                builder: &mut impl VariantBuilderExt,
+                index: usize,
+            ) -> Result<()> {
+                let value = self.value(index);
+                $(
+                    let $v = value;
+                    let value = $transform;
+                )?
+                builder.append_value(value);
+                Ok(())
+            }
+        }
+    };
+}
+
+impl_append_to_variant_builder!(BooleanArray);
+impl_append_to_variant_builder!(StringArray);
+impl_append_to_variant_builder!(BinaryViewArray);
+impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
+impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
+impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
+impl_append_to_variant_builder!(PrimitiveArray<Int64Type>);
+impl_append_to_variant_builder!(PrimitiveArray<Float32Type>);
+impl_append_to_variant_builder!(PrimitiveArray<Float64Type>);
+
+impl_append_to_variant_builder!(PrimitiveArray<Date32Type>, |days_since_epoch| {
+    Date32Type::to_naive_date(days_since_epoch)
+});
+
+impl_append_to_variant_builder!(
+    PrimitiveArray<Time64MicrosecondType>,
+    |micros_since_midnight| {
+        time64us_to_time(micros_since_midnight).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "Invalid Time64 microsecond value: {micros_since_midnight}"
+            ))
+        })?
+    }
+);
+
+// UUID from FixedSizeBinary(16)
+// NOTE: FixedSizeBinaryArray guarantees the byte length, so we can safely unwrap
+impl_append_to_variant_builder!(FixedSizeBinaryArray, |bytes| {
+    Uuid::from_slice(bytes).unwrap()
+});
+
+/// Trait for timestamp types to handle conversion to `DateTime<Utc>`
+trait TimestampType: ArrowPrimitiveType<Native = i64> {
+    fn to_datetime_utc(value: i64) -> Result<DateTime<Utc>>;
+}
+
+impl TimestampType for TimestampMicrosecondType {
+    fn to_datetime_utc(micros: i64) -> Result<DateTime<Utc>> {
+        DateTime::from_timestamp_micros(micros).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "Invalid timestamp microsecond value: {micros}"
+            ))
+        })
+    }
+}
+
+impl TimestampType for TimestampNanosecondType {
+    fn to_datetime_utc(nanos: i64) -> Result<DateTime<Utc>> {
+        Ok(DateTime::from_timestamp_nanos(nanos))
+    }
+}
+
+/// Generic builder for timestamp types that handles timezone-aware conversion
+struct TimestampUnshredRowBuilder<'a, T: TimestampType> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: &'a PrimitiveArray<T>,
+    has_timezone: bool,
+}
+
+impl<'a, T: TimestampType> TimestampUnshredRowBuilder<'a, T> {
+    fn new(
+        value: Option<&'a BinaryViewArray>,
+        typed_value: &'a dyn Array,
+        has_timezone: bool,
+    ) -> Self {
+        Self {
+            value,
+            typed_value: typed_value.as_primitive(),
+            has_timezone,
+        }
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        handle_unshredded_case!(self, builder, metadata, index, false);
+
+        // If we get here, typed_value is valid and value is NULL
+        let timestamp_value = self.typed_value.value(index);
+        let dt = T::to_datetime_utc(timestamp_value)?;
+        if self.has_timezone {
+            builder.append_value(dt);
+        } else {
+            builder.append_value(dt.naive_utc());
+        }
+        Ok(())
+    }
+}
+
+/// Generic builder for decimal unshredding
+struct DecimalUnshredRowBuilder<'a, A: DecimalType, V>
+where
+    V: VariantDecimalType<Native = A::Native>,
+{
+    value: Option<&'a BinaryViewArray>,
+    typed_value: &'a PrimitiveArray<A>,
+    scale: i8,
+    _phantom: PhantomData<V>,
+}
+
+impl<'a, A: DecimalType, V> DecimalUnshredRowBuilder<'a, A, V>
+where
+    V: VariantDecimalType<Native = A::Native>,
+{
+    fn new(value: Option<&'a BinaryViewArray>, typed_value: &'a dyn Array, scale: i8) -> Self {
+        Self {
+            value,
+            typed_value: typed_value.as_primitive(),
+            scale,
+            _phantom: PhantomData,
+        }
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        handle_unshredded_case!(self, builder, metadata, index, false);
+
+        let raw = self.typed_value.value(index);
+        let variant = V::try_new_with_signed_scale(raw, self.scale)?;
+        builder.append_value(variant);
+        Ok(())
+    }
+}
+
+/// Builder for unshredding struct/object types with nested fields
+struct StructUnshredVariantBuilder<'a> {
+    value: Option<&'a arrow::array::BinaryViewArray>,
+    typed_value: &'a arrow::array::StructArray,
+    field_unshredders: IndexMap<&'a str, Option<UnshredVariantRowBuilder<'a>>>,
+}
+
+impl<'a> StructUnshredVariantBuilder<'a> {
+    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a StructArray) -> Result<Self> {
+        // Create unshredders for each field in constructor
+        let mut field_unshredders = IndexMap::new();
+        for (field, field_array) in typed_value.fields().iter().zip(typed_value.columns()) {
+            // Factory returns None for None/None case -- these are missing fields we should skip
+            let Some(field_array) = field_array.as_struct_opt() else {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Invalid shredded variant object field: expected Struct, got {}",
+                    field_array.data_type()
+                )));
+            };
+            let field_unshredder = UnshredVariantRowBuilder::try_new_opt(field_array.try_into()?)?;
+            field_unshredders.insert(field.name().as_ref(), field_unshredder);
+        }
+
+        Ok(Self {
+            value,
+            typed_value,
+            field_unshredders,
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        let value = handle_unshredded_case!(self, builder, metadata, index, true);
+
+        // If we get here, typed_value is valid and value may or may not be valid
+        let mut object_builder = builder.try_new_object()?;
+
+        // Process typed fields (skip empty builders that indicate missing fields)
+        for (field_name, field_unshredder_opt) in &mut self.field_unshredders {
+            if let Some(field_unshredder) = field_unshredder_opt {
+                let mut field_builder = ObjectFieldBuilder::new(field_name, &mut object_builder);
+                field_unshredder.append_row(&mut field_builder, metadata, index)?;
+            }
+        }
+
+        // Process any unshredded fields (partial shredding)
+        if let Some(value) = value {
+            let Variant::Object(object) = value else {
+                return Err(ArrowError::InvalidArgumentError(
+                    "Expected object in value field for partially shredded struct".to_string(),
+                ));
+            };
+
+            for (field_name, field_value) in object.iter() {
+                if self.field_unshredders.contains_key(field_name) {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Field '{field_name}' appears in both typed_value and value",
+                    )));
+                }
+                object_builder.insert_bytes(field_name, field_value);
+            }
+        }
+
+        object_builder.finish();
+        Ok(())
+    }
+}
+
+/// Builder for unshredding list/array types with recursive element processing
+struct ListUnshredVariantBuilder<'a, L: ListLikeArray> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: &'a L,
+    element_unshredder: Box<UnshredVariantRowBuilder<'a>>,
+}
+
+impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
+    fn try_new(value: Option<&'a BinaryViewArray>, typed_value: &'a L) -> Result<Self> {
+        // Create a recursive unshredder for the list elements
+        // The element type comes from the values array of the list
+        let element_values = typed_value.values();
+
+        // For shredded lists, each element would be a ShreddedVariantFieldArray (struct)
+        // Extract value/typed_value from the element struct
+        let Some(element_values) = element_values.as_struct_opt() else {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Invalid shredded variant array element: expected Struct, got {}",
+                element_values.data_type()
+            )));
+        };
+
+        // Create recursive unshredder for elements
+        //
+        // NOTE: A None/None array element is technically invalid, but the shredding spec
+        // requires us to emit `Variant::Null` when a required value is missing.
+        let element_unshredder = UnshredVariantRowBuilder::try_new_opt(element_values.try_into()?)?
+            .unwrap_or_else(|| UnshredVariantRowBuilder::null(None));
+
+        Ok(Self {
+            value,
+            typed_value,
+            element_unshredder: Box::new(element_unshredder),
+        })
+    }
+
+    fn append_row(
+        &mut self,
+        builder: &mut impl VariantBuilderExt,
+        metadata: &VariantMetadata,
+        index: usize,
+    ) -> Result<()> {
+        handle_unshredded_case!(self, builder, metadata, index, false);
+
+        // If we get here, typed_value is valid and value is NULL -- process the list elements
+        let mut list_builder = builder.try_new_list()?;
+        for element_index in self.typed_value.element_range(index) {
+            self.element_unshredder
+                .append_row(&mut list_builder, metadata, element_index)?;
+        }
+
+        list_builder.finish();
+        Ok(())
+    }
+}
+
+// TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice?
+// Or do we also need targeted stand-alone unit tests for full coverage?
diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs
new file mode 100644
index 000000000000..fb2a08d64193
--- /dev/null
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -0,0 +1,1635 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`VariantArray`] implementation
+
+use crate::VariantArrayBuilder;
+use crate::type_conversion::{
+    generic_conversion_single_value, generic_conversion_single_value_with_result,
+    primitive_conversion_single_value,
+};
+use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
+use arrow::buffer::NullBuffer;
+use arrow::compute::cast;
+use arrow::datatypes::{
+    Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, Float16Type, Float32Type,
+    Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Time64MicrosecondType,
+    TimestampMicrosecondType, TimestampNanosecondType,
+};
+use arrow::error::Result;
+use arrow_schema::extension::ExtensionType;
+use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
+use chrono::{DateTime, NaiveTime};
+use parquet_variant::{
+    Uuid, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType as _,
+};
+
+use std::borrow::Cow;
+use std::sync::Arc;
+
+/// Arrow Variant [`ExtensionType`].
+///
+/// Represents the canonical Arrow Extension Type for storing variants.
+/// See [`VariantArray`] for more examples of using this extension type.
+pub struct VariantType;
+
+impl ExtensionType for VariantType {
+    const NAME: &'static str = "arrow.parquet.variant";
+
+    // Variants extension metadata is an empty string
+    // <https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/docs/source/format/CanonicalExtensions.rst?plain=1#L473>
+    type Metadata = &'static str;
+
+    fn metadata(&self) -> &Self::Metadata {
+        &""
+    }
+
+    fn serialize_metadata(&self) -> Option<String> {
+        Some(String::new())
+    }
+
+    fn deserialize_metadata(_metadata: Option<&str>) -> Result<Self::Metadata> {
+        Ok("")
+    }
+
+    fn supports_data_type(&self, data_type: &DataType) -> Result<()> {
+        if matches!(data_type, DataType::Struct(_)) {
+            Ok(())
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "VariantType only supports StructArray, got {data_type}"
+            )))
+        }
+    }
+
+    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self> {
+        Self.supports_data_type(data_type)?;
+        Ok(Self)
+    }
+}
+
+/// An array of Parquet [`Variant`] values
+///
+/// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
+/// `metadata` and `value` fields, and adds convenience methods to access
+/// the [`Variant`]s.
+///
+/// See [`VariantArrayBuilder`] for constructing `VariantArray` row by row.
+///
+/// See the examples below from converting between `VariantArray` and
+/// `StructArray`.
+///
+/// [`VariantArrayBuilder`]: crate::VariantArrayBuilder
+///
+/// # Documentation
+///
+/// At the time of this writing, Variant has been accepted as an official
+/// extension type but not been published to the [official list of extension
+/// types] on the Apache Arrow website. See the [Extension Type for Parquet
+/// Variant arrow] ticket for more details.
+///
+/// [Extension Type for Parquet Variant arrow]: https://github.com/apache/arrow/issues/46908
+/// [official list of extension types]: https://arrow.apache.org/docs/format/CanonicalExtensions.html
+///
+/// # Example: Check if a [`StructArray`] has the [`VariantType`] extension
+///
+/// Arrow Arrays only provide [`DataType`], but the extension type information
+/// is stored on a [`Field`]. Thus, you must have access to the [`Schema`] or
+/// [`Field`] to check for the extension type.
+///
+/// [`Schema`]: arrow_schema::Schema
+/// ```
+/// # use arrow::array::StructArray;
+/// # use arrow_schema::{Schema, Field, DataType};
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
+/// # fn get_variant_array() -> VariantArray {
+/// #   let mut builder = VariantArrayBuilder::new(10);
+/// #   builder.append_variant(Variant::from("such wow"));
+/// #   builder.build()
+/// # }
+/// # fn get_schema() -> Schema {
+/// #   Schema::new(vec![
+/// #     Field::new("id", DataType::Int32, false),
+/// #     get_variant_array().field("var"),
+/// #   ])
+/// # }
+/// let schema = get_schema();
+/// assert_eq!(schema.fields().len(), 2);
+/// // first field is not a Variant
+/// assert!(schema.field(0).try_extension_type::<VariantType>().is_err());
+/// // second field is a Variant
+/// assert!(schema.field(1).try_extension_type::<VariantType>().is_ok());
+/// ```
+///
+/// # Example: Constructing the correct [`Field`] for a [`VariantArray`]
+///
+/// You can construct the correct [`Field`] for a [`VariantArray`] using the
+/// [`VariantArray::field`] method.
+///
+/// ```
+/// # use arrow_schema::{Schema, Field, DataType};
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
+/// # fn get_variant_array() -> VariantArray {
+/// #   let mut builder = VariantArrayBuilder::new(10);
+/// #   builder.append_variant(Variant::from("such wow"));
+/// #   builder.build()
+/// # }
+/// let variant_array = get_variant_array();
+/// // First field is an integer id, second field is a variant
+/// let schema = Schema::new(vec![
+///   Field::new("id", DataType::Int32, false),
+///   // call VariantArray::field to get the correct Field
+///   variant_array.field("var"),
+/// ]);
+/// ```
+///
+/// You can also construct the [`Field`] using [`VariantType`] directly
+///
+/// ```
+/// # use arrow_schema::{Schema, Field, DataType};
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray, VariantType};
+/// # fn get_variant_array() -> VariantArray {
+/// #   let mut builder = VariantArrayBuilder::new(10);
+/// #   builder.append_variant(Variant::from("such wow"));
+/// #   builder.build()
+/// # }
+/// # let variant_array = get_variant_array();
+/// // The DataType of a VariantArray varies depending on how it is shredded
+/// let data_type = variant_array.data_type().clone();
+/// // First field is an integer id, second field is a variant
+/// let schema = Schema::new(vec![
+///   Field::new("id", DataType::Int32, false),
+///   Field::new("var", data_type, false)
+///     // Add extension metadata to the field using `VariantType`
+///     .with_extension_type(VariantType),
+/// ]);
+/// ```
+///
+/// # Example: Converting a [`VariantArray`] to a [`StructArray`]
+///
+/// ```
+/// # use arrow::array::StructArray;
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::VariantArrayBuilder;
+/// // Create Variant Array
+/// let mut builder = VariantArrayBuilder::new(10);
+/// builder.append_variant(Variant::from("such wow"));
+/// let variant_array = builder.build();
+/// // convert to StructArray
+/// let struct_array: StructArray = variant_array.into();
+/// ```
+///
+/// # Example: Converting a [`StructArray`] to a [`VariantArray`]
+///
+/// ```
+/// # use arrow::array::StructArray;
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::{VariantArrayBuilder, VariantArray};
+/// # fn get_struct_array() -> StructArray {
+/// #   let mut builder = VariantArrayBuilder::new(10);
+/// #   builder.append_variant(Variant::from("such wow"));
+/// #   builder.build().into()
+/// # }
+/// let struct_array: StructArray = get_struct_array();
+/// // try and create a VariantArray from it
+/// let variant_array = VariantArray::try_new(&struct_array).unwrap();
+/// assert_eq!(variant_array.value(0), Variant::from("such wow"));
+/// ```
+///
+#[derive(Debug, Clone, PartialEq)]
+pub struct VariantArray {
+    /// Reference to the underlying StructArray
+    inner: StructArray,
+
+    /// The metadata column of this variant
+    metadata: BinaryViewArray,
+
+    /// how is this variant array shredded?
+    shredding_state: ShreddingState,
+}
+
+impl VariantArray {
+    /// Creates a new `VariantArray` from a [`StructArray`].
+    ///
+    /// # Arguments
+    /// - `inner` - The underlying [`StructArray`] that contains the variant data.
+    ///
+    /// # Returns
+    /// - A new instance of `VariantArray`.
+    ///
+    /// # Errors:
+    /// - If the `StructArray` does not contain the required fields
+    ///
+    /// # Requirements of the `StructArray`
+    ///
+    /// 1. A required field named `metadata` which is binary, large_binary, or
+    ///    binary_view
+    ///
+    /// 2. An optional field named `value` that is binary, large_binary, or
+    ///    binary_view
+    ///
+    /// 3. An optional field named `typed_value` which can be any primitive type
+    ///    or be a list, large_list, list_view or struct
+    ///
+    /// NOTE: It is also permissible for the metadata field to be
+    /// Dictionary-Encoded, preferably (but not required) with an index type of
+    /// int8.
+    ///
+    /// Currently, only [`BinaryViewArray`] are supported.
+    pub fn try_new(inner: &dyn Array) -> Result<Self> {
+        // Workaround lack of support for Binary
+        // https://github.com/apache/arrow-rs/issues/8387
+        let inner = cast_to_binary_view_arrays(inner)?;
+
+        let Some(inner) = inner.as_struct_opt() else {
+            return Err(ArrowError::InvalidArgumentError(
+                "Invalid VariantArray: requires StructArray as input".to_string(),
+            ));
+        };
+
+        // Note the specification allows for any order so we must search by name
+
+        // Ensure the StructArray has a metadata field of BinaryView
+        let Some(metadata_field) = inner.column_by_name("metadata") else {
+            return Err(ArrowError::InvalidArgumentError(
+                "Invalid VariantArray: StructArray must contain a 'metadata' field".to_string(),
+            ));
+        };
+        let Some(metadata) = metadata_field.as_binary_view_opt() else {
+            return Err(ArrowError::NotYetImplemented(format!(
+                "VariantArray 'metadata' field must be BinaryView, got {}",
+                metadata_field.data_type()
+            )));
+        };
+
+        // Note these clones are cheap, they just bump the ref count
+        Ok(Self {
+            inner: inner.clone(),
+            metadata: metadata.clone(),
+            shredding_state: ShreddingState::try_from(inner)?,
+        })
+    }
+
+    pub(crate) fn from_parts(
+        metadata: BinaryViewArray,
+        value: Option<BinaryViewArray>,
+        typed_value: Option<ArrayRef>,
+        nulls: Option<NullBuffer>,
+    ) -> Self {
+        let mut builder =
+            StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false);
+        if let Some(value) = value.clone() {
+            builder = builder.with_field("value", Arc::new(value), true);
+        }
+        if let Some(typed_value) = typed_value.clone() {
+            builder = builder.with_field("typed_value", typed_value, true);
+        }
+        if let Some(nulls) = nulls {
+            builder = builder.with_nulls(nulls);
+        }
+
+        Self {
+            inner: builder.build(),
+            metadata,
+            shredding_state: ShreddingState::new(value, typed_value),
+        }
+    }
+
+    /// Returns a reference to the underlying [`StructArray`].
+    pub fn inner(&self) -> &StructArray {
+        &self.inner
+    }
+
+    /// Returns the inner [`StructArray`], consuming self
+    pub fn into_inner(self) -> StructArray {
+        self.inner
+    }
+
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
+    /// Return the [`Variant`] instance stored at the given row
+    ///
+    /// This is a convenience wrapper that calls [`VariantArray::try_value`] and unwraps the `Result`.
+    /// Use `try_value` if you need to handle conversion errors gracefully.
+    ///
+    /// # Panics
+    /// * if the index is out of bounds
+    /// * if the array value is null
+    /// * if `try_value` returns an error.
+    pub fn value(&self, index: usize) -> Variant<'_, '_> {
+        self.try_value(index).unwrap()
+    }
+
+    /// Return the [`Variant`] instance stored at the given row
+    ///
+    /// Note: This method does not check for nulls and the value is arbitrary
+    /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index.
+    ///
+    /// # Panics
+    ///
+    /// Panics if
+    /// * the index is out of bounds
+    /// * the array value is null
+    ///
+    /// # Errors
+    ///
+    /// Errors if
+    /// - the data in `typed_value` cannot be interpreted as a valid `Variant`
+    ///
+    /// If this is a shredded variant but has no value at the shredded location, it
+    /// will return [`Variant::Null`].
+    ///
+    ///
+    /// # Performance Note
+    ///
+    /// This is certainly not the most efficient way to access values in a
+    /// `VariantArray`, but it is useful for testing and debugging.
+    ///
+    /// Note: Does not do deep validation of the [`Variant`], so it is up to the
+    /// caller to ensure that the metadata and value were constructed correctly.
+    pub fn try_value(&self, index: usize) -> Result<Variant<'_, '_>> {
+        match (self.typed_value_field(), self.value_field()) {
+            // Always prefer typed_value, if available
+            (Some(typed_value), value) if typed_value.is_valid(index) => {
+                typed_value_to_variant(typed_value, value, index)
+            }
+            // Otherwise fall back to value, if available
+            (_, Some(value)) if value.is_valid(index) => {
+                Ok(Variant::new(self.metadata.value(index), value.value(index)))
+            }
+            // It is technically invalid for neither value nor typed_value fields to be available,
+            // but the spec specifically requires readers to return Variant::Null in this case.
+            _ => Ok(Variant::Null),
+        }
+    }
+
+    /// Return a reference to the metadata field of the [`StructArray`]
+    pub fn metadata_field(&self) -> &BinaryViewArray {
+        &self.metadata
+    }
+
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
+    }
+
+    /// Return a reference to the typed_value field of the `StructArray`, if present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
+    }
+
+    /// Return a field to represent this VariantArray in a `Schema` with
+    /// a particular name
+    pub fn field(&self, name: impl Into<String>) -> Field {
+        Field::new(
+            name.into(),
+            self.data_type().clone(),
+            self.inner.is_nullable(),
+        )
+        .with_extension_type(VariantType)
+    }
+
+    /// Returns a new DataType representing this VariantArray's inner type
+    pub fn data_type(&self) -> &DataType {
+        self.inner.data_type()
+    }
+
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        let inner = self.inner.slice(offset, length);
+        let metadata = self.metadata.slice(offset, length);
+        let shredding_state = self.shredding_state.slice(offset, length);
+        Self {
+            inner,
+            metadata,
+            shredding_state,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        self.inner.nulls()
+    }
+
+    /// Is the element at index null?
+    pub fn is_null(&self, index: usize) -> bool {
+        self.nulls().is_some_and(|n| n.is_null(index))
+    }
+
+    /// Is the element at index valid (not null)?
+    pub fn is_valid(&self, index: usize) -> bool {
+        !self.is_null(index)
+    }
+
+    /// Returns an iterator over the values in this array
+    pub fn iter(&self) -> VariantArrayIter<'_> {
+        VariantArrayIter::new(self)
+    }
+}
+
+impl From<VariantArray> for StructArray {
+    fn from(variant_array: VariantArray) -> Self {
+        variant_array.into_inner()
+    }
+}
+
+impl From<VariantArray> for ArrayRef {
+    fn from(variant_array: VariantArray) -> Self {
+        Arc::new(variant_array.into_inner())
+    }
+}
+
+impl<'m, 'v> FromIterator<Option<Variant<'m, 'v>>> for VariantArray {
+    fn from_iter<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(iter: T) -> Self {
+        let iter = iter.into_iter();
+
+        let mut b = VariantArrayBuilder::new(iter.size_hint().0);
+        b.extend(iter);
+        b.build()
+    }
+}
+
+impl<'m, 'v> FromIterator<Variant<'m, 'v>> for VariantArray {
+    fn from_iter<T: IntoIterator<Item = Variant<'m, 'v>>>(iter: T) -> Self {
+        Self::from_iter(iter.into_iter().map(Some))
+    }
+}
+
+/// An iterator over [`VariantArray`]
+///
+/// This iterator returns `Option<Option<Variant<'a, 'a>>>` where:
+/// - `None` indicates the end of iteration
+/// - `Some(None)` indicates a null value at this position
+/// - `Some(Some(variant))` indicates a valid variant value
+///
+/// # Example
+///
+/// ```
+/// # use parquet_variant::Variant;
+/// # use parquet_variant_compute::VariantArrayBuilder;
+/// let mut builder = VariantArrayBuilder::new(10);
+/// builder.append_variant(Variant::from(42));
+/// builder.append_null();
+/// builder.append_variant(Variant::from("hello"));
+/// let array = builder.build();
+///
+/// let values = array.iter().collect::<Vec<_>>();
+/// assert_eq!(values.len(), 3);
+/// assert_eq!(values[0], Some(Variant::from(42)));
+/// assert_eq!(values[1], None);
+/// assert_eq!(values[2], Some(Variant::from("hello")));
+/// ```
+#[derive(Debug)]
+pub struct VariantArrayIter<'a> {
+    array: &'a VariantArray,
+    head_i: usize,
+    tail_i: usize,
+}
+
+impl<'a> VariantArrayIter<'a> {
+    /// Creates a new iterator over the given [`VariantArray`]
+    pub fn new(array: &'a VariantArray) -> Self {
+        Self {
+            array,
+            head_i: 0,
+            tail_i: array.len(),
+        }
+    }
+
+    fn value_opt(&self, i: usize) -> Option<Variant<'a, 'a>> {
+        self.array.is_valid(i).then(|| self.array.value(i))
+    }
+}
+
+impl<'a> Iterator for VariantArrayIter<'a> {
+    type Item = Option<Variant<'a, 'a>>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.head_i == self.tail_i {
+            return None;
+        }
+
+        let out = self.value_opt(self.head_i);
+
+        self.head_i += 1;
+
+        Some(out)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let remainder = self.tail_i - self.head_i;
+
+        (remainder, Some(remainder))
+    }
+}
+
+impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.head_i == self.tail_i {
+            return None;
+        }
+
+        self.tail_i -= 1;
+
+        Some(self.value_opt(self.tail_i))
+    }
+}
+
+impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
+
+/// One shredded field of a partially or perfectly shredded variant. For example, suppose the
+/// shredding schema for variant `v` treats it as an object with a single field `a`, where `a` is
+/// itself a struct with the single field `b` of type INT. Then the physical layout of the column
+/// is:
+///
+/// ```text
+/// v: VARIANT {
+///     metadata: BINARY,
+///     value: BINARY,
+///     typed_value: STRUCT {
+///         a: SHREDDED_VARIANT_FIELD {
+///             value: BINARY,
+///             typed_value: STRUCT {
+///                 a: SHREDDED_VARIANT_FIELD {
+///                     value: BINARY,
+///                     typed_value: INT,
+///                 },
+///             },
+///         },
+///     },
+/// }
+/// ```
+///
+/// In the above, each row of `v.value` is either a variant value (shredding failed, `v` was not an
+/// object at all) or a variant object (partial shredding, `v` was an object but included unexpected
+/// fields other than `a`), or is NULL (perfect shredding, `v` was an object containing only the
+/// single expected field `a`).
+///
+/// A similar story unfolds for each `v.typed_value.a.value` -- a variant value if shredding failed
+/// (`v:a` was not an object at all), or a variant object (`v:a` was an object with unexpected
+/// additional fields), or NULL (`v:a` was an object containing only the single expected field `b`).
+///
+/// Finally, `v.typed_value.a.typed_value.b.value` is either NULL (`v:a.b` was an integer) or else a
+/// variant value (which could be `Variant::Null`).
+#[derive(Debug)]
+pub struct ShreddedVariantFieldArray {
+    /// Reference to the underlying StructArray
+    inner: StructArray,
+    shredding_state: ShreddingState,
+}
+
+#[allow(unused)]
+impl ShreddedVariantFieldArray {
+    /// Creates a new `ShreddedVariantFieldArray` from a [`StructArray`].
+    ///
+    /// # Arguments
+    /// - `inner` - The underlying [`StructArray`] that contains the variant data.
+    ///
+    /// # Returns
+    /// - A new instance of `ShreddedVariantFieldArray`.
+    ///
+    /// # Errors:
+    /// - If the `StructArray` does not contain the required fields
+    ///
+    /// # Requirements of the `StructArray`
+    ///
+    /// 1. An optional field named `value` that is binary, large_binary, or
+    ///    binary_view
+    ///
+    /// 2. An optional field named `typed_value` which can be any primitive type
+    ///    or be a list, large_list, list_view or struct
+    ///
+    /// Currently, only `value` columns of type [`BinaryViewArray`] are supported.
+    pub fn try_new(inner: &dyn Array) -> Result<Self> {
+        let Some(inner_struct) = inner.as_struct_opt() else {
+            return Err(ArrowError::InvalidArgumentError(
+                "Invalid ShreddedVariantFieldArray: requires StructArray as input".to_string(),
+            ));
+        };
+
+        // Note this clone is cheap, it just bumps the ref count
+        Ok(Self {
+            inner: inner_struct.clone(),
+            shredding_state: ShreddingState::try_from(inner_struct)?,
+        })
+    }
+
+    /// Return the shredding state of this `VariantArray`
+    pub fn shredding_state(&self) -> &ShreddingState {
+        &self.shredding_state
+    }
+
+    /// Return a reference to the value field of the `StructArray`
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.shredding_state.value_field()
+    }
+
+    /// Return a reference to the typed_value field of the `StructArray`, if present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.shredding_state.typed_value_field()
+    }
+
+    /// Returns a reference to the underlying [`StructArray`].
+    pub fn inner(&self) -> &StructArray {
+        &self.inner
+    }
+
+    pub(crate) fn from_parts(
+        value: Option<BinaryViewArray>,
+        typed_value: Option<ArrayRef>,
+        nulls: Option<NullBuffer>,
+    ) -> Self {
+        let mut builder = StructArrayBuilder::new();
+        if let Some(value) = value.clone() {
+            builder = builder.with_field("value", Arc::new(value), true);
+        }
+        if let Some(typed_value) = typed_value.clone() {
+            builder = builder.with_field("typed_value", typed_value, true);
+        }
+        if let Some(nulls) = nulls {
+            builder = builder.with_nulls(nulls);
+        }
+
+        Self {
+            inner: builder.build(),
+            shredding_state: ShreddingState::new(value, typed_value),
+        }
+    }
+
+    /// Returns the inner [`StructArray`], consuming self
+    pub fn into_inner(self) -> StructArray {
+        self.inner
+    }
+
+    pub fn data_type(&self) -> &DataType {
+        self.inner.data_type()
+    }
+
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    pub fn offset(&self) -> usize {
+        self.inner.offset()
+    }
+
+    pub fn nulls(&self) -> Option<&NullBuffer> {
+        // According to the shredding spec, ShreddedVariantFieldArray should be
+        // physically non-nullable - SQL NULL is inferred by both value and
+        // typed_value being physically NULL
+        None
+    }
+    /// Is the element at index null?
+    pub fn is_null(&self, index: usize) -> bool {
+        self.nulls().is_some_and(|n| n.is_null(index))
+    }
+
+    /// Is the element at index valid (not null)?
+    pub fn is_valid(&self, index: usize) -> bool {
+        !self.is_null(index)
+    }
+}
+
+impl From<ShreddedVariantFieldArray> for ArrayRef {
+    fn from(array: ShreddedVariantFieldArray) -> Self {
+        Arc::new(array.into_inner())
+    }
+}
+
+impl From<ShreddedVariantFieldArray> for StructArray {
+    fn from(array: ShreddedVariantFieldArray) -> Self {
+        array.into_inner()
+    }
+}
+
+/// Represents the shredding state of a [`VariantArray`]
+///
+/// [`VariantArray`]s can be shredded according to the [Parquet Variant
+/// Shredding Spec]. Shredding means that the actual value is stored in a typed
+/// `typed_field` instead of the generic `value` field.
+///
+/// Both value and typed_value are optional fields used together to encode a
+/// single value. Values in the two fields must be interpreted according to the
+/// following table (see [Parquet Variant Shredding Spec] for more details):
+///
+/// | value    | typed_value  | Meaning |
+/// |----------|--------------|---------|
+/// | NULL     | NULL         | The value is missing; only valid for shredded object fields |
+/// | non-NULL | NULL         | The value is present and may be any type, including [`Variant::Null`] |
+/// | NULL     | non-NULL     | The value is present and is the shredded type |
+/// | non-NULL | non-NULL     | The value is present and is a partially shredded object |
+///
+///
+/// Applying the above rules to entire columns, we obtain the following:
+///
+/// | value  | typed_value  | Meaning |
+/// |--------|-------------|---------|
+/// | --     | --          | **Missing**: The value is always missing; only valid for shredded object fields |
+/// | exists | --          | **Unshredded**: If present, the value may be any type, including [`Variant::Null`]
+/// | --     | exists      | **Perfectly shredded**: If present, the value is always the shredded type |
+/// | exists | exists      | **Imperfectly shredded**: The value might (not) be present and might (not) be the shredded type |
+///
+/// NOTE: Partial shredding is a row-wise situation that can arise under imperfect shredding (a
+/// column-wise situation): When both columns exist (imperfect shredding) and the typed_value column
+/// is a struct, then both columns can be non-NULL for the same row if value is a variant object
+/// (partial shredding).
+///
+/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
+#[derive(Debug, Clone, PartialEq)]
+pub struct ShreddingState {
+    value: Option<BinaryViewArray>,
+    typed_value: Option<ArrayRef>,
+}
+
+impl ShreddingState {
+    /// Create a new `ShreddingState` from the given `value` and `typed_value` fields
+    ///
+    /// Note you can create a `ShreddingState` from a &[`StructArray`] using
+    /// `ShreddingState::try_from(&struct_array)`, for example:
+    ///
+    /// ```no_run
+    /// # use arrow::array::StructArray;
+    /// # use parquet_variant_compute::ShreddingState;
+    /// # fn get_struct_array() -> StructArray {
+    /// #   unimplemented!()
+    /// # }
+    /// let struct_array: StructArray = get_struct_array();
+    /// let shredding_state = ShreddingState::try_from(&struct_array).unwrap();
+    /// ```
+    pub fn new(value: Option<BinaryViewArray>, typed_value: Option<ArrayRef>) -> Self {
+        Self { value, typed_value }
+    }
+
+    /// Return a reference to the value field, if present
+    pub fn value_field(&self) -> Option<&BinaryViewArray> {
+        self.value.as_ref()
+    }
+
+    /// Return a reference to the typed_value field, if present
+    pub fn typed_value_field(&self) -> Option<&ArrayRef> {
+        self.typed_value.as_ref()
+    }
+
+    /// Returns a borrowed version of this shredding state
+    pub fn borrow(&self) -> BorrowedShreddingState<'_> {
+        BorrowedShreddingState {
+            value: self.value_field(),
+            typed_value: self.typed_value_field(),
+        }
+    }
+
+    /// Slice all the underlying arrays
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        Self {
+            value: self.value.as_ref().map(|v| v.slice(offset, length)),
+            typed_value: self.typed_value.as_ref().map(|tv| tv.slice(offset, length)),
+        }
+    }
+}
+
+/// Similar to [`ShreddingState`] except it holds borrowed references of the target arrays. Useful
+/// for avoiding clone operations when the caller does not need a self-standing shredding state.
+#[derive(Clone, Debug)]
+pub struct BorrowedShreddingState<'a> {
+    value: Option<&'a BinaryViewArray>,
+    typed_value: Option<&'a ArrayRef>,
+}
+
+impl<'a> BorrowedShreddingState<'a> {
+    /// Create a new `BorrowedShreddingState` from the given `value` and `typed_value` fields
+    ///
+    /// Note you can create a `BorrowedShreddingState` from a &[`StructArray`] using
+    /// `BorrowedShreddingState::try_from(&struct_array)`, for example:
+    ///
+    /// ```no_run
+    /// # use arrow::array::StructArray;
+    /// # use parquet_variant_compute::BorrowedShreddingState;
+    /// # fn get_struct_array() -> StructArray {
+    /// #   unimplemented!()
+    /// # }
+    /// let struct_array: StructArray = get_struct_array();
+    /// let shredding_state = BorrowedShreddingState::try_from(&struct_array).unwrap();
+    /// ```
+    pub fn new(value: Option<&'a BinaryViewArray>, typed_value: Option<&'a ArrayRef>) -> Self {
+        Self { value, typed_value }
+    }
+
+    /// Return a reference to the value field, if present
+    pub fn value_field(&self) -> Option<&'a BinaryViewArray> {
+        self.value
+    }
+
+    /// Return a reference to the typed_value field, if present
+    pub fn typed_value_field(&self) -> Option<&'a ArrayRef> {
+        self.typed_value
+    }
+}
+
+impl<'a> TryFrom<&'a StructArray> for BorrowedShreddingState<'a> {
+    type Error = ArrowError;
+
+    fn try_from(inner_struct: &'a StructArray) -> Result<Self> {
+        // The `value` column need not exist, but if it does it must be a binary view.
+        let value = if let Some(value_col) = inner_struct.column_by_name("value") {
+            let Some(binary_view) = value_col.as_binary_view_opt() else {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "VariantArray 'value' field must be BinaryView, got {}",
+                    value_col.data_type()
+                )));
+            };
+            Some(binary_view)
+        } else {
+            None
+        };
+        let typed_value = inner_struct.column_by_name("typed_value");
+        Ok(BorrowedShreddingState::new(value, typed_value))
+    }
+}
+
+impl TryFrom<&StructArray> for ShreddingState {
+    type Error = ArrowError;
+
+    fn try_from(inner_struct: &StructArray) -> Result<Self> {
+        Ok(BorrowedShreddingState::try_from(inner_struct)?.into())
+    }
+}
+
+impl From<BorrowedShreddingState<'_>> for ShreddingState {
+    fn from(state: BorrowedShreddingState<'_>) -> Self {
+        ShreddingState {
+            value: state.value_field().cloned(),
+            typed_value: state.typed_value_field().cloned(),
+        }
+    }
+}
+
+/// Builds struct arrays from component fields
+///
+/// TODO: move to arrow crate
+#[derive(Debug, Default, Clone)]
+pub(crate) struct StructArrayBuilder {
+    fields: Vec<FieldRef>,
+    arrays: Vec<ArrayRef>,
+    nulls: Option<NullBuffer>,
+}
+
+impl StructArrayBuilder {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Add an array to this struct array as a field with the specified name.
+    pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self {
+        let field = Field::new(field_name, array.data_type().clone(), nullable);
+        self.fields.push(Arc::new(field));
+        self.arrays.push(array);
+        self
+    }
+
+    /// Set the null buffer for this struct array.
+    pub fn with_nulls(mut self, nulls: NullBuffer) -> Self {
+        self.nulls = Some(nulls);
+        self
+    }
+
+    pub fn build(self) -> StructArray {
+        let Self {
+            fields,
+            arrays,
+            nulls,
+        } = self;
+        StructArray::new(Fields::from(fields), arrays, nulls)
+    }
+}
+
+/// returns the non-null element at index as a Variant
+fn typed_value_to_variant<'a>(
+    typed_value: &'a ArrayRef,
+    value: Option<&BinaryViewArray>,
+    index: usize,
+) -> Result<Variant<'a, 'a>> {
+    let data_type = typed_value.data_type();
+    if value.is_some_and(|v| !matches!(data_type, DataType::Struct(_)) && v.is_valid(index)) {
+        // Only a partially shredded struct is allowed to have values for both columns
+        panic!("Invalid variant, conflicting value and typed_value");
+    }
+    match data_type {
+        DataType::Null => Ok(Variant::Null),
+        DataType::Boolean => {
+            let boolean_array = typed_value.as_boolean();
+            let value = boolean_array.value(index);
+            Ok(Variant::from(value))
+        }
+        // 16-byte FixedSizeBinary alway corresponds to a UUID; all other sizes are illegal.
+        DataType::FixedSizeBinary(16) => {
+            let array = typed_value.as_fixed_size_binary();
+            let value = array.value(index);
+            Ok(Uuid::from_slice(value).unwrap().into()) // unwrap is safe: slice is always 16 bytes
+        }
+        DataType::BinaryView => {
+            let array = typed_value.as_binary_view();
+            let value = array.value(index);
+            Ok(Variant::from(value))
+        }
+        DataType::Utf8 => {
+            let array = typed_value.as_string::<i32>();
+            let value = array.value(index);
+            Ok(Variant::from(value))
+        }
+        DataType::LargeUtf8 => {
+            let array = typed_value.as_string::<i64>();
+            let value = array.value(index);
+            Ok(Variant::from(value))
+        }
+        DataType::Utf8View => {
+            let array = typed_value.as_string_view();
+            let value = array.value(index);
+            Ok(Variant::from(value))
+        }
+        DataType::Int8 => {
+            primitive_conversion_single_value!(Int8Type, typed_value, index)
+        }
+        DataType::Int16 => {
+            primitive_conversion_single_value!(Int16Type, typed_value, index)
+        }
+        DataType::Int32 => {
+            primitive_conversion_single_value!(Int32Type, typed_value, index)
+        }
+        DataType::Int64 => {
+            primitive_conversion_single_value!(Int64Type, typed_value, index)
+        }
+        DataType::Float16 => {
+            primitive_conversion_single_value!(Float16Type, typed_value, index)
+        }
+        DataType::Float32 => {
+            primitive_conversion_single_value!(Float32Type, typed_value, index)
+        }
+        DataType::Float64 => {
+            primitive_conversion_single_value!(Float64Type, typed_value, index)
+        }
+        DataType::Decimal32(_, s) => {
+            generic_conversion_single_value_with_result!(
+                Decimal32Type,
+                as_primitive,
+                |v| VariantDecimal4::try_new(v, *s as u8),
+                typed_value,
+                index
+            )
+        }
+        DataType::Decimal64(_, s) => {
+            generic_conversion_single_value_with_result!(
+                Decimal64Type,
+                as_primitive,
+                |v| VariantDecimal8::try_new(v, *s as u8),
+                typed_value,
+                index
+            )
+        }
+        DataType::Decimal128(_, s) => {
+            generic_conversion_single_value_with_result!(
+                Decimal128Type,
+                as_primitive,
+                |v| VariantDecimal16::try_new(v, *s as u8),
+                typed_value,
+                index
+            )
+        }
+        DataType::Date32 => {
+            generic_conversion_single_value!(
+                Date32Type,
+                as_primitive,
+                Date32Type::to_naive_date,
+                typed_value,
+                index
+            )
+        }
+        DataType::Time64(TimeUnit::Microsecond) => {
+            generic_conversion_single_value_with_result!(
+                Time64MicrosecondType,
+                as_primitive,
+                |v| NaiveTime::from_num_seconds_from_midnight_opt(
+                    (v / 1_000_000) as u32,
+                    (v % 1_000_000) as u32 * 1000
+                )
+                .ok_or_else(|| format!("Invalid microsecond from midnight: {}", v)),
+                typed_value,
+                index
+            )
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => {
+            generic_conversion_single_value!(
+                TimestampMicrosecondType,
+                as_primitive,
+                |v| DateTime::from_timestamp_micros(v).unwrap(),
+                typed_value,
+                index
+            )
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, None) => {
+            generic_conversion_single_value!(
+                TimestampMicrosecondType,
+                as_primitive,
+                |v| DateTime::from_timestamp_micros(v).unwrap().naive_utc(),
+                typed_value,
+                index
+            )
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, Some(_)) => {
+            generic_conversion_single_value!(
+                TimestampNanosecondType,
+                as_primitive,
+                DateTime::from_timestamp_nanos,
+                typed_value,
+                index
+            )
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
+            generic_conversion_single_value!(
+                TimestampNanosecondType,
+                as_primitive,
+                |v| DateTime::from_timestamp_nanos(v).naive_utc(),
+                typed_value,
+                index
+            )
+        }
+        // todo other types here (note this is very similar to cast_to_variant.rs)
+        // so it would be great to figure out how to share this code
+        _ => {
+            // We shouldn't panic in production code, but this is a
+            // placeholder until we implement more types
+            // https://github.com/apache/arrow-rs/issues/8091
+            debug_assert!(
+                false,
+                "Unsupported typed_value type: {}",
+                typed_value.data_type()
+            );
+            Ok(Variant::Null)
+        }
+    }
+}
+
+/// Workaround for lack of direct support for BinaryArray
+/// <https://github.com/apache/arrow-rs/issues/8387>
+///
+/// The values are read as
+/// * `StructArray<metadata: Binary, value: Binary>`
+///
+/// but VariantArray needs them as
+/// * `StructArray<metadata: BinaryView, value: BinaryView>`
+///
+/// So cast them to get the right type.
+fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef> {
+    let new_type = canonicalize_and_verify_data_type(array.data_type())?;
+    if let Cow::Borrowed(_) = new_type {
+        if let Some(array) = array.as_struct_opt() {
+            return Ok(Arc::new(array.clone())); // bypass the unnecessary cast
+        }
+    }
+    cast(array, new_type.as_ref())
+}
+
+/// Recursively visits a data type, ensuring that it only contains data types that can legally
+/// appear in a (possibly shredded) variant array. It also replaces Binary fields with BinaryView,
+/// since that's what comes back from the parquet reader and what the variant code expects to find.
+fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result<Cow<'_, DataType>> {
+    use DataType::*;
+
+    // helper macros
+    macro_rules! fail {
+        () => {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Illegal shredded value type: {data_type}"
+            )))
+        };
+    }
+    macro_rules! borrow {
+        () => {
+            Cow::Borrowed(data_type)
+        };
+    }
+
+    let new_data_type = match data_type {
+        // Primitive arrow types that have a direct variant counterpart are allowed
+        Null | Boolean => borrow!(),
+        Int8 | Int16 | Int32 | Int64 | Float32 | Float64 => borrow!(),
+
+        // Unsigned integers and half-float are not allowed
+        UInt8 | UInt16 | UInt32 | UInt64 | Float16 => fail!(),
+
+        // Most decimal types are allowed, with restrictions on precision and scale
+        //
+        // NOTE: arrow-parquet reads widens 32- and 64-bit decimals to 128-bit, but the variant spec
+        // requires using the narrowest decimal type for a given precision. Fix those up first.
+        Decimal64(p, s) | Decimal128(p, s)
+            if VariantDecimal4::is_valid_precision_and_scale(p, s) =>
+        {
+            Cow::Owned(Decimal32(*p, *s))
+        }
+        Decimal128(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => {
+            Cow::Owned(Decimal64(*p, *s))
+        }
+        Decimal32(p, s) if VariantDecimal4::is_valid_precision_and_scale(p, s) => borrow!(),
+        Decimal64(p, s) if VariantDecimal8::is_valid_precision_and_scale(p, s) => borrow!(),
+        Decimal128(p, s) if VariantDecimal16::is_valid_precision_and_scale(p, s) => borrow!(),
+        Decimal32(..) | Decimal64(..) | Decimal128(..) | Decimal256(..) => fail!(),
+
+        // Only micro and nano timestamps are allowed
+        Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _) => borrow!(),
+        Timestamp(TimeUnit::Millisecond | TimeUnit::Second, _) => fail!(),
+
+        // Only 32-bit dates and 64-bit microsecond time are allowed.
+        Date32 | Time64(TimeUnit::Microsecond) => borrow!(),
+        Date64 | Time32(_) | Time64(_) | Duration(_) | Interval(_) => fail!(),
+
+        // Binary and string are allowed. Force Binary/LargeBinary to BinaryView because that's what the parquet
+        // reader returns and what the rest of the variant code expects.
+        Binary | LargeBinary => Cow::Owned(BinaryView),
+        BinaryView | Utf8 | LargeUtf8 | Utf8View => borrow!(),
+
+        // UUID maps to 16-byte fixed-size binary; no other width is allowed
+        FixedSizeBinary(16) => borrow!(),
+        FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
+
+        // We can _possibly_ allow (some of) these some day?
+        ListView(_) | LargeList(_) | LargeListView(_) => {
+            fail!()
+        }
+
+        // Lists and struct are allowed, maps and unions are not
+        List(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
+        },
+        // Struct is used by the internal layout, and can also represent a shredded variant object.
+        Struct(fields) => {
+            // Avoid allocation unless at least one field changes, to avoid unnecessary deep cloning
+            // of the data type. Even if some fields change, the others are shallow arc clones.
+            let mut new_fields = std::collections::HashMap::new();
+            for (i, field) in fields.iter().enumerate() {
+                if let Cow::Owned(new_field) = canonicalize_and_verify_field(field)? {
+                    new_fields.insert(i, new_field);
+                }
+            }
+
+            if new_fields.is_empty() {
+                borrow!()
+            } else {
+                let new_fields = fields
+                    .iter()
+                    .enumerate()
+                    .map(|(i, field)| new_fields.remove(&i).unwrap_or_else(|| field.clone()));
+                Cow::Owned(DataType::Struct(new_fields.collect()))
+            }
+        }
+        Map(..) | Union(..) => fail!(),
+
+        // We can _possibly_ support (some of) these some day?
+        Dictionary(..) | RunEndEncoded(..) => fail!(),
+    };
+    Ok(new_data_type)
+}
+
+fn canonicalize_and_verify_field(field: &Arc<Field>) -> Result<Cow<'_, Arc<Field>>> {
+    let Cow::Owned(new_data_type) = canonicalize_and_verify_data_type(field.data_type())? else {
+        return Ok(Cow::Borrowed(field));
+    };
+    let new_field = field.as_ref().clone().with_data_type(new_data_type);
+    Ok(Cow::Owned(Arc::new(new_field)))
+}
+
+#[cfg(test)]
+mod test {
+    use crate::VariantArrayBuilder;
+    use std::str::FromStr;
+
+    use super::*;
+    use arrow::array::{
+        BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array,
+        Time64MicrosecondArray,
+    };
+    use arrow_schema::{Field, Fields};
+    use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString};
+
+    #[test]
+    fn invalid_not_a_struct_array() {
+        let array = make_binary_view_array();
+        // Should fail because the input is not a StructArray
+        let err = VariantArray::try_new(&array);
+        assert_eq!(
+            err.unwrap_err().to_string(),
+            "Invalid argument error: Invalid VariantArray: requires StructArray as input"
+        );
+    }
+
+    #[test]
+    fn invalid_missing_metadata() {
+        let fields = Fields::from(vec![Field::new("value", DataType::BinaryView, true)]);
+        let array = StructArray::new(fields, vec![make_binary_view_array()], None);
+        // Should fail because the StructArray does not contain a 'metadata' field
+        let err = VariantArray::try_new(&array);
+        assert_eq!(
+            err.unwrap_err().to_string(),
+            "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
+        );
+    }
+
+    #[test]
+    fn all_null_missing_value_and_typed_value() {
+        let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
+        let array = StructArray::new(fields, vec![make_binary_view_array()], None);
+
+        // NOTE: By strict spec interpretation, this case (top-level variant with null/null)
+        // should be invalid, but we currently allow it and treat it as Variant::Null.
+        // This is a pragmatic decision to handle missing data gracefully.
+        let variant_array = VariantArray::try_new(&array).unwrap();
+
+        // Verify the shredding state is AllNull
+        assert!(matches!(
+            variant_array.shredding_state(),
+            ShreddingState {
+                value: None,
+                typed_value: None
+            }
+        ));
+
+        // Verify that value() returns Variant::Null (compensating for spec violation)
+        for i in 0..variant_array.len() {
+            if variant_array.is_valid(i) {
+                assert_eq!(variant_array.value(i), parquet_variant::Variant::Null);
+            }
+        }
+    }
+
+    #[test]
+    fn invalid_metadata_field_type() {
+        let fields = Fields::from(vec![
+            Field::new("metadata", DataType::Int32, true), // not supported
+            Field::new("value", DataType::BinaryView, true),
+        ]);
+        let array = StructArray::new(
+            fields,
+            vec![make_int32_array(), make_binary_view_array()],
+            None,
+        );
+        let err = VariantArray::try_new(&array);
+        assert_eq!(
+            err.unwrap_err().to_string(),
+            "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Int32"
+        );
+    }
+
+    #[test]
+    fn invalid_value_field_type() {
+        let fields = Fields::from(vec![
+            Field::new("metadata", DataType::BinaryView, true),
+            Field::new("value", DataType::Int32, true), // Not yet supported
+        ]);
+        let array = StructArray::new(
+            fields,
+            vec![make_binary_view_array(), make_int32_array()],
+            None,
+        );
+        let err = VariantArray::try_new(&array);
+        assert_eq!(
+            err.unwrap_err().to_string(),
+            "Not yet implemented: VariantArray 'value' field must be BinaryView, got Int32"
+        );
+    }
+
+    fn make_binary_view_array() -> ArrayRef {
+        Arc::new(BinaryViewArray::from(vec![b"test" as &[u8]]))
+    }
+
+    fn make_int32_array() -> ArrayRef {
+        Arc::new(Int32Array::from(vec![1]))
+    }
+
+    #[test]
+    fn all_null_shredding_state() {
+        // Verify the shredding state is AllNull
+        assert!(matches!(
+            ShreddingState::new(None, None),
+            ShreddingState {
+                value: None,
+                typed_value: None
+            }
+        ));
+    }
+
+    #[test]
+    fn all_null_variant_array_construction() {
+        let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
+        let nulls = NullBuffer::from(vec![false, false, false]); // all null
+
+        let fields = Fields::from(vec![Field::new("metadata", DataType::BinaryView, false)]);
+        let struct_array = StructArray::new(fields, vec![Arc::new(metadata)], Some(nulls));
+
+        let variant_array = VariantArray::try_new(&struct_array).unwrap();
+
+        // Verify the shredding state is AllNull
+        assert!(matches!(
+            variant_array.shredding_state(),
+            ShreddingState {
+                value: None,
+                typed_value: None
+            }
+        ));
+
+        // Verify all values are null
+        assert_eq!(variant_array.len(), 3);
+        assert!(!variant_array.is_valid(0));
+        assert!(!variant_array.is_valid(1));
+        assert!(!variant_array.is_valid(2));
+
+        // Verify that value() returns Variant::Null for all indices
+        for i in 0..variant_array.len() {
+            assert!(
+                !variant_array.is_valid(i),
+                "Expected value at index {i} to be null"
+            );
+        }
+    }
+
+    #[test]
+    fn value_field_present_but_all_null_should_be_unshredded() {
+        // This test demonstrates the issue: when a value field exists in schema
+        // but all its values are null, it should remain Unshredded, not AllNull
+        let metadata = BinaryViewArray::from(vec![b"test" as &[u8]; 3]);
+
+        // Create a value field with all null values
+        let value_nulls = NullBuffer::from(vec![false, false, false]); // all null
+        let value_array = BinaryViewArray::from_iter_values(vec![""; 3]);
+        let value_data = value_array
+            .to_data()
+            .into_builder()
+            .nulls(Some(value_nulls))
+            .build()
+            .unwrap();
+        let value = BinaryViewArray::from(value_data);
+
+        let fields = Fields::from(vec![
+            Field::new("metadata", DataType::BinaryView, false),
+            Field::new("value", DataType::BinaryView, true), // Field exists in schema
+        ]);
+        let struct_array = StructArray::new(
+            fields,
+            vec![Arc::new(metadata), Arc::new(value)],
+            None, // struct itself is not null, just the value field is all null
+        );
+
+        let variant_array = VariantArray::try_new(&struct_array).unwrap();
+
+        // This should be Unshredded, not AllNull, because value field exists in schema
+        assert!(matches!(
+            variant_array.shredding_state(),
+            ShreddingState {
+                value: Some(_),
+                typed_value: None
+            }
+        ));
+    }
+
+    #[test]
+    fn test_variant_array_iterable() {
+        let mut b = VariantArrayBuilder::new(6);
+
+        b.append_null();
+        b.append_variant(Variant::from(1_i8));
+        b.append_variant(Variant::Null);
+        b.append_variant(Variant::from(2_i32));
+        b.append_variant(Variant::from(3_i64));
+        b.append_null();
+
+        let v = b.build();
+
+        let variants = v.iter().collect::<Vec<_>>();
+
+        assert_eq!(
+            variants,
+            vec![
+                None,
+                Some(Variant::Int8(1)),
+                Some(Variant::Null),
+                Some(Variant::Int32(2)),
+                Some(Variant::Int64(3)),
+                None,
+            ]
+        );
+    }
+
+    #[test]
+    fn test_variant_array_iter_double_ended() {
+        let mut b = VariantArrayBuilder::new(5);
+
+        b.append_variant(Variant::from(0_i32));
+        b.append_null();
+        b.append_variant(Variant::from(2_i32));
+        b.append_null();
+        b.append_variant(Variant::from(4_i32));
+
+        let array = b.build();
+        let mut iter = array.iter();
+
+        assert_eq!(iter.next(), Some(Some(Variant::from(0_i32))));
+        assert_eq!(iter.next(), Some(None));
+
+        assert_eq!(iter.next_back(), Some(Some(Variant::from(4_i32))));
+        assert_eq!(iter.next_back(), Some(None));
+        assert_eq!(iter.next_back(), Some(Some(Variant::from(2_i32))));
+
+        assert_eq!(iter.next_back(), None);
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn test_variant_array_iter_reverse() {
+        let mut b = VariantArrayBuilder::new(5);
+
+        b.append_variant(Variant::from("a"));
+        b.append_null();
+        b.append_variant(Variant::from("aaa"));
+        b.append_null();
+        b.append_variant(Variant::from("aaaaa"));
+
+        let array = b.build();
+
+        let result: Vec<_> = array.iter().rev().collect();
+        assert_eq!(
+            result,
+            vec![
+                Some(Variant::from("aaaaa")),
+                None,
+                Some(Variant::from("aaa")),
+                None,
+                Some(Variant::from("a")),
+            ]
+        );
+    }
+
+    #[test]
+    fn test_variant_array_iter_empty() {
+        let v = VariantArrayBuilder::new(0).build();
+        let mut i = v.iter();
+        assert!(i.next().is_none());
+        assert!(i.next_back().is_none());
+    }
+
+    #[test]
+    fn test_from_variant_opts_into_variant_array() {
+        let v = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse), None];
+
+        let variant_array = VariantArray::from_iter(v);
+
+        assert_eq!(variant_array.len(), 4);
+
+        assert!(variant_array.is_null(0));
+
+        assert!(!variant_array.is_null(1));
+        assert_eq!(variant_array.value(1), Variant::Null);
+
+        assert!(!variant_array.is_null(2));
+        assert_eq!(variant_array.value(2), Variant::BooleanFalse);
+
+        assert!(variant_array.is_null(3));
+    }
+
+    #[test]
+    fn test_from_variants_into_variant_array() {
+        let v = vec![
+            Variant::Null,
+            Variant::BooleanFalse,
+            Variant::ShortString(ShortString::try_new("norm").unwrap()),
+        ];
+
+        let variant_array = VariantArray::from_iter(v);
+
+        assert_eq!(variant_array.len(), 3);
+
+        assert!(!variant_array.is_null(0));
+        assert_eq!(variant_array.value(0), Variant::Null);
+
+        assert!(!variant_array.is_null(1));
+        assert_eq!(variant_array.value(1), Variant::BooleanFalse);
+
+        assert!(!variant_array.is_null(2));
+        assert_eq!(
+            variant_array.value(2),
+            Variant::ShortString(ShortString::try_new("norm").unwrap())
+        );
+    }
+
+    #[test]
+    fn test_variant_equality() {
+        let v_iter = [None, Some(Variant::BooleanFalse), Some(Variant::Null), None];
+        let v = VariantArray::from_iter(v_iter.clone());
+
+        {
+            let v_copy = v.clone();
+            assert_eq!(v, v_copy);
+        }
+
+        {
+            let v_iter_reversed = v_iter.iter().cloned().rev();
+            let v_reversed = VariantArray::from_iter(v_iter_reversed);
+
+            assert_ne!(v, v_reversed);
+        }
+
+        {
+            let v_sliced = v.slice(0, 1);
+            assert_ne!(v, v_sliced);
+        }
+    }
+
+    macro_rules! invalid_variant_array_test {
+        ($fn_name: ident, $invalid_typed_value: expr, $error_msg: literal) => {
+            #[test]
+            fn $fn_name() {
+                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+                    EMPTY_VARIANT_METADATA_BYTES,
+                    1,
+                ));
+                let invalid_typed_value = $invalid_typed_value;
+
+                let struct_array = StructArrayBuilder::new()
+                    .with_field("metadata", Arc::new(metadata), false)
+                    .with_field("typed_value", Arc::new(invalid_typed_value), true)
+                    .build();
+
+                let array: VariantArray = VariantArray::try_new(&struct_array)
+                    .expect("should create variant array")
+                    .into();
+
+                let result = array.try_value(0);
+                assert!(result.is_err());
+                let error = result.unwrap_err();
+                assert!(matches!(error, ArrowError::CastError(_)));
+
+                let expected: &str = $error_msg;
+                assert!(
+                    error.to_string().contains($error_msg),
+                    "error `{}` did not contain `{}`",
+                    error,
+                    expected
+                )
+            }
+        };
+    }
+
+    invalid_variant_array_test!(
+        test_variant_array_invalide_time,
+        Time64MicrosecondArray::from(vec![Some(86401000000)]),
+        "Cast error: Cast failed at index 0 (array type: Time64(µs)): Invalid microsecond from midnight: 86401000000"
+    );
+
+    invalid_variant_array_test!(
+        test_variant_array_invalid_decimal32,
+        Decimal32Array::from(vec![Some(1234567890)]),
+        "Cast error: Cast failed at index 0 (array type: Decimal32(9, 2)): Invalid argument error: 1234567890 is wider than max precision 9"
+    );
+
+    invalid_variant_array_test!(
+        test_variant_array_invalid_decimal64,
+        Decimal64Array::from(vec![Some(1234567890123456789)]),
+        "Cast error: Cast failed at index 0 (array type: Decimal64(18, 6)): Invalid argument error: 1234567890123456789 is wider than max precision 18"
+    );
+
+    invalid_variant_array_test!(
+        test_variant_array_invalid_decimal128,
+        Decimal128Array::from(vec![Some(
+            i128::from_str("123456789012345678901234567890123456789").unwrap()
+        ),]),
+        "Cast error: Cast failed at index 0 (array type: Decimal128(38, 10)): Invalid argument error: 123456789012345678901234567890123456789 is wider than max precision 38"
+    );
+}
diff --git a/parquet-variant-compute/src/variant_array_builder.rs b/parquet-variant-compute/src/variant_array_builder.rs
new file mode 100644
index 000000000000..86ece0010042
--- /dev/null
+++ b/parquet-variant-compute/src/variant_array_builder.rs
@@ -0,0 +1,632 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`VariantArrayBuilder`] implementation
+
+use crate::VariantArray;
+use arrow::array::{ArrayRef, BinaryViewArray, BinaryViewBuilder, NullBufferBuilder, StructArray};
+use arrow_schema::{ArrowError, DataType, Field, Fields};
+use parquet_variant::{
+    BuilderSpecificState, ListBuilder, MetadataBuilder, ObjectBuilder, Variant, VariantBuilderExt,
+    VariantMetadata,
+};
+use parquet_variant::{
+    ParentState, ReadOnlyMetadataBuilder, ValueBuilder, WritableMetadataBuilder,
+};
+use std::sync::Arc;
+
+/// A builder for [`VariantArray`]
+///
+/// This builder is used to construct a `VariantArray` and allows APIs for
+/// adding metadata
+///
+/// This builder always creates a `VariantArray` using [`BinaryViewArray`] for both
+/// the metadata and value fields.
+///
+/// # TODO
+/// 1. Support shredding: <https://github.com/apache/arrow-rs/issues/7895>
+///
+/// ## Example:
+/// ```
+/// # use arrow::array::Array;
+/// # use parquet_variant::{Variant, VariantBuilder, VariantBuilderExt};
+/// # use parquet_variant_compute::VariantArrayBuilder;
+/// # use parquet_variant::ShortString;
+/// // Create a new VariantArrayBuilder with a capacity of 100 rows
+/// let mut builder = VariantArrayBuilder::new(100);
+/// // append variant values
+/// builder.append_variant(Variant::from(42));
+/// // append a null row (note not a Variant::Null)
+/// builder.append_null();
+/// // append an object to the builder using VariantBuilderExt methods directly
+/// builder.new_object()
+///   .with_field("foo", "bar")
+///   .finish();
+///
+/// // bulk insert a list of values
+/// // `Option::None` is a null value
+/// builder.extend([None, Some(Variant::from("norm"))]);
+///
+/// // create the final VariantArray
+/// let variant_array = builder.build();
+/// assert_eq!(variant_array.len(), 5);
+/// // // Access the values
+/// // row 1 is not null and is an integer
+/// assert!(!variant_array.is_null(0));
+/// assert_eq!(variant_array.value(0), Variant::from(42i32));
+/// // row 1 is null
+/// assert!(variant_array.is_null(1));
+/// // row 2 is not null and is an object
+/// assert!(!variant_array.is_null(2));
+/// let value = variant_array.value(2);
+/// let obj = value.as_object().expect("expected object");
+/// assert_eq!(obj.get("foo"), Some(Variant::from("bar")));
+/// // row 3 is null
+/// assert!(variant_array.is_null(3));
+/// // row 4 is not null and is a short string
+/// assert!(!variant_array.is_null(4));
+/// let value = variant_array.value(4);
+/// assert_eq!(value, Variant::ShortString(ShortString::try_new("norm").unwrap()));
+/// ```
+#[derive(Debug)]
+pub struct VariantArrayBuilder {
+    /// Nulls
+    nulls: NullBufferBuilder,
+    /// builder for all the metadata
+    metadata_builder: WritableMetadataBuilder,
+    /// ending offset for each serialized metadata dictionary in the buffer
+    metadata_offsets: Vec<usize>,
+    /// builder for values
+    value_builder: ValueBuilder,
+    /// ending offset for each serialized variant value in the buffer
+    value_offsets: Vec<usize>,
+    /// The fields of the final `StructArray`
+    ///
+    /// TODO: 1) Add extension type metadata
+    /// TODO: 2) Add support for shredding
+    fields: Fields,
+}
+
+impl VariantArrayBuilder {
+    pub fn new(row_capacity: usize) -> Self {
+        // The subfields are expected to be non-nullable according to the parquet variant spec.
+        let metadata_field = Field::new("metadata", DataType::BinaryView, false);
+        let value_field = Field::new("value", DataType::BinaryView, false);
+
+        Self {
+            nulls: NullBufferBuilder::new(row_capacity),
+            metadata_builder: WritableMetadataBuilder::default(),
+            metadata_offsets: Vec::with_capacity(row_capacity),
+            value_builder: ValueBuilder::new(),
+            value_offsets: Vec::with_capacity(row_capacity),
+            fields: Fields::from(vec![metadata_field, value_field]),
+        }
+    }
+
+    /// Build the final builder
+    pub fn build(self) -> VariantArray {
+        let Self {
+            mut nulls,
+            metadata_builder,
+            metadata_offsets,
+            value_builder,
+            value_offsets,
+            fields,
+        } = self;
+
+        let metadata_buffer = metadata_builder.into_inner();
+        let metadata_array = binary_view_array_from_buffers(metadata_buffer, metadata_offsets);
+
+        let value_buffer = value_builder.into_inner();
+        let value_array = binary_view_array_from_buffers(value_buffer, value_offsets);
+
+        // The build the final struct array
+        let inner = StructArray::new(
+            fields,
+            vec![
+                Arc::new(metadata_array) as ArrayRef,
+                Arc::new(value_array) as ArrayRef,
+            ],
+            nulls.finish(),
+        );
+        // TODO add arrow extension type metadata
+
+        VariantArray::try_new(&inner).expect("valid VariantArray by construction")
+    }
+
+    /// Appends a null row to the builder.
+    pub fn append_null(&mut self) {
+        self.nulls.append_null();
+        // The subfields are expected to be non-nullable according to the parquet variant spec.
+        self.metadata_offsets.push(self.metadata_builder.offset());
+        self.value_offsets.push(self.value_builder.offset());
+    }
+
+    /// Append the [`Variant`] to the builder as the next row
+    pub fn append_variant(&mut self, variant: Variant) {
+        ValueBuilder::append_variant(self.parent_state(), variant);
+    }
+
+    /// Creates a builder-specific parent state
+    fn parent_state(&mut self) -> ParentState<'_, ArrayBuilderState<'_>> {
+        let state = ArrayBuilderState {
+            metadata_offsets: &mut self.metadata_offsets,
+            value_offsets: &mut self.value_offsets,
+            nulls: &mut self.nulls,
+        };
+
+        ParentState::new(&mut self.value_builder, &mut self.metadata_builder, state)
+    }
+}
+
+impl<'m, 'v> Extend<Option<Variant<'m, 'v>>> for VariantArrayBuilder {
+    fn extend<T: IntoIterator<Item = Option<Variant<'m, 'v>>>>(&mut self, iter: T) {
+        for v in iter {
+            match v {
+                Some(v) => self.append_variant(v),
+                None => self.append_null(),
+            }
+        }
+    }
+}
+
+/// Builder-specific state for array building that manages array-level offsets and nulls. See
+/// [`VariantBuilderExt`] for details.
+#[derive(Debug)]
+pub struct ArrayBuilderState<'a> {
+    metadata_offsets: &'a mut Vec<usize>,
+    value_offsets: &'a mut Vec<usize>,
+    nulls: &'a mut NullBufferBuilder,
+}
+
+// All changes are pending until finalized
+impl BuilderSpecificState for ArrayBuilderState<'_> {
+    fn finish(
+        &mut self,
+        metadata_builder: &mut dyn MetadataBuilder,
+        value_builder: &mut ValueBuilder,
+    ) {
+        self.metadata_offsets.push(metadata_builder.finish());
+        self.value_offsets.push(value_builder.offset());
+        self.nulls.append_non_null();
+    }
+}
+
+impl VariantBuilderExt for VariantArrayBuilder {
+    type State<'a>
+        = ArrayBuilderState<'a>
+    where
+        Self: 'a;
+
+    /// Appending NULL to a variant array produces an actual NULL value
+    fn append_null(&mut self) {
+        self.append_null();
+    }
+
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.append_variant(value.into());
+    }
+
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(ListBuilder::new(self.parent_state(), false))
+    }
+
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(ObjectBuilder::new(self.parent_state(), false))
+    }
+}
+
+/// A builder for creating only the value column of a [`VariantArray`]
+///
+/// This builder is used when you have existing metadata and only need to build
+/// the value column. It's useful for scenarios like variant unshredding, data
+/// transformation, or filtering where you want to reuse existing metadata.
+///
+/// The builder produces a [`BinaryViewArray`] that can be combined with existing
+/// metadata to create a complete [`VariantArray`].
+///
+/// # Example:
+/// ```
+/// # use arrow::array::Array;
+/// # use parquet_variant::{Variant};
+/// # use parquet_variant_compute::VariantValueArrayBuilder;
+/// // Create a variant value builder for 10 rows
+/// let mut builder = VariantValueArrayBuilder::new(10);
+///
+/// // Append some values with their corresponding metadata, which the
+/// // builder takes advantage of to avoid creating new metadata.
+/// builder.append_value(Variant::from(42));
+/// builder.append_null();
+/// builder.append_value(Variant::from("hello"));
+///
+/// // Build the final value array
+/// let value_array = builder.build().unwrap();
+/// assert_eq!(value_array.len(), 3);
+/// ```
+#[derive(Debug)]
+pub struct VariantValueArrayBuilder {
+    value_builder: ValueBuilder,
+    value_offsets: Vec<usize>,
+    nulls: NullBufferBuilder,
+}
+
+impl VariantValueArrayBuilder {
+    /// Create a new `VariantValueArrayBuilder` with the specified row capacity
+    pub fn new(row_capacity: usize) -> Self {
+        Self {
+            value_builder: ValueBuilder::new(),
+            value_offsets: Vec::with_capacity(row_capacity),
+            nulls: NullBufferBuilder::new(row_capacity),
+        }
+    }
+
+    /// Build the final value array
+    ///
+    /// Returns a [`BinaryViewArray`] containing the serialized variant values.
+    /// This can be combined with existing metadata to create a complete [`VariantArray`].
+    pub fn build(mut self) -> Result<BinaryViewArray, ArrowError> {
+        let value_buffer = self.value_builder.into_inner();
+        let mut array = binary_view_array_from_buffers(value_buffer, self.value_offsets);
+        if let Some(nulls) = self.nulls.finish() {
+            let (views, buffers, _) = array.into_parts();
+            array = BinaryViewArray::try_new(views, buffers, Some(nulls))?;
+        }
+        Ok(array)
+    }
+
+    /// Append a null row to the builder
+    ///
+    /// WARNING: It is only valid to call this method when building the `value` field of a shredded
+    /// variant column (which is nullable). The `value` field of a binary (unshredded) variant
+    /// column is non-nullable, and callers should instead invoke [`Self::append_value`] with
+    /// `Variant::Null`, passing the appropriate metadata value.
+    pub fn append_null(&mut self) {
+        self.value_offsets.push(self.value_builder.offset());
+        self.nulls.append_null();
+    }
+
+    /// Append a variant value with its corresponding metadata
+    ///
+    /// # Arguments
+    /// * `value` - The variant value to append
+    /// * `metadata` - The metadata dictionary for this variant (used for field name resolution)
+    ///
+    /// # Returns
+    /// * `Ok(())` if the value was successfully appended
+    /// * `Err(ArrowError)` if the variant contains field names not found in the metadata
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::Variant;
+    /// # use parquet_variant_compute::VariantValueArrayBuilder;
+    /// let mut builder = VariantValueArrayBuilder::new(10);
+    /// builder.append_value(Variant::from(42));
+    /// ```
+    pub fn append_value(&mut self, value: Variant<'_, '_>) {
+        // NOTE: Have to clone because the builder consumes `value`
+        self.builder_ext(&value.metadata().clone())
+            .append_value(value);
+    }
+
+    /// Creates a builder-specific parent state.
+    ///
+    /// For example, this can be useful for code that wants to copy a subset of fields from an
+    /// object `value` as a new row of `value_array_builder`:
+    ///
+    /// ```no_run
+    /// # use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant};
+    /// # use parquet_variant_compute::VariantValueArrayBuilder;
+    /// # let value = Variant::Null;
+    /// # let mut value_array_builder = VariantValueArrayBuilder::new(0);
+    /// # fn should_keep(field_name: &str) -> bool { todo!() };
+    /// let Variant::Object(obj) = value else {
+    ///     panic!("Not a variant object");
+    /// };
+    /// let mut metadata_builder = ReadOnlyMetadataBuilder::new(&obj.metadata);
+    /// let state = value_array_builder.parent_state(&mut metadata_builder);
+    /// let mut object_builder = ObjectBuilder::new(state, false);
+    /// for (field_name, field_value) in obj.iter() {
+    ///     if should_keep(field_name) {
+    ///         object_builder.insert_bytes(field_name, field_value);
+    ///     }
+    /// }
+    ///  object_builder.finish(); // appends the filtered object
+    /// ```
+    pub fn parent_state<'a>(
+        &'a mut self,
+        metadata_builder: &'a mut dyn MetadataBuilder,
+    ) -> ParentState<'a, ValueArrayBuilderState<'a>> {
+        let state = ValueArrayBuilderState {
+            value_offsets: &mut self.value_offsets,
+            nulls: &mut self.nulls,
+        };
+
+        ParentState::new(&mut self.value_builder, metadata_builder, state)
+    }
+
+    /// Creates a thin [`VariantBuilderExt`] wrapper for this builder, which hides the `metadata`
+    /// parameter (similar to the way [`parquet_variant::ObjectFieldBuilder`] hides field names).
+    pub fn builder_ext<'a>(
+        &'a mut self,
+        metadata: &'a VariantMetadata<'a>,
+    ) -> VariantValueArrayBuilderExt<'a> {
+        VariantValueArrayBuilderExt {
+            metadata_builder: ReadOnlyMetadataBuilder::new(metadata),
+            value_builder: self,
+        }
+    }
+}
+
+/// Builder-specific state for array building that manages array-level offsets and nulls. See
+/// [`VariantBuilderExt`] for details.
+#[derive(Debug)]
+pub struct ValueArrayBuilderState<'a> {
+    value_offsets: &'a mut Vec<usize>,
+    nulls: &'a mut NullBufferBuilder,
+}
+
+// All changes are pending until finalized
+impl BuilderSpecificState for ValueArrayBuilderState<'_> {
+    fn finish(
+        &mut self,
+        _metadata_builder: &mut dyn MetadataBuilder,
+        value_builder: &mut ValueBuilder,
+    ) {
+        self.value_offsets.push(value_builder.offset());
+        self.nulls.append_non_null();
+    }
+}
+
+/// A thin [`VariantBuilderExt`] wrapper that hides the short-lived (per-row)
+/// [`ReadOnlyMetadataBuilder`] instances that [`VariantValueArrayBuilder`] requires.
+pub struct VariantValueArrayBuilderExt<'a> {
+    metadata_builder: ReadOnlyMetadataBuilder<'a>,
+    value_builder: &'a mut VariantValueArrayBuilder,
+}
+
+impl<'a> VariantValueArrayBuilderExt<'a> {
+    /// Creates a new instance from a metadata builder and a reference to a variant value builder.
+    pub fn new(
+        metadata_builder: ReadOnlyMetadataBuilder<'a>,
+        value_builder: &'a mut VariantValueArrayBuilder,
+    ) -> Self {
+        Self {
+            metadata_builder,
+            value_builder,
+        }
+    }
+}
+
+impl<'a> VariantBuilderExt for VariantValueArrayBuilderExt<'a> {
+    type State<'b>
+        = ValueArrayBuilderState<'b>
+    where
+        Self: 'b;
+
+    fn append_null(&mut self) {
+        self.value_builder.append_null()
+    }
+
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        let state = self.value_builder.parent_state(&mut self.metadata_builder);
+        ValueBuilder::append_variant_bytes(state, value.into());
+    }
+
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
+        let state = self.value_builder.parent_state(&mut self.metadata_builder);
+        Ok(ListBuilder::new(state, false))
+    }
+
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
+        let state = self.value_builder.parent_state(&mut self.metadata_builder);
+        Ok(ObjectBuilder::new(state, false))
+    }
+}
+
+fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> BinaryViewArray {
+    // All offsets are less than or equal to the buffer length, so we can safely cast all offsets
+    // inside the loop below, as long as the buffer length fits in u32.
+    u32::try_from(buffer.len()).expect("buffer length should fit in u32");
+
+    let mut builder = BinaryViewBuilder::with_capacity(offsets.len());
+    let block = builder.append_block(buffer.into());
+    // TODO this can be much faster if it creates the views directly during append
+    let mut start = 0;
+    for end in offsets {
+        let end = end as u32; // Safe cast: validated max offset fits in u32 above
+        builder
+            .try_append_view(block, start, end - start)
+            .expect("Failed to append view");
+        start = end;
+    }
+    builder.finish()
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use arrow::array::Array;
+    use parquet_variant::{ShortString, Variant};
+
+    /// Test that both the metadata and value buffers are non nullable
+    #[test]
+    fn test_variant_array_builder_non_nullable() {
+        let mut builder = VariantArrayBuilder::new(10);
+
+        builder.extend([
+            None, // should not panic
+            Some(Variant::from(42_i32)),
+        ]);
+
+        let variant_array = builder.build();
+
+        assert_eq!(variant_array.len(), 2);
+        assert!(variant_array.is_null(0));
+        assert!(!variant_array.is_null(1));
+        assert_eq!(variant_array.value(1), Variant::from(42i32));
+
+        // the metadata and value fields of non shredded variants should not be null
+        assert!(variant_array.metadata_field().nulls().is_none());
+        assert!(variant_array.value_field().unwrap().nulls().is_none());
+        let DataType::Struct(fields) = variant_array.data_type() else {
+            panic!("Expected VariantArray to have Struct data type");
+        };
+        for field in fields {
+            assert!(
+                !field.is_nullable(),
+                "Field {} should be non-nullable",
+                field.name()
+            );
+        }
+    }
+
+    /// Test using appending variants to the array builder
+    #[test]
+    fn test_variant_array_builder() {
+        let mut builder = VariantArrayBuilder::new(10);
+        builder.append_null(); // should not panic
+        builder.append_variant(Variant::from(42i32));
+
+        // make an object in the next row
+        builder.new_object().with_field("foo", "bar").finish();
+
+        // append a new list
+        builder
+            .new_list()
+            .with_value(Variant::from(1i32))
+            .with_value(Variant::from(2i32))
+            .finish();
+        let variant_array = builder.build();
+
+        assert_eq!(variant_array.len(), 4);
+        assert!(variant_array.is_null(0));
+        assert!(!variant_array.is_null(1));
+        assert_eq!(variant_array.value(1), Variant::from(42i32));
+        assert!(!variant_array.is_null(2));
+        let variant = variant_array.value(2);
+        let variant = variant.as_object().expect("variant to be an object");
+        assert_eq!(variant.get("foo").unwrap(), Variant::from("bar"));
+        assert!(!variant_array.is_null(3));
+        let variant = variant_array.value(3);
+        let list = variant.as_list().expect("variant to be a list");
+        assert_eq!(list.len(), 2);
+    }
+
+    #[test]
+    fn test_extend_variant_array_builder() {
+        let mut b = VariantArrayBuilder::new(3);
+        b.extend([None, Some(Variant::Null), Some(Variant::from("norm"))]);
+
+        let variant_array = b.build();
+
+        assert_eq!(variant_array.len(), 3);
+        assert!(variant_array.is_null(0));
+        assert_eq!(variant_array.value(1), Variant::Null);
+        assert_eq!(
+            variant_array.value(2),
+            Variant::ShortString(ShortString::try_new("norm").unwrap())
+        );
+    }
+
+    #[test]
+    fn test_variant_value_array_builder_basic() {
+        let mut builder = VariantValueArrayBuilder::new(10);
+
+        // Append some values
+        builder.append_value(Variant::from(42i32));
+        builder.append_null();
+        builder.append_value(Variant::from("hello"));
+
+        let value_array = builder.build().unwrap();
+        assert_eq!(value_array.len(), 3);
+    }
+
+    #[test]
+    fn test_variant_value_array_builder_with_objects() {
+        // Populate a variant array with objects
+        let mut builder = VariantArrayBuilder::new(3);
+        builder
+            .new_object()
+            .with_field("name", "Alice")
+            .with_field("age", 30i32)
+            .finish();
+
+        builder
+            .new_object()
+            .with_field("name", "Bob")
+            .with_field("age", 42i32)
+            .with_field("city", "Wonderland")
+            .finish();
+
+        builder
+            .new_object()
+            .with_field("name", "Charlie")
+            .with_field("age", 1i32)
+            .finish();
+
+        let array = builder.build();
+
+        // Copy (some of) the objects over to the value array builder
+        //
+        // NOTE: Because we will reuse the metadata column, we cannot reorder rows. We can only
+        // filter or manipulate values within a row.
+        let mut value_builder = VariantValueArrayBuilder::new(3);
+
+        // straight copy
+        value_builder.append_value(array.value(0));
+
+        // filtering fields takes more work because we need to manually create an object builder
+        let value = array.value(1);
+        let mut builder = value_builder.builder_ext(value.metadata());
+        builder
+            .new_object()
+            .with_field("name", value.get_object_field("name").unwrap())
+            .with_field("age", value.get_object_field("age").unwrap())
+            .finish();
+
+        // same bytes, but now nested and duplicated inside a list
+        let value = array.value(2);
+        let mut builder = value_builder.builder_ext(value.metadata());
+        builder
+            .new_list()
+            .with_value(value.clone())
+            .with_value(value.clone())
+            .finish();
+
+        let array2 = VariantArray::from_parts(
+            array.metadata_field().clone(),
+            Some(value_builder.build().unwrap()),
+            None,
+            None,
+        );
+
+        assert_eq!(array2.len(), 3);
+        assert_eq!(array.value(0), array2.value(0));
+
+        assert_eq!(
+            array.value(1).get_object_field("name"),
+            array2.value(1).get_object_field("name")
+        );
+        assert_eq!(
+            array.value(1).get_object_field("age"),
+            array2.value(1).get_object_field("age")
+        );
+
+        assert_eq!(array.value(2), array2.value(2).get_list_element(0).unwrap());
+        assert_eq!(array.value(2), array2.value(2).get_list_element(1).unwrap());
+    }
+}
diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs
new file mode 100644
index 000000000000..624c8ae128dc
--- /dev/null
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -0,0 +1,4161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow::{
+    array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
+    compute::CastOptions,
+    datatypes::Field,
+    error::Result,
+};
+use arrow_schema::{ArrowError, DataType, FieldRef};
+use parquet_variant::{VariantPath, VariantPathElement};
+
+use crate::VariantArray;
+use crate::variant_array::BorrowedShreddingState;
+use crate::variant_to_arrow::make_variant_to_arrow_row_builder;
+
+use arrow::array::AsArray;
+use std::sync::Arc;
+
+pub(crate) enum ShreddedPathStep<'a> {
+    /// Path step succeeded, return the new shredding state
+    Success(BorrowedShreddingState<'a>),
+    /// The path element is not present in the `typed_value` column and there is no `value` column,
+    /// so we know it does not exist. It, and all paths under it, are all-NULL.
+    Missing,
+    /// The path element is not present in the `typed_value` column and must be retrieved from the `value`
+    /// column instead. The caller should be prepared to handle any value, including the requested
+    /// type, an arbitrary "wrong" type, or `Variant::Null`.
+    NotShredded,
+}
+
+/// Given a shredded variant field -- a `(value?, typed_value?)` pair -- try to take one path step
+/// deeper. For a `VariantPathElement::Field`, the step fails if there is no `typed_value` at this
+/// level, or if `typed_value` is not a struct, or if the requested field name does not exist.
+///
+/// TODO: Support `VariantPathElement::Index`? It wouldn't be easy, and maybe not even possible.
+pub(crate) fn follow_shredded_path_element<'a>(
+    shredding_state: &BorrowedShreddingState<'a>,
+    path_element: &VariantPathElement<'_>,
+    cast_options: &CastOptions,
+) -> Result<ShreddedPathStep<'a>> {
+    // If the requested path element is not present in `typed_value`, and `value` is missing, then
+    // we know it does not exist; it, and all paths under it, are all-NULL.
+    let missing_path_step = || match shredding_state.value_field() {
+        Some(_) => ShreddedPathStep::NotShredded,
+        None => ShreddedPathStep::Missing,
+    };
+
+    let Some(typed_value) = shredding_state.typed_value_field() else {
+        return Ok(missing_path_step());
+    };
+
+    match path_element {
+        VariantPathElement::Field { name } => {
+            // Try to step into the requested field name of a struct.
+            // First, try to downcast to StructArray
+            let Some(struct_array) = typed_value.as_any().downcast_ref::<StructArray>() else {
+                // Downcast failure - if strict cast options are enabled, this should be an error
+                if !cast_options.safe {
+                    return Err(ArrowError::CastError(format!(
+                        "Cannot access field '{}' on non-struct type: {}",
+                        name,
+                        typed_value.data_type()
+                    )));
+                }
+                // With safe cast options, return NULL (missing_path_step)
+                return Ok(missing_path_step());
+            };
+
+            // Now try to find the column - missing column in a present struct is just missing data
+            let Some(field) = struct_array.column_by_name(name) else {
+                // Missing column in a present struct is just missing, not wrong - return Ok
+                return Ok(missing_path_step());
+            };
+
+            let struct_array = field.as_struct_opt().ok_or_else(|| {
+                // TODO: Should we blow up? Or just end the traversal and let the normal
+                // variant pathing code sort out the mess that it must anyway be
+                // prepared to handle?
+                ArrowError::InvalidArgumentError(format!(
+                    "Expected Struct array while following path, got {}",
+                    field.data_type(),
+                ))
+            })?;
+
+            let state = BorrowedShreddingState::try_from(struct_array)?;
+            Ok(ShreddedPathStep::Success(state))
+        }
+        VariantPathElement::Index { .. } => {
+            // TODO: Support array indexing. Among other things, it will require slicing not
+            // only the array we have here, but also the corresponding metadata and null masks.
+            Err(ArrowError::NotYetImplemented(
+                "Pathing into shredded variant array index".into(),
+            ))
+        }
+    }
+}
+
+/// Follows the given path as far as possible through shredded variant fields. If the path ends on a
+/// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path
+/// and extract the requested value on a per-row basis.
+fn shredded_get_path(
+    input: &VariantArray,
+    path: &[VariantPathElement<'_>],
+    as_field: Option<&Field>,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef> {
+    // Helper that creates a new VariantArray from the given nested value and typed_value columns,
+    // properly accounting for accumulated nulls from path traversal
+    let make_target_variant =
+        |value: Option<BinaryViewArray>,
+         typed_value: Option<ArrayRef>,
+         accumulated_nulls: Option<arrow::buffer::NullBuffer>| {
+            let metadata = input.metadata_field().clone();
+            VariantArray::from_parts(metadata, value, typed_value, accumulated_nulls)
+        };
+
+    // Helper that shreds a VariantArray to a specific type.
+    let shred_basic_variant =
+        |target: VariantArray, path: VariantPath<'_>, as_field: Option<&Field>| {
+            let as_type = as_field.map(|f| f.data_type());
+            let mut builder = make_variant_to_arrow_row_builder(
+                target.metadata_field(),
+                path,
+                as_type,
+                cast_options,
+                target.len(),
+            )?;
+            for i in 0..target.len() {
+                if target.is_null(i) {
+                    builder.append_null()?;
+                } else if !cast_options.safe {
+                    let value = target.try_value(i)?;
+                    builder.append_value(value)?;
+                } else {
+                    let _ = match target.try_value(i) {
+                        Ok(v) => builder.append_value(v)?,
+                        Err(_) => {
+                            builder.append_null()?;
+                            false // add this to make match arms have the same return type
+                        }
+                    };
+                }
+            }
+            builder.finish()
+        };
+
+    // Peel away the prefix of path elements that traverses the shredded parts of this variant
+    // column. Shredding will traverse the rest of the path on a per-row basis.
+    let mut shredding_state = input.shredding_state().borrow();
+    let mut accumulated_nulls = input.inner().nulls().cloned();
+    let mut path_index = 0;
+    for path_element in path {
+        match follow_shredded_path_element(&shredding_state, path_element, cast_options)? {
+            ShreddedPathStep::Success(state) => {
+                // Union nulls from the typed_value we just accessed
+                if let Some(typed_value) = shredding_state.typed_value_field() {
+                    accumulated_nulls = arrow::buffer::NullBuffer::union(
+                        accumulated_nulls.as_ref(),
+                        typed_value.nulls(),
+                    );
+                }
+                shredding_state = state;
+                path_index += 1;
+                continue;
+            }
+            ShreddedPathStep::Missing => {
+                let num_rows = input.len();
+                let arr = match as_field.map(|f| f.data_type()) {
+                    Some(data_type) => Arc::new(array::new_null_array(data_type, num_rows)) as _,
+                    None => Arc::new(array::NullArray::new(num_rows)) as _,
+                };
+                return Ok(arr);
+            }
+            ShreddedPathStep::NotShredded => {
+                let target = make_target_variant(
+                    shredding_state.value_field().cloned(),
+                    None,
+                    accumulated_nulls,
+                );
+                return shred_basic_variant(target, path[path_index..].into(), as_field);
+            }
+        };
+    }
+
+    // Path exhausted! Create a new `VariantArray` for the location we landed on.
+    let target = make_target_variant(
+        shredding_state.value_field().cloned(),
+        shredding_state.typed_value_field().cloned(),
+        accumulated_nulls,
+    );
+
+    // If our caller did not request any specific type, we can just return whatever we landed on.
+    let Some(as_field) = as_field else {
+        return Ok(ArrayRef::from(target));
+    };
+
+    // Try to return the typed value directly when we have a perfect shredding match.
+    if let Some(shredded) = try_perfect_shredding(&target, as_field) {
+        return Ok(shredded);
+    }
+
+    // Structs are special. Recurse into each field separately, hoping to follow the shredding even
+    // further, and build up the final struct from those individually shredded results.
+    if let DataType::Struct(fields) = as_field.data_type() {
+        let children = fields
+            .iter()
+            .map(|field| {
+                shredded_get_path(
+                    &target,
+                    &[VariantPathElement::from(field.name().as_str())],
+                    Some(field),
+                    cast_options,
+                )
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let struct_nulls = target.nulls().cloned();
+
+        return Ok(Arc::new(StructArray::try_new(
+            fields.clone(),
+            children,
+            struct_nulls,
+        )?));
+    }
+
+    // Not a struct, so directly shred the variant as the requested type
+    shred_basic_variant(target, VariantPath::default(), Some(as_field))
+}
+
+fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Option<ArrayRef> {
+    // Try to return the typed value directly when we have a perfect shredding match.
+    if matches!(as_field.data_type(), DataType::Struct(_)) {
+        return None;
+    }
+    let typed_value = variant_array.typed_value_field()?;
+    if typed_value.data_type() == as_field.data_type()
+        && variant_array
+            .value_field()
+            .is_none_or(|v| v.null_count() == v.len())
+    {
+        // Here we need to gate against the case where the `typed_value` is null but data is in the `value` column.
+        // 1. If the `value` column is null, or
+        // 2. If every row in the `value` column is null
+
+        // This is a perfect shredding, where the value is entirely shredded out,
+        // so we can just return the typed value.
+        return Some(typed_value.clone());
+    }
+    None
+}
+
+/// Returns an array with the specified path extracted from the variant values.
+///
+/// The return array type depends on the `as_type` field of the options parameter
+/// 1. `as_type: None`: a VariantArray is returned. The values in this new VariantArray will point
+///    to the specified path.
+/// 2. `as_type: Some(<specific field>)`: an array of the specified type is returned.
+///
+/// TODO: How would a caller request a struct or list type where the fields/elements can be any
+/// variant? Caller can pass None as the requested type to fetch a specific path, but it would
+/// quickly become annoying (and inefficient) to call `variant_get` for each leaf value in a struct or
+/// list and then try to assemble the results.
+pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {
+    let variant_array = VariantArray::try_new(input)?;
+
+    let GetOptions {
+        as_type,
+        path,
+        cast_options,
+    } = options;
+
+    shredded_get_path(&variant_array, &path, as_type.as_deref(), &cast_options)
+}
+
+/// Controls the action of the variant_get kernel.
+#[derive(Debug, Clone, Default)]
+pub struct GetOptions<'a> {
+    /// What path to extract
+    pub path: VariantPath<'a>,
+    /// if `as_type` is None, the returned array will itself be a VariantArray.
+    ///
+    /// if `as_type` is `Some(type)` the field is returned as the specified type.
+    pub as_type: Option<FieldRef>,
+    /// Controls the casting behavior (e.g. error vs substituting null on cast error).
+    pub cast_options: CastOptions<'a>,
+}
+
+impl<'a> GetOptions<'a> {
+    /// Construct default options to get the specified path as a variant.
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Construct options to get the specified path as a variant.
+    pub fn new_with_path(path: VariantPath<'a>) -> Self {
+        Self {
+            path,
+            as_type: None,
+            cast_options: Default::default(),
+        }
+    }
+
+    /// Specify the type to return.
+    pub fn with_as_type(mut self, as_type: Option<FieldRef>) -> Self {
+        self.as_type = as_type;
+        self
+    }
+
+    /// Specify the cast options to use when casting to the specified type.
+    pub fn with_cast_options(mut self, cast_options: CastOptions<'a>) -> Self {
+        self.cast_options = cast_options;
+        self
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::str::FromStr;
+    use std::sync::Arc;
+
+    use super::{GetOptions, variant_get};
+    use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
+    use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
+    use arrow::array::{
+        Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
+        Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+        Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
+        LargeBinaryArray, LargeStringArray, NullBuilder, StringArray, StringViewArray, StructArray,
+        Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    };
+    use arrow::buffer::NullBuffer;
+    use arrow::compute::CastOptions;
+    use arrow::datatypes::DataType::{Int16, Int32, Int64};
+    use arrow::datatypes::i256;
+    use arrow::util::display::FormatOptions;
+    use arrow_schema::DataType::{Boolean, Float32, Float64, Int8};
+    use arrow_schema::{DataType, Field, FieldRef, Fields, IntervalUnit, TimeUnit};
+    use chrono::DateTime;
+    use parquet_variant::{
+        EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
+        VariantDecimalType, VariantPath,
+    };
+
+    fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
+        // Create input array from JSON string
+        let input_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(input_json)]));
+        let input_variant_array_ref = ArrayRef::from(json_to_variant(&input_array_ref).unwrap());
+
+        let result =
+            variant_get(&input_variant_array_ref, GetOptions::new_with_path(path)).unwrap();
+
+        // Create expected array from JSON string
+        let expected_array_ref: ArrayRef = Arc::new(StringArray::from(vec![Some(expected_json)]));
+        let expected_variant_array = json_to_variant(&expected_array_ref).unwrap();
+
+        let result_array = VariantArray::try_new(&result).unwrap();
+        assert_eq!(
+            result_array.len(),
+            1,
+            "Expected result array to have length 1"
+        );
+        assert!(
+            result_array.nulls().is_none(),
+            "Expected no nulls in result array"
+        );
+        let result_variant = result_array.value(0);
+        let expected_variant = expected_variant_array.value(0);
+        assert_eq!(
+            result_variant, expected_variant,
+            "Result variant does not match expected variant"
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_field() {
+        single_variant_get_test(
+            r#"{"some_field": 1234}"#,
+            VariantPath::from("some_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_list_index() {
+        single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_object_of_object() {
+        single_variant_get_test(
+            r#"{"top_level_field": {"inner_field": 1234}}"#,
+            VariantPath::from("top_level_field").join("inner_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_list_of_object() {
+        single_variant_get_test(
+            r#"[{"some_field": 1234}]"#,
+            VariantPath::from(0).join("some_field"),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_primitive_variant_inside_object_of_list() {
+        single_variant_get_test(
+            r#"{"some_field": [1234]}"#,
+            VariantPath::from("some_field").join(0),
+            "1234",
+        );
+    }
+
+    #[test]
+    fn get_complex_variant() {
+        single_variant_get_test(
+            r#"{"top_level_field": {"inner_field": 1234}}"#,
+            VariantPath::from("top_level_field"),
+            r#"{"inner_field": 1234}"#,
+        );
+    }
+
+    /// Partial Shredding: extract a value as a VariantArray
+    macro_rules! numeric_partially_shredded_test {
+        ($primitive_type:ty, $data_fn:ident) => {
+            let array = $data_fn();
+            let options = GetOptions::new();
+            let result = variant_get(&array, options).unwrap();
+
+            // expect the result is a VariantArray
+            let result = VariantArray::try_new(&result).unwrap();
+            assert_eq!(result.len(), 4);
+
+            // Expect the values are the same as the original values
+            assert_eq!(
+                result.value(0),
+                Variant::from(<$primitive_type>::try_from(34u8).unwrap())
+            );
+            assert!(!result.is_valid(1));
+            assert_eq!(result.value(2), Variant::from("n/a"));
+            assert_eq!(
+                result.value(3),
+                Variant::from(<$primitive_type>::try_from(100u8).unwrap())
+            );
+        };
+    }
+
+    macro_rules! partially_shredded_variant_array_gen {
+        ($func_name:ident,  $typed_value_array_gen: expr) => {
+            fn $func_name() -> ArrayRef {
+                let (metadata, string_value) = {
+                    let mut builder = parquet_variant::VariantBuilder::new();
+                    builder.append_value("n/a");
+                    builder.finish()
+                };
+
+                let nulls = NullBuffer::from(vec![
+                    true,  // row 0 non null
+                    false, // row 1 is null
+                    true,  // row 2 non null
+                    true,  // row 3 non null
+                ]);
+
+                // metadata is the same for all rows
+                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
+                // about why row1 is an empty but non null, value.
+                let values = BinaryViewArray::from(vec![
+                    None,                // row 0 is shredded, so no value
+                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
+                    Some(&string_value), // copy the string value "N/A"
+                    None,                // row 3 is shredded, so no value
+                ]);
+
+                let typed_value = $typed_value_array_gen();
+
+                let struct_array = StructArrayBuilder::new()
+                    .with_field("metadata", Arc::new(metadata), false)
+                    .with_field("typed_value", Arc::new(typed_value), true)
+                    .with_field("value", Arc::new(values), true)
+                    .with_nulls(nulls)
+                    .build();
+                ArrayRef::from(
+                    VariantArray::try_new(&struct_array).expect("should create variant array"),
+                )
+            }
+        };
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_int8_as_variant() {
+        numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_int16_as_variant() {
+        numeric_partially_shredded_test!(i16, partially_shredded_int16_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_int32_as_variant() {
+        numeric_partially_shredded_test!(i32, partially_shredded_int32_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_int64_as_variant() {
+        numeric_partially_shredded_test!(i64, partially_shredded_int64_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_float32_as_variant() {
+        numeric_partially_shredded_test!(f32, partially_shredded_float32_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_float64_as_variant() {
+        numeric_partially_shredded_test!(f64, partially_shredded_float64_variant_array);
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_bool_as_variant() {
+        let array = partially_shredded_bool_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::from(true));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from(false));
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_utf8_as_variant() {
+        let array = partially_shredded_utf8_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::from("hello"));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from("world"));
+    }
+
+    partially_shredded_variant_array_gen!(partially_shredded_binary_view_variant_array, || {
+        BinaryViewArray::from(vec![
+            Some(&[1u8, 2u8, 3u8][..]), // row 0 is shredded
+            None,                       // row 1 is null
+            None,                       // row 2 is a string
+            Some(&[4u8, 5u8, 6u8][..]), // row 3 is shredded
+        ])
+    });
+
+    #[test]
+    fn get_variant_partially_shredded_date32_as_variant() {
+        let array = partially_shredded_date32_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        use chrono::NaiveDate;
+        let date1 = NaiveDate::from_ymd_opt(2025, 9, 17).unwrap();
+        let date2 = NaiveDate::from_ymd_opt(2025, 9, 9).unwrap();
+        assert_eq!(result.value(0), Variant::from(date1));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from(date2));
+    }
+
+    #[test]
+    fn get_variant_partially_shredded_binary_view_as_variant() {
+        let array = partially_shredded_binary_view_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result.len(), 4);
+
+        // Expect the values are the same as the original values
+        assert_eq!(result.value(0), Variant::from(&[1u8, 2u8, 3u8][..]));
+        assert!(!result.is_valid(1));
+        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
+    }
+
+    /// Shredding: extract a value as an Int32Array
+    #[test]
+    fn get_variant_shredded_int32_as_int32_safe_cast() {
+        // Extract the typed value as Int32Array
+        let array = partially_shredded_int32_variant_array();
+        // specify we want the typed value as Int32
+        let field = Field::new("typed_value", DataType::Int32, true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(34),
+            None,
+            None, // "n/a" is not an Int32 so converted to null
+            Some(100),
+        ]));
+        assert_eq!(&result, &expected)
+    }
+
+    /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a")
+    #[test]
+    fn get_variant_shredded_int32_as_int32_unsafe_cast() {
+        // Extract the typed value as Int32Array
+        let array = partially_shredded_int32_variant_array();
+        let field = Field::new("typed_value", DataType::Int32, true);
+        let cast_options = CastOptions {
+            safe: false, // unsafe cast
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+
+        let err = variant_get(&array, options).unwrap_err();
+        // TODO make this error message nicer (not Debug format)
+        assert_eq!(
+            err.to_string(),
+            "Cast error: Failed to extract primitive of type Int32 from variant ShortString(ShortString(\"n/a\")) at path VariantPath([])"
+        );
+    }
+
+    /// Perfect Shredding: extract the typed value as a VariantArray
+    macro_rules! numeric_perfectly_shredded_test {
+        ($primitive_type:ty, $data_fn:ident) => {
+            let array = $data_fn();
+            let options = GetOptions::new();
+            let result = variant_get(&array, options).unwrap();
+
+            // expect the result is a VariantArray
+            let result = VariantArray::try_new(&result).unwrap();
+            assert_eq!(result.len(), 3);
+
+            // Expect the values are the same as the original values
+            assert_eq!(
+                result.value(0),
+                Variant::from(<$primitive_type>::try_from(1u8).unwrap())
+            );
+            assert_eq!(
+                result.value(1),
+                Variant::from(<$primitive_type>::try_from(2u8).unwrap())
+            );
+            assert_eq!(
+                result.value(2),
+                Variant::from(<$primitive_type>::try_from(3u8).unwrap())
+            );
+        };
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_int8_as_variant() {
+        numeric_perfectly_shredded_test!(i8, perfectly_shredded_int8_variant_array);
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_int16_as_variant() {
+        numeric_perfectly_shredded_test!(i16, perfectly_shredded_int16_variant_array);
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_int32_as_variant() {
+        numeric_perfectly_shredded_test!(i32, perfectly_shredded_int32_variant_array);
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_int64_as_variant() {
+        numeric_perfectly_shredded_test!(i64, perfectly_shredded_int64_variant_array);
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_float32_as_variant() {
+        numeric_perfectly_shredded_test!(f32, perfectly_shredded_float32_variant_array);
+    }
+
+    #[test]
+    fn get_variant_perfectly_shredded_float64_as_variant() {
+        numeric_perfectly_shredded_test!(f64, perfectly_shredded_float64_variant_array);
+    }
+
+    /// AllNull: extract a value as a VariantArray
+    #[test]
+    fn get_variant_all_null_as_variant() {
+        let array = all_null_variant_array();
+        let options = GetOptions::new();
+        let result = variant_get(&array, options).unwrap();
+
+        // expect the result is a VariantArray
+        let result = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result.len(), 3);
+
+        // All values should be null
+        assert!(!result.is_valid(0));
+        assert!(!result.is_valid(1));
+        assert!(!result.is_valid(2));
+    }
+
+    /// AllNull: extract a value as an Int32Array
+    #[test]
+    fn get_variant_all_null_as_int32() {
+        let array = all_null_variant_array();
+        // specify we want the typed value as Int32
+        let field = Field::new("typed_value", DataType::Int32, true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Option::<i32>::None,
+            Option::<i32>::None,
+            Option::<i32>::None,
+        ]));
+        assert_eq!(&result, &expected)
+    }
+
+    macro_rules! perfectly_shredded_to_arrow_primitive_test {
+        ($name:ident, $primitive_type:expr, $perfectly_shredded_array_gen_fun:ident, $expected_array:expr) => {
+            #[test]
+            fn $name() {
+                let array = $perfectly_shredded_array_gen_fun();
+                let field = Field::new("typed_value", $primitive_type, true);
+                let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+                let result = variant_get(&array, options).unwrap();
+                let expected_array: ArrayRef = Arc::new($expected_array);
+                assert_eq!(&result, &expected_array);
+            }
+        };
+    }
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_int18_as_int8,
+        Int8,
+        perfectly_shredded_int8_variant_array,
+        Int8Array::from(vec![Some(1), Some(2), Some(3)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_int16_as_int16,
+        Int16,
+        perfectly_shredded_int16_variant_array,
+        Int16Array::from(vec![Some(1), Some(2), Some(3)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_int32_as_int32,
+        Int32,
+        perfectly_shredded_int32_variant_array,
+        Int32Array::from(vec![Some(1), Some(2), Some(3)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_int64_as_int64,
+        Int64,
+        perfectly_shredded_int64_variant_array,
+        Int64Array::from(vec![Some(1), Some(2), Some(3)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_float32_as_float32,
+        Float32,
+        perfectly_shredded_float32_variant_array,
+        Float32Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_float64_as_float64,
+        Float64,
+        perfectly_shredded_float64_variant_array,
+        Float64Array::from(vec![Some(1.0), Some(2.0), Some(3.0)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_boolean_as_boolean,
+        Boolean,
+        perfectly_shredded_bool_variant_array,
+        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_utf8_as_utf8,
+        DataType::Utf8,
+        perfectly_shredded_utf8_variant_array,
+        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_large_utf8_as_utf8,
+        DataType::Utf8,
+        perfectly_shredded_large_utf8_variant_array,
+        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_utf8_view_as_utf8,
+        DataType::Utf8,
+        perfectly_shredded_utf8_view_variant_array,
+        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    );
+
+    macro_rules! perfectly_shredded_variant_array_fn {
+        ($func:ident, $typed_value_gen:expr) => {
+            fn $func() -> ArrayRef {
+                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
+                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
+                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+                    EMPTY_VARIANT_METADATA_BYTES,
+                    3,
+                ));
+                let typed_value = $typed_value_gen();
+
+                let struct_array = StructArrayBuilder::new()
+                    .with_field("metadata", Arc::new(metadata), false)
+                    .with_field("typed_value", Arc::new(typed_value), true)
+                    .build();
+
+                VariantArray::try_new(&struct_array)
+                    .expect("should create variant array")
+                    .into()
+            }
+        };
+    }
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_variant_array, || {
+        StringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    });
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_utf8_variant_array, || {
+        LargeStringArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    });
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_utf8_view_variant_array, || {
+        StringViewArray::from(vec![Some("foo"), Some("bar"), Some("baz")])
+    });
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_bool_variant_array, || {
+        BooleanArray::from(vec![Some(true), Some(false), Some(true)])
+    });
+
+    /// Return a VariantArray that represents a perfectly "shredded" variant
+    /// for the given typed value.
+    ///
+    /// The schema of the corresponding `StructArray` would look like this:
+    ///
+    /// ```text
+    /// StructArray {
+    ///   metadata: BinaryViewArray,
+    ///   typed_value: Int32Array,
+    /// }
+    /// ```
+    macro_rules! numeric_perfectly_shredded_variant_array_fn {
+        ($func:ident, $array_type:ident, $primitive_type:ty) => {
+            perfectly_shredded_variant_array_fn!($func, || {
+                $array_type::from(vec![
+                    Some(<$primitive_type>::try_from(1u8).unwrap()),
+                    Some(<$primitive_type>::try_from(2u8).unwrap()),
+                    Some(<$primitive_type>::try_from(3u8).unwrap()),
+                ])
+            });
+        };
+    }
+
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_int8_variant_array,
+        Int8Array,
+        i8
+    );
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_int16_variant_array,
+        Int16Array,
+        i16
+    );
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_int32_variant_array,
+        Int32Array,
+        i32
+    );
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_int64_variant_array,
+        Int64Array,
+        i64
+    );
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_float32_variant_array,
+        Float32Array,
+        f32
+    );
+    numeric_perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_float64_variant_array,
+        Float64Array,
+        f64
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_micro_ntz_variant_array,
+        || {
+            arrow::array::TimestampMicrosecondArray::from(vec![
+                Some(-456000),
+                Some(1758602096000001),
+                Some(1758602096000002),
+            ])
+        }
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_micro_ntz,
+        DataType::Timestamp(TimeUnit::Microsecond, None),
+        perfectly_shredded_timestamp_micro_ntz_variant_array,
+        arrow::array::TimestampMicrosecondArray::from(vec![
+            Some(-456000),
+            Some(1758602096000001),
+            Some(1758602096000002),
+        ])
+    );
+
+    // test converting micro to nano
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        perfectly_shredded_timestamp_micro_ntz_variant_array,
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-456000000),
+            Some(1758602096000001000),
+            Some(1758602096000002000)
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
+        arrow::array::TimestampMicrosecondArray::from(vec![
+            Some(-456000),
+            Some(1758602096000001),
+            Some(1758602096000002),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_micro,
+        DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_micro_variant_array,
+        arrow::array::TimestampMicrosecondArray::from(vec![
+            Some(-456000),
+            Some(1758602096000001),
+            Some(1758602096000002),
+        ])
+        .with_timezone("+00:00")
+    );
+
+    // test converting micro to nano
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_as_nano,
+        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_micro_variant_array,
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-456000000),
+            Some(1758602096000001000),
+            Some(1758602096000002000)
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_nano_ntz_variant_array,
+        || {
+            arrow::array::TimestampNanosecondArray::from(vec![
+                Some(-4999999561),
+                Some(1758602096000000001),
+                Some(1758602096000000002),
+            ])
+        }
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
+        || {
+            arrow::array::TimestampMicrosecondArray::from(vec![
+                Some(1234),       // can't be cast to second & millisecond
+                Some(1234000),    // can be cast to millisecond, but not second
+                Some(1234000000), // can be cast to second & millisecond
+            ])
+            .with_timezone("+00:00")
+        }
+    );
+
+    // The following two tests wants to cover the micro with timezone -> milli/second cases
+    // there are three test items, which contains some items can be cast safely, and some can't
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_second,
+        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampSecondArray::from(vec![
+            None,
+            None, // Return None if can't be cast to second safely
+            Some(1234)
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_as_timestamp_milli,
+        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_micro_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampMillisecondArray::from(vec![
+            None, // Return None if can't be cast to millisecond safely
+            Some(1234),
+            Some(1234000)
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
+        || {
+            arrow::array::TimestampMicrosecondArray::from(vec![
+                Some(1234),       // can't be cast to second & millisecond
+                Some(1234000),    // can be cast to millisecond, but not second
+                Some(1234000000), // can be cast to second & millisecond
+            ])
+        }
+    );
+
+    // The following two tests wants to cover the micro_ntz -> milli/second cases
+    // there are three test items, which contains some items can be cast safely, and some can't
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_second,
+        DataType::Timestamp(TimeUnit::Second, None),
+        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampSecondArray::from(vec![
+            None,
+            None, // Return None if can't be cast to second safely
+            Some(1234)
+        ])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_micro_ntz_as_timestamp_milli,
+        DataType::Timestamp(TimeUnit::Millisecond, None),
+        perfectly_shredded_timestamp_micro_ntz_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampMillisecondArray::from(vec![
+            None, // Return None if can't be cast to millisecond safely
+            Some(1234),
+            Some(1234000)
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
+        || {
+            arrow::array::TimestampNanosecondArray::from(vec![
+                Some(1234000),       // can't be cast to second & millisecond
+                Some(1234000000),    // can be cast to millisecond, but not second
+                Some(1234000000000), // can be cast to second & millisecond
+            ])
+            .with_timezone("+00:00")
+        }
+    );
+
+    // The following two tests wants to cover the nano with timezone -> milli/second cases
+    // there are three test items, which contains some items can be cast safely, and some can't
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_second,
+        DataType::Timestamp(TimeUnit::Second, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampSecondArray::from(vec![
+            None,
+            None, // Return None if can't be cast to second safely
+            Some(1234)
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_milli,
+        DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_nano_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampMillisecondArray::from(vec![
+            None, // Return None if can't be cast to millisecond safely
+            Some(1234),
+            Some(1234000)
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
+        || {
+            arrow::array::TimestampNanosecondArray::from(vec![
+                Some(1234000),       // can't be cast to second & millisecond
+                Some(1234000000),    // can be cast to millisecond, but not second
+                Some(1234000000000), // can be cast to second & millisecond
+            ])
+        }
+    );
+
+    // The following two tests wants to cover the nano_ntz -> milli/second cases
+    // there are three test items, which contains some items can be cast safely, and some can't
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_second,
+        DataType::Timestamp(TimeUnit::Second, None),
+        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampSecondArray::from(vec![
+            None,
+            None, // Return None if can't be cast to second safely
+            Some(1234)
+        ])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_milli,
+        DataType::Timestamp(TimeUnit::Millisecond, None),
+        perfectly_shredded_timestamp_nano_ntz_variant_array_for_second_and_milli_second,
+        arrow::array::TimestampMillisecondArray::from(vec![
+            None, // Return None if can't be cast to millisecond safely
+            Some(1234),
+            Some(1234000)
+        ])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_ntz_as_timestamp_nano_ntz,
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
+        perfectly_shredded_timestamp_nano_ntz_variant_array,
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-4999999561),
+            Some(1758602096000000001),
+            Some(1758602096000000002),
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_nano_variant_array, || {
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-4999999561),
+            Some(1758602096000000001),
+            Some(1758602096000000002),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_timestamp_nano_as_timestamp_nano,
+        DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
+        perfectly_shredded_timestamp_nano_variant_array,
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-4999999561),
+            Some(1758602096000000001),
+            Some(1758602096000000002),
+        ])
+        .with_timezone("+00:00")
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_date_variant_array, || {
+        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_date_as_date,
+        DataType::Date32,
+        perfectly_shredded_date_variant_array,
+        Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_date_as_date64,
+        DataType::Date64,
+        perfectly_shredded_date_variant_array,
+        Date64Array::from(vec![
+            Some(-1066608000000),
+            Some(1519430400000),
+            Some(1728000000000)
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
+        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_time_as_time,
+        DataType::Time64(TimeUnit::Microsecond),
+        perfectly_shredded_time_variant_array,
+        Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_time_as_time64_nano,
+        DataType::Time64(TimeUnit::Nanosecond),
+        perfectly_shredded_time_variant_array,
+        Time64NanosecondArray::from(vec![
+            Some(12345000000),
+            Some(87654000000),
+            Some(135792000000)
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array_for_time32, || {
+        Time64MicrosecondArray::from(vec![
+            Some(1234),        // This can't be cast to Time32 losslessly
+            Some(7654000),     // This can be cast to Time32(Millisecond), but not Time32(Second)
+            Some(35792000000), // This can be cast to Time32(Second) & Time32(Millisecond)
+        ])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_time_as_time32_second,
+        DataType::Time32(TimeUnit::Second),
+        perfectly_shredded_time_variant_array_for_time32,
+        Time32SecondArray::from(vec![
+            None,
+            None, // Return None if can't be cast to Time32(Second) safely
+            Some(35792)
+        ])
+    );
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_time_as_time32_milli,
+        DataType::Time32(TimeUnit::Millisecond),
+        perfectly_shredded_time_variant_array_for_time32,
+        Time32MillisecondArray::from(vec![
+            None, // Return None if can't be cast to Time32(Second) safely
+            Some(7654),
+            Some(35792000)
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
+        let mut builder = NullBuilder::new();
+        builder.append_nulls(3);
+        builder.finish()
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_null_as_null,
+        DataType::Null,
+        perfectly_shredded_null_variant_array,
+        arrow::array::NullArray::new(3)
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array_with_int, || {
+        Int32Array::from(vec![Some(32), Some(64), Some(48)])
+    });
+
+    // We append null values if type miss match happens in safe mode
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_null_with_type_missmatch_in_safe_mode,
+        DataType::Null,
+        perfectly_shredded_null_variant_array_with_int,
+        arrow::array::NullArray::new(3)
+    );
+
+    // We'll return an error if type miss match happens in strict mode
+    #[test]
+    fn get_variant_perfectly_shredded_null_as_null_with_type_missmatch_in_strict_mode() {
+        let array = perfectly_shredded_null_variant_array_with_int();
+        let field = Field::new("typed_value", DataType::Null, true);
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            });
+
+        let result = variant_get(&array, options);
+
+        assert!(result.is_err());
+        let error_msg = format!("{}", result.unwrap_err());
+        assert!(
+            error_msg
+                .contains("Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])"),
+            "Expected=[Cast error: Failed to extract primitive of type Null from variant Int32(32) at path VariantPath([])],\
+                Got error message=[{}]",
+            error_msg
+        );
+    }
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal4_variant_array, || {
+        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
+            .with_precision_and_scale(5, 2)
+            .unwrap()
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal4_as_decimal4,
+        DataType::Decimal32(5, 2),
+        perfectly_shredded_decimal4_variant_array,
+        Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
+            .with_precision_and_scale(5, 2)
+            .unwrap()
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_decimal8_variant_array_cast2decimal32,
+        || {
+            Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
+                .with_precision_and_scale(6, 1)
+                .unwrap()
+        }
+    );
+
+    // The input will be cast to Decimal32 when transformed to Variant
+    // This tests will covert the logic DataType::Decimal64(the original array)
+    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal64(the result array)
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal8_through_decimal32_as_decimal8,
+        DataType::Decimal64(6, 1),
+        perfectly_shredded_decimal8_variant_array_cast2decimal32,
+        Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
+            .with_precision_and_scale(6, 1)
+            .unwrap()
+    );
+
+    // This tests will covert the logic DataType::Decimal64(the original array)
+    //  -> Variant::Decimal8(VariantArray) -> DataType::Decimal64(the result array)
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal8_variant_array, || {
+        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
+            .with_precision_and_scale(10, 1)
+            .unwrap()
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal8_as_decimal8,
+        DataType::Decimal64(10, 1),
+        perfectly_shredded_decimal8_variant_array,
+        Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
+            .with_precision_and_scale(10, 1)
+            .unwrap()
+    );
+
+    // This tests will covert the logic DataType::Decimal128(the original array)
+    //  -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_decimal16_within_decimal4_variant_array,
+        || {
+            Decimal128Array::from(vec![
+                Some(i128::from(1234589)),
+                Some(i128::from(2344444)),
+                Some(i128::from(-1234789)),
+            ])
+            .with_precision_and_scale(7, 3)
+            .unwrap()
+        }
+    );
+
+    // This tests will covert the logic DataType::Decimal128(the original array)
+    // -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal16_within_decimal4_as_decimal16,
+        DataType::Decimal128(7, 3),
+        perfectly_shredded_decimal16_within_decimal4_variant_array,
+        Decimal128Array::from(vec![
+            Some(i128::from(1234589)),
+            Some(i128::from(2344444)),
+            Some(i128::from(-1234789)),
+        ])
+        .with_precision_and_scale(7, 3)
+        .unwrap()
+    );
+
+    perfectly_shredded_variant_array_fn!(
+        perfectly_shredded_decimal16_within_decimal8_variant_array,
+        || {
+            Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
+                .with_precision_and_scale(10, 1)
+                .unwrap()
+        }
+    );
+
+    // This tests will covert the logic DataType::Decimal128(the original array)
+    // -> Variant::Decimal8(VariantArray) -> DataType::Decimal128(the result array)
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal16_within8_as_decimal16,
+        DataType::Decimal128(10, 1),
+        perfectly_shredded_decimal16_within_decimal8_variant_array,
+        Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
+            .with_precision_and_scale(10, 1)
+            .unwrap()
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal16_variant_array, || {
+        Decimal128Array::from(vec![
+            Some(i128::from_str("12345678901234567899").unwrap()),
+            Some(i128::from_str("23445677483748324300").unwrap()),
+            Some(i128::from_str("-12345678901234567899").unwrap()),
+        ])
+        .with_precision_and_scale(20, 3)
+        .unwrap()
+    });
+
+    // This tests will covert the logic DataType::Decimal128(the original array)
+    // -> Variant::Decimal16(VariantArray) -> DataType::Decimal128(the result array)
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_decimal16_as_decimal16,
+        DataType::Decimal128(20, 3),
+        perfectly_shredded_decimal16_variant_array,
+        Decimal128Array::from(vec![
+            Some(i128::from_str("12345678901234567899").unwrap()),
+            Some(i128::from_str("23445677483748324300").unwrap()),
+            Some(i128::from_str("-12345678901234567899").unwrap())
+        ])
+        .with_precision_and_scale(20, 3)
+        .unwrap()
+    );
+
+    macro_rules! assert_variant_get_as_variant_array_with_default_option {
+        ($variant_array: expr, $array_expected: expr) => {{
+            let options = GetOptions::new();
+            let array = $variant_array;
+            let result = variant_get(&array, options).unwrap();
+
+            // expect the result is a VariantArray
+            let result = VariantArray::try_new(&result).unwrap();
+
+            assert_eq!(result.len(), $array_expected.len());
+
+            for (idx, item) in $array_expected.into_iter().enumerate() {
+                match item {
+                    Some(item) => assert_eq!(result.value(idx), item),
+                    None => assert!(result.is_null(idx)),
+                }
+            }
+        }};
+    }
+
+    partially_shredded_variant_array_gen!(
+        partially_shredded_timestamp_micro_ntz_variant_array,
+        || {
+            arrow::array::TimestampMicrosecondArray::from(vec![
+                Some(-456000),
+                None,
+                None,
+                Some(1758602096000000),
+            ])
+        }
+    );
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
+        let array = partially_shredded_timestamp_micro_ntz_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp_micros(-456000i64)
+                        .unwrap()
+                        .naive_utc(),
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .naive_utc(),
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
+        arrow::array::TimestampMicrosecondArray::from(vec![
+            Some(-456000),
+            None,
+            None,
+            Some(1758602096000000),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
+        let array = partially_shredded_timestamp_micro_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp_micros(-456000i64)
+                        .unwrap()
+                        .to_utc(),
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .to_utc(),
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(
+        partially_shredded_timestamp_nano_ntz_variant_array,
+        || {
+            arrow::array::TimestampNanosecondArray::from(vec![
+                Some(-4999999561),
+                None,
+                None,
+                Some(1758602096000000000),
+            ])
+        }
+    );
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
+        let array = partially_shredded_timestamp_nano_ntz_variant_array();
+
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .naive_utc()
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-4999999561),
+            None,
+            None,
+            Some(1758602096000000000),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
+        let array = partially_shredded_timestamp_nano_variant_array();
+
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .to_utc()
+                )),
+            ]
+        )
+    }
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
+        BinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_binary_as_binary,
+        DataType::Binary,
+        perfectly_shredded_binary_variant_array,
+        BinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_large_binary_variant_array, || {
+        LargeBinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_large_binary_as_large_binary,
+        DataType::LargeBinary,
+        perfectly_shredded_large_binary_variant_array,
+        LargeBinaryArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    );
+
+    perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_view_variant_array, || {
+        BinaryViewArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    });
+
+    perfectly_shredded_to_arrow_primitive_test!(
+        get_variant_perfectly_shredded_binary_view_as_binary_view,
+        DataType::BinaryView,
+        perfectly_shredded_binary_view_variant_array,
+        BinaryViewArray::from(vec![
+            Some(b"Apache" as &[u8]),
+            Some(b"Arrow-rs" as &[u8]),
+            Some(b"Parquet-variant" as &[u8]),
+        ])
+    );
+
+    /// Return a VariantArray that represents a normal "shredded" variant
+    /// for the following example
+    ///
+    /// Based on the example from [the doc]
+    ///
+    /// [the doc]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?tab=t.0
+    ///
+    /// ```text
+    /// 34
+    /// null (an Arrow NULL, not a Variant::Null)
+    /// "n/a" (a string)
+    /// 100
+    /// ```
+    ///
+    /// The schema of the corresponding `StructArray` would look like this:
+    ///
+    /// ```text
+    /// StructArray {
+    ///   metadata: BinaryViewArray,
+    ///   value: BinaryViewArray,
+    ///   typed_value: Int32Array,
+    /// }
+    /// ```
+    macro_rules! numeric_partially_shredded_variant_array_fn {
+        ($func:ident, $array_type:ident, $primitive_type:ty) => {
+            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
+                Some(<$primitive_type>::try_from(34u8).unwrap()), // row 0 is shredded, so it has a value
+                None,                                             // row 1 is null, so no value
+                None, // row 2 is a string, so no typed value
+                Some(<$primitive_type>::try_from(100u8).unwrap()), // row 3 is shredded, so it has a value
+            ]));
+        };
+    }
+
+    macro_rules! partially_shredded_variant_array_gen {
+        ($func:ident, $typed_array_gen: expr) => {
+            fn $func() -> ArrayRef {
+                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
+                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
+                let (metadata, string_value) = {
+                    let mut builder = parquet_variant::VariantBuilder::new();
+                    builder.append_value("n/a");
+                    builder.finish()
+                };
+
+                let nulls = NullBuffer::from(vec![
+                    true,  // row 0 non null
+                    false, // row 1 is null
+                    true,  // row 2 non null
+                    true,  // row 3 non null
+                ]);
+
+                // metadata is the same for all rows
+                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
+                // about why row1 is an empty but non null, value.
+                let values = BinaryViewArray::from(vec![
+                    None,                // row 0 is shredded, so no value
+                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
+                    Some(&string_value), // copy the string value "N/A"
+                    None,                // row 3 is shredded, so no value
+                ]);
+
+                let typed_value = $typed_array_gen();
+
+                let struct_array = StructArrayBuilder::new()
+                    .with_field("metadata", Arc::new(metadata), false)
+                    .with_field("typed_value", Arc::new(typed_value), true)
+                    .with_field("value", Arc::new(values), true)
+                    .with_nulls(nulls)
+                    .build();
+
+                ArrayRef::from(
+                    VariantArray::try_new(&struct_array).expect("should create variant array"),
+                )
+            }
+        };
+    }
+
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int8_variant_array,
+        Int8Array,
+        i8
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int16_variant_array,
+        Int16Array,
+        i16
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int32_variant_array,
+        Int32Array,
+        i32
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int64_variant_array,
+        Int64Array,
+        i64
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_float32_variant_array,
+        Float32Array,
+        f32
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_float64_variant_array,
+        Float64Array,
+        f64
+    );
+
+    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
+        arrow::array::BooleanArray::from(vec![
+            Some(true),  // row 0 is shredded, so it has a value
+            None,        // row 1 is null, so no value
+            None,        // row 2 is a string, so no typed value
+            Some(false), // row 3 is shredded, so it has a value
+        ])
+    });
+
+    partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || {
+        StringArray::from(vec![
+            Some("hello"), // row 0 is shredded
+            None,          // row 1 is null
+            None,          // row 2 is a string
+            Some("world"), // row 3 is shredded
+        ])
+    });
+
+    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
+        Date32Array::from(vec![
+            Some(20348), // row 0 is shredded, 2025-09-17
+            None,        // row 1 is null
+            None,        // row 2 is a string, not a date
+            Some(20340), // row 3 is shredded, 2025-09-09
+        ])
+    });
+
+    /// Return a VariantArray that represents an "all null" variant
+    /// for the following example (3 null values):
+    ///
+    /// ```text
+    /// null
+    /// null
+    /// null
+    /// ```
+    ///
+    /// The schema of the corresponding `StructArray` would look like this:
+    ///
+    /// ```text
+    /// StructArray {
+    ///   metadata: BinaryViewArray,
+    /// }
+    /// ```
+    fn all_null_variant_array() -> ArrayRef {
+        let nulls = NullBuffer::from(vec![
+            false, // row 0 is null
+            false, // row 1 is null
+            false, // row 2 is null
+        ]);
+
+        // metadata is the same for all rows (though they're all null)
+        let metadata =
+            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
+
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), false)
+            .with_nulls(nulls)
+            .build();
+
+        Arc::new(struct_array)
+    }
+    /// This test manually constructs a shredded variant array representing objects
+    /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field
+    /// as VariantArray using variant_get.
+    #[test]
+    fn test_shredded_object_field_access() {
+        let array = shredded_object_with_x_field_variant_array();
+
+        // Test: Extract the "x" field as VariantArray first
+        let options = GetOptions::new_with_path(VariantPath::from("x"));
+        let result = variant_get(&array, options).unwrap();
+
+        let result_variant = VariantArray::try_new(&result).unwrap();
+        assert_eq!(result_variant.len(), 2);
+
+        // Row 0: expect x=1
+        assert_eq!(result_variant.value(0), Variant::Int32(1));
+        // Row 1: expect x=42
+        assert_eq!(result_variant.value(1), Variant::Int32(42));
+    }
+
+    /// Test extracting shredded object field with type conversion
+    #[test]
+    fn test_shredded_object_field_as_int32() {
+        let array = shredded_object_with_x_field_variant_array();
+
+        // Test: Extract the "x" field as Int32Array (type conversion)
+        let field = Field::new("x", DataType::Int32, false);
+        let options = GetOptions::new_with_path(VariantPath::from("x"))
+            .with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        // Should get Int32Array
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(42)]));
+        assert_eq!(&result, &expected);
+    }
+
+    /// Helper function to create a shredded variant array representing objects
+    ///
+    /// This creates an array that represents:
+    /// Row 0: {"x": 1, "y": "foo"}  (x is shredded, y is in value field)
+    /// Row 1: {"x": 42}             (x is shredded, perfect shredding)
+    ///
+    /// The physical layout follows the shredding spec where:
+    /// - metadata: contains object metadata
+    /// - typed_value: StructArray with field "x" (ShreddedVariantFieldArray)
+    /// - value: contains fallback for unshredded fields like {"y": "foo"}
+    /// - The "x" field has typed_value=Int32Array and value=NULL (perfect shredding)
+    fn shredded_object_with_x_field_variant_array() -> ArrayRef {
+        // Create the base metadata for objects
+        let (metadata, y_field_value) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+            obj.insert("x", Variant::Int32(42));
+            obj.insert("y", Variant::from("foo"));
+            obj.finish();
+            builder.finish()
+        };
+
+        // Create metadata array (same for both rows)
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
+
+        // Create the main value field per the 3-step shredding spec:
+        // Step 2: If field not in shredding schema, check value field
+        // Row 0: {"y": "foo"} (y is not shredded, stays in value for step 2)
+        // Row 1: {} (empty object - no unshredded fields)
+        let empty_object_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        let value_array = BinaryViewArray::from(vec![
+            Some(y_field_value.as_slice()),      // Row 0 has {"y": "foo"}
+            Some(empty_object_value.as_slice()), // Row 1 has {}
+        ]);
+
+        // Create the "x" field as a ShreddedVariantFieldArray
+        // This represents the shredded Int32 values for the "x" field
+        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
+
+        // For perfect shredding of the x field, no "value" column, only typed_value
+        let x_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(x_field_typed_value), true)
+            .build();
+
+        // Wrap the x field struct in a ShreddedVariantFieldArray
+        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
+            .expect("should create ShreddedVariantFieldArray");
+
+        // Create the main typed_value as a struct containing the "x" field
+        let typed_value_fields = Fields::from(vec![Field::new(
+            "x",
+            x_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let typed_value_struct = StructArray::try_new(
+            typed_value_fields,
+            vec![ArrayRef::from(x_field_shredded)],
+            None, // No nulls - both rows have the object structure
+        )
+        .unwrap();
+
+        // Create the main VariantArray
+        let main_struct = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("value", Arc::new(value_array), true)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .build();
+
+        Arc::new(main_struct)
+    }
+
+    /// Simple test to check if nested paths are supported by current implementation
+    #[test]
+    fn test_simple_nested_path_support() {
+        // Check: How does VariantPath parse different strings?
+        println!("Testing path parsing:");
+
+        let path_x = VariantPath::from("x");
+        let elements_x: Vec<_> = path_x.iter().collect();
+        println!("  'x' -> {} elements: {:?}", elements_x.len(), elements_x);
+
+        let path_ax = VariantPath::from("a.x");
+        let elements_ax: Vec<_> = path_ax.iter().collect();
+        println!(
+            "  'a.x' -> {} elements: {:?}",
+            elements_ax.len(),
+            elements_ax
+        );
+
+        let path_ax_alt = VariantPath::from("$.a.x");
+        let elements_ax_alt: Vec<_> = path_ax_alt.iter().collect();
+        println!(
+            "  '$.a.x' -> {} elements: {:?}",
+            elements_ax_alt.len(),
+            elements_ax_alt
+        );
+
+        let path_nested = VariantPath::from("a").join("x");
+        let elements_nested: Vec<_> = path_nested.iter().collect();
+        println!(
+            "  VariantPath::from('a').join('x') -> {} elements: {:?}",
+            elements_nested.len(),
+            elements_nested
+        );
+
+        // Use your existing simple test data but try "a.x" instead of "x"
+        let array = shredded_object_with_x_field_variant_array();
+
+        // Test if variant_get with REAL nested path throws not implemented error
+        let real_nested_path = VariantPath::from("a").join("x");
+        let options = GetOptions::new_with_path(real_nested_path);
+        let result = variant_get(&array, options);
+
+        match result {
+            Ok(_) => {
+                println!("Nested path 'a.x' works unexpectedly!");
+            }
+            Err(e) => {
+                println!("Nested path 'a.x' error: {}", e);
+                if e.to_string().contains("Not yet implemented")
+                    || e.to_string().contains("NotYetImplemented")
+                {
+                    println!("This is expected - nested paths are not implemented");
+                    return;
+                }
+                // Any other error is also expected for now
+                println!("This shows nested paths need implementation");
+            }
+        }
+    }
+
+    /// Test comprehensive variant_get scenarios with Int32 conversion
+    /// Test depth 0: Direct field access "x" with Int32 conversion
+    /// Covers shredded vs non-shredded VariantArrays for simple field access
+    #[test]
+    fn test_depth_0_int32_conversion() {
+        println!("=== Testing Depth 0: Direct field access ===");
+
+        // Non-shredded test data: [{"x": 42}, {"x": "foo"}, {"y": 10}]
+        let unshredded_array = create_depth_0_test_data();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("x");
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&unshredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(42), // {"x": 42} -> 42
+            None,     // {"x": "foo"} -> NULL (type mismatch)
+            None,     // {"y": 10} -> NULL (field missing)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 0 (unshredded) passed");
+
+        // Shredded test data: using simplified approach based on working pattern
+        let shredded_array = create_depth_0_shredded_test_data_simple();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("x");
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&shredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(42), // {"x": 42} -> 42 (from typed_value)
+            None,     // {"x": "foo"} -> NULL (type mismatch, from value field)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 0 (shredded) passed");
+    }
+
+    /// Test depth 1: Single nested field access "a.x" with Int32 conversion
+    /// Covers shredded vs non-shredded VariantArrays for nested field access
+    #[test]
+    fn test_depth_1_int32_conversion() {
+        println!("=== Testing Depth 1: Single nested field access ===");
+
+        // Non-shredded test data from the GitHub issue
+        let unshredded_array = create_nested_path_test_data();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("a.x"); // Dot notation!
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&unshredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(55), // {"a": {"x": 55}} -> 55
+            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 1 (unshredded) passed");
+
+        // Shredded test data: depth 1 nested shredding
+        let shredded_array = create_depth_1_shredded_test_data_working();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("a.x"); // Dot notation!
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&shredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(55), // {"a": {"x": 55}} -> 55 (from nested shredded x)
+            None,     // {"a": {"x": "foo"}} -> NULL (type mismatch in nested value)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 1 (shredded) passed");
+    }
+
+    /// Test depth 2: Double nested field access "a.b.x" with Int32 conversion  
+    /// Covers shredded vs non-shredded VariantArrays for deeply nested field access
+    #[test]
+    fn test_depth_2_int32_conversion() {
+        println!("=== Testing Depth 2: Double nested field access ===");
+
+        // Non-shredded test data: [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
+        let unshredded_array = create_depth_2_test_data();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&unshredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(100), // {"a": {"b": {"x": 100}}} -> 100
+            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch)
+            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 2 (unshredded) passed");
+
+        // Shredded test data: depth 2 nested shredding
+        let shredded_array = create_depth_2_shredded_test_data_working();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let path = VariantPath::from("a.b.x"); // Double nested dot notation!
+        let options = GetOptions::new_with_path(path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&shredded_array, options).unwrap();
+
+        let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(100), // {"a": {"b": {"x": 100}}} -> 100 (from deeply nested shredded x)
+            None,      // {"a": {"b": {"x": "bar"}}} -> NULL (type mismatch in deep value)
+            None,      // {"a": {"b": {"y": 200}}} -> NULL (field missing in deep structure)
+        ]));
+        assert_eq!(&result, &expected);
+        println!("Depth 2 (shredded) passed");
+    }
+
+    /// Test that demonstrates what CURRENTLY WORKS
+    ///
+    /// This shows that nested path functionality does work, but only when the
+    /// test data matches what the current implementation expects
+    #[test]
+    fn test_current_nested_path_functionality() {
+        let array = shredded_object_with_x_field_variant_array();
+
+        // Test: Extract the "x" field (single level) - this works
+        let single_path = VariantPath::from("x");
+        let field = Field::new("result", DataType::Int32, true);
+        let options =
+            GetOptions::new_with_path(single_path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        println!("Single path 'x' works - result: {:?}", result);
+
+        // Test: Try nested path "a.x" - this is what we need to implement
+        let nested_path = VariantPath::from("a").join("x");
+        let field = Field::new("result", DataType::Int32, true);
+        let options =
+            GetOptions::new_with_path(nested_path).with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&array, options).unwrap();
+
+        println!("Nested path 'a.x' result: {:?}", result);
+    }
+
+    /// Create test data for depth 0 (direct field access)
+    /// [{"x": 42}, {"x": "foo"}, {"y": 10}]
+    fn create_depth_0_test_data() -> ArrayRef {
+        let mut builder = crate::VariantArrayBuilder::new(3);
+
+        // Row 1: {"x": 42}
+        {
+            let json_str = r#"{"x": 42}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        // Row 2: {"x": "foo"}
+        {
+            let json_str = r#"{"x": "foo"}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        // Row 3: {"y": 10} (missing "x" field)
+        {
+            let json_str = r#"{"y": 10}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        ArrayRef::from(builder.build())
+    }
+
+    /// Create test data for depth 1 (single nested field)
+    /// This represents the exact scenarios from the GitHub issue: "a.x"
+    fn create_nested_path_test_data() -> ArrayRef {
+        let mut builder = crate::VariantArrayBuilder::new(2);
+
+        // Row 1: {"a": {"x": 55}, "b": 42}
+        {
+            let json_str = r#"{"a": {"x": 55}, "b": 42}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        // Row 2: {"a": {"x": "foo"}, "b": 42}
+        {
+            let json_str = r#"{"a": {"x": "foo"}, "b": 42}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        ArrayRef::from(builder.build())
+    }
+
+    /// Create test data for depth 2 (double nested field)
+    /// [{"a": {"b": {"x": 100}}}, {"a": {"b": {"x": "bar"}}}, {"a": {"b": {"y": 200}}}]
+    fn create_depth_2_test_data() -> ArrayRef {
+        let mut builder = crate::VariantArrayBuilder::new(3);
+
+        // Row 1: {"a": {"b": {"x": 100}}}
+        {
+            let json_str = r#"{"a": {"b": {"x": 100}}}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        // Row 2: {"a": {"b": {"x": "bar"}}}
+        {
+            let json_str = r#"{"a": {"b": {"x": "bar"}}}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        // Row 3: {"a": {"b": {"y": 200}}} (missing "x" field)
+        {
+            let json_str = r#"{"a": {"b": {"y": 200}}}"#;
+            let string_array: ArrayRef = Arc::new(StringArray::from(vec![json_str]));
+            if let Ok(variant_array) = json_to_variant(&string_array) {
+                builder.append_variant(variant_array.value(0));
+            } else {
+                builder.append_null();
+            }
+        }
+
+        ArrayRef::from(builder.build())
+    }
+
+    /// Create simple shredded test data for depth 0 using a simplified working pattern
+    /// Creates 2 rows: [{"x": 42}, {"x": "foo"}] with "x" shredded where possible
+    fn create_depth_0_shredded_test_data_simple() -> ArrayRef {
+        // Create base metadata using the working pattern
+        let (metadata, string_x_value) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+            obj.insert("x", Variant::from("foo"));
+            obj.finish();
+            builder.finish()
+        };
+
+        // Metadata array (same for both rows)
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
+
+        // Value array following the 3-step shredding spec:
+        // Row 0: {} (x is shredded, no unshredded fields)
+        // Row 1: {"x": "foo"} (x is a string, can't be shredded to Int32)
+        let empty_object_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        let value_array = BinaryViewArray::from(vec![
+            Some(empty_object_value.as_slice()), // Row 0: {} (x shredded out)
+            Some(string_x_value.as_slice()),     // Row 1: {"x": "foo"} (fallback)
+        ]);
+
+        // Create the "x" field as a ShreddedVariantFieldArray
+        let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
+
+        // For the x field, only typed_value (perfect shredding when possible)
+        let x_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(x_field_typed_value), true)
+            .build();
+
+        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
+            .expect("should create ShreddedVariantFieldArray");
+
+        // Create the main typed_value as a struct containing the "x" field
+        let typed_value_fields = Fields::from(vec![Field::new(
+            "x",
+            x_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let typed_value_struct = StructArray::try_new(
+            typed_value_fields,
+            vec![ArrayRef::from(x_field_shredded)],
+            None,
+        )
+        .unwrap();
+
+        // Build final VariantArray
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("value", Arc::new(value_array), true)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    /// Create working depth 1 shredded test data based on the existing working pattern
+    /// This creates a properly structured shredded variant for "a.x" where:
+    /// - Row 0: {"a": {"x": 55}, "b": 42} with a.x shredded into typed_value
+    /// - Row 1: {"a": {"x": "foo"}, "b": 42} with a.x fallback to value field due to type mismatch
+    fn create_depth_1_shredded_test_data_working() -> ArrayRef {
+        // Create metadata following the working pattern from shredded_object_with_x_field_variant_array
+        let (metadata, _) = {
+            // Create nested structure: {"a": {"x": 55}, "b": 42}
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+
+            // Create the nested "a" object
+            let mut a_obj = obj.new_object("a");
+            a_obj.insert("x", Variant::Int32(55));
+            a_obj.finish();
+
+            obj.insert("b", Variant::Int32(42));
+            obj.finish();
+            builder.finish()
+        };
+
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 2));
+
+        // Create value arrays for the fallback case
+        // Following the spec: if field cannot be shredded, it stays in value
+        let empty_object_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        // Row 1 fallback: use the working pattern from the existing shredded test
+        // This avoids metadata issues by using the simple fallback approach
+        let row1_fallback = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+            obj.insert("fallback", Variant::from("data"));
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        let value_array = BinaryViewArray::from(vec![
+            Some(empty_object_value.as_slice()), // Row 0: {} (everything shredded except b in unshredded fields)
+            Some(row1_fallback.as_slice()), // Row 1: {"a": {"x": "foo"}, "b": 42} (a.x can't be shredded)
+        ]);
+
+        // Create the nested shredded structure
+        // Level 2: x field (the deepest level)
+        let x_typed_value = Int32Array::from(vec![Some(55), None]);
+        let x_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(x_typed_value), true)
+            .build();
+        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
+            .expect("should create ShreddedVariantFieldArray for x");
+
+        // Level 1: a field containing x field + value field for fallbacks
+        // The "a" field needs both typed_value (for shredded x) and value (for fallback cases)
+
+        // Create the value field for "a" (for cases where a.x can't be shredded)
+        let a_value_data = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+        let a_value_array = BinaryViewArray::from(vec![
+            None,                          // Row 0: x is shredded, so no value fallback needed
+            Some(a_value_data.as_slice()), // Row 1: fallback for a.x="foo" (but logic will check typed_value first)
+        ]);
+
+        let a_inner_fields = Fields::from(vec![Field::new(
+            "x",
+            x_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let a_inner_struct = StructArrayBuilder::new()
+            .with_field(
+                "typed_value",
+                Arc::new(
+                    StructArray::try_new(
+                        a_inner_fields,
+                        vec![ArrayRef::from(x_field_shredded)],
+                        None,
+                    )
+                    .unwrap(),
+                ),
+                true,
+            )
+            .with_field("value", Arc::new(a_value_array), true)
+            .build();
+        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
+            .expect("should create ShreddedVariantFieldArray for a");
+
+        // Level 0: main typed_value struct containing a field
+        let typed_value_fields = Fields::from(vec![Field::new(
+            "a",
+            a_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let typed_value_struct = StructArray::try_new(
+            typed_value_fields,
+            vec![ArrayRef::from(a_field_shredded)],
+            None,
+        )
+        .unwrap();
+
+        // Build final VariantArray
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("value", Arc::new(value_array), true)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    /// Create working depth 2 shredded test data for "a.b.x" paths
+    /// This creates a 3-level nested shredded structure where:
+    /// - Row 0: {"a": {"b": {"x": 100}}} with a.b.x shredded into typed_value
+    /// - Row 1: {"a": {"b": {"x": "bar"}}} with type mismatch fallback
+    /// - Row 2: {"a": {"b": {"y": 200}}} with missing field fallback
+    fn create_depth_2_shredded_test_data_working() -> ArrayRef {
+        // Create metadata following the working pattern
+        let (metadata, _) = {
+            // Create deeply nested structure: {"a": {"b": {"x": 100}}}
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+
+            // Create the nested "a.b" structure
+            let mut a_obj = obj.new_object("a");
+            let mut b_obj = a_obj.new_object("b");
+            b_obj.insert("x", Variant::Int32(100));
+            b_obj.finish();
+            a_obj.finish();
+
+            obj.finish();
+            builder.finish()
+        };
+
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3));
+
+        // Create value arrays for fallback cases
+        let empty_object_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        // Simple fallback values - avoiding complex nested metadata
+        let value_array = BinaryViewArray::from(vec![
+            Some(empty_object_value.as_slice()), // Row 0: fully shredded
+            Some(empty_object_value.as_slice()), // Row 1: fallback (simplified)
+            Some(empty_object_value.as_slice()), // Row 2: fallback (simplified)
+        ]);
+
+        // Create the deeply nested shredded structure: a.b.x
+
+        // Level 3: x field (deepest level)
+        let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
+        let x_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(x_typed_value), true)
+            .build();
+        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
+            .expect("should create ShreddedVariantFieldArray for x");
+
+        // Level 2: b field containing x field + value field
+        let b_value_data = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+        let b_value_array = BinaryViewArray::from(vec![
+            None,                          // Row 0: x is shredded
+            Some(b_value_data.as_slice()), // Row 1: fallback for b.x="bar"
+            Some(b_value_data.as_slice()), // Row 2: fallback for b.y=200
+        ]);
+
+        let b_inner_fields = Fields::from(vec![Field::new(
+            "x",
+            x_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let b_inner_struct = StructArrayBuilder::new()
+            .with_field(
+                "typed_value",
+                Arc::new(
+                    StructArray::try_new(
+                        b_inner_fields,
+                        vec![ArrayRef::from(x_field_shredded)],
+                        None,
+                    )
+                    .unwrap(),
+                ),
+                true,
+            )
+            .with_field("value", Arc::new(b_value_array), true)
+            .build();
+        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct)
+            .expect("should create ShreddedVariantFieldArray for b");
+
+        // Level 1: a field containing b field + value field
+        let a_value_data = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            let (_, value) = builder.finish();
+            value
+        };
+        let a_value_array = BinaryViewArray::from(vec![
+            None,                          // Row 0: b is shredded
+            Some(a_value_data.as_slice()), // Row 1: fallback for a.b.*
+            Some(a_value_data.as_slice()), // Row 2: fallback for a.b.*
+        ]);
+
+        let a_inner_fields = Fields::from(vec![Field::new(
+            "b",
+            b_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let a_inner_struct = StructArrayBuilder::new()
+            .with_field(
+                "typed_value",
+                Arc::new(
+                    StructArray::try_new(
+                        a_inner_fields,
+                        vec![ArrayRef::from(b_field_shredded)],
+                        None,
+                    )
+                    .unwrap(),
+                ),
+                true,
+            )
+            .with_field("value", Arc::new(a_value_array), true)
+            .build();
+        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
+            .expect("should create ShreddedVariantFieldArray for a");
+
+        // Level 0: main typed_value struct containing a field
+        let typed_value_fields = Fields::from(vec![Field::new(
+            "a",
+            a_field_shredded.data_type().clone(),
+            true,
+        )]);
+        let typed_value_struct = StructArray::try_new(
+            typed_value_fields,
+            vec![ArrayRef::from(a_field_shredded)],
+            None,
+        )
+        .unwrap();
+
+        // Build final VariantArray
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("value", Arc::new(value_array), true)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    #[test]
+    fn test_strict_cast_options_downcast_failure() {
+        use arrow::compute::CastOptions;
+        use arrow::datatypes::{DataType, Field};
+        use arrow::error::ArrowError;
+        use parquet_variant::VariantPath;
+        use std::sync::Arc;
+
+        // Use the existing simple test data that has Int32 as typed_value
+        let variant_array = perfectly_shredded_int32_variant_array();
+
+        // Try to access a field with safe cast options (should return NULLs)
+        let safe_options = GetOptions {
+            path: VariantPath::from("nonexistent_field"),
+            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
+            cast_options: CastOptions::default(), // safe = true
+        };
+
+        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
+        let result = variant_get(&variant_array_ref, safe_options);
+        // Should succeed and return NULLs (safe behavior)
+        assert!(result.is_ok());
+        let result_array = result.unwrap();
+        assert_eq!(result_array.len(), 3);
+        assert!(result_array.is_null(0));
+        assert!(result_array.is_null(1));
+        assert!(result_array.is_null(2));
+
+        // Try to access a field with strict cast options (should error)
+        let strict_options = GetOptions {
+            path: VariantPath::from("nonexistent_field"),
+            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
+            cast_options: CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        };
+
+        let result = variant_get(&variant_array_ref, strict_options);
+        // Should fail with a cast error
+        assert!(result.is_err());
+        let error = result.unwrap_err();
+        assert!(matches!(error, ArrowError::CastError(_)));
+        assert!(
+            error
+                .to_string()
+                .contains("Cannot access field 'nonexistent_field' on non-struct type")
+        );
+    }
+
+    #[test]
+    fn test_error_message_boolean_type_display() {
+        let mut builder = VariantArrayBuilder::new(1);
+        builder.append_variant(Variant::Int32(123));
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        // Request Boolean with strict casting to force an error
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", DataType::Boolean, true))),
+            cast_options: CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        };
+
+        let err = variant_get(&variant_array, options).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("Failed to extract primitive of type Boolean"));
+    }
+
+    #[test]
+    fn test_error_message_numeric_type_display() {
+        let mut builder = VariantArrayBuilder::new(1);
+        builder.append_variant(Variant::BooleanTrue);
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        // Request Boolean with strict casting to force an error
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", DataType::Float32, true))),
+            cast_options: CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        };
+
+        let err = variant_get(&variant_array, options).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("Failed to extract primitive of type Float32"));
+    }
+
+    #[test]
+    fn test_error_message_temporal_type_display() {
+        let mut builder = VariantArrayBuilder::new(1);
+        builder.append_variant(Variant::BooleanFalse);
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        // Request Boolean with strict casting to force an error
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new(
+                "result",
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+                true,
+            ))),
+            cast_options: CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        };
+
+        let err = variant_get(&variant_array, options).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("Failed to extract primitive of type Timestamp(ns)"));
+    }
+
+    #[test]
+    fn test_null_buffer_union_for_shredded_paths() {
+        use arrow::compute::CastOptions;
+        use arrow::datatypes::{DataType, Field};
+        use parquet_variant::VariantPath;
+        use std::sync::Arc;
+
+        // Test that null buffers are properly unioned when traversing shredded paths
+        // This test verifies scovich's null buffer union requirement
+
+        // Create a depth-1 shredded variant array where:
+        // - The top-level variant array has some nulls
+        // - The nested typed_value also has some nulls
+        // - The result should be the union of both null buffers
+
+        let variant_array = create_depth_1_shredded_test_data_working();
+
+        // Get the field "x" which should union nulls from:
+        // 1. The top-level variant array nulls
+        // 2. The "a" field's typed_value nulls
+        // 3. The "x" field's typed_value nulls
+        let options = GetOptions {
+            path: VariantPath::from("a.x"),
+            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref: Arc<dyn Array> = variant_array.clone();
+        let result = variant_get(&variant_array_ref, options).unwrap();
+
+        // Verify the result length matches input
+        assert_eq!(result.len(), variant_array.len());
+
+        // The null pattern should reflect the union of all ancestor nulls
+        // Row 0: Should have valid data (path exists and is shredded as Int32)
+        // Row 1: Should be null (due to type mismatch - "foo" can't cast to Int32)
+        assert!(!result.is_null(0), "Row 0 should have valid Int32 data");
+        assert!(
+            result.is_null(1),
+            "Row 1 should be null due to type casting failure"
+        );
+
+        // Verify the actual values
+        let int32_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_result.value(0), 55); // The valid Int32 value
+    }
+
+    #[test]
+    fn test_struct_null_mask_union_from_children() {
+        use arrow::compute::CastOptions;
+        use arrow::datatypes::{DataType, Field, Fields};
+        use parquet_variant::VariantPath;
+        use std::sync::Arc;
+
+        use arrow::array::StringArray;
+
+        // Test that struct null masks properly union nulls from children field extractions
+        // This verifies scovich's concern about incomplete null masks in struct construction
+
+        // Create test data where some fields will fail type casting
+        let json_strings = vec![
+            r#"{"a": 42, "b": "hello"}"#, // Row 0: a=42 (castable to int), b="hello" (not castable to int)
+            r#"{"a": "world", "b": 100}"#, // Row 1: a="world" (not castable to int), b=100 (castable to int)
+            r#"{"a": 55, "b": 77}"#,       // Row 2: a=55 (castable to int), b=77 (castable to int)
+        ];
+
+        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Request extraction as a struct with both fields as Int32
+        // This should create child arrays where some fields are null due to casting failures
+        let struct_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(), // Extract the whole object as struct
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        let result = variant_get(&variant_array_ref, options).unwrap();
+
+        // Verify the result is a StructArray
+        let struct_result = result.as_struct();
+        assert_eq!(struct_result.len(), 3);
+
+        // Get the individual field arrays
+        let field_a = struct_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let field_b = struct_result
+            .column(1)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Verify field values and nulls
+        // Row 0: a=42 (valid), b=null (casting failure)
+        assert!(!field_a.is_null(0));
+        assert_eq!(field_a.value(0), 42);
+        assert!(field_b.is_null(0)); // "hello" can't cast to int
+
+        // Row 1: a=null (casting failure), b=100 (valid)
+        assert!(field_a.is_null(1)); // "world" can't cast to int
+        assert!(!field_b.is_null(1));
+        assert_eq!(field_b.value(1), 100);
+
+        // Row 2: a=55 (valid), b=77 (valid)
+        assert!(!field_a.is_null(2));
+        assert_eq!(field_a.value(2), 55);
+        assert!(!field_b.is_null(2));
+        assert_eq!(field_b.value(2), 77);
+
+        // Verify the struct-level null mask properly unions child nulls
+        // The struct should NOT be null in any row because each row has at least one valid field
+        // (This tests that we're not incorrectly making the entire struct null when children fail)
+        assert!(!struct_result.is_null(0)); // Has valid field 'a'
+        assert!(!struct_result.is_null(1)); // Has valid field 'b'
+        assert!(!struct_result.is_null(2)); // Has both valid fields
+    }
+
+    #[test]
+    fn test_field_nullability_preservation() {
+        use arrow::compute::CastOptions;
+        use arrow::datatypes::{DataType, Field};
+        use parquet_variant::VariantPath;
+        use std::sync::Arc;
+
+        use arrow::array::StringArray;
+
+        // Test that field nullability from GetOptions.as_type is preserved in the result
+
+        let json_strings = vec![
+            r#"{"x": 42}"#,                  // Row 0: Valid int that should convert to Int32
+            r#"{"x": "not_a_number"}"#,      // Row 1: String that can't cast to Int32
+            r#"{"x": null}"#,                // Row 2: Explicit null value
+            r#"{"x": "hello"}"#,             // Row 3: Another string (wrong type)
+            r#"{"y": 100}"#,                 // Row 4: Missing "x" field (SQL NULL case)
+            r#"{"x": 127}"#, // Row 5: Small int (could be Int8, widening cast candidate)
+            r#"{"x": 32767}"#, // Row 6: Medium int (could be Int16, widening cast candidate)
+            r#"{"x": 2147483647}"#, // Row 7: Max Int32 value (fits in Int32)
+            r#"{"x": 9223372036854775807}"#, // Row 8: Large Int64 value (cannot convert to Int32)
+        ];
+
+        let string_array: Arc<dyn arrow::array::Array> = Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Test 1: nullable field (should allow nulls from cast failures)
+        let nullable_field = Arc::new(Field::new("result", DataType::Int32, true));
+        let options_nullable = GetOptions {
+            path: VariantPath::from("x"),
+            as_type: Some(nullable_field.clone()),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        let result_nullable = variant_get(&variant_array_ref, options_nullable).unwrap();
+
+        // Verify we get an Int32Array with nulls for cast failures
+        let int32_result = result_nullable
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(int32_result.len(), 9);
+
+        // Row 0: 42 converts successfully to Int32
+        assert!(!int32_result.is_null(0));
+        assert_eq!(int32_result.value(0), 42);
+
+        // Row 1: "not_a_number" fails to convert -> NULL
+        assert!(int32_result.is_null(1));
+
+        // Row 2: explicit null value -> NULL
+        assert!(int32_result.is_null(2));
+
+        // Row 3: "hello" (wrong type) fails to convert -> NULL
+        assert!(int32_result.is_null(3));
+
+        // Row 4: missing "x" field (SQL NULL case) -> NULL
+        assert!(int32_result.is_null(4));
+
+        // Row 5: 127 (small int, potential Int8 -> Int32 widening)
+        // Current behavior: JSON parses to Int8, should convert to Int32
+        assert!(!int32_result.is_null(5));
+        assert_eq!(int32_result.value(5), 127);
+
+        // Row 6: 32767 (medium int, potential Int16 -> Int32 widening)
+        // Current behavior: JSON parses to Int16, should convert to Int32
+        assert!(!int32_result.is_null(6));
+        assert_eq!(int32_result.value(6), 32767);
+
+        // Row 7: 2147483647 (max Int32, fits exactly)
+        // Current behavior: Should convert successfully
+        assert!(!int32_result.is_null(7));
+        assert_eq!(int32_result.value(7), 2147483647);
+
+        // Row 8: 9223372036854775807 (large Int64, cannot fit in Int32)
+        // Current behavior: Should fail conversion -> NULL
+        assert!(int32_result.is_null(8));
+
+        // Test 2: non-nullable field (behavior should be the same with safe casting)
+        let non_nullable_field = Arc::new(Field::new("result", DataType::Int32, false));
+        let options_non_nullable = GetOptions {
+            path: VariantPath::from("x"),
+            as_type: Some(non_nullable_field.clone()),
+            cast_options: CastOptions::default(), // safe=true by default
+        };
+
+        // Create variant array again since we moved it
+        let variant_array_2 = json_to_variant(&string_array).unwrap();
+        let variant_array_ref_2 = ArrayRef::from(variant_array_2);
+        let result_non_nullable = variant_get(&variant_array_ref_2, options_non_nullable).unwrap();
+        let int32_result_2 = result_non_nullable
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Even with a non-nullable field, safe casting should still produce nulls for failures
+        assert_eq!(int32_result_2.len(), 9);
+
+        // Row 0: 42 converts successfully to Int32
+        assert!(!int32_result_2.is_null(0));
+        assert_eq!(int32_result_2.value(0), 42);
+
+        // Rows 1-4: All should be null due to safe casting behavior
+        // (non-nullable field specification doesn't override safe casting behavior)
+        assert!(int32_result_2.is_null(1)); // "not_a_number"
+        assert!(int32_result_2.is_null(2)); // explicit null
+        assert!(int32_result_2.is_null(3)); // "hello"
+        assert!(int32_result_2.is_null(4)); // missing field
+
+        // Rows 5-7: These should also convert successfully (numeric widening/fitting)
+        assert!(!int32_result_2.is_null(5)); // 127 (Int8 -> Int32)
+        assert_eq!(int32_result_2.value(5), 127);
+        assert!(!int32_result_2.is_null(6)); // 32767 (Int16 -> Int32)
+        assert_eq!(int32_result_2.value(6), 32767);
+        assert!(!int32_result_2.is_null(7)); // 2147483647 (fits in Int32)
+        assert_eq!(int32_result_2.value(7), 2147483647);
+
+        // Row 8: Large Int64 should fail conversion -> NULL
+        assert!(int32_result_2.is_null(8)); // 9223372036854775807 (too large for Int32)
+    }
+
+    #[test]
+    fn test_struct_extraction_subset_superset_schema_perfectly_shredded() {
+        // Create variant with diverse null patterns and empty objects
+        let variant_array = create_comprehensive_shredded_variant();
+
+        // Request struct with fields "a", "b", "d" (skip existing "c", add missing "d")
+        let struct_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+            Field::new("d", DataType::Int32, true),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array, options).unwrap();
+
+        // Verify the result is a StructArray with 3 fields and 5 rows
+        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_result.len(), 5);
+        assert_eq!(struct_result.num_columns(), 3);
+
+        let field_a = struct_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let field_b = struct_result
+            .column(1)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let field_d = struct_result
+            .column(2)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Row 0: Normal values {"a": 1, "b": 2, "c": 3} → {a: 1, b: 2, d: NULL}
+        assert!(!struct_result.is_null(0));
+        assert_eq!(field_a.value(0), 1);
+        assert_eq!(field_b.value(0), 2);
+        assert!(field_d.is_null(0)); // Missing field "d"
+
+        // Row 1: Top-level NULL → struct-level NULL
+        assert!(struct_result.is_null(1));
+
+        // Row 2: Field "a" missing → {a: NULL, b: 2, d: NULL}
+        assert!(!struct_result.is_null(2));
+        assert!(field_a.is_null(2)); // Missing field "a"
+        assert_eq!(field_b.value(2), 2);
+        assert!(field_d.is_null(2)); // Missing field "d"
+
+        // Row 3: Field "b" missing → {a: 1, b: NULL, d: NULL}
+        assert!(!struct_result.is_null(3));
+        assert_eq!(field_a.value(3), 1);
+        assert!(field_b.is_null(3)); // Missing field "b"
+        assert!(field_d.is_null(3)); // Missing field "d"
+
+        // Row 4: Empty object {} → {a: NULL, b: NULL, d: NULL}
+        assert!(!struct_result.is_null(4));
+        assert!(field_a.is_null(4)); // Empty object
+        assert!(field_b.is_null(4)); // Empty object
+        assert!(field_d.is_null(4)); // Missing field "d"
+    }
+
+    #[test]
+    fn test_nested_struct_extraction_perfectly_shredded() {
+        // Create nested variant with diverse null patterns
+        let variant_array = create_comprehensive_nested_shredded_variant();
+        println!("variant_array: {variant_array:?}");
+
+        // Request 3-level nested struct type {"outer": {"inner": INT}}
+        let inner_field = Field::new("inner", DataType::Int32, true);
+        let inner_type = DataType::Struct(Fields::from(vec![inner_field]));
+        let outer_field = Field::new("outer", inner_type, true);
+        let result_type = DataType::Struct(Fields::from(vec![outer_field]));
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", result_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array, options).unwrap();
+        println!("result: {result:?}");
+
+        // Verify the result is a StructArray with "outer" field and 4 rows
+        let outer_struct = result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(outer_struct.len(), 4);
+        assert_eq!(outer_struct.num_columns(), 1);
+
+        // Get the "inner" struct column
+        let inner_struct = outer_struct
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        assert_eq!(inner_struct.num_columns(), 1);
+
+        // Get the "leaf" field (Int32 values)
+        let leaf_field = inner_struct
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Row 0: Normal nested {"outer": {"inner": {"leaf": 42}}}
+        assert!(!outer_struct.is_null(0));
+        assert!(!inner_struct.is_null(0));
+        assert_eq!(leaf_field.value(0), 42);
+
+        // Row 1: "inner" field missing → {outer: {inner: NULL}}
+        assert!(!outer_struct.is_null(1));
+        assert!(!inner_struct.is_null(1)); // outer exists, inner exists but leaf is NULL
+        assert!(leaf_field.is_null(1)); // leaf field is NULL
+
+        // Row 2: "outer" field missing → {outer: NULL}
+        assert!(!outer_struct.is_null(2));
+        assert!(inner_struct.is_null(2)); // outer field is NULL
+
+        // Row 3: Top-level NULL → struct-level NULL
+        assert!(outer_struct.is_null(3));
+    }
+
+    #[test]
+    fn test_path_based_null_masks_one_step() {
+        // Create nested variant with diverse null patterns
+        let variant_array = create_comprehensive_nested_shredded_variant();
+
+        // Extract "outer" field using path-based variant_get
+        let path = VariantPath::from("outer");
+        let inner_field = Field::new("inner", DataType::Int32, true);
+        let result_type = DataType::Struct(Fields::from(vec![inner_field]));
+
+        let options = GetOptions {
+            path,
+            as_type: Some(Arc::new(Field::new("result", result_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array, options).unwrap();
+
+        // Verify the result is a StructArray with "inner" field and 4 rows
+        let outer_result = result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(outer_result.len(), 4);
+        assert_eq!(outer_result.num_columns(), 1);
+
+        // Get the "inner" field (Int32 values)
+        let inner_field = outer_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Row 0: Normal nested {"outer": {"inner": 42}} → {"inner": 42}
+        assert!(!outer_result.is_null(0));
+        assert_eq!(inner_field.value(0), 42);
+
+        // Row 1: Inner field null {"outer": {"inner": null}} → {"inner": null}
+        assert!(!outer_result.is_null(1));
+        assert!(inner_field.is_null(1));
+
+        // Row 2: Outer field null {"outer": null} → null (entire struct is null)
+        assert!(outer_result.is_null(2));
+
+        // Row 3: Top-level null → null (entire struct is null)
+        assert!(outer_result.is_null(3));
+    }
+
+    #[test]
+    fn test_path_based_null_masks_two_steps() {
+        // Create nested variant with diverse null patterns
+        let variant_array = create_comprehensive_nested_shredded_variant();
+
+        // Extract "outer.inner" field using path-based variant_get
+        let path = VariantPath::from("outer").join("inner");
+
+        let options = GetOptions {
+            path,
+            as_type: Some(Arc::new(Field::new("result", DataType::Int32, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array, options).unwrap();
+
+        // Verify the result is an Int32Array with 4 rows
+        let int_result = result.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int_result.len(), 4);
+
+        // Row 0: Normal nested {"outer": {"inner": 42}} → 42
+        assert!(!int_result.is_null(0));
+        assert_eq!(int_result.value(0), 42);
+
+        // Row 1: Inner field null {"outer": {"inner": null}} → null
+        assert!(int_result.is_null(1));
+
+        // Row 2: Outer field null {"outer": null} → null (path traversal fails)
+        assert!(int_result.is_null(2));
+
+        // Row 3: Top-level null → null (path traversal fails)
+        assert!(int_result.is_null(3));
+    }
+
+    #[test]
+    fn test_struct_extraction_mixed_and_unshredded() {
+        // Create a partially shredded variant (x shredded, y not)
+        let variant_array = create_mixed_and_unshredded_variant();
+
+        // Request struct with both shredded and unshredded fields
+        let struct_fields = Fields::from(vec![
+            Field::new("x", DataType::Int32, true),
+            Field::new("y", DataType::Int32, true),
+        ]);
+        let struct_type = DataType::Struct(struct_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array, options).unwrap();
+
+        // Verify the mixed shredding works (should succeed with current implementation)
+        let struct_result = result.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(struct_result.len(), 4);
+        assert_eq!(struct_result.num_columns(), 2);
+
+        let field_x = struct_result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        let field_y = struct_result
+            .column(1)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+
+        // Row 0: {"x": 1, "y": 42} - x from shredded, y from value field
+        assert_eq!(field_x.value(0), 1);
+        assert_eq!(field_y.value(0), 42);
+
+        // Row 1: {"x": 2} - x from shredded, y missing (perfect shredding)
+        assert_eq!(field_x.value(1), 2);
+        assert!(field_y.is_null(1));
+
+        // Row 2: {"x": 3, "y": null} - x from shredded, y explicitly null in value
+        assert_eq!(field_x.value(2), 3);
+        assert!(field_y.is_null(2));
+
+        // Row 3: top-level null - entire struct row should be null
+        assert!(struct_result.is_null(3));
+    }
+
+    /// Test that demonstrates the actual struct row builder gap
+    /// This test should fail because it hits unshredded nested structs
+    #[test]
+    fn test_struct_row_builder_gap_demonstration() {
+        // Create completely unshredded JSON variant (no typed_value at all)
+        let json_strings = vec![
+            r#"{"outer": {"inner": 42}}"#,
+            r#"{"outer": {"inner": 100}}"#,
+        ];
+        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
+        let variant_array = json_to_variant(&string_array).unwrap();
+
+        // Request nested struct - this should fail at the row builder level
+        let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
+        let inner_struct_type = DataType::Struct(inner_fields);
+        let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
+        let outer_struct_type = DataType::Struct(outer_fields);
+
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new("result", outer_struct_type, true))),
+            cast_options: CastOptions::default(),
+        };
+
+        let variant_array_ref = ArrayRef::from(variant_array);
+        let result = variant_get(&variant_array_ref, options);
+
+        // Should fail with NotYetImplemented when the row builder tries to handle struct type
+        assert!(result.is_err());
+        let error = result.unwrap_err();
+        assert!(error.to_string().contains("Not yet implemented"));
+    }
+
+    /// Create comprehensive shredded variant with diverse null patterns and empty objects
+    /// Rows: normal values, top-level null, missing field a, missing field b, empty object
+    fn create_comprehensive_shredded_variant() -> ArrayRef {
+        let (metadata, _) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let obj = builder.new_object();
+            obj.finish();
+            builder.finish()
+        };
+
+        // Create null buffer for top-level nulls
+        let nulls = NullBuffer::from(vec![
+            true,  // row 0: normal values
+            false, // row 1: top-level null
+            true,  // row 2: missing field a
+            true,  // row 3: missing field b
+            true,  // row 4: empty object
+        ]);
+
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 5));
+
+        // Create shredded fields with different null patterns
+        // Field "a": present in rows 0,3 (missing in rows 1,2,4)
+        let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
+        let a_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(a_field_typed_value), true)
+            .build();
+        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct)
+            .expect("should create ShreddedVariantFieldArray for a");
+
+        // Field "b": present in rows 0,2 (missing in rows 1,3,4)
+        let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
+        let b_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(b_field_typed_value), true)
+            .build();
+        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct)
+            .expect("should create ShreddedVariantFieldArray for b");
+
+        // Field "c": present in row 0 only (missing in all other rows)
+        let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
+        let c_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(c_field_typed_value), true)
+            .build();
+        let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct)
+            .expect("should create ShreddedVariantFieldArray for c");
+
+        // Create main typed_value struct
+        let typed_value_fields = Fields::from(vec![
+            Field::new("a", a_field_shredded.data_type().clone(), true),
+            Field::new("b", b_field_shredded.data_type().clone(), true),
+            Field::new("c", c_field_shredded.data_type().clone(), true),
+        ]);
+        let typed_value_struct = StructArray::try_new(
+            typed_value_fields,
+            vec![
+                ArrayRef::from(a_field_shredded),
+                ArrayRef::from(b_field_shredded),
+                ArrayRef::from(c_field_shredded),
+            ],
+            None,
+        )
+        .unwrap();
+
+        // Build final VariantArray with top-level nulls
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .with_nulls(nulls)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    /// Create comprehensive nested shredded variant with diverse null patterns
+    /// Represents 3-level structure: variant -> outer -> inner (INT value)
+    /// The shredding schema is: {"metadata": BINARY, "typed_value": {"outer": {"typed_value": {"inner": {"typed_value": INT}}}}}
+    /// Rows: normal nested value, inner field null, outer field null, top-level null
+    fn create_comprehensive_nested_shredded_variant() -> ArrayRef {
+        // Create the inner level: contains typed_value with Int32 values
+        // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null
+        let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2
+        let inner = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(inner_typed_value), true)
+            .build();
+        let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap();
+
+        let outer_typed_value_nulls = NullBuffer::from(vec![
+            true,  // row 0: inner struct exists with typed_value=42
+            false, // row 1: inner field NULL
+            false, // row 2: outer field NULL
+            false, // row 3: top-level NULL
+        ]);
+        let outer_typed_value = StructArrayBuilder::new()
+            .with_field("inner", ArrayRef::from(inner), false)
+            .with_nulls(outer_typed_value_nulls)
+            .build();
+
+        let outer = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(outer_typed_value), true)
+            .build();
+        let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap();
+
+        let typed_value_nulls = NullBuffer::from(vec![
+            true,  // row 0: inner struct exists with typed_value=42
+            true,  // row 1: inner field NULL
+            false, // row 2: outer field NULL
+            false, // row 3: top-level NULL
+        ]);
+        let typed_value = StructArrayBuilder::new()
+            .with_field("outer", ArrayRef::from(outer), false)
+            .with_nulls(typed_value_nulls)
+            .build();
+
+        // Build final VariantArray with top-level nulls
+        let metadata_array =
+            BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4));
+        let nulls = NullBuffer::from(vec![
+            true,  // row 0: inner struct exists with typed_value=42
+            true,  // row 1: inner field NULL
+            true,  // row 2: outer field NULL
+            false, // row 3: top-level NULL
+        ]);
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("typed_value", Arc::new(typed_value), true)
+            .with_nulls(nulls)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    /// Create variant with mixed shredding (spec-compliant) including null scenarios
+    /// Field "x" is globally shredded, field "y" is never shredded
+    fn create_mixed_and_unshredded_variant() -> ArrayRef {
+        // Create spec-compliant mixed shredding:
+        // - Field "x" is globally shredded (has typed_value column)
+        // - Field "y" is never shredded (only appears in value field when present)
+
+        let (metadata, y_field_value) = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            let mut obj = builder.new_object();
+            obj.insert("y", Variant::from(42));
+            obj.finish();
+            builder.finish()
+        };
+
+        let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
+
+        // Value field contains objects with unshredded fields only (never contains "x")
+        // Row 0: {"y": "foo"} - x is shredded out, y remains in value
+        // Row 1: {} - both x and y are absent (perfect shredding for x, y missing)
+        // Row 2: {"y": null} - x is shredded out, y explicitly null
+        // Row 3: top-level null (encoded in VariantArray's null mask, but fields contain valid data)
+
+        let empty_object_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            builder.new_object().finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        let y_null_value = {
+            let mut builder = parquet_variant::VariantBuilder::new();
+            builder.new_object().with_field("y", Variant::Null).finish();
+            let (_, value) = builder.finish();
+            value
+        };
+
+        let value_array = BinaryViewArray::from(vec![
+            Some(y_field_value.as_slice()),      // Row 0: {"y": 42}
+            Some(empty_object_value.as_slice()), // Row 1: {}
+            Some(y_null_value.as_slice()),       // Row 2: {"y": null}
+            Some(empty_object_value.as_slice()), // Row 3: top-level null (but value field contains valid data)
+        ]);
+
+        // Create shredded field "x" (globally shredded - never appears in value field)
+        // For top-level null row, the field still needs valid content (not null)
+        let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
+        let x_field_struct = StructArrayBuilder::new()
+            .with_field("typed_value", Arc::new(x_field_typed_value), true)
+            .build();
+        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
+            .expect("should create ShreddedVariantFieldArray for x");
+
+        // Create main typed_value struct (only contains shredded fields)
+        let typed_value_struct = StructArrayBuilder::new()
+            .with_field("x", ArrayRef::from(x_field_shredded), false)
+            .build();
+
+        // Build VariantArray with both value and typed_value (PartiallyShredded)
+        // Top-level null is encoded in the main StructArray's null mask
+        let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
+        let struct_array = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata_array), false)
+            .with_field("value", Arc::new(value_array), true)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .with_nulls(variant_nulls)
+            .build();
+
+        Arc::new(struct_array)
+    }
+
+    #[test]
+    fn get_decimal32_rescaled_to_scale2() {
+        // Build unshredded variant values with different scales
+        let mut builder = crate::VariantArrayBuilder::new(5);
+        builder.append_variant(VariantDecimal4::try_new(1234, 2).unwrap().into()); // 12.34
+        builder.append_variant(VariantDecimal4::try_new(1234, 3).unwrap().into()); // 1.234
+        builder.append_variant(VariantDecimal4::try_new(1234, 0).unwrap().into()); // 1234
+        builder.append_null();
+        builder.append_variant(
+            VariantDecimal8::try_new((VariantDecimal4::MAX_UNSCALED_VALUE as i64) + 1, 3)
+                .unwrap()
+                .into(),
+        ); // should fit into Decimal32
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), 1234);
+        assert_eq!(result.value(1), 123);
+        assert_eq!(result.value(2), 123400);
+        assert!(result.is_null(3));
+        assert_eq!(
+            result.value(4),
+            VariantDecimal4::MAX_UNSCALED_VALUE / 10 + 1
+        ); // should not be null as the final result fits into Decimal32
+    }
+
+    #[test]
+    fn get_decimal32_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(7);
+        builder.append_variant(VariantDecimal4::try_new(1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal4::try_new(1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal4::try_new(-1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal4::try_new(-1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal4::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
+        builder.append_variant(VariantDecimal4::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
+        builder.append_variant(VariantDecimal4::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, -1), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), -1);
+        assert_eq!(result.value(0), 124);
+        assert_eq!(result.value(1), 125);
+        assert_eq!(result.value(2), -124);
+        assert_eq!(result.value(3), -125);
+        assert_eq!(result.value(4), 1);
+        assert!(result.is_valid(5));
+        assert_eq!(result.value(5), 0);
+        assert_eq!(result.value(6), 1);
+    }
+
+    #[test]
+    fn get_decimal32_large_scale_reduction() {
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal4::try_new(-VariantDecimal4::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, -9), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), -9);
+        assert_eq!(result.value(0), -1);
+        assert_eq!(result.value(1), 1);
+
+        let field = Field::new("result", DataType::Decimal32(9, -10), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert_eq!(result.precision(), 9);
+        assert_eq!(result.scale(), -10);
+        assert!(result.is_valid(0));
+        assert_eq!(result.value(0), 0);
+        assert!(result.is_valid(1));
+        assert_eq!(result.value(1), 0);
+    }
+
+    #[test]
+    fn get_decimal32_precision_overflow_safe() {
+        // Exceed Decimal32 after scaling and rounding
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 9)
+                .unwrap()
+                .into(),
+        ); // integer value round up overflows
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(2, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal32Array>().unwrap();
+
+        assert!(result.is_null(0));
+        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
+    }
+
+    #[test]
+    fn get_decimal32_precision_overflow_unsafe_errors() {
+        let mut builder = crate::VariantArrayBuilder::new(1);
+        builder.append_variant(
+            VariantDecimal4::try_new(VariantDecimal4::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal32(9, 2), true);
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let err = variant_get(&variant_array, options).unwrap_err();
+
+        assert!(
+            err.to_string().contains(
+                "Failed to cast to Decimal32(precision=9, scale=2) from variant Decimal4"
+            )
+        );
+    }
+
+    #[test]
+    fn get_decimal64_rescaled_to_scale2() {
+        let mut builder = crate::VariantArrayBuilder::new(5);
+        builder.append_variant(VariantDecimal8::try_new(1234, 2).unwrap().into()); // 12.34
+        builder.append_variant(VariantDecimal8::try_new(1234, 3).unwrap().into()); // 1.234
+        builder.append_variant(VariantDecimal8::try_new(1234, 0).unwrap().into()); // 1234
+        builder.append_null();
+        builder.append_variant(
+            VariantDecimal16::try_new((VariantDecimal8::MAX_UNSCALED_VALUE as i128) + 1, 3)
+                .unwrap()
+                .into(),
+        ); // should fit into Decimal64
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal64(18, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
+
+        assert_eq!(result.precision(), 18);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), 1234);
+        assert_eq!(result.value(1), 123);
+        assert_eq!(result.value(2), 123400);
+        assert!(result.is_null(3));
+        assert_eq!(
+            result.value(4),
+            VariantDecimal8::MAX_UNSCALED_VALUE / 10 + 1
+        ); // should not be null as the final result fits into Decimal64
+    }
+
+    #[test]
+    fn get_decimal64_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(7);
+        builder.append_variant(VariantDecimal8::try_new(1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal8::try_new(1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal8::try_new(-1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal8::try_new(-1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal8::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
+        builder.append_variant(VariantDecimal8::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
+        builder.append_variant(VariantDecimal8::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal64(18, -1), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
+
+        assert_eq!(result.precision(), 18);
+        assert_eq!(result.scale(), -1);
+        assert_eq!(result.value(0), 124);
+        assert_eq!(result.value(1), 125);
+        assert_eq!(result.value(2), -124);
+        assert_eq!(result.value(3), -125);
+        assert_eq!(result.value(4), 1);
+        assert!(result.is_valid(5));
+        assert_eq!(result.value(5), 0);
+        assert_eq!(result.value(6), 1);
+    }
+
+    #[test]
+    fn get_decimal64_large_scale_reduction() {
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal8::try_new(-VariantDecimal8::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal64(18, -18), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
+
+        assert_eq!(result.precision(), 18);
+        assert_eq!(result.scale(), -18);
+        assert_eq!(result.value(0), -1);
+        assert_eq!(result.value(1), 1);
+
+        let field = Field::new("result", DataType::Decimal64(18, -19), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
+
+        assert_eq!(result.precision(), 18);
+        assert_eq!(result.scale(), -19);
+        assert!(result.is_valid(0));
+        assert_eq!(result.value(0), 0);
+        assert!(result.is_valid(1));
+        assert_eq!(result.value(1), 0);
+    }
+
+    #[test]
+    fn get_decimal64_precision_overflow_safe() {
+        // Exceed Decimal64 after scaling and rounding
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 18)
+                .unwrap()
+                .into(),
+        ); // integer value round up overflows
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal64(2, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal64Array>().unwrap();
+
+        assert!(result.is_null(0));
+        assert!(result.is_null(1));
+    }
+
+    #[test]
+    fn get_decimal64_precision_overflow_unsafe_errors() {
+        let mut builder = crate::VariantArrayBuilder::new(1);
+        builder.append_variant(
+            VariantDecimal8::try_new(VariantDecimal8::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal64(18, 2), true);
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let err = variant_get(&variant_array, options).unwrap_err();
+
+        assert!(
+            err.to_string().contains(
+                "Failed to cast to Decimal64(precision=18, scale=2) from variant Decimal8"
+            )
+        );
+    }
+
+    #[test]
+    fn get_decimal128_rescaled_to_scale2() {
+        let mut builder = crate::VariantArrayBuilder::new(4);
+        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into());
+        builder.append_null();
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal128(38, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
+
+        assert_eq!(result.precision(), 38);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), 1234);
+        assert_eq!(result.value(1), 123);
+        assert_eq!(result.value(2), 123400);
+        assert!(result.is_null(3));
+    }
+
+    #[test]
+    fn get_decimal128_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(7);
+        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
+        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
+        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal128(38, -1), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
+
+        assert_eq!(result.precision(), 38);
+        assert_eq!(result.scale(), -1);
+        assert_eq!(result.value(0), 124);
+        assert_eq!(result.value(1), 125);
+        assert_eq!(result.value(2), -124);
+        assert_eq!(result.value(3), -125);
+        assert_eq!(result.value(4), 1);
+        assert!(result.is_valid(5));
+        assert_eq!(result.value(5), 0);
+        assert_eq!(result.value(6), 1);
+    }
+
+    #[test]
+    fn get_decimal128_precision_overflow_safe() {
+        // Exceed Decimal128 after scaling and rounding
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 38)
+                .unwrap()
+                .into(),
+        ); // integer value round up overflows
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal128(2, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal128Array>().unwrap();
+
+        assert!(result.is_null(0));
+        assert!(result.is_null(1)); // should overflow because 1.00 does not fit into precision (2)
+    }
+
+    #[test]
+    fn get_decimal128_precision_overflow_unsafe_errors() {
+        let mut builder = crate::VariantArrayBuilder::new(1);
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal128(38, 2), true);
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let err = variant_get(&variant_array, options).unwrap_err();
+
+        assert!(err.to_string().contains(
+            "Failed to cast to Decimal128(precision=38, scale=2) from variant Decimal16"
+        ));
+    }
+
+    #[test]
+    fn get_decimal256_rescaled_to_scale2() {
+        // Build unshredded variant values with different scales using Decimal16 source
+        let mut builder = crate::VariantArrayBuilder::new(4);
+        builder.append_variant(VariantDecimal16::try_new(1234, 2).unwrap().into()); // 12.34
+        builder.append_variant(VariantDecimal16::try_new(1234, 3).unwrap().into()); // 1.234
+        builder.append_variant(VariantDecimal16::try_new(1234, 0).unwrap().into()); // 1234
+        builder.append_null();
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal256(76, 2), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
+
+        assert_eq!(result.precision(), 76);
+        assert_eq!(result.scale(), 2);
+        assert_eq!(result.value(0), i256::from_i128(1234));
+        assert_eq!(result.value(1), i256::from_i128(123));
+        assert_eq!(result.value(2), i256::from_i128(123400));
+        assert!(result.is_null(3));
+    }
+
+    #[test]
+    fn get_decimal256_scale_down_rounding() {
+        let mut builder = crate::VariantArrayBuilder::new(7);
+        builder.append_variant(VariantDecimal16::try_new(1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(-1235, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(-1245, 0).unwrap().into());
+        builder.append_variant(VariantDecimal16::try_new(1235, 2).unwrap().into()); // 12.35 rounded down to 10 for scale -1
+        builder.append_variant(VariantDecimal16::try_new(1235, 3).unwrap().into()); // 1.235 rounded down to 0 for scale -1
+        builder.append_variant(VariantDecimal16::try_new(5235, 3).unwrap().into()); // 5.235 rounded up to 10 for scale -1
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal256(76, -1), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
+
+        assert_eq!(result.precision(), 76);
+        assert_eq!(result.scale(), -1);
+        assert_eq!(result.value(0), i256::from_i128(124));
+        assert_eq!(result.value(1), i256::from_i128(125));
+        assert_eq!(result.value(2), i256::from_i128(-124));
+        assert_eq!(result.value(3), i256::from_i128(-125));
+        assert_eq!(result.value(4), i256::from_i128(1));
+        assert!(result.is_valid(5));
+        assert_eq!(result.value(5), i256::from_i128(0));
+        assert_eq!(result.value(6), i256::from_i128(1));
+    }
+
+    #[test]
+    fn get_decimal256_precision_overflow_safe() {
+        // Exceed Decimal128 max precision (38) after scaling
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal256(76, 39), true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+        let result = result.as_any().downcast_ref::<Decimal256Array>().unwrap();
+
+        // Input is Decimal16 with integer = 10^38-1 and scale = 1, target scale = 39
+        // So expected integer is (10^38-1) * 10^(39-1) = (10^38-1) * 10^38
+        let base = i256::from_i128(10);
+        let factor = base.checked_pow(38).unwrap();
+        let expected = i256::from_i128(VariantDecimal16::MAX_UNSCALED_VALUE)
+            .checked_mul(factor)
+            .unwrap();
+        assert_eq!(result.value(0), expected);
+        assert!(result.is_null(1));
+    }
+
+    #[test]
+    fn get_decimal256_precision_overflow_unsafe_errors() {
+        // Exceed Decimal128 max precision (38) after scaling
+        let mut builder = crate::VariantArrayBuilder::new(2);
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 1)
+                .unwrap()
+                .into(),
+        );
+        builder.append_variant(
+            VariantDecimal16::try_new(VariantDecimal16::MAX_UNSCALED_VALUE, 0)
+                .unwrap()
+                .into(),
+        );
+        let variant_array: ArrayRef = ArrayRef::from(builder.build());
+
+        let field = Field::new("result", DataType::Decimal256(76, 39), true);
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let err = variant_get(&variant_array, options).unwrap_err();
+
+        assert!(err.to_string().contains(
+            "Failed to cast to Decimal256(precision=76, scale=39) from variant Decimal16"
+        ));
+    }
+
+    #[test]
+    fn get_non_supported_temporal_types_error() {
+        let values = vec![None, Some(Variant::Null), Some(Variant::BooleanFalse)];
+        let variant_array: ArrayRef = ArrayRef::from(VariantArray::from_iter(values));
+
+        let test_cases = vec![
+            FieldRef::from(Field::new(
+                "result",
+                DataType::Duration(TimeUnit::Microsecond),
+                true,
+            )),
+            FieldRef::from(Field::new(
+                "result",
+                DataType::Interval(IntervalUnit::YearMonth),
+                true,
+            )),
+        ];
+
+        for field in test_cases {
+            let options = GetOptions::new().with_as_type(Some(field));
+            let err = variant_get(&variant_array, options).unwrap_err();
+            assert!(
+                err.to_string()
+                    .contains("Casting Variant to duration/interval types is not supported")
+            );
+        }
+    }
+
+    fn invalid_time_variant_array() -> ArrayRef {
+        let mut builder = VariantArrayBuilder::new(3);
+        // 86401000000 is invalid for Time64Microsecond (max is 86400000000)
+        builder.append_variant(Variant::Int64(86401000000));
+        builder.append_variant(Variant::Int64(86401000000));
+        builder.append_variant(Variant::Int64(86401000000));
+        Arc::new(builder.build().into_inner())
+    }
+
+    #[test]
+    fn test_variant_get_error_when_cast_failure_and_safe_false() {
+        let variant_array = invalid_time_variant_array();
+
+        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
+        let cast_options = CastOptions {
+            safe: false, // Will error on cast failure
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let err = variant_get(&variant_array, options).unwrap_err();
+        assert!(
+            err.to_string().contains(
+                "Cast error: Failed to extract primitive of type Time64(µs) from variant Int64(86401000000) at path VariantPath([])"
+            ),
+            "actual: {err}",
+        );
+    }
+
+    #[test]
+    fn test_variant_get_return_null_when_cast_failure_and_safe_true() {
+        let variant_array = invalid_time_variant_array();
+
+        let field = Field::new("result", DataType::Time64(TimeUnit::Microsecond), true);
+        let cast_options = CastOptions {
+            safe: true, // Will return null on cast failure
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(field)))
+            .with_cast_options(cast_options);
+        let result = variant_get(&variant_array, options).unwrap();
+        assert_eq!(3, result.len());
+
+        for i in 0..3 {
+            assert!(result.is_null(i));
+        }
+    }
+
+    #[test]
+    fn test_perfect_shredding_returns_same_arc_ptr() {
+        let variant_array = perfectly_shredded_int32_variant_array();
+
+        let variant_array_ref = VariantArray::try_new(&variant_array).unwrap();
+        let typed_value_arc = variant_array_ref.typed_value_field().unwrap().clone();
+
+        let field = Field::new("result", DataType::Int32, true);
+        let options = GetOptions::new().with_as_type(Some(FieldRef::from(field)));
+        let result = variant_get(&variant_array, options).unwrap();
+
+        assert!(Arc::ptr_eq(&typed_value_arc, &result));
+    }
+
+    #[test]
+    fn test_perfect_shredding_three_typed_value_columns() {
+        // Column 1: perfectly shredded primitive with all nulls
+        let all_nulls_values: Arc<Int32Array> = Arc::new(Int32Array::from(vec![
+            Option::<i32>::None,
+            Option::<i32>::None,
+            Option::<i32>::None,
+        ]));
+        let all_nulls_erased: ArrayRef = all_nulls_values.clone();
+        let all_nulls_field =
+            ShreddedVariantFieldArray::from_parts(None, Some(all_nulls_erased.clone()), None);
+        let all_nulls_type = all_nulls_field.data_type().clone();
+        let all_nulls_struct: ArrayRef = ArrayRef::from(all_nulls_field);
+
+        // Column 2: perfectly shredded primitive with some nulls
+        let some_nulls_values: Arc<Int32Array> =
+            Arc::new(Int32Array::from(vec![Some(10), None, Some(30)]));
+        let some_nulls_erased: ArrayRef = some_nulls_values.clone();
+        let some_nulls_field =
+            ShreddedVariantFieldArray::from_parts(None, Some(some_nulls_erased.clone()), None);
+        let some_nulls_type = some_nulls_field.data_type().clone();
+        let some_nulls_struct: ArrayRef = ArrayRef::from(some_nulls_field);
+
+        // Column 3: perfectly shredded nested struct
+        let inner_values: Arc<Int32Array> =
+            Arc::new(Int32Array::from(vec![Some(111), None, Some(333)]));
+        let inner_erased: ArrayRef = inner_values.clone();
+        let inner_field =
+            ShreddedVariantFieldArray::from_parts(None, Some(inner_erased.clone()), None);
+        let inner_field_type = inner_field.data_type().clone();
+        let inner_struct_array: ArrayRef = ArrayRef::from(inner_field);
+
+        let nested_struct = Arc::new(
+            StructArray::try_new(
+                Fields::from(vec![Field::new("inner", inner_field_type, true)]),
+                vec![inner_struct_array],
+                None,
+            )
+            .unwrap(),
+        );
+        let nested_struct_erased: ArrayRef = nested_struct.clone();
+        let struct_field =
+            ShreddedVariantFieldArray::from_parts(None, Some(nested_struct_erased.clone()), None);
+        let struct_field_type = struct_field.data_type().clone();
+        let struct_field_struct: ArrayRef = ArrayRef::from(struct_field);
+
+        // Assemble the top-level typed_value struct with the three columns above
+        let typed_value_struct = StructArray::try_new(
+            Fields::from(vec![
+                Field::new("all_nulls", all_nulls_type, true),
+                Field::new("some_nulls", some_nulls_type, true),
+                Field::new("struct_field", struct_field_type, true),
+            ]),
+            vec![all_nulls_struct, some_nulls_struct, struct_field_struct],
+            None,
+        )
+        .unwrap();
+
+        let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+            EMPTY_VARIANT_METADATA_BYTES,
+            all_nulls_values.len(),
+        ));
+        let variant_struct = StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), false)
+            .with_field("typed_value", Arc::new(typed_value_struct), true)
+            .build();
+        let variant_array: ArrayRef = VariantArray::try_new(&variant_struct).unwrap().into();
+
+        // Case 1: all-null primitive column should reuse the typed_value Arc directly
+        let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
+        let all_nulls_result = variant_get(
+            &variant_array,
+            GetOptions::new_with_path(VariantPath::from("all_nulls"))
+                .with_as_type(Some(all_nulls_field_ref)),
+        )
+        .unwrap();
+        assert!(Arc::ptr_eq(&all_nulls_result, &all_nulls_erased));
+
+        // Case 2: primitive column with some nulls should also reuse its typed_value Arc
+        let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
+        let some_nulls_result = variant_get(
+            &variant_array,
+            GetOptions::new_with_path(VariantPath::from("some_nulls"))
+                .with_as_type(Some(some_nulls_field_ref)),
+        )
+        .unwrap();
+        assert!(Arc::ptr_eq(&some_nulls_result, &some_nulls_erased));
+
+        // Case 3: struct column should return a StructArray composed from the nested field
+        let struct_child_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
+        let struct_field_ref = FieldRef::from(Field::new(
+            "result",
+            DataType::Struct(struct_child_fields.clone()),
+            true,
+        ));
+        let struct_result = variant_get(
+            &variant_array,
+            GetOptions::new_with_path(VariantPath::from("struct_field"))
+                .with_as_type(Some(struct_field_ref)),
+        )
+        .unwrap();
+        let struct_array = struct_result
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+        assert_eq!(struct_array.len(), 3);
+        assert_eq!(struct_array.null_count(), 0);
+
+        let inner_values_result = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(inner_values_result.len(), 3);
+        assert_eq!(inner_values_result.value(0), 111);
+        assert!(inner_values_result.is_null(1));
+        assert_eq!(inner_values_result.value(2), 333);
+    }
+}
diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs
new file mode 100644
index 000000000000..172bd4811bc3
--- /dev/null
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -0,0 +1,1008 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::shred_variant::{
+    VariantToShreddedVariantRowBuilder, make_variant_to_shredded_variant_arrow_row_builder,
+};
+use crate::type_conversion::{
+    PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
+};
+use crate::variant_array::ShreddedVariantFieldArray;
+use crate::{VariantArray, VariantValueArrayBuilder};
+use arrow::array::{
+    ArrayRef, ArrowNativeTypeOp, BinaryBuilder, BinaryLikeArrayBuilder, BinaryViewArray,
+    BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, GenericListArray,
+    GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
+    OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::compute::{CastOptions, DecimalCast};
+use arrow::datatypes::{self, DataType, DecimalType};
+use arrow::error::{ArrowError, Result};
+use arrow_schema::{FieldRef, TimeUnit};
+use parquet_variant::{Variant, VariantList, VariantPath};
+use std::sync::Arc;
+
+/// Builder for converting variant values into strongly typed Arrow arrays.
+///
+/// Useful for variant_get kernels that need to extract specific paths from variant values, possibly
+/// with casting of leaf values to specific types.
+pub(crate) enum VariantToArrowRowBuilder<'a> {
+    Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
+    BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
+
+    // Path extraction wrapper - contains a boxed enum for any of the above
+    WithPath(VariantPathRowBuilder<'a>),
+}
+
+impl<'a> VariantToArrowRowBuilder<'a> {
+    pub fn append_null(&mut self) -> Result<()> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_null(),
+            BinaryVariant(b) => b.append_null(),
+            WithPath(path_builder) => path_builder.append_null(),
+        }
+    }
+
+    pub fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.append_value(&value),
+            BinaryVariant(b) => b.append_value(value),
+            WithPath(path_builder) => path_builder.append_value(value),
+        }
+    }
+
+    pub fn finish(self) -> Result<ArrayRef> {
+        use VariantToArrowRowBuilder::*;
+        match self {
+            Primitive(b) => b.finish(),
+            BinaryVariant(b) => b.finish(),
+            WithPath(path_builder) => path_builder.finish(),
+        }
+    }
+}
+
+pub(crate) fn make_variant_to_arrow_row_builder<'a>(
+    metadata: &BinaryViewArray,
+    path: VariantPath<'a>,
+    data_type: Option<&'a DataType>,
+    cast_options: &'a CastOptions,
+    capacity: usize,
+) -> Result<VariantToArrowRowBuilder<'a>> {
+    use VariantToArrowRowBuilder::*;
+
+    let mut builder = match data_type {
+        // If no data type was requested, build an unshredded VariantArray.
+        None => BinaryVariant(VariantToBinaryVariantArrowRowBuilder::new(
+            metadata.clone(),
+            capacity,
+        )),
+        Some(DataType::Struct(_)) => {
+            return Err(ArrowError::NotYetImplemented(
+                "Converting unshredded variant objects to arrow structs".to_string(),
+            ));
+        }
+        Some(
+            DataType::List(_)
+            | DataType::LargeList(_)
+            | DataType::ListView(_)
+            | DataType::LargeListView(_)
+            | DataType::FixedSizeList(..),
+        ) => {
+            return Err(ArrowError::NotYetImplemented(
+                "Converting unshredded variant arrays to arrow lists".to_string(),
+            ));
+        }
+        Some(data_type) => {
+            let builder =
+                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
+            Primitive(builder)
+        }
+    };
+
+    // Wrap with path extraction if needed
+    if !path.is_empty() {
+        builder = WithPath(VariantPathRowBuilder {
+            builder: Box::new(builder),
+            path,
+        })
+    };
+
+    Ok(builder)
+}
+
+/// Builder for converting primitive variant values to Arrow arrays. It is used by both
+/// `VariantToArrowRowBuilder` (below) and `VariantToShreddedPrimitiveVariantRowBuilder` (in
+/// `shred_variant.rs`).
+pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
+    Null(VariantToNullArrowRowBuilder<'a>),
+    Boolean(VariantToBooleanArrowRowBuilder<'a>),
+    Int8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int8Type>),
+    Int16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int16Type>),
+    Int32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int32Type>),
+    Int64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Int64Type>),
+    UInt8(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt8Type>),
+    UInt16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt16Type>),
+    UInt32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt32Type>),
+    UInt64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::UInt64Type>),
+    Float16(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float16Type>),
+    Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
+    Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
+    Decimal32(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal32Type>),
+    Decimal64(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal64Type>),
+    Decimal128(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal128Type>),
+    Decimal256(VariantToDecimalArrowRowBuilder<'a, datatypes::Decimal256Type>),
+    TimestampSecond(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampSecondType>),
+    TimestampSecondNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampSecondType>),
+    TimestampMilli(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMillisecondType>),
+    TimestampMilliNtz(
+        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMillisecondType>,
+    ),
+    TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
+    TimestampMicroNtz(
+        VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
+    ),
+    TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
+    TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
+    Time32Second(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32SecondType>),
+    Time32Milli(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time32MillisecondType>),
+    Time64Micro(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64MicrosecondType>),
+    Time64Nano(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Time64NanosecondType>),
+    Date32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
+    Date64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date64Type>),
+    Uuid(VariantToUuidArrowRowBuilder<'a>),
+    String(VariantToStringArrowBuilder<'a, StringBuilder>),
+    LargeString(VariantToStringArrowBuilder<'a, LargeStringBuilder>),
+    StringView(VariantToStringArrowBuilder<'a, StringViewBuilder>),
+    Binary(VariantToBinaryArrowRowBuilder<'a, BinaryBuilder>),
+    LargeBinary(VariantToBinaryArrowRowBuilder<'a, LargeBinaryBuilder>),
+    BinaryView(VariantToBinaryArrowRowBuilder<'a, BinaryViewBuilder>),
+}
+
+impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
+    pub fn append_null(&mut self) -> Result<()> {
+        use PrimitiveVariantToArrowRowBuilder::*;
+        match self {
+            Null(b) => b.append_null(),
+            Boolean(b) => b.append_null(),
+            Int8(b) => b.append_null(),
+            Int16(b) => b.append_null(),
+            Int32(b) => b.append_null(),
+            Int64(b) => b.append_null(),
+            UInt8(b) => b.append_null(),
+            UInt16(b) => b.append_null(),
+            UInt32(b) => b.append_null(),
+            UInt64(b) => b.append_null(),
+            Float16(b) => b.append_null(),
+            Float32(b) => b.append_null(),
+            Float64(b) => b.append_null(),
+            Decimal32(b) => b.append_null(),
+            Decimal64(b) => b.append_null(),
+            Decimal128(b) => b.append_null(),
+            Decimal256(b) => b.append_null(),
+            TimestampSecond(b) => b.append_null(),
+            TimestampSecondNtz(b) => b.append_null(),
+            TimestampMilli(b) => b.append_null(),
+            TimestampMilliNtz(b) => b.append_null(),
+            TimestampMicro(b) => b.append_null(),
+            TimestampMicroNtz(b) => b.append_null(),
+            TimestampNano(b) => b.append_null(),
+            TimestampNanoNtz(b) => b.append_null(),
+            Time32Second(b) => b.append_null(),
+            Time32Milli(b) => b.append_null(),
+            Time64Micro(b) => b.append_null(),
+            Time64Nano(b) => b.append_null(),
+            Date32(b) => b.append_null(),
+            Date64(b) => b.append_null(),
+            Uuid(b) => b.append_null(),
+            String(b) => b.append_null(),
+            LargeString(b) => b.append_null(),
+            StringView(b) => b.append_null(),
+            Binary(b) => b.append_null(),
+            LargeBinary(b) => b.append_null(),
+            BinaryView(b) => b.append_null(),
+        }
+    }
+
+    pub fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        use PrimitiveVariantToArrowRowBuilder::*;
+        match self {
+            Null(b) => b.append_value(value),
+            Boolean(b) => b.append_value(value),
+            Int8(b) => b.append_value(value),
+            Int16(b) => b.append_value(value),
+            Int32(b) => b.append_value(value),
+            Int64(b) => b.append_value(value),
+            UInt8(b) => b.append_value(value),
+            UInt16(b) => b.append_value(value),
+            UInt32(b) => b.append_value(value),
+            UInt64(b) => b.append_value(value),
+            Float16(b) => b.append_value(value),
+            Float32(b) => b.append_value(value),
+            Float64(b) => b.append_value(value),
+            Decimal32(b) => b.append_value(value),
+            Decimal64(b) => b.append_value(value),
+            Decimal128(b) => b.append_value(value),
+            Decimal256(b) => b.append_value(value),
+            TimestampSecond(b) => b.append_value(value),
+            TimestampSecondNtz(b) => b.append_value(value),
+            TimestampMilli(b) => b.append_value(value),
+            TimestampMilliNtz(b) => b.append_value(value),
+            TimestampMicro(b) => b.append_value(value),
+            TimestampMicroNtz(b) => b.append_value(value),
+            TimestampNano(b) => b.append_value(value),
+            TimestampNanoNtz(b) => b.append_value(value),
+            Time32Second(b) => b.append_value(value),
+            Time32Milli(b) => b.append_value(value),
+            Time64Micro(b) => b.append_value(value),
+            Time64Nano(b) => b.append_value(value),
+            Date32(b) => b.append_value(value),
+            Date64(b) => b.append_value(value),
+            Uuid(b) => b.append_value(value),
+            String(b) => b.append_value(value),
+            LargeString(b) => b.append_value(value),
+            StringView(b) => b.append_value(value),
+            Binary(b) => b.append_value(value),
+            LargeBinary(b) => b.append_value(value),
+            BinaryView(b) => b.append_value(value),
+        }
+    }
+
+    pub fn finish(self) -> Result<ArrayRef> {
+        use PrimitiveVariantToArrowRowBuilder::*;
+        match self {
+            Null(b) => b.finish(),
+            Boolean(b) => b.finish(),
+            Int8(b) => b.finish(),
+            Int16(b) => b.finish(),
+            Int32(b) => b.finish(),
+            Int64(b) => b.finish(),
+            UInt8(b) => b.finish(),
+            UInt16(b) => b.finish(),
+            UInt32(b) => b.finish(),
+            UInt64(b) => b.finish(),
+            Float16(b) => b.finish(),
+            Float32(b) => b.finish(),
+            Float64(b) => b.finish(),
+            Decimal32(b) => b.finish(),
+            Decimal64(b) => b.finish(),
+            Decimal128(b) => b.finish(),
+            Decimal256(b) => b.finish(),
+            TimestampSecond(b) => b.finish(),
+            TimestampSecondNtz(b) => b.finish(),
+            TimestampMilli(b) => b.finish(),
+            TimestampMilliNtz(b) => b.finish(),
+            TimestampMicro(b) => b.finish(),
+            TimestampMicroNtz(b) => b.finish(),
+            TimestampNano(b) => b.finish(),
+            TimestampNanoNtz(b) => b.finish(),
+            Time32Second(b) => b.finish(),
+            Time32Milli(b) => b.finish(),
+            Time64Micro(b) => b.finish(),
+            Time64Nano(b) => b.finish(),
+            Date32(b) => b.finish(),
+            Date64(b) => b.finish(),
+            Uuid(b) => b.finish(),
+            String(b) => b.finish(),
+            LargeString(b) => b.finish(),
+            StringView(b) => b.finish(),
+            Binary(b) => b.finish(),
+            LargeBinary(b) => b.finish(),
+            BinaryView(b) => b.finish(),
+        }
+    }
+}
+
+/// Creates a row builder that converts primitive `Variant` values into the requested Arrow data type.
+pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
+    data_type: &'a DataType,
+    cast_options: &'a CastOptions,
+    capacity: usize,
+) -> Result<PrimitiveVariantToArrowRowBuilder<'a>> {
+    use PrimitiveVariantToArrowRowBuilder::*;
+
+    let builder =
+        match data_type {
+            DataType::Null => Null(VariantToNullArrowRowBuilder::new(cast_options, capacity)),
+            DataType::Boolean => {
+                Boolean(VariantToBooleanArrowRowBuilder::new(cast_options, capacity))
+            }
+            DataType::Int8 => Int8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int16 => Int16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int32 => Int32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Int64 => Int64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt8 => UInt8(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt16 => UInt16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt32 => UInt32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::UInt64 => UInt64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float16 => Float16(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float32 => Float32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Float64 => Float64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Decimal32(precision, scale) => Decimal32(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
+            ),
+            DataType::Decimal64(precision, scale) => Decimal64(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
+            ),
+            DataType::Decimal128(precision, scale) => Decimal128(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
+            ),
+            DataType::Decimal256(precision, scale) => Decimal256(
+                VariantToDecimalArrowRowBuilder::new(cast_options, capacity, *precision, *scale)?,
+            ),
+            DataType::Date32 => Date32(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Date64 => Date64(VariantToPrimitiveArrowRowBuilder::new(
+                cast_options,
+                capacity,
+            )),
+            DataType::Time32(TimeUnit::Second) => Time32Second(
+                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Time32(TimeUnit::Millisecond) => Time32Milli(
+                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Time32(t) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "The unit for Time32 must be second/millisecond, received {t:?}"
+                )));
+            }
+            DataType::Time64(TimeUnit::Microsecond) => Time64Micro(
+                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Time64(TimeUnit::Nanosecond) => Time64Nano(
+                VariantToPrimitiveArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Time64(t) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "The unit for Time64 must be micro/nano seconds, received {t:?}"
+                )));
+            }
+            DataType::Timestamp(TimeUnit::Second, None) => TimestampSecondNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Second, tz) => TimestampSecond(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
+            ),
+            DataType::Timestamp(TimeUnit::Millisecond, None) => TimestampMilliNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Millisecond, tz) => TimestampMilli(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
+            ),
+            DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
+                VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
+            ),
+            DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
+                VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
+            ),
+            DataType::Duration(_) | DataType::Interval(_) => {
+                return Err(ArrowError::InvalidArgumentError(
+                    "Casting Variant to duration/interval types is not supported. \
+                    The Variant format does not define duration/interval types."
+                        .to_string(),
+                ));
+            }
+            DataType::Binary => Binary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity)),
+            DataType::LargeBinary => {
+                LargeBinary(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
+            }
+            DataType::BinaryView => {
+                BinaryView(VariantToBinaryArrowRowBuilder::new(cast_options, capacity))
+            }
+            DataType::FixedSizeBinary(16) => {
+                Uuid(VariantToUuidArrowRowBuilder::new(cast_options, capacity))
+            }
+            DataType::FixedSizeBinary(_) => {
+                return Err(ArrowError::NotYetImplemented(format!(
+                    "DataType {data_type:?} not yet implemented"
+                )));
+            }
+            DataType::Utf8 => String(VariantToStringArrowBuilder::new(cast_options, capacity)),
+            DataType::LargeUtf8 => {
+                LargeString(VariantToStringArrowBuilder::new(cast_options, capacity))
+            }
+            DataType::Utf8View => {
+                StringView(VariantToStringArrowBuilder::new(cast_options, capacity))
+            }
+            DataType::List(_)
+            | DataType::LargeList(_)
+            | DataType::ListView(_)
+            | DataType::LargeListView(_)
+            | DataType::FixedSizeList(..)
+            | DataType::Struct(_)
+            | DataType::Map(..)
+            | DataType::Union(..)
+            | DataType::Dictionary(..)
+            | DataType::RunEndEncoded(..) => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Casting to {data_type:?} is not applicable for primitive Variant types"
+                )));
+            }
+        };
+    Ok(builder)
+}
+
+pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
+    List(VariantToListArrowRowBuilder<'a, i32, false>),
+    LargeList(VariantToListArrowRowBuilder<'a, i64, false>),
+    ListView(VariantToListArrowRowBuilder<'a, i32, true>),
+    LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
+}
+
+impl<'a> ArrayVariantToArrowRowBuilder<'a> {
+    pub(crate) fn try_new(
+        data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        use ArrayVariantToArrowRowBuilder::*;
+
+        // Make List/ListView builders without repeating the constructor boilerplate.
+        macro_rules! make_list_builder {
+            ($variant:ident, $offset:ty, $is_view:expr, $field:ident) => {
+                $variant(VariantToListArrowRowBuilder::<$offset, $is_view>::try_new(
+                    $field.clone(),
+                    $field.data_type(),
+                    cast_options,
+                    capacity,
+                )?)
+            };
+        }
+
+        let builder = match data_type {
+            DataType::List(field) => make_list_builder!(List, i32, false, field),
+            DataType::LargeList(field) => make_list_builder!(LargeList, i64, false, field),
+            DataType::ListView(field) => make_list_builder!(ListView, i32, true, field),
+            DataType::LargeListView(field) => make_list_builder!(LargeListView, i64, true, field),
+            DataType::FixedSizeList(..) => {
+                return Err(ArrowError::NotYetImplemented(
+                    "Converting unshredded variant arrays to arrow fixed-size lists".to_string(),
+                ));
+            }
+            other => {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "Casting to {other:?} is not applicable for array Variant types"
+                )));
+            }
+        };
+        Ok(builder)
+    }
+
+    pub(crate) fn append_null(&mut self) {
+        match self {
+            Self::List(builder) => builder.append_null(),
+            Self::LargeList(builder) => builder.append_null(),
+            Self::ListView(builder) => builder.append_null(),
+            Self::LargeListView(builder) => builder.append_null(),
+        }
+    }
+
+    pub(crate) fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
+        match self {
+            Self::List(builder) => builder.append_value(list),
+            Self::LargeList(builder) => builder.append_value(list),
+            Self::ListView(builder) => builder.append_value(list),
+            Self::LargeListView(builder) => builder.append_value(list),
+        }
+    }
+
+    pub(crate) fn finish(self) -> Result<ArrayRef> {
+        match self {
+            Self::List(builder) => builder.finish(),
+            Self::LargeList(builder) => builder.finish(),
+            Self::ListView(builder) => builder.finish(),
+            Self::LargeListView(builder) => builder.finish(),
+        }
+    }
+}
+
+/// A thin wrapper whose only job is to extract a specific path from a variant value and pass the
+/// result to a nested builder.
+pub(crate) struct VariantPathRowBuilder<'a> {
+    builder: Box<VariantToArrowRowBuilder<'a>>,
+    path: VariantPath<'a>,
+}
+
+impl<'a> VariantPathRowBuilder<'a> {
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null()
+    }
+
+    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        if let Some(v) = value.get_path(&self.path) {
+            self.builder.append_value(v)
+        } else {
+            self.builder.append_null()?;
+            Ok(false)
+        }
+    }
+
+    fn finish(self) -> Result<ArrayRef> {
+        self.builder.finish()
+    }
+}
+
+macro_rules! define_variant_to_primitive_builder {
+    (struct $name:ident<$lifetime:lifetime $(, $generic:ident: $bound:path )?>
+    |$array_param:ident $(, $field:ident: $field_type:ty)?| -> $builder_name:ident $(< $array_type:ty >)? { $init_expr: expr },
+    |$value: ident| $value_transform:expr,
+    type_name: $type_name:expr) => {
+        pub(crate) struct $name<$lifetime $(, $generic : $bound )?>
+        {
+            builder: $builder_name $(<$array_type>)?,
+            cast_options: &$lifetime CastOptions<$lifetime>,
+        }
+
+        impl<$lifetime $(, $generic: $bound+ )?> $name<$lifetime $(, $generic )?> {
+            fn new(
+                cast_options: &$lifetime CastOptions<$lifetime>,
+                $array_param: usize,
+                // add this so that $init_expr can use it
+                $( $field: $field_type, )?
+            ) -> Self {
+                Self {
+                    builder: $init_expr,
+                    cast_options,
+                }
+            }
+
+            fn append_null(&mut self) -> Result<()> {
+                self.builder.append_null();
+                Ok(())
+            }
+
+            fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
+                if let Some(v) = $value_transform {
+                    self.builder.append_value(v);
+                    Ok(true)
+                } else {
+                    if !self.cast_options.safe {
+                        // Unsafe casting: return error on conversion failure
+                        return Err(ArrowError::CastError(format!(
+                            "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
+                            $type_name,
+                            $value
+                        )));
+                    }
+                    // Safe casting: append null on conversion failure
+                    self.builder.append_null();
+                    Ok(false)
+                }
+            }
+
+            // Add this to silence unused mut warning from macro-generated code
+            // This is mainly for `FakeNullBuilder`
+            #[allow(unused_mut)]
+            fn finish(mut self) -> Result<ArrayRef> {
+                Ok(Arc::new(self.builder.finish()))
+            }
+        }
+    }
+}
+
+define_variant_to_primitive_builder!(
+    struct VariantToStringArrowBuilder<'a, B: StringLikeArrayBuilder>
+    |capacity| -> B { B::with_capacity(capacity) },
+    |value| value.as_string(),
+    type_name: B::type_name()
+);
+
+define_variant_to_primitive_builder!(
+    struct VariantToBooleanArrowRowBuilder<'a>
+    |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
+    |value|  value.as_boolean(),
+    type_name: datatypes::BooleanType::DATA_TYPE
+);
+
+define_variant_to_primitive_builder!(
+    struct VariantToPrimitiveArrowRowBuilder<'a, T:PrimitiveFromVariant>
+    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
+    |value| T::from_variant(value),
+    type_name: T::DATA_TYPE
+);
+
+define_variant_to_primitive_builder!(
+    struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
+    |capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
+    |value| T::from_variant(value),
+    type_name: T::DATA_TYPE
+);
+
+define_variant_to_primitive_builder!(
+    struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
+    |capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
+        PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
+    },
+    |value| T::from_variant(value),
+    type_name: T::DATA_TYPE
+);
+
+define_variant_to_primitive_builder!(
+    struct VariantToBinaryArrowRowBuilder<'a, B: BinaryLikeArrayBuilder>
+    |capacity| -> B { B::with_capacity(capacity) },
+    |value| value.as_u8_slice(),
+    type_name: B::type_name()
+);
+
+/// Builder for converting variant values to arrow Decimal values
+pub(crate) struct VariantToDecimalArrowRowBuilder<'a, T>
+where
+    T: DecimalType,
+    T::Native: DecimalCast,
+{
+    builder: PrimitiveBuilder<T>,
+    cast_options: &'a CastOptions<'a>,
+    precision: u8,
+    scale: i8,
+}
+
+impl<'a, T> VariantToDecimalArrowRowBuilder<'a, T>
+where
+    T: DecimalType,
+    T::Native: DecimalCast,
+{
+    fn new(
+        cast_options: &'a CastOptions<'a>,
+        capacity: usize,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Self> {
+        let builder = PrimitiveBuilder::<T>::with_capacity(capacity)
+            .with_precision_and_scale(precision, scale)?;
+        Ok(Self {
+            builder,
+            cast_options,
+            precision,
+            scale,
+        })
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
+            self.builder.append_value(scaled);
+            Ok(true)
+        } else if self.cast_options.safe {
+            self.builder.append_null();
+            Ok(false)
+        } else {
+            Err(ArrowError::CastError(format!(
+                "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
+                T::PREFIX,
+                self.precision,
+                self.scale,
+                value
+            )))
+        }
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        Ok(Arc::new(self.builder.finish()))
+    }
+}
+
+/// Builder for converting variant values to FixedSizeBinary(16) for UUIDs
+pub(crate) struct VariantToUuidArrowRowBuilder<'a> {
+    builder: FixedSizeBinaryBuilder,
+    cast_options: &'a CastOptions<'a>,
+}
+
+impl<'a> VariantToUuidArrowRowBuilder<'a> {
+    fn new(cast_options: &'a CastOptions<'a>, capacity: usize) -> Self {
+        Self {
+            builder: FixedSizeBinaryBuilder::with_capacity(capacity, 16),
+            cast_options,
+        }
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        match value.as_uuid() {
+            Some(uuid) => {
+                self.builder
+                    .append_value(uuid.as_bytes())
+                    .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
+
+                Ok(true)
+            }
+            None if self.cast_options.safe => {
+                self.builder.append_null();
+                Ok(false)
+            }
+            None => Err(ArrowError::CastError(format!(
+                "Failed to extract UUID from variant {value:?}",
+            ))),
+        }
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        Ok(Arc::new(self.builder.finish()))
+    }
+}
+
+pub(crate) struct VariantToListArrowRowBuilder<'a, O, const IS_VIEW: bool>
+where
+    O: OffsetSizeTrait + ArrowNativeTypeOp,
+{
+    field: FieldRef,
+    offsets: Vec<O>,
+    element_builder: Box<VariantToShreddedVariantRowBuilder<'a>>,
+    nulls: NullBufferBuilder,
+    current_offset: O,
+}
+
+impl<'a, O, const IS_VIEW: bool> VariantToListArrowRowBuilder<'a, O, IS_VIEW>
+where
+    O: OffsetSizeTrait + ArrowNativeTypeOp,
+{
+    fn try_new(
+        field: FieldRef,
+        element_data_type: &'a DataType,
+        cast_options: &'a CastOptions,
+        capacity: usize,
+    ) -> Result<Self> {
+        if capacity >= isize::MAX as usize {
+            return Err(ArrowError::ComputeError(
+                "Capacity exceeds isize::MAX when reserving list offsets".to_string(),
+            ));
+        }
+        let mut offsets = Vec::with_capacity(capacity + 1);
+        offsets.push(O::ZERO);
+        let element_builder = make_variant_to_shredded_variant_arrow_row_builder(
+            element_data_type,
+            cast_options,
+            capacity,
+            false,
+        )?;
+        Ok(Self {
+            field,
+            offsets,
+            element_builder: Box::new(element_builder),
+            nulls: NullBufferBuilder::new(capacity),
+            current_offset: O::ZERO,
+        })
+    }
+
+    fn append_null(&mut self) {
+        self.offsets.push(self.current_offset);
+        self.nulls.append_null();
+    }
+
+    fn append_value(&mut self, list: VariantList<'_, '_>) -> Result<()> {
+        for element in list.iter() {
+            self.element_builder.append_value(element)?;
+            self.current_offset = self.current_offset.add_checked(O::ONE)?;
+        }
+        self.offsets.push(self.current_offset);
+        self.nulls.append_non_null();
+        Ok(())
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        let (value, typed_value, nulls) = self.element_builder.finish()?;
+        let element_array =
+            ShreddedVariantFieldArray::from_parts(Some(value), Some(typed_value), nulls);
+        let field = Arc::new(
+            self.field
+                .as_ref()
+                .clone()
+                .with_data_type(element_array.data_type().clone()),
+        );
+
+        if IS_VIEW {
+            // NOTE: `offsets` is never empty (constructor pushes an entry)
+            let mut sizes = Vec::with_capacity(self.offsets.len() - 1);
+            for i in 1..self.offsets.len() {
+                sizes.push(self.offsets[i] - self.offsets[i - 1]);
+            }
+            self.offsets.pop();
+            let list_view_array = GenericListViewArray::<O>::new(
+                field,
+                ScalarBuffer::from(self.offsets),
+                ScalarBuffer::from(sizes),
+                ArrayRef::from(element_array),
+                self.nulls.finish(),
+            );
+            Ok(Arc::new(list_view_array))
+        } else {
+            let list_array = GenericListArray::<O>::new(
+                field,
+                OffsetBuffer::<O>::new(ScalarBuffer::from(self.offsets)),
+                ArrayRef::from(element_array),
+                self.nulls.finish(),
+            );
+            Ok(Arc::new(list_array))
+        }
+    }
+}
+
+/// Builder for creating VariantArray output (for path extraction without type conversion)
+pub(crate) struct VariantToBinaryVariantArrowRowBuilder {
+    metadata: BinaryViewArray,
+    builder: VariantValueArrayBuilder,
+    nulls: NullBufferBuilder,
+}
+
+impl VariantToBinaryVariantArrowRowBuilder {
+    fn new(metadata: BinaryViewArray, capacity: usize) -> Self {
+        Self {
+            metadata,
+            builder: VariantValueArrayBuilder::new(capacity),
+            nulls: NullBufferBuilder::new(capacity),
+        }
+    }
+}
+
+impl VariantToBinaryVariantArrowRowBuilder {
+    fn append_null(&mut self) -> Result<()> {
+        self.builder.append_null();
+        self.nulls.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: Variant<'_, '_>) -> Result<bool> {
+        self.builder.append_value(value);
+        self.nulls.append_non_null();
+        Ok(true)
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        let variant_array = VariantArray::from_parts(
+            self.metadata,
+            Some(self.builder.build()?),
+            None, // no typed_value column
+            self.nulls.finish(),
+        );
+
+        Ok(ArrayRef::from(variant_array))
+    }
+}
+
+#[derive(Default)]
+struct FakeNullBuilder {
+    item_count: usize,
+}
+
+impl FakeNullBuilder {
+    fn append_value(&mut self, _: ()) {
+        self.item_count += 1;
+    }
+
+    fn append_null(&mut self) {
+        self.item_count += 1;
+    }
+
+    fn finish(self) -> NullArray {
+        NullArray::new(self.item_count)
+    }
+}
+
+define_variant_to_primitive_builder!(
+    struct VariantToNullArrowRowBuilder<'a>
+    |_capacity| -> FakeNullBuilder { FakeNullBuilder::default() },
+    |value| value.as_null(),
+    type_name: "Null"
+);
+
+#[cfg(test)]
+mod tests {
+    use super::make_primitive_variant_to_arrow_row_builder;
+    use arrow::compute::CastOptions;
+    use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
+    use arrow::error::ArrowError;
+    use std::sync::Arc;
+
+    #[test]
+    fn make_primitive_builder_rejects_non_primitive_types() {
+        let cast_options = CastOptions::default();
+        let item_field = Arc::new(Field::new("item", DataType::Int32, true));
+        let struct_fields = Fields::from(vec![Field::new("child", DataType::Int32, true)]);
+        let map_entries_field = Arc::new(Field::new(
+            "entries",
+            DataType::Struct(Fields::from(vec![
+                Field::new("key", DataType::Utf8, false),
+                Field::new("value", DataType::Float64, true),
+            ])),
+            true,
+        ));
+        let union_fields =
+            UnionFields::try_new(vec![1], vec![Field::new("child", DataType::Int32, true)])
+                .unwrap();
+        let run_ends_field = Arc::new(Field::new("run_ends", DataType::Int32, false));
+        let ree_values_field = Arc::new(Field::new("values", DataType::Utf8, true));
+
+        let non_primitive_types = vec![
+            DataType::List(item_field.clone()),
+            DataType::LargeList(item_field.clone()),
+            DataType::ListView(item_field.clone()),
+            DataType::LargeListView(item_field.clone()),
+            DataType::FixedSizeList(item_field.clone(), 2),
+            DataType::Struct(struct_fields.clone()),
+            DataType::Map(map_entries_field.clone(), false),
+            DataType::Union(union_fields.clone(), UnionMode::Dense),
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+            DataType::RunEndEncoded(run_ends_field.clone(), ree_values_field.clone()),
+        ];
+
+        for data_type in non_primitive_types {
+            let err =
+                match make_primitive_variant_to_arrow_row_builder(&data_type, &cast_options, 1) {
+                    Ok(_) => panic!("non-primitive type {data_type:?} should be rejected"),
+                    Err(err) => err,
+                };
+
+            match err {
+                ArrowError::InvalidArgumentError(msg) => {
+                    assert!(msg.contains(&format!("{data_type:?}")));
+                }
+                other => panic!("expected InvalidArgumentError, got {other:?}"),
+            }
+        }
+    }
+}
diff --git a/parquet-variant-json/Cargo.toml b/parquet-variant-json/Cargo.toml
new file mode 100644
index 000000000000..f9550adc26af
--- /dev/null
+++ b/parquet-variant-json/Cargo.toml
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "parquet-variant-json"
+version = { workspace = true }
+license = { workspace = true }
+description = "Apache Parquet Variant to/from JSON"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+keywords = ["arrow", "parquet", "variant"]
+readme = "../parquet-variant/README.md"
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+
+[dependencies]
+arrow-schema = { workspace = true }
+parquet-variant = { workspace = true }
+chrono = { workspace = true }
+serde_json = "1.0"
+base64 = "0.22"
+uuid = "1.18.0"
+
+
+[lib]
+name = "parquet_variant_json"
+bench = false
+
+[dev-dependencies]
diff --git a/parquet-variant-json/src/from_json.rs b/parquet-variant-json/src/from_json.rs
new file mode 100644
index 000000000000..4c22785ef106
--- /dev/null
+++ b/parquet-variant-json/src/from_json.rs
@@ -0,0 +1,670 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for parsing JSON strings as Variant
+
+use arrow_schema::ArrowError;
+use parquet_variant::{ObjectFieldBuilder, Variant, VariantBuilderExt};
+use serde_json::{Number, Value};
+
+/// Converts a JSON string to Variant using a [`VariantBuilderExt`], such as
+/// [`VariantBuilder`].
+///
+/// The resulting `value` and `metadata` buffers can be
+/// extracted using `builder.finish()`
+///
+/// # Arguments
+/// * `json` - The JSON string to parse as Variant.
+///
+/// # Returns
+///
+/// * `Ok(())` if successful
+/// * `Err` with error details if the conversion fails
+///
+/// [`VariantBuilder`]: parquet_variant::VariantBuilder
+///
+/// ```rust
+/// # use parquet_variant::VariantBuilder;
+/// # use parquet_variant_json::{JsonToVariant, VariantToJson};
+///
+/// let mut variant_builder = VariantBuilder::new();
+/// let person_string = "{\"name\":\"Alice\", \"age\":30, ".to_string()
+/// + "\"email\":\"alice@example.com\", \"is_active\": true, \"score\": 95.7,"
+/// + "\"additional_info\": null}";
+/// variant_builder.append_json(&person_string)?;
+///
+/// let (metadata, value) = variant_builder.finish();
+///
+/// let variant = parquet_variant::Variant::try_new(&metadata, &value)?;
+///
+/// let json_result = variant.to_json_string()?;
+/// let json_value = variant.to_json_value()?;
+///
+/// let mut buffer = Vec::new();
+/// variant.to_json(&mut buffer)?;
+/// let buffer_result = String::from_utf8(buffer)?;
+/// assert_eq!(json_result, "{\"additional_info\":null,\"age\":30,".to_string() +
+/// "\"email\":\"alice@example.com\",\"is_active\":true,\"name\":\"Alice\",\"score\":95.7}");
+/// assert_eq!(json_result, buffer_result);
+/// assert_eq!(json_result, serde_json::to_string(&json_value)?);
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub trait JsonToVariant {
+    /// Create a Variant from a JSON string
+    fn append_json(&mut self, json: &str) -> Result<(), ArrowError>;
+}
+
+impl<T: VariantBuilderExt> JsonToVariant for T {
+    fn append_json(&mut self, json: &str) -> Result<(), ArrowError> {
+        let json: Value = serde_json::from_str(json)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("JSON format error: {e}")))?;
+
+        append_json(&json, self)?;
+        Ok(())
+    }
+}
+
+fn variant_from_number<'m, 'v>(n: &Number) -> Result<Variant<'m, 'v>, ArrowError> {
+    if let Some(i) = n.as_i64() {
+        // Find minimum Integer width to fit
+        if i as i8 as i64 == i {
+            Ok((i as i8).into())
+        } else if i as i16 as i64 == i {
+            Ok((i as i16).into())
+        } else if i as i32 as i64 == i {
+            Ok((i as i32).into())
+        } else {
+            Ok(i.into())
+        }
+    } else {
+        // Todo: Try decimal once we implement custom JSON parsing where we have access to strings
+        // Try double - currently json_to_variant does not produce decimal
+        match n.as_f64() {
+            Some(f) => return Ok(f.into()),
+            None => Err(ArrowError::InvalidArgumentError(format!(
+                "Failed to parse {n} as number",
+            ))),
+        }?
+    }
+}
+
+pub fn append_json(json: &Value, builder: &mut impl VariantBuilderExt) -> Result<(), ArrowError> {
+    match json {
+        Value::Null => builder.append_value(Variant::Null),
+        Value::Bool(b) => builder.append_value(*b),
+        Value::Number(n) => {
+            builder.append_value(variant_from_number(n)?);
+        }
+        Value::String(s) => builder.append_value(s.as_str()),
+        Value::Array(arr) => {
+            let mut list_builder = builder.try_new_list()?;
+            for val in arr {
+                append_json(val, &mut list_builder)?;
+            }
+            list_builder.finish();
+        }
+        Value::Object(obj) => {
+            let mut obj_builder = builder.try_new_object()?;
+            for (key, value) in obj.iter() {
+                let mut field_builder = ObjectFieldBuilder::new(key, &mut obj_builder);
+                append_json(value, &mut field_builder)?;
+            }
+            obj_builder.finish();
+        }
+    };
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::VariantToJson;
+    use arrow_schema::ArrowError;
+    use parquet_variant::{
+        ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16,
+    };
+
+    struct JsonToVariantTest<'a> {
+        json: &'a str,
+        expected: Variant<'a, 'a>,
+    }
+
+    impl JsonToVariantTest<'_> {
+        fn run(self) -> Result<(), ArrowError> {
+            let mut variant_builder = VariantBuilder::new();
+            variant_builder.append_json(self.json)?;
+            let (metadata, value) = variant_builder.finish();
+            let variant = Variant::try_new(&metadata, &value)?;
+            assert_eq!(variant, self.expected);
+            Ok(())
+        }
+    }
+
+    #[test]
+    fn test_json_to_variant_null() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "null",
+            expected: Variant::Null,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "true",
+            expected: Variant::BooleanTrue,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "false",
+            expected: Variant::BooleanFalse,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "  127 ",
+            expected: Variant::Int8(127),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "  -128 ",
+            expected: Variant::Int8(-128),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_int16() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "  27134  ",
+            expected: Variant::Int16(27134),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_int32() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: " -32767431  ",
+            expected: Variant::Int32(-32767431),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_int64() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "92842754201389",
+            expected: Variant::Int64(92842754201389),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "1.23",
+            expected: Variant::from(VariantDecimal4::try_new(123, 2)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "99999999.9",
+            expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "-99999999.9",
+            expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "0.999999999",
+            expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "0.000000001",
+            expected: Variant::from(VariantDecimal4::try_new(1, 9)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "-0.999999999",
+            expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "999999999.0",
+            expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "-999999999.0",
+            expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "0.999999999999999999",
+            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "9999999999999999.99",
+            expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "-9999999999999999.99",
+            expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "9999999999999999999", // integer larger than i64
+            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "0.9999999999999999999",
+            expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "79228162514264337593543950335", // 2 ^ 96 - 1
+            expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?),
+        }
+        .run()
+    }
+
+    #[ignore]
+    #[test]
+    fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "7.9228162514264337593543950335", // using scale higher than this falls into double
+            // since the max scale is 28.
+            expected: Variant::from(VariantDecimal16::try_new(
+                79228162514264337593543950335,
+                28,
+            )?),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_double_precision() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "0.79228162514264337593543950335",
+            expected: Variant::Double(0.792_281_625_142_643_4_f64),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "15e-1",
+            expected: Variant::Double(15e-1f64),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "-15e-1",
+            expected: Variant::Double(-15e-1f64),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_short_string() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: "\"harsh\"",
+            expected: Variant::ShortString(ShortString::try_new("harsh")?),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: &format!("\"{}\"", "a".repeat(63)),
+            expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_long_string() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: &format!("\"{}\"", "a".repeat(64)),
+            expected: Variant::String(&"a".repeat(64)),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> {
+        JsonToVariantTest {
+            json: &format!("\"{}\"", "b".repeat(100000)),
+            expected: Variant::String(&"b".repeat(100000)),
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_array_simple() -> Result<(), ArrowError> {
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        list_builder.append_value(Variant::Int8(127));
+        list_builder.append_value(Variant::Int16(128));
+        list_builder.append_value(Variant::Int32(-32767431));
+        list_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+
+        JsonToVariantTest {
+            json: "[127, 128, -32767431]",
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> {
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        let mut object_builder_inner = list_builder.new_object();
+        object_builder_inner.insert("age", Variant::Int8(32));
+        object_builder_inner.finish();
+        list_builder.append_value(Variant::Int16(128));
+        list_builder.append_value(Variant::BooleanFalse);
+        list_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+
+        JsonToVariantTest {
+            json: "[{\"age\": 32}, 128, false]",
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> {
+        // u16 offset - 128 i8's + 1 "true" = 257 bytes
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        for _ in 0..128 {
+            list_builder.append_value(Variant::Int8(1));
+        }
+        list_builder.append_value(Variant::BooleanTrue);
+        list_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+
+        JsonToVariantTest {
+            json: &format!("[{} true]", "1, ".repeat(128)),
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> {
+        // verify u24, and large_size
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        for _ in 0..256 {
+            let mut list_builder_inner = list_builder.new_list();
+            for _ in 0..255 {
+                list_builder_inner.append_value(Variant::Null);
+            }
+            list_builder_inner.finish();
+        }
+        list_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let intermediate = format!("[{}]", vec!["null"; 255].join(", "));
+        let json = format!("[{}]", vec![intermediate; 256].join(", "));
+        JsonToVariantTest {
+            json: json.as_str(),
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_object_simple() -> Result<(), ArrowError> {
+        let mut variant_builder = VariantBuilder::new();
+        let mut object_builder = variant_builder.new_object();
+        object_builder.insert("a", Variant::Int8(3));
+        object_builder.insert("b", Variant::Int8(2));
+        object_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        JsonToVariantTest {
+            json: "{\"b\": 2, \"a\": 1, \"a\": 3}",
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_object_complex() -> Result<(), ArrowError> {
+        let mut variant_builder = VariantBuilder::new();
+        let mut object_builder = variant_builder.new_object();
+        let mut inner_list_builder = object_builder.new_list("booleans");
+        inner_list_builder.append_value(Variant::BooleanTrue);
+        inner_list_builder.append_value(Variant::BooleanFalse);
+        inner_list_builder.finish();
+        object_builder.insert("null", Variant::Null);
+        let mut inner_list_builder = object_builder.new_list("numbers");
+        inner_list_builder.append_value(Variant::Int8(4));
+        inner_list_builder.append_value(Variant::Double(-3e0));
+        inner_list_builder.append_value(Variant::Double(1001e-3));
+        inner_list_builder.finish();
+        object_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        JsonToVariantTest {
+            json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}",
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> {
+        // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each
+        // element a list of numbers from 0-127
+        let keys: Vec<String> = (0..=255).map(|n| format!("{n:03}")).collect();
+        let innermost_list: String = format!(
+            "[{}]",
+            (0..=127)
+                .map(|n| format!("{n}"))
+                .collect::<Vec<_>>()
+                .join(",")
+        );
+        let inner_keys: Vec<String> = (240..=495).map(|n| format!("{n}")).collect();
+        let inner_object = format!(
+            "{{{}:{}}}",
+            inner_keys
+                .iter()
+                .map(|k| format!("\"{k}\""))
+                .collect::<Vec<String>>()
+                .join(format!(":{innermost_list},").as_str()),
+            innermost_list
+        );
+        let json = format!(
+            "{{{}:{}}}",
+            keys.iter()
+                .map(|k| format!("\"{k}\""))
+                .collect::<Vec<String>>()
+                .join(format!(":{inner_object},").as_str()),
+            inner_object
+        );
+        // Manually verify raw JSON value size
+        let mut variant_builder = VariantBuilder::new();
+        variant_builder.append_json(&json)?;
+        let (metadata, value) = variant_builder.finish();
+        let v = Variant::try_new(&metadata, &value)?;
+        let output_string = v.to_json_string()?;
+        assert_eq!(output_string, json);
+        // Verify metadata size = 1 + 2 + 2 * 497 + 3 * 496
+        assert_eq!(metadata.len(), 2485);
+        // Verify value size.
+        // Size of innermost_list: 1 + 1 + 2*(128 + 1) + 2*128 = 516
+        // Size of inner object: 1 + 4 + 2*256 + 3*(256 + 1) + 256 * 516 = 133384
+        // Size of json: 1 + 4 + 2*256 + 4*(256 + 1) + 256 * 133384 = 34147849
+        assert_eq!(value.len(), 34147849);
+
+        let mut variant_builder = VariantBuilder::new();
+        let mut object_builder = variant_builder.new_object();
+        keys.iter().for_each(|key| {
+            let mut inner_object_builder = object_builder.new_object(key);
+            inner_keys.iter().for_each(|inner_key| {
+                let mut list_builder = inner_object_builder.new_list(inner_key);
+                for i in 0..=127 {
+                    list_builder.append_value(Variant::Int8(i));
+                }
+                list_builder.finish();
+            });
+            inner_object_builder.finish();
+        });
+        object_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+
+        JsonToVariantTest {
+            json: &json,
+            expected: variant,
+        }
+        .run()
+    }
+
+    #[test]
+    fn test_json_to_variant_unicode() -> Result<(), ArrowError> {
+        let json = "{\"爱\":\"अ\",\"a\":1}";
+        let mut variant_builder = VariantBuilder::new();
+        variant_builder.append_json(json)?;
+        let (metadata, value) = variant_builder.finish();
+        let v = Variant::try_new(&metadata, &value)?;
+        let output_string = v.to_json_string()?;
+        assert_eq!(output_string, "{\"a\":1,\"爱\":\"अ\"}");
+        let mut variant_builder = VariantBuilder::new();
+        let mut object_builder = variant_builder.new_object();
+        object_builder.insert("a", Variant::Int8(1));
+        object_builder.insert("爱", Variant::ShortString(ShortString::try_new("अ")?));
+        object_builder.finish();
+        let (metadata, value) = variant_builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+
+        assert_eq!(
+            value,
+            &[
+                2u8, 2u8, 0u8, 1u8, 0u8, 2u8, 6u8, 12u8, 1u8, 13u8, 0xe0u8, 0xa4u8, 0x85u8
+            ]
+        );
+        assert_eq!(
+            metadata,
+            &[17u8, 2u8, 0u8, 1u8, 4u8, 97u8, 0xe7u8, 0x88u8, 0xb1u8]
+        );
+        JsonToVariantTest {
+            json,
+            expected: variant,
+        }
+        .run()
+    }
+}
diff --git a/parquet-variant-json/src/lib.rs b/parquet-variant-json/src/lib.rs
new file mode 100644
index 000000000000..6b42b15bd480
--- /dev/null
+++ b/parquet-variant-json/src/lib.rs
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Conversion between [JSON] and the [Variant Binary Encoding] from [Apache Parquet].
+//!
+//! [JSON]: https://www.json.org/json-en.html
+//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
+//! [Apache Parquet]: https://parquet.apache.org/
+//!
+//! * See [`JsonToVariant`] trait for converting a JSON string to a Variant.
+//! * See [`VariantToJson`] trait for converting a Variant to a JSON string.
+//!
+//! ## 🚧 Work In Progress
+//!
+//! This crate is under active development and is not yet ready for production use.
+//! If you are interested in helping, you can find more information on the GitHub [Variant issue]
+//!
+//! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736
+
+mod from_json;
+mod to_json;
+
+pub use from_json::{JsonToVariant, append_json};
+pub use to_json::VariantToJson;
diff --git a/parquet-variant-json/src/to_json.rs b/parquet-variant-json/src/to_json.rs
new file mode 100644
index 000000000000..707b1fe0a38f
--- /dev/null
+++ b/parquet-variant-json/src/to_json.rs
@@ -0,0 +1,1341 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Module for converting Variant data to JSON format
+use arrow_schema::ArrowError;
+use base64::{Engine as _, engine::general_purpose};
+use chrono::Timelike;
+use parquet_variant::{Variant, VariantList, VariantObject};
+use serde_json::Value;
+use std::io::Write;
+
+/// Extension trait for converting Variants to JSON
+pub trait VariantToJson {
+    ///
+    /// This function writes JSON directly to any type that implements [`Write`],
+    /// making it efficient for streaming or when you want to control the output destination.
+    ///
+    /// See [`VariantToJson::to_json_string`] for a convenience function that returns a
+    /// JSON string.
+    ///
+    /// # Arguments
+    ///
+    /// * `writer` - Writer to output JSON to
+    /// * `variant` - The Variant value to convert
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(())` if successful
+    /// * `Err` with error details if conversion fails
+    ///
+    /// # Examples
+    ///
+    ///
+    /// ```rust
+    /// # use parquet_variant::{Variant};
+    /// # use parquet_variant_json::VariantToJson;
+    /// # use arrow_schema::ArrowError;
+    /// let variant = Variant::from("Hello, World!");
+    /// let mut buffer = Vec::new();
+    /// variant.to_json(&mut buffer)?;
+    /// assert_eq!(String::from_utf8(buffer).unwrap(), "\"Hello, World!\"");
+    /// # Ok::<(), ArrowError>(())
+    /// ```
+    ///
+    /// # Example: Create a [`Variant::Object`] and convert to JSON
+    /// ```rust
+    /// # use parquet_variant::{Variant, VariantBuilder};
+    /// # use parquet_variant_json::VariantToJson;
+    /// # use arrow_schema::ArrowError;
+    /// let mut builder = VariantBuilder::new();
+    /// // Create an object builder that will write fields to the object
+    /// let mut object_builder = builder.new_object();
+    /// object_builder.insert("first_name", "Jiaying");
+    /// object_builder.insert("last_name", "Li");
+    /// object_builder.finish();
+    /// // Finish the builder to get the metadata and value
+    /// let (metadata, value) = builder.finish();
+    /// // Create the Variant and convert to JSON
+    /// let variant = Variant::try_new(&metadata, &value)?;
+    /// let mut writer = Vec::new();
+    /// variant.to_json(&mut writer)?;
+    /// assert_eq!(br#"{"first_name":"Jiaying","last_name":"Li"}"#, writer.as_slice());
+    /// # Ok::<(), ArrowError>(())
+    /// ```
+    fn to_json(&self, buffer: &mut impl Write) -> Result<(), ArrowError>;
+
+    /// Convert [`Variant`] to JSON [`String`]
+    ///
+    /// This is a convenience function that converts a Variant to a JSON string.
+    /// This is the same as calling [`VariantToJson::to_json`] with a [`Vec`].
+    /// It's the simplest way to get a JSON representation when you just need a String result.
+    ///
+    /// # Arguments
+    ///
+    /// * `variant` - The Variant value to convert
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(String)` containing the JSON representation
+    /// * `Err` with error details if conversion fails
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use parquet_variant::{Variant};
+    /// # use parquet_variant_json::VariantToJson;
+    /// # use arrow_schema::ArrowError;
+    /// let variant = Variant::Int32(42);
+    /// let json = variant.to_json_string()?;
+    /// assert_eq!(json, "42");
+    /// # Ok::<(), ArrowError>(())
+    /// ```
+    ///
+    /// # Example: Create a [`Variant::Object`] and convert to JSON
+    ///
+    /// This example shows how to create an object with two fields and convert it to JSON:
+    /// ```json
+    /// {
+    ///   "first_name": "Jiaying",
+    ///   "last_name": "Li"
+    /// }
+    /// ```
+    ///
+    /// ```rust
+    /// # use parquet_variant::{Variant, VariantBuilder};
+    /// # use parquet_variant_json::VariantToJson;
+    /// # use arrow_schema::ArrowError;
+    /// let mut builder = VariantBuilder::new();
+    /// // Create an object builder that will write fields to the object
+    /// let mut object_builder = builder.new_object();
+    /// object_builder.insert("first_name", "Jiaying");
+    /// object_builder.insert("last_name", "Li");
+    /// object_builder.finish();
+    /// // Finish the builder to get the metadata and value
+    /// let (metadata, value) = builder.finish();
+    /// // Create the Variant and convert to JSON
+    /// let variant = Variant::try_new(&metadata, &value)?;
+    /// let json = variant.to_json_string()?;
+    /// assert_eq!(r#"{"first_name":"Jiaying","last_name":"Li"}"#, json);
+    /// # Ok::<(), ArrowError>(())
+    /// ```
+    fn to_json_string(&self) -> Result<String, ArrowError>;
+
+    /// Convert [`Variant`] to [`serde_json::Value`]
+    ///
+    /// This function converts a Variant to a [`serde_json::Value`], which is useful
+    /// when you need to work with the JSON data programmatically or integrate with
+    /// other serde-based JSON processing.
+    ///
+    /// # Arguments
+    ///
+    /// * `variant` - The Variant value to convert
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(Value)` containing the JSON value
+    /// * `Err` with error details if conversion fails
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use parquet_variant::{Variant};
+    /// # use parquet_variant_json::VariantToJson;
+    /// # use serde_json::Value;
+    /// # use arrow_schema::ArrowError;
+    /// let variant = Variant::from("hello");
+    /// let json_value = variant.to_json_value()?;
+    /// assert_eq!(json_value, Value::String("hello".to_string()));
+    /// # Ok::<(), ArrowError>(())
+    /// ```
+    fn to_json_value(&self) -> Result<Value, ArrowError>;
+}
+
+impl<'m, 'v> VariantToJson for Variant<'m, 'v> {
+    fn to_json(&self, buffer: &mut impl Write) -> Result<(), ArrowError> {
+        match self {
+            Variant::Null => write!(buffer, "null")?,
+            Variant::BooleanTrue => write!(buffer, "true")?,
+            Variant::BooleanFalse => write!(buffer, "false")?,
+            Variant::Int8(i) => write!(buffer, "{i}")?,
+            Variant::Int16(i) => write!(buffer, "{i}")?,
+            Variant::Int32(i) => write!(buffer, "{i}")?,
+            Variant::Int64(i) => write!(buffer, "{i}")?,
+            Variant::Float(f) => write!(buffer, "{f}")?,
+            Variant::Double(f) => write!(buffer, "{f}")?,
+            Variant::Decimal4(decimal) => write!(buffer, "{decimal}")?,
+            Variant::Decimal8(decimal) => write!(buffer, "{decimal}")?,
+            Variant::Decimal16(decimal) => write!(buffer, "{decimal}")?,
+            Variant::Date(date) => write!(buffer, "\"{}\"", format_date_string(date))?,
+            Variant::TimestampMicros(ts) | Variant::TimestampNanos(ts) => {
+                write!(buffer, "\"{}\"", ts.to_rfc3339())?
+            }
+            Variant::TimestampNtzMicros(ts) => {
+                write!(buffer, "\"{}\"", format_timestamp_ntz_string(ts, 6))?
+            }
+            Variant::TimestampNtzNanos(ts) => {
+                write!(buffer, "\"{}\"", format_timestamp_ntz_string(ts, 9))?
+            }
+            Variant::Time(time) => write!(buffer, "\"{}\"", format_time_ntz_str(time))?,
+            Variant::Binary(bytes) => {
+                // Encode binary as base64 string
+                let base64_str = format_binary_base64(bytes);
+                let json_str = serde_json::to_string(&base64_str).map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
+                })?;
+                write!(buffer, "{json_str}")?
+            }
+            Variant::String(s) => {
+                // Use serde_json to properly escape the string
+                let json_str = serde_json::to_string(s).map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
+                })?;
+                write!(buffer, "{json_str}")?
+            }
+            Variant::ShortString(s) => {
+                // Use serde_json to properly escape the string
+                let json_str = serde_json::to_string(s.as_str()).map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!("JSON encoding error: {e}"))
+                })?;
+                write!(buffer, "{json_str}")?
+            }
+            Variant::Uuid(uuid) => {
+                write!(buffer, "\"{uuid}\"")?;
+            }
+            Variant::Object(obj) => {
+                convert_object_to_json(buffer, obj)?;
+            }
+            Variant::List(arr) => {
+                convert_array_to_json(buffer, arr)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn to_json_string(&self) -> Result<String, ArrowError> {
+        let mut buffer = Vec::new();
+        self.to_json(&mut buffer)?;
+        String::from_utf8(buffer)
+            .map_err(|e| ArrowError::InvalidArgumentError(format!("UTF-8 conversion error: {e}")))
+    }
+
+    fn to_json_value(&self) -> Result<Value, ArrowError> {
+        match self {
+            Variant::Null => Ok(Value::Null),
+            Variant::BooleanTrue => Ok(Value::Bool(true)),
+            Variant::BooleanFalse => Ok(Value::Bool(false)),
+            Variant::Int8(i) => Ok(Value::Number((*i).into())),
+            Variant::Int16(i) => Ok(Value::Number((*i).into())),
+            Variant::Int32(i) => Ok(Value::Number((*i).into())),
+            Variant::Int64(i) => Ok(Value::Number((*i).into())),
+            Variant::Float(f) => serde_json::Number::from_f64((*f).into())
+                .map(Value::Number)
+                .ok_or_else(|| ArrowError::InvalidArgumentError("Invalid float value".to_string())),
+            Variant::Double(f) => serde_json::Number::from_f64(*f)
+                .map(Value::Number)
+                .ok_or_else(|| {
+                    ArrowError::InvalidArgumentError("Invalid double value".to_string())
+                }),
+            Variant::Decimal4(decimal4) => {
+                let scale = decimal4.scale();
+                let integer = decimal4.integer();
+
+                let integer = if scale == 0 {
+                    integer
+                } else {
+                    let divisor = 10_i32.pow(scale as u32);
+                    if integer % divisor != 0 {
+                        // fall back to floating point
+                        return Ok(Value::from(integer as f64 / divisor as f64));
+                    }
+                    integer / divisor
+                };
+                Ok(Value::from(integer))
+            }
+            Variant::Decimal8(decimal8) => {
+                let scale = decimal8.scale();
+                let integer = decimal8.integer();
+
+                let integer = if scale == 0 {
+                    integer
+                } else {
+                    let divisor = 10_i64.pow(scale as u32);
+                    if integer % divisor != 0 {
+                        // fall back to floating point
+                        return Ok(Value::from(integer as f64 / divisor as f64));
+                    }
+                    integer / divisor
+                };
+                Ok(Value::from(integer))
+            }
+            Variant::Decimal16(decimal16) => {
+                let scale = decimal16.scale();
+                let integer = decimal16.integer();
+
+                let integer = if scale == 0 {
+                    integer
+                } else {
+                    let divisor = 10_i128.pow(scale as u32);
+                    if integer % divisor != 0 {
+                        // fall back to floating point
+                        return Ok(Value::from(integer as f64 / divisor as f64));
+                    }
+                    integer / divisor
+                };
+                // i128 has higher precision than any 64-bit type. Try a lossless narrowing cast to
+                // i64 or u64 first, falling back to a lossy narrowing cast to f64 if necessary.
+                let value = i64::try_from(integer)
+                    .map(Value::from)
+                    .or_else(|_| u64::try_from(integer).map(Value::from))
+                    .unwrap_or_else(|_| Value::from(integer as f64));
+                Ok(value)
+            }
+            Variant::Date(date) => Ok(Value::String(format_date_string(date))),
+            Variant::TimestampMicros(ts) | Variant::TimestampNanos(ts) => {
+                Ok(Value::String(ts.to_rfc3339()))
+            }
+            Variant::TimestampNtzMicros(ts) => {
+                Ok(Value::String(format_timestamp_ntz_string(ts, 6)))
+            }
+            Variant::TimestampNtzNanos(ts) => Ok(Value::String(format_timestamp_ntz_string(ts, 9))),
+            Variant::Time(time) => Ok(Value::String(format_time_ntz_str(time))),
+            Variant::Binary(bytes) => Ok(Value::String(format_binary_base64(bytes))),
+            Variant::String(s) => Ok(Value::String(s.to_string())),
+            Variant::ShortString(s) => Ok(Value::String(s.to_string())),
+            Variant::Uuid(uuid) => Ok(Value::String(uuid.to_string())),
+            Variant::Object(obj) => {
+                let map = obj
+                    .iter()
+                    .map(|(k, v)| v.to_json_value().map(|json_val| (k.to_string(), json_val)))
+                    .collect::<Result<_, _>>()?;
+                Ok(Value::Object(map))
+            }
+            Variant::List(arr) => {
+                let vec = arr
+                    .iter()
+                    .map(|element| element.to_json_value())
+                    .collect::<Result<_, _>>()?;
+                Ok(Value::Array(vec))
+            }
+        }
+    }
+}
+
+// Format string constants to avoid duplication and reduce errors
+const DATE_FORMAT: &str = "%Y-%m-%d";
+
+// Helper functions for consistent formatting
+fn format_date_string(date: &chrono::NaiveDate) -> String {
+    date.format(DATE_FORMAT).to_string()
+}
+
+fn format_timestamp_ntz_string(ts: &chrono::NaiveDateTime, precision: usize) -> String {
+    let format_str = format!(
+        "{}",
+        ts.format(&format!("%Y-%m-%dT%H:%M:%S%.{}f", precision))
+    );
+    ts.format(format_str.as_str()).to_string()
+}
+
+fn format_binary_base64(bytes: &[u8]) -> String {
+    general_purpose::STANDARD.encode(bytes)
+}
+
+fn format_time_ntz_str(time: &chrono::NaiveTime) -> String {
+    let base = time.format("%H:%M:%S").to_string();
+    let micros = time.nanosecond() / 1000;
+    match micros {
+        0 => format!("{}.{}", base, 0),
+        _ => {
+            let micros_str = format!("{:06}", micros);
+            let micros_str_trimmed = micros_str.trim_matches('0');
+            format!("{}.{}", base, micros_str_trimmed)
+        }
+    }
+}
+
+/// Convert object fields to JSON
+fn convert_object_to_json(buffer: &mut impl Write, obj: &VariantObject) -> Result<(), ArrowError> {
+    write!(buffer, "{{")?;
+
+    // Get all fields from the object
+    let mut first = true;
+
+    for (key, value) in obj.iter() {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+
+        // Write the key (properly escaped)
+        let json_key = serde_json::to_string(key).map_err(|e| {
+            ArrowError::InvalidArgumentError(format!("JSON key encoding error: {e}"))
+        })?;
+        write!(buffer, "{json_key}:")?;
+
+        // Recursively convert the value
+        value.to_json(buffer)?;
+    }
+
+    write!(buffer, "}}")?;
+    Ok(())
+}
+
+/// Convert array elements to JSON
+fn convert_array_to_json(buffer: &mut impl Write, arr: &VariantList) -> Result<(), ArrowError> {
+    write!(buffer, "[")?;
+
+    let mut first = true;
+    for element in arr.iter() {
+        if !first {
+            write!(buffer, ",")?;
+        }
+        first = false;
+
+        element.to_json(buffer)?;
+    }
+
+    write!(buffer, "]")?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::{DateTime, NaiveDate, NaiveTime, Utc};
+    use parquet_variant::{VariantDecimal4, VariantDecimal8, VariantDecimal16};
+
+    #[test]
+    fn test_decimal_edge_cases() -> Result<(), ArrowError> {
+        // Test negative decimal
+        let negative_variant = Variant::from(VariantDecimal4::try_new(-12345, 3)?);
+        let negative_json = negative_variant.to_json_string()?;
+        assert_eq!(negative_json, "-12.345");
+
+        // Test large scale decimal
+        let large_scale_variant = Variant::from(VariantDecimal8::try_new(123456789, 6)?);
+        let large_scale_json = large_scale_variant.to_json_string()?;
+        assert_eq!(large_scale_json, "123.456789");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_decimal16_to_json() -> Result<(), ArrowError> {
+        let variant = Variant::from(VariantDecimal16::try_new(123456789012345, 4)?);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "12345678901.2345");
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::Number(_)));
+
+        // Test very large number
+        let large_variant = Variant::from(VariantDecimal16::try_new(999999999999999999, 2)?);
+        let large_json = large_variant.to_json_string()?;
+        // Due to f64 precision limits, very large numbers may lose precision
+        assert!(
+            large_json.starts_with("9999999999999999")
+                || large_json.starts_with("10000000000000000")
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_date_to_json() -> Result<(), ArrowError> {
+        let date = NaiveDate::from_ymd_opt(2023, 12, 25).unwrap();
+        let variant = Variant::Date(date);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"2023-12-25\"");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(json_value, Value::String("2023-12-25".to_string()));
+
+        // Test leap year date
+        let leap_date = NaiveDate::from_ymd_opt(2024, 2, 29).unwrap();
+        let leap_variant = Variant::Date(leap_date);
+        let leap_json = leap_variant.to_json_string()?;
+        assert_eq!(leap_json, "\"2024-02-29\"");
+        Ok(())
+    }
+
+    #[test]
+    fn test_timestamp_micros_to_json() -> Result<(), ArrowError> {
+        let timestamp = DateTime::parse_from_rfc3339("2023-12-25T10:30:45Z")
+            .unwrap()
+            .with_timezone(&Utc);
+        let variant = Variant::TimestampMicros(timestamp);
+        let json = variant.to_json_string()?;
+        assert!(json.contains("2023-12-25T10:30:45"));
+        assert!(json.starts_with('"') && json.ends_with('"'));
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+        Ok(())
+    }
+
+    #[test]
+    fn test_timestamp_ntz_micros_to_json() -> Result<(), ArrowError> {
+        let naive_timestamp = DateTime::from_timestamp(1703505045, 123456)
+            .unwrap()
+            .naive_utc();
+        let variant = Variant::TimestampNtzMicros(naive_timestamp);
+        let json = variant.to_json_string()?;
+        assert!(json.contains("2023-12-25"));
+        assert!(json.starts_with('"') && json.ends_with('"'));
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+        Ok(())
+    }
+
+    #[test]
+    fn test_time_to_json() -> Result<(), ArrowError> {
+        let naive_time = NaiveTime::from_num_seconds_from_midnight_opt(12345, 123460708).unwrap();
+        let variant = Variant::Time(naive_time);
+        let json = variant.to_json_string()?;
+        assert_eq!("\"03:25:45.12346\"", json);
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+        Ok(())
+    }
+
+    #[test]
+    fn test_timestamp_nanos_to_json() -> Result<(), ArrowError> {
+        let timestamp = DateTime::parse_from_rfc3339("2023-12-25T10:30:45.123456789Z")
+            .unwrap()
+            .with_timezone(&Utc);
+        let variant = Variant::TimestampNanos(timestamp);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"2023-12-25T10:30:45.123456789+00:00\"");
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+        Ok(())
+    }
+
+    #[test]
+    fn test_timestamp_ntz_nanos_to_json() -> Result<(), ArrowError> {
+        let naive_timestamp = DateTime::from_timestamp(1703505045, 123456789)
+            .unwrap()
+            .naive_utc();
+        let variant = Variant::TimestampNtzNanos(naive_timestamp);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"2023-12-25T11:50:45.123456789\"");
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+        Ok(())
+    }
+
+    #[test]
+    fn test_binary_to_json() -> Result<(), ArrowError> {
+        let binary_data = b"Hello, World!";
+        let variant = Variant::Binary(binary_data);
+        let json = variant.to_json_string()?;
+
+        // Should be base64 encoded and quoted
+        assert!(json.starts_with('"') && json.ends_with('"'));
+        assert!(json.len() > 2); // Should have content
+
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::String(_)));
+
+        // Test empty binary
+        let empty_variant = Variant::Binary(b"");
+        let empty_json = empty_variant.to_json_string()?;
+        assert_eq!(empty_json, "\"\"");
+
+        // Test binary with special bytes
+        let special_variant = Variant::Binary(&[0, 255, 128, 64]);
+        let special_json = special_variant.to_json_string()?;
+        assert!(special_json.starts_with('"') && special_json.ends_with('"'));
+        Ok(())
+    }
+
+    #[test]
+    fn test_string_to_json() -> Result<(), ArrowError> {
+        let variant = Variant::from("hello world");
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"hello world\"");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(json_value, Value::String("hello world".to_string()));
+        Ok(())
+    }
+
+    #[test]
+    fn test_short_string_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::ShortString;
+        let short_string = ShortString::try_new("short")?;
+        let variant = Variant::ShortString(short_string);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"short\"");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(json_value, Value::String("short".to_string()));
+        Ok(())
+    }
+
+    #[test]
+    fn test_uuid_to_json() -> Result<(), ArrowError> {
+        let uuid = uuid::Uuid::parse_str("123e4567-e89b-12d3-a456-426614174000").unwrap();
+        let variant = Variant::Uuid(uuid);
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"123e4567-e89b-12d3-a456-426614174000\"");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(
+            json_value,
+            Value::String("123e4567-e89b-12d3-a456-426614174000".to_string())
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_string_escaping() -> Result<(), ArrowError> {
+        let variant = Variant::from("hello\nworld\t\"quoted\"");
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "\"hello\\nworld\\t\\\"quoted\\\"\"");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(
+            json_value,
+            Value::String("hello\nworld\t\"quoted\"".to_string())
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_json_buffer_writing() -> Result<(), ArrowError> {
+        let variant = Variant::Int8(123);
+        let mut buffer = Vec::new();
+        variant.to_json(&mut buffer)?;
+
+        let result = String::from_utf8(buffer)
+            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
+        assert_eq!(result, "123");
+        Ok(())
+    }
+
+    /// Reusable test structure for JSON conversion testing
+    struct JsonTest {
+        variant: Variant<'static, 'static>,
+        expected_json: &'static str,
+        expected_value: Value,
+    }
+
+    impl JsonTest {
+        fn run(self) {
+            let json_string = self
+                .variant
+                .to_json_string()
+                .expect("variant_to_json_string should succeed");
+            assert_eq!(
+                json_string, self.expected_json,
+                "JSON string mismatch for variant: {:?}",
+                self.variant
+            );
+
+            let json_value = self
+                .variant
+                .to_json_value()
+                .expect("variant_to_json_value should succeed");
+
+            // For floating point numbers, we need special comparison due to JSON number representation
+            match (&json_value, &self.expected_value) {
+                (Value::Number(actual), Value::Number(expected)) => {
+                    let actual_f64 = actual.as_f64().unwrap_or(0.0);
+                    let expected_f64 = expected.as_f64().unwrap_or(0.0);
+                    assert!(
+                        (actual_f64 - expected_f64).abs() < f64::EPSILON,
+                        "JSON value mismatch for variant: {:?}, got {}, expected {}",
+                        self.variant,
+                        actual_f64,
+                        expected_f64
+                    );
+                }
+                _ => {
+                    assert_eq!(
+                        json_value, self.expected_value,
+                        "JSON value mismatch for variant: {:?}",
+                        self.variant
+                    );
+                }
+            }
+
+            // Verify roundtrip: JSON string should parse to same value
+            let parsed: Value =
+                serde_json::from_str(&json_string).expect("Generated JSON should be valid");
+            // Same floating point handling for roundtrip
+            match (&parsed, &self.expected_value) {
+                (Value::Number(actual), Value::Number(expected)) => {
+                    let actual_f64 = actual.as_f64().unwrap_or(0.0);
+                    let expected_f64 = expected.as_f64().unwrap_or(0.0);
+                    assert!(
+                        (actual_f64 - expected_f64).abs() < f64::EPSILON,
+                        "Parsed JSON mismatch for variant: {:?}, got {}, expected {}",
+                        self.variant,
+                        actual_f64,
+                        expected_f64
+                    );
+                }
+                _ => {
+                    assert_eq!(
+                        parsed, self.expected_value,
+                        "Parsed JSON mismatch for variant: {:?}",
+                        self.variant
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_json_conversion() {
+        use parquet_variant::ShortString;
+
+        // Null
+        JsonTest {
+            variant: Variant::Null,
+            expected_json: "null",
+            expected_value: Value::Null,
+        }
+        .run();
+
+        // Booleans
+        JsonTest {
+            variant: Variant::BooleanTrue,
+            expected_json: "true",
+            expected_value: Value::Bool(true),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::BooleanFalse,
+            expected_json: "false",
+            expected_value: Value::Bool(false),
+        }
+        .run();
+
+        // Integers - positive and negative edge cases
+        JsonTest {
+            variant: Variant::Int8(42),
+            expected_json: "42",
+            expected_value: Value::Number(42.into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int8(-128),
+            expected_json: "-128",
+            expected_value: Value::Number((-128).into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int16(32767),
+            expected_json: "32767",
+            expected_value: Value::Number(32767.into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int16(-32768),
+            expected_json: "-32768",
+            expected_value: Value::Number((-32768).into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int32(2147483647),
+            expected_json: "2147483647",
+            expected_value: Value::Number(2147483647.into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int32(-2147483648),
+            expected_json: "-2147483648",
+            expected_value: Value::Number((-2147483648).into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int64(9223372036854775807),
+            expected_json: "9223372036854775807",
+            expected_value: Value::Number(9223372036854775807i64.into()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Int64(-9223372036854775808),
+            expected_json: "-9223372036854775808",
+            expected_value: Value::Number((-9223372036854775808i64).into()),
+        }
+        .run();
+
+        // Floats
+        JsonTest {
+            variant: Variant::Float(3.5),
+            expected_json: "3.5",
+            expected_value: serde_json::Number::from_f64(3.5)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Float(0.0),
+            expected_json: "0",
+            expected_value: Value::Number(0.into()), // Use integer 0 to match JSON parsing
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Float(-1.5),
+            expected_json: "-1.5",
+            expected_value: serde_json::Number::from_f64(-1.5)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Double(std::f64::consts::E),
+            expected_json: "2.718281828459045",
+            expected_value: serde_json::Number::from_f64(std::f64::consts::E)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        // Decimals
+        JsonTest {
+            variant: Variant::from(VariantDecimal4::try_new(12345, 2).unwrap()),
+            expected_json: "123.45",
+            expected_value: serde_json::Number::from_f64(123.45)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::from(VariantDecimal4::try_new(42, 0).unwrap()),
+            expected_json: "42",
+            expected_value: serde_json::Number::from_f64(42.0)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::from(VariantDecimal8::try_new(1234567890, 3).unwrap()),
+            expected_json: "1234567.89",
+            expected_value: serde_json::Number::from_f64(1234567.89)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::from(VariantDecimal16::try_new(123456789012345, 4).unwrap()),
+            expected_json: "12345678901.2345",
+            expected_value: serde_json::Number::from_f64(12345678901.2345)
+                .map(Value::Number)
+                .unwrap(),
+        }
+        .run();
+
+        // Strings
+        JsonTest {
+            variant: Variant::from("hello world"),
+            expected_json: "\"hello world\"",
+            expected_value: Value::String("hello world".to_string()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::from(""),
+            expected_json: "\"\"",
+            expected_value: Value::String("".to_string()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::ShortString(ShortString::try_new("test").unwrap()),
+            expected_json: "\"test\"",
+            expected_value: Value::String("test".to_string()),
+        }
+        .run();
+
+        // Date and timestamps
+        JsonTest {
+            variant: Variant::Date(NaiveDate::from_ymd_opt(2023, 12, 25).unwrap()),
+            expected_json: "\"2023-12-25\"",
+            expected_value: Value::String("2023-12-25".to_string()),
+        }
+        .run();
+
+        // Binary data (base64 encoded)
+        JsonTest {
+            variant: Variant::Binary(b"test"),
+            expected_json: "\"dGVzdA==\"", // base64 encoded "test"
+            expected_value: Value::String("dGVzdA==".to_string()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Binary(b""),
+            expected_json: "\"\"", // empty base64
+            expected_value: Value::String("".to_string()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::Binary(b"binary data"),
+            expected_json: "\"YmluYXJ5IGRhdGE=\"", // base64 encoded "binary data"
+            expected_value: Value::String("YmluYXJ5IGRhdGE=".to_string()),
+        }
+        .run();
+    }
+
+    #[test]
+    fn test_string_escaping_comprehensive() {
+        // Test comprehensive string escaping scenarios
+        JsonTest {
+            variant: Variant::from("line1\nline2\ttab\"quote\"\\backslash"),
+            expected_json: "\"line1\\nline2\\ttab\\\"quote\\\"\\\\backslash\"",
+            expected_value: Value::String("line1\nline2\ttab\"quote\"\\backslash".to_string()),
+        }
+        .run();
+
+        JsonTest {
+            variant: Variant::from("Hello 世界 🌍"),
+            expected_json: "\"Hello 世界 🌍\"",
+            expected_value: Value::String("Hello 世界 🌍".to_string()),
+        }
+        .run();
+    }
+
+    #[test]
+    fn test_buffer_writing_variants() -> Result<(), ArrowError> {
+        let variant = Variant::from("test buffer writing");
+
+        // Test writing to a Vec<u8>
+        let mut buffer = Vec::new();
+        variant.to_json(&mut buffer)?;
+        let result = String::from_utf8(buffer)
+            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
+        assert_eq!(result, "\"test buffer writing\"");
+
+        // Test writing to vec![]
+        let mut buffer = vec![];
+        variant.to_json(&mut buffer)?;
+        let result = String::from_utf8(buffer)
+            .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
+        assert_eq!(result, "\"test buffer writing\"");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_simple_object_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        // Create a simple object with various field types
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_object()
+            .with_field("name", "Alice")
+            .with_field("age", 30i32)
+            .with_field("active", true)
+            .with_field("score", 95.5f64)
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        // Parse the JSON to verify structure - handle JSON parsing errors manually
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        let obj = parsed.as_object().expect("expected JSON object");
+        assert_eq!(obj.get("name"), Some(&Value::String("Alice".to_string())));
+        assert_eq!(obj.get("age"), Some(&Value::Number(30.into())));
+        assert_eq!(obj.get("active"), Some(&Value::Bool(true)));
+        assert!(matches!(obj.get("score"), Some(Value::Number(_))));
+        assert_eq!(obj.len(), 4);
+
+        // Test variant_to_json_value as well
+        let json_value = variant.to_json_value()?;
+        assert!(matches!(json_value, Value::Object(_)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_empty_object_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        {
+            let obj = builder.new_object();
+            obj.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "{}");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(json_value, Value::Object(serde_json::Map::new()));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_object_with_special_characters_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_object()
+            .with_field("message", "Hello \"World\"\nWith\tTabs")
+            .with_field("path", "C:\\Users\\Alice\\Documents")
+            .with_field("unicode", "😀 Smiley")
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        // Verify that special characters are properly escaped
+        assert!(json.contains("Hello \\\"World\\\"\\nWith\\tTabs"));
+        assert!(json.contains("C:\\\\Users\\\\Alice\\\\Documents"));
+        assert!(json.contains("😀 Smiley"));
+
+        // Verify that the JSON can be parsed back
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        assert!(matches!(parsed, Value::Object(_)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_simple_list_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_list()
+            .with_value(1i32)
+            .with_value(2i32)
+            .with_value(3i32)
+            .with_value(4i32)
+            .with_value(5i32)
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "[1,2,3,4,5]");
+
+        let json_value = variant.to_json_value()?;
+        let arr = json_value.as_array().expect("expected JSON array");
+        assert_eq!(arr.len(), 5);
+        assert_eq!(arr[0], Value::Number(1.into()));
+        assert_eq!(arr[4], Value::Number(5.into()));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_empty_list_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        {
+            let list = builder.new_list();
+            list.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+        assert_eq!(json, "[]");
+
+        let json_value = variant.to_json_value()?;
+        assert_eq!(json_value, Value::Array(vec![]));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_mixed_type_list_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_list()
+            .with_value("hello")
+            .with_value(42i32)
+            .with_value(true)
+            .with_value(()) // null
+            .with_value(std::f64::consts::PI)
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        let arr = parsed.as_array().expect("expected JSON array");
+        assert_eq!(arr.len(), 5);
+        assert_eq!(arr[0], Value::String("hello".to_string()));
+        assert_eq!(arr[1], Value::Number(42.into()));
+        assert_eq!(arr[2], Value::Bool(true));
+        assert_eq!(arr[3], Value::Null);
+        assert!(matches!(arr[4], Value::Number(_)));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_object_field_ordering_in_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        {
+            let mut obj = builder.new_object();
+            // Add fields in non-alphabetical order
+            obj.insert("zebra", "last");
+            obj.insert("alpha", "first");
+            obj.insert("beta", "second");
+            obj.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        // Parse and verify all fields are present
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        let obj = parsed.as_object().expect("expected JSON object");
+        assert_eq!(obj.len(), 3);
+        assert_eq!(obj.get("alpha"), Some(&Value::String("first".to_string())));
+        assert_eq!(obj.get("beta"), Some(&Value::String("second".to_string())));
+        assert_eq!(obj.get("zebra"), Some(&Value::String("last".to_string())));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_list_with_various_primitive_types_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_list()
+            .with_value("string_value")
+            .with_value(42i32)
+            .with_value(true)
+            .with_value(std::f64::consts::PI)
+            .with_value(false)
+            .with_value(()) // null
+            .with_value(100i64)
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        let arr = parsed.as_array().expect("expected JSON array");
+        assert_eq!(arr.len(), 7);
+        assert_eq!(arr[0], Value::String("string_value".to_string()));
+        assert_eq!(arr[1], Value::Number(42.into()));
+        assert_eq!(arr[2], Value::Bool(true));
+        assert!(matches!(arr[3], Value::Number(_))); // float
+        assert_eq!(arr[4], Value::Bool(false));
+        assert_eq!(arr[5], Value::Null);
+        assert_eq!(arr[6], Value::Number(100.into()));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_object_with_various_primitive_types_to_json() -> Result<(), ArrowError> {
+        use parquet_variant::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        {
+            let mut obj = builder.new_object();
+            obj.insert("string_field", "test_string");
+            obj.insert("int_field", 123i32);
+            obj.insert("bool_field", true);
+            obj.insert("float_field", 2.71f64);
+            obj.insert("null_field", ());
+            obj.insert("long_field", 999i64);
+            obj.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value)?;
+        let json = variant.to_json_string()?;
+
+        let parsed: Value = serde_json::from_str(&json).unwrap();
+        let obj = parsed.as_object().expect("expected JSON object");
+        assert_eq!(obj.len(), 6);
+        assert_eq!(
+            obj.get("string_field"),
+            Some(&Value::String("test_string".to_string()))
+        );
+        assert_eq!(obj.get("int_field"), Some(&Value::Number(123.into())));
+        assert_eq!(obj.get("bool_field"), Some(&Value::Bool(true)));
+        assert!(matches!(obj.get("float_field"), Some(Value::Number(_))));
+        assert_eq!(obj.get("null_field"), Some(&Value::Null));
+        assert_eq!(obj.get("long_field"), Some(&Value::Number(999.into())));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_decimal_precision_behavior() -> Result<(), ArrowError> {
+        // Test case that demonstrates f64 precision limits
+        // This is a 63-bit precision decimal8 value that f64 cannot represent exactly
+        let high_precision_decimal8 = Variant::from(VariantDecimal8::try_new(
+            9007199254740993, // 2^53 + 1, exceeds f64 precision
+            6,
+        )?);
+
+        let json_string = high_precision_decimal8.to_json_string()?;
+        let json_value = high_precision_decimal8.to_json_value()?;
+
+        // Due to f64 precision limits, we expect precision loss for values > 2^53
+        // Both functions should produce consistent results (even if not exact)
+        let parsed: Value = serde_json::from_str(&json_string).unwrap();
+        assert_eq!(parsed, json_value);
+
+        // Test a case that can be exactly represented (integer result)
+        let exact_decimal = Variant::from(VariantDecimal8::try_new(
+            1234567890000, // Should result in 1234567.89 (trailing zeros trimmed)
+            6,
+        )?);
+
+        let json_string_exact = exact_decimal.to_json_string()?;
+        assert_eq!(json_string_exact, "1234567.89");
+
+        // Test integer case (should be exact)
+        let integer_decimal = Variant::from(VariantDecimal8::try_new(
+            42000000, // Should result in 42 (integer)
+            6,
+        )?);
+
+        let json_string_integer = integer_decimal.to_json_string()?;
+        assert_eq!(json_string_integer, "42");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_float_nan_inf_handling() -> Result<(), ArrowError> {
+        // Test NaN handling - should return an error since JSON doesn't support NaN
+        let nan_variant = Variant::Float(f32::NAN);
+        let nan_result = nan_variant.to_json_value();
+        assert!(nan_result.is_err());
+        assert!(
+            nan_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid float value")
+        );
+
+        // Test positive infinity - should return an error since JSON doesn't support Infinity
+        let pos_inf_variant = Variant::Float(f32::INFINITY);
+        let pos_inf_result = pos_inf_variant.to_json_value();
+        assert!(pos_inf_result.is_err());
+        assert!(
+            pos_inf_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid float value")
+        );
+
+        // Test negative infinity - should return an error since JSON doesn't support -Infinity
+        let neg_inf_variant = Variant::Float(f32::NEG_INFINITY);
+        let neg_inf_result = neg_inf_variant.to_json_value();
+        assert!(neg_inf_result.is_err());
+        assert!(
+            neg_inf_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid float value")
+        );
+
+        // Test the same for Double variants
+        let nan_double_variant = Variant::Double(f64::NAN);
+        let nan_double_result = nan_double_variant.to_json_value();
+        assert!(nan_double_result.is_err());
+        assert!(
+            nan_double_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid double value")
+        );
+
+        let pos_inf_double_variant = Variant::Double(f64::INFINITY);
+        let pos_inf_double_result = pos_inf_double_variant.to_json_value();
+        assert!(pos_inf_double_result.is_err());
+        assert!(
+            pos_inf_double_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid double value")
+        );
+
+        let neg_inf_double_variant = Variant::Double(f64::NEG_INFINITY);
+        let neg_inf_double_result = neg_inf_double_variant.to_json_value();
+        assert!(neg_inf_double_result.is_err());
+        assert!(
+            neg_inf_double_result
+                .unwrap_err()
+                .to_string()
+                .contains("Invalid double value")
+        );
+
+        // Test normal float values still work
+        let normal_float = Variant::Float(std::f32::consts::PI);
+        let normal_result = normal_float.to_json_value()?;
+        assert!(matches!(normal_result, Value::Number(_)));
+
+        let normal_double = Variant::Double(std::f64::consts::E);
+        let normal_double_result = normal_double.to_json_value()?;
+        assert!(matches!(normal_double_result, Value::Number(_)));
+
+        Ok(())
+    }
+}
diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml
index 0065121726ac..51671d518910 100644
--- a/parquet-variant/Cargo.toml
+++ b/parquet-variant/Cargo.toml
@@ -17,9 +17,7 @@
 
 [package]
 name = "parquet-variant"
-# This package is still in development and thus the version does
-# not follow the versions of the rest of the crates in this repo.
-version = "0.1.0"
+version = { workspace = true }
 license = { workspace = true }
 description = "Apache Parquet Variant implementation in Rust"
 homepage = { workspace = true }
@@ -28,12 +26,39 @@ authors = { workspace = true }
 keywords = ["arrow", "parquet", "variant"]
 readme = "README.md"
 edition = { workspace = true }
-# needs a newer version than workspace due to
-# rror: `Option::<T>::unwrap` is not yet stable as a const fn
-rust-version = "1.83"
+rust-version = { workspace = true }
 
 [dependencies]
 arrow-schema = { workspace = true }
 chrono = { workspace = true }
+half = { version = "2.1", default-features = false }
+indexmap = "2.10.0"
+uuid = { version = "1.18.0", features = ["v4"]}
+
+simdutf8 = { workspace = true , optional = true }
 
 [lib]
+name = "parquet_variant"
+bench = false
+
+[dev-dependencies]
+paste = { version = "1.0" }
+criterion = { workspace = true, default-features = false }
+rand = { version = "0.9", default-features = false, features = [
+    "std",
+    "std_rng",
+    "thread_rng",
+] }
+
+[features]
+default = ["simdutf8"]
+# Enable SIMD UTF-8 validation
+simdutf8 = ["dep:simdutf8"]
+
+[[bench]]
+name = "variant_builder"
+harness = false
+
+[[bench]]
+name = "variant_validation"
+harness = false
diff --git a/parquet-variant/benches/variant_builder.rs b/parquet-variant/benches/variant_builder.rs
new file mode 100644
index 000000000000..420fa583ee1a
--- /dev/null
+++ b/parquet-variant/benches/variant_builder.rs
@@ -0,0 +1,524 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate parquet_variant;
+
+use criterion::*;
+
+use parquet_variant::{Variant, VariantBuilder};
+use rand::{
+    Rng, SeedableRng,
+    distr::{Alphanumeric, uniform::SampleUniform},
+    rngs::StdRng,
+};
+use std::{hint, ops::Range};
+
+fn random<T: SampleUniform + PartialEq + PartialOrd>(rng: &mut StdRng, range: Range<T>) -> T {
+    rng.random_range::<T, _>(range)
+}
+
+// generates a string with a 50/50 chance whether it's a short or a long string
+fn random_string(rng: &mut StdRng) -> String {
+    let len = rng.random_range::<usize, _>(1..128);
+
+    rng.sample_iter(&Alphanumeric)
+        .take(len)
+        .map(char::from)
+        .collect()
+}
+
+struct RandomStringGenerator {
+    cursor: usize,
+    table: Vec<String>,
+}
+
+impl RandomStringGenerator {
+    pub fn new(rng: &mut StdRng, capacity: usize) -> Self {
+        let table = (0..capacity)
+            .map(|_| random_string(rng))
+            .collect::<Vec<_>>();
+
+        Self { cursor: 0, table }
+    }
+
+    pub fn next(&mut self) -> &str {
+        let this = &self.table[self.cursor];
+
+        self.cursor = (self.cursor + 1) % self.table.len();
+
+        this
+    }
+}
+
+// Creates an object with field names inserted in reverse lexicographical order
+fn bench_object_field_names_reverse_order(c: &mut Criterion) {
+    c.bench_function("bench_object_field_names_reverse_order", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 117);
+        b.iter(|| {
+            let mut variant = VariantBuilder::new();
+            let mut object_builder = variant.new_object();
+
+            for i in 0..50_000 {
+                object_builder.insert(format!("{}", 1000 - i).as_str(), string_table.next());
+            }
+
+            object_builder.finish();
+            hint::black_box(variant.finish());
+        })
+    });
+}
+
+// Creates objects with a homogenous schema (same field names)
+/*
+    {
+        name: String,
+        age: i32,
+        likes_cilantro: bool,
+        comments: Long string
+        dishes: Vec<String>
+    }
+*/
+fn bench_object_same_schema(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+    let mut string_table = RandomStringGenerator::new(&mut rng, 117);
+
+    c.bench_function("bench_object_same_schema", |b| {
+        b.iter(|| {
+            for _ in 0..25_000 {
+                let mut variant = VariantBuilder::new();
+                let mut object_builder = variant.new_object();
+                object_builder.insert("name", string_table.next());
+                object_builder.insert("age", random::<u32>(&mut rng, 18..100) as i32);
+                object_builder.insert("likes_cilantro", rng.random_bool(0.5));
+                object_builder.insert("comments", string_table.next());
+
+                let mut inner_list_builder = object_builder.new_list("dishes");
+                inner_list_builder.append_value(string_table.next());
+                inner_list_builder.append_value(string_table.next());
+                inner_list_builder.append_value(string_table.next());
+
+                inner_list_builder.finish();
+                object_builder.finish();
+
+                hint::black_box(variant.finish());
+            }
+        })
+    });
+}
+
+// Creates a list of objects with the same schema (same field names)
+/*
+    {
+        name: String,
+        age: i32,
+        likes_cilantro: bool,
+        comments: Long string
+        dishes: Vec<String>
+    }
+*/
+fn bench_object_list_same_schema(c: &mut Criterion) {
+    c.bench_function("bench_object_list_same_schema", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 101);
+
+        b.iter(|| {
+            let mut variant = VariantBuilder::new();
+
+            let mut list_builder = variant.new_list();
+
+            for _ in 0..25_000 {
+                let mut object_builder = list_builder.new_object();
+                object_builder.insert("name", string_table.next());
+                object_builder.insert("age", random::<u32>(&mut rng, 18..100) as i32);
+                object_builder.insert("likes_cilantro", rng.random_bool(0.5));
+                object_builder.insert("comments", string_table.next());
+
+                let mut list_builder = object_builder.new_list("dishes");
+                list_builder.append_value(string_table.next());
+                list_builder.append_value(string_table.next());
+                list_builder.append_value(string_table.next());
+
+                list_builder.finish();
+                object_builder.finish();
+            }
+
+            list_builder.finish();
+            hint::black_box(variant.finish());
+        })
+    });
+}
+
+// Creates variant objects with an undefined schema (random field names)
+// values are randomly generated, with an equal distribution to whether it's a String, Object, or List
+fn bench_object_unknown_schema(c: &mut Criterion) {
+    c.bench_function("bench_object_unknown_schema", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 1001);
+
+        b.iter(|| {
+            for _ in 0..200 {
+                let mut variant = VariantBuilder::new();
+                let mut object_builder = variant.new_object();
+
+                for _num_fields in 0..random::<u8>(&mut rng, 0..100) {
+                    if rng.random_bool(0.33) {
+                        let key = string_table.next();
+                        object_builder.insert(key, key);
+                        continue;
+                    }
+
+                    if rng.random_bool(0.5) {
+                        let mut inner_object_builder = object_builder.new_object("rand_object");
+
+                        for _num_fields in 0..random::<u8>(&mut rng, 0..25) {
+                            let key = string_table.next();
+                            inner_object_builder.insert(key, key);
+                        }
+                        inner_object_builder.finish();
+
+                        continue;
+                    }
+
+                    let mut inner_list_builder = object_builder.new_list("rand_list");
+
+                    for _num_elements in 0..random::<u8>(&mut rng, 0..25) {
+                        inner_list_builder.append_value(string_table.next());
+                    }
+
+                    inner_list_builder.finish();
+                }
+                object_builder.finish();
+                hint::black_box(variant.finish());
+            }
+        })
+    });
+}
+
+// Creates a list of variant objects with an undefined schema (random field names)
+// values are randomly generated, with an equal distribution to whether it's a String, Object, or List
+fn bench_object_list_unknown_schema(c: &mut Criterion) {
+    c.bench_function("bench_object_list_unknown_schema", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 1001);
+
+        b.iter(|| {
+            let mut rng = StdRng::seed_from_u64(42);
+
+            let mut variant = VariantBuilder::new();
+
+            let mut list_builder = variant.new_list();
+
+            for _ in 0..200 {
+                let mut object_builder = list_builder.new_object();
+
+                for _num_fields in 0..random::<u8>(&mut rng, 0..100) {
+                    let key = string_table.next();
+
+                    if rng.random_bool(0.33) {
+                        object_builder.insert(key, key);
+                        continue;
+                    }
+
+                    if rng.random_bool(0.5) {
+                        let mut inner_object_builder = object_builder.new_object("rand_object");
+
+                        for _num_fields in 0..random::<u8>(&mut rng, 0..25) {
+                            let key = string_table.next();
+                            inner_object_builder.insert(key, key);
+                        }
+                        inner_object_builder.finish();
+
+                        continue;
+                    }
+
+                    let mut inner_list_builder = object_builder.new_list("rand_list");
+
+                    for _num_elements in 0..random::<u8>(&mut rng, 0..25) {
+                        inner_list_builder.append_value(key);
+                    }
+
+                    inner_list_builder.finish();
+                }
+                object_builder.finish();
+            }
+
+            list_builder.finish();
+            hint::black_box(variant.finish());
+        })
+    });
+}
+
+// Creates objects with a partially homogenous schema (same field names)
+/*
+    {
+        "id": &[u8],        // Following are common across all objects
+        "span_id: &[u8],
+        "created": u32,
+        "ended": u32,
+        "span_name": String,
+
+        "attributes": {
+            // following fields are randomized
+        }
+    }
+*/
+fn bench_object_partially_same_schema(c: &mut Criterion) {
+    c.bench_function("bench_object_partially_same_schema", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 117);
+
+        b.iter(|| {
+            let mut rng = StdRng::seed_from_u64(42);
+
+            for _ in 0..200 {
+                let mut variant = VariantBuilder::new();
+                let mut object_builder = variant.new_object();
+
+                object_builder.insert(
+                    "id",
+                    random::<i128>(&mut rng, 0..i128::MAX)
+                        .to_le_bytes()
+                        .as_slice(),
+                );
+
+                object_builder.insert(
+                    "span_id",
+                    random::<i128>(&mut rng, 0..i128::MAX)
+                        .to_le_bytes()
+                        .as_slice(),
+                );
+
+                object_builder.insert("created", random::<u32>(&mut rng, 0..u32::MAX) as i32);
+                object_builder.insert("ended", random::<u32>(&mut rng, 0..u32::MAX) as i32);
+                object_builder.insert("span_name", string_table.next());
+
+                {
+                    let mut inner_object_builder = object_builder.new_object("attributes");
+
+                    for _num_fields in 0..random::<u8>(&mut rng, 0..100) {
+                        let key = string_table.next();
+                        inner_object_builder.insert(key, key);
+                    }
+                    inner_object_builder.finish();
+                }
+
+                object_builder.finish();
+                hint::black_box(variant.finish());
+            }
+        })
+    });
+}
+
+// Creates a list of variant objects with a partially homogenous schema (similar field names)
+/*
+    {
+        "id": &[u8],        // Following are common across all objects
+        "span_id: &[u8],
+        "created": u32,
+        "ended": u32,
+        "span_name": String,
+
+        "attributees": {
+            // following fields are randomized
+        }
+    }
+*/
+fn bench_object_list_partially_same_schema(c: &mut Criterion) {
+    c.bench_function("bench_object_list_partially_same_schema", |b| {
+        let mut rng = StdRng::seed_from_u64(42);
+        let mut string_table = RandomStringGenerator::new(&mut rng, 117);
+
+        b.iter(|| {
+            let mut variant = VariantBuilder::new();
+
+            let mut list_builder = variant.new_list();
+
+            for _ in 0..100 {
+                let mut object_builder = list_builder.new_object();
+
+                object_builder.insert(
+                    "id",
+                    random::<i128>(&mut rng, 0..i128::MAX)
+                        .to_le_bytes()
+                        .as_slice(),
+                );
+
+                object_builder.insert(
+                    "span_id",
+                    random::<i128>(&mut rng, 0..i128::MAX)
+                        .to_le_bytes()
+                        .as_slice(),
+                );
+
+                object_builder.insert("created", random::<u32>(&mut rng, 0..u32::MAX) as i32);
+                object_builder.insert("ended", random::<u32>(&mut rng, 0..u32::MAX) as i32);
+                object_builder.insert("span_name", string_table.next());
+
+                {
+                    let mut inner_object_builder = object_builder.new_object("attributes");
+
+                    for _num_fields in 0..random::<u8>(&mut rng, 0..100) {
+                        let key = string_table.next();
+                        inner_object_builder.insert(key, key);
+                    }
+                    inner_object_builder.finish();
+                }
+
+                object_builder.finish();
+            }
+
+            list_builder.finish();
+            hint::black_box(variant.finish());
+        })
+    });
+}
+
+// Benchmark validation performance
+fn bench_validation_validated_vs_unvalidated(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+    let mut string_table = RandomStringGenerator::new(&mut rng, 117);
+
+    // Pre-generate test data
+    let mut test_data = Vec::new();
+    for _ in 0..100 {
+        let mut builder = VariantBuilder::new();
+        let mut obj = builder.new_object();
+        obj.insert("field1", string_table.next());
+        obj.insert("field2", rng.random::<i32>());
+        obj.insert("field3", rng.random::<bool>());
+
+        let mut list = obj.new_list("field4");
+        for _ in 0..10 {
+            list.append_value(rng.random::<i32>());
+        }
+        list.finish();
+
+        obj.finish();
+        test_data.push(builder.finish());
+    }
+
+    let mut group = c.benchmark_group("validation");
+
+    group.bench_function("validated_construction", |b| {
+        b.iter(|| {
+            for (metadata, value) in &test_data {
+                let variant = Variant::try_new(metadata, value).unwrap();
+                hint::black_box(variant);
+            }
+        })
+    });
+
+    group.bench_function("unvalidated_construction", |b| {
+        b.iter(|| {
+            for (metadata, value) in &test_data {
+                let variant = Variant::new(metadata, value);
+                hint::black_box(variant);
+            }
+        })
+    });
+
+    group.bench_function("validation_cost", |b| {
+        // Create unvalidated variants first
+        let unvalidated: Vec<_> = test_data
+            .iter()
+            .map(|(metadata, value)| Variant::new(metadata, value))
+            .collect();
+
+        b.iter(|| {
+            for variant in &unvalidated {
+                let validated = variant.clone().with_full_validation().unwrap();
+                hint::black_box(validated);
+            }
+        })
+    });
+
+    group.finish();
+}
+
+// Benchmark iteration performance on validated vs unvalidated variants
+fn bench_iteration_performance(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+
+    // Create a complex nested structure
+    let mut builder = VariantBuilder::new();
+    let mut list = builder.new_list();
+
+    for i in 0..1000 {
+        let mut obj = list.new_object();
+        obj.insert(&format!("field_{i}"), rng.random::<i32>());
+        obj.insert("nested_data", format!("data_{i}").as_str());
+        obj.finish();
+    }
+    list.finish();
+
+    let (metadata, value) = builder.finish();
+    let validated = Variant::try_new(&metadata, &value).unwrap();
+    let unvalidated = Variant::new(&metadata, &value);
+
+    let mut group = c.benchmark_group("iteration");
+
+    group.bench_function("validated_iteration", |b| {
+        b.iter(|| {
+            if let Some(list) = validated.as_list() {
+                for item in list.iter() {
+                    hint::black_box(item);
+                }
+            }
+        })
+    });
+
+    group.bench_function("unvalidated_fallible_iteration", |b| {
+        b.iter(|| {
+            if let Some(list) = unvalidated.as_list() {
+                for item in list.iter_try().flatten() {
+                    hint::black_box(item);
+                }
+            }
+        })
+    });
+
+    group.finish();
+}
+
+fn bench_extend_metadata_builder(c: &mut Criterion) {
+    let list = (0..400_000).map(|i| format!("id_{i}")).collect::<Vec<_>>();
+
+    c.bench_function("bench_extend_metadata_builder", |b| {
+        b.iter(|| {
+            std::hint::black_box(
+                VariantBuilder::new().with_field_names(list.iter().map(|s| s.as_str())),
+            );
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_object_field_names_reverse_order,
+    bench_object_same_schema,
+    bench_object_list_same_schema,
+    bench_object_unknown_schema,
+    bench_object_list_unknown_schema,
+    bench_object_partially_same_schema,
+    bench_object_list_partially_same_schema,
+    bench_validation_validated_vs_unvalidated,
+    bench_iteration_performance,
+    bench_extend_metadata_builder
+);
+
+criterion_main!(benches);
diff --git a/parquet-variant/benches/variant_validation.rs b/parquet-variant/benches/variant_validation.rs
new file mode 100644
index 000000000000..dcf7681a76ed
--- /dev/null
+++ b/parquet-variant/benches/variant_validation.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate parquet_variant;
+
+use criterion::*;
+
+use parquet_variant::{Variant, VariantBuilder};
+
+fn generate_large_object() -> (Vec<u8>, Vec<u8>) {
+    // 256 elements (keys: 000-255) - each element is an object of 256 elements (240-495) - each
+    // element a list of numbers from 0-127
+    let mut variant_builder = VariantBuilder::new();
+    let mut outer_object = variant_builder.new_object();
+
+    for i in 0..=125 {
+        let key = format!("{i:03}");
+        let mut inner_object = outer_object.new_object(&key);
+
+        for j in 125..=250 {
+            let inner_key = format!("{j}");
+            let mut list_builder = inner_object.new_list(&inner_key);
+
+            for k in 0..=127 {
+                list_builder.append_value(Variant::Int8(k));
+            }
+            list_builder.finish();
+        }
+        inner_object.finish();
+    }
+    outer_object.finish();
+
+    variant_builder.finish()
+}
+
+fn generate_complex_object() -> (Vec<u8>, Vec<u8>) {
+    let mut variant_builder = VariantBuilder::new();
+    let mut object_builder = variant_builder.new_object();
+    let mut inner_list_builder = object_builder.new_list("booleans");
+
+    for _ in 0..1024 {
+        inner_list_builder.append_value(Variant::BooleanTrue);
+    }
+
+    inner_list_builder.finish();
+    object_builder.insert("null", Variant::Null);
+    let mut inner_list_builder = object_builder.new_list("numbers");
+    for _ in 0..1024 {
+        inner_list_builder.append_value(Variant::Int8(4));
+        inner_list_builder.append_value(Variant::Double(-3e0));
+        inner_list_builder.append_value(Variant::Double(1001e-3));
+    }
+    inner_list_builder.finish();
+
+    let mut inner_object_builder = object_builder.new_object("nested");
+
+    for i in 0..2048 {
+        let key = format!("{}", 1024 - i);
+        inner_object_builder.insert(&key, i);
+    }
+    inner_object_builder.finish();
+
+    object_builder.finish();
+
+    variant_builder.finish()
+}
+
+fn generate_large_nested_list() -> (Vec<u8>, Vec<u8>) {
+    let mut variant_builder = VariantBuilder::new();
+    let mut list_builder = variant_builder.new_list();
+    for _ in 0..255 {
+        let mut list_builder_inner = list_builder.new_list();
+        for _ in 0..120 {
+            list_builder_inner.append_value(Variant::Null);
+
+            let mut list_builder_inner_inner = list_builder_inner.new_list();
+            for _ in 0..20 {
+                list_builder_inner_inner.append_value(Variant::Double(-3e0));
+            }
+
+            list_builder_inner_inner.finish();
+        }
+        list_builder_inner.finish();
+    }
+    list_builder.finish();
+    variant_builder.finish()
+}
+
+// Generates a large object and performs full validation
+fn bench_validate_large_object(c: &mut Criterion) {
+    let (metadata, value) = generate_large_object();
+    c.bench_function("bench_validate_large_object", |b| {
+        b.iter(|| {
+            std::hint::black_box(Variant::try_new(&metadata, &value).unwrap());
+        })
+    });
+}
+
+fn bench_validate_complex_object(c: &mut Criterion) {
+    let (metadata, value) = generate_complex_object();
+    c.bench_function("bench_validate_complex_object", |b| {
+        b.iter(|| {
+            std::hint::black_box(Variant::try_new(&metadata, &value).unwrap());
+        })
+    });
+}
+
+fn bench_validate_large_nested_list(c: &mut Criterion) {
+    let (metadata, value) = generate_large_nested_list();
+    c.bench_function("bench_validate_large_nested_list", |b| {
+        b.iter(|| {
+            std::hint::black_box(Variant::try_new(&metadata, &value).unwrap());
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_validate_large_object,
+    bench_validate_complex_object,
+    bench_validate_large_nested_list
+);
+
+criterion_main!(benches);
diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs
index 6cde4ce91125..e6122f062c38 100644
--- a/parquet-variant/src/builder.rs
+++ b/parquet-variant/src/builder.rs
@@ -14,12 +14,26 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::decoder::{VariantBasicType, VariantPrimitiveType};
-use crate::{ShortString, Variant};
-use std::collections::HashMap;
-
-const BASIC_TYPE_BITS: u8 = 2;
-const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+use crate::decoder::{OffsetSizeBytes, VariantBasicType, VariantPrimitiveType};
+use crate::{
+    ShortString, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantList,
+    VariantMetadata, VariantObject,
+};
+use arrow_schema::ArrowError;
+use chrono::Timelike;
+use uuid::Uuid;
+
+mod list;
+mod metadata;
+mod object;
+
+pub use list::*;
+pub use metadata::*;
+pub use object::*;
+
+pub(crate) const BASIC_TYPE_BITS: u8 = 2;
+pub(crate) const UNIX_EPOCH_DATE: chrono::NaiveDate =
+    chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
 
 fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
     (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
@@ -29,50 +43,439 @@ fn short_string_header(len: usize) -> u8 {
     (len as u8) << 2 | VariantBasicType::ShortString as u8
 }
 
-fn array_header(large: bool, offset_size: u8) -> u8 {
-    let large_bit = if large { 1 } else { 0 };
-    (large_bit << (BASIC_TYPE_BITS + 2))
-        | ((offset_size - 1) << BASIC_TYPE_BITS)
-        | VariantBasicType::Array as u8
+pub(crate) fn int_size(v: usize) -> OffsetSizeBytes {
+    match v {
+        0..=0xFF => OffsetSizeBytes::One,
+        0x100..=0xFFFF => OffsetSizeBytes::Two,
+        0x10000..=0xFFFFFF => OffsetSizeBytes::Three,
+        _ => OffsetSizeBytes::Four,
+    }
+}
+
+/// Wrapper around a `Vec<u8>` that provides methods for appending
+/// primitive values, variant types, and metadata.
+///
+/// This is used internally by the builders to construct the
+/// the `value` field for [`Variant`] values.
+///
+/// You can reuse an existing `Vec<u8>` by using the `from` impl
+#[derive(Debug, Default)]
+pub struct ValueBuilder(Vec<u8>);
+
+impl ValueBuilder {
+    /// Construct a ValueBuffer that will write to a new underlying `Vec`
+    pub fn new() -> Self {
+        Default::default()
+    }
 }
 
-fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
-    let large_bit = if large { 1 } else { 0 };
-    (large_bit << (BASIC_TYPE_BITS + 4))
-        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
-        | ((offset_size - 1) << BASIC_TYPE_BITS)
-        | VariantBasicType::Object as u8
+/// Macro to generate the match statement for each append_variant, try_append_variant, and
+/// append_variant_bytes -- they each have slightly different handling for object and list handling.
+macro_rules! variant_append_value {
+    ($builder:expr, $value:expr, $object_pat:pat => $object_arm:expr, $list_pat:pat => $list_arm:expr) => {
+        match $value {
+            Variant::Null => $builder.append_null(),
+            Variant::BooleanTrue => $builder.append_bool(true),
+            Variant::BooleanFalse => $builder.append_bool(false),
+            Variant::Int8(v) => $builder.append_int8(v),
+            Variant::Int16(v) => $builder.append_int16(v),
+            Variant::Int32(v) => $builder.append_int32(v),
+            Variant::Int64(v) => $builder.append_int64(v),
+            Variant::Date(v) => $builder.append_date(v),
+            Variant::Time(v) => $builder.append_time_micros(v),
+            Variant::TimestampMicros(v) => $builder.append_timestamp_micros(v),
+            Variant::TimestampNtzMicros(v) => $builder.append_timestamp_ntz_micros(v),
+            Variant::TimestampNanos(v) => $builder.append_timestamp_nanos(v),
+            Variant::TimestampNtzNanos(v) => $builder.append_timestamp_ntz_nanos(v),
+            Variant::Decimal4(decimal4) => $builder.append_decimal4(decimal4),
+            Variant::Decimal8(decimal8) => $builder.append_decimal8(decimal8),
+            Variant::Decimal16(decimal16) => $builder.append_decimal16(decimal16),
+            Variant::Float(v) => $builder.append_float(v),
+            Variant::Double(v) => $builder.append_double(v),
+            Variant::Binary(v) => $builder.append_binary(v),
+            Variant::String(s) => $builder.append_string(s),
+            Variant::ShortString(s) => $builder.append_short_string(s),
+            Variant::Uuid(v) => $builder.append_uuid(v),
+            $object_pat => $object_arm,
+            $list_pat => $list_arm,
+        }
+    };
 }
 
-fn int_size(v: usize) -> u8 {
-    match v {
-        0..=0xFF => 1,
-        0x100..=0xFFFF => 2,
-        0x10000..=0xFFFFFF => 3,
-        _ => 4,
+impl ValueBuilder {
+    fn append_u8(&mut self, term: u8) {
+        self.0.push(term);
+    }
+
+    fn append_slice(&mut self, other: &[u8]) {
+        self.0.extend_from_slice(other);
+    }
+
+    fn append_primitive_header(&mut self, primitive_type: VariantPrimitiveType) {
+        self.0.push(primitive_header(primitive_type));
+    }
+
+    /// Returns the underlying buffer, consuming self
+    pub fn into_inner(self) -> Vec<u8> {
+        self.0
+    }
+
+    pub(crate) fn inner_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.0
+    }
+
+    // Variant types below
+
+    fn append_null(&mut self) {
+        self.append_primitive_header(VariantPrimitiveType::Null);
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.append_primitive_header(primitive_type);
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.append_primitive_header(VariantPrimitiveType::Int8);
+        self.append_u8(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.append_primitive_header(VariantPrimitiveType::Int16);
+        self.append_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.append_primitive_header(VariantPrimitiveType::Int32);
+        self.append_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.append_primitive_header(VariantPrimitiveType::Int64);
+        self.append_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.append_primitive_header(VariantPrimitiveType::Float);
+        self.append_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.append_primitive_header(VariantPrimitiveType::Double);
+        self.append_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.append_primitive_header(VariantPrimitiveType::Date);
+        let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
+        self.append_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: chrono::DateTime<chrono::Utc>) {
+        self.append_primitive_header(VariantPrimitiveType::TimestampMicros);
+        let micros = value.timestamp_micros();
+        self.append_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.append_primitive_header(VariantPrimitiveType::TimestampNtzMicros);
+        let micros = value.and_utc().timestamp_micros();
+        self.append_slice(&micros.to_le_bytes());
+    }
+
+    fn append_time_micros(&mut self, value: chrono::NaiveTime) {
+        self.append_primitive_header(VariantPrimitiveType::Time);
+        let micros_from_midnight = value.num_seconds_from_midnight() as u64 * 1_000_000
+            + value.nanosecond() as u64 / 1_000;
+        self.append_slice(&micros_from_midnight.to_le_bytes());
+    }
+
+    fn append_timestamp_nanos(&mut self, value: chrono::DateTime<chrono::Utc>) {
+        self.append_primitive_header(VariantPrimitiveType::TimestampNanos);
+        let nanos = value.timestamp_nanos_opt().unwrap();
+        self.append_slice(&nanos.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_nanos(&mut self, value: chrono::NaiveDateTime) {
+        self.append_primitive_header(VariantPrimitiveType::TimestampNtzNanos);
+        let nanos = value.and_utc().timestamp_nanos_opt().unwrap();
+        self.append_slice(&nanos.to_le_bytes());
+    }
+
+    fn append_uuid(&mut self, value: Uuid) {
+        self.append_primitive_header(VariantPrimitiveType::Uuid);
+        self.append_slice(&value.into_bytes());
+    }
+
+    fn append_decimal4(&mut self, decimal4: VariantDecimal4) {
+        self.append_primitive_header(VariantPrimitiveType::Decimal4);
+        self.append_u8(decimal4.scale());
+        self.append_slice(&decimal4.integer().to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, decimal8: VariantDecimal8) {
+        self.append_primitive_header(VariantPrimitiveType::Decimal8);
+        self.append_u8(decimal8.scale());
+        self.append_slice(&decimal8.integer().to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, decimal16: VariantDecimal16) {
+        self.append_primitive_header(VariantPrimitiveType::Decimal16);
+        self.append_u8(decimal16.scale());
+        self.append_slice(&decimal16.integer().to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.append_primitive_header(VariantPrimitiveType::Binary);
+        self.append_slice(&(value.len() as u32).to_le_bytes());
+        self.append_slice(value);
+    }
+
+    fn append_short_string(&mut self, value: ShortString) {
+        let inner = value.0;
+        self.append_u8(short_string_header(inner.len()));
+        self.append_slice(inner.as_bytes());
+    }
+
+    fn append_string(&mut self, value: &str) {
+        self.append_primitive_header(VariantPrimitiveType::String);
+        self.append_slice(&(value.len() as u32).to_le_bytes());
+        self.append_slice(value.as_bytes());
+    }
+
+    fn append_object<S: BuilderSpecificState>(state: ParentState<'_, S>, obj: VariantObject) {
+        let mut object_builder = ObjectBuilder::new(state, false);
+        object_builder.extend(obj.iter());
+        object_builder.finish();
+    }
+
+    fn try_append_object<S: BuilderSpecificState>(
+        state: ParentState<'_, S>,
+        obj: VariantObject,
+    ) -> Result<(), ArrowError> {
+        let mut object_builder = ObjectBuilder::new(state, false);
+
+        for res in obj.iter_try() {
+            let (field_name, value) = res?;
+            object_builder.try_insert(field_name, value)?;
+        }
+
+        object_builder.finish();
+        Ok(())
+    }
+
+    fn append_list<S: BuilderSpecificState>(state: ParentState<'_, S>, list: VariantList) {
+        let mut list_builder = ListBuilder::new(state, false);
+        list_builder.extend(list.iter());
+        list_builder.finish();
+    }
+
+    fn try_append_list<S: BuilderSpecificState>(
+        state: ParentState<'_, S>,
+        list: VariantList,
+    ) -> Result<(), ArrowError> {
+        let mut list_builder = ListBuilder::new(state, false);
+        for res in list.iter_try() {
+            let value = res?;
+            list_builder.try_append_value(value)?;
+        }
+
+        list_builder.finish();
+
+        Ok(())
+    }
+
+    /// Returns the current size of the underlying buffer
+    pub fn offset(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Appends a variant to the builder.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ValueBuilder::try_append_variant`]
+    pub fn append_variant<S: BuilderSpecificState>(
+        mut state: ParentState<'_, S>,
+        variant: Variant<'_, '_>,
+    ) {
+        variant_append_value!(
+            state.value_builder(),
+            variant,
+            Variant::Object(obj) => return Self::append_object(state, obj),
+            Variant::List(list) => return Self::append_list(state, list)
+        );
+        state.finish();
+    }
+
+    /// Tries to append a variant to the provided [`ParentState`] instance.
+    ///
+    /// The attempt fails if the variant contains duplicate field names in objects when validation
+    /// is enabled.
+    pub fn try_append_variant<S: BuilderSpecificState>(
+        mut state: ParentState<'_, S>,
+        variant: Variant<'_, '_>,
+    ) -> Result<(), ArrowError> {
+        variant_append_value!(
+            state.value_builder(),
+            variant,
+            Variant::Object(obj) => return Self::try_append_object(state, obj),
+            Variant::List(list) => return Self::try_append_list(state, list)
+        );
+        state.finish();
+        Ok(())
+    }
+
+    /// Appends a variant to the buffer by copying raw bytes when possible.
+    ///
+    /// For objects and lists, this directly copies their underlying byte representation instead of
+    /// performing a logical copy and without touching the metadata builder. For other variant
+    /// types, this falls back to the standard append behavior.
+    ///
+    /// The caller must ensure that the metadata dictionary is already built and correct for
+    /// any objects or lists being appended.
+    pub fn append_variant_bytes<S: BuilderSpecificState>(
+        mut state: ParentState<'_, S>,
+        variant: Variant<'_, '_>,
+    ) {
+        let builder = state.value_builder();
+        variant_append_value!(
+            builder,
+            variant,
+            Variant::Object(obj) => builder.append_slice(obj.value),
+            Variant::List(list) => builder.append_slice(list.value)
+        );
+        state.finish();
     }
 }
 
-/// Write little-endian integer to buffer
-fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
-    for i in 0..nbytes {
-        buf[i as usize] = (value >> (i * 8)) as u8;
+/// A trait for managing state specific to different builder types.
+pub trait BuilderSpecificState: std::fmt::Debug {
+    /// Called by [`ParentState::finish`] to apply any pending builder-specific changes.
+    ///
+    /// The provided implementation does nothing by default.
+    ///
+    /// Parameters:
+    /// - `metadata_builder`: The metadata builder that was used
+    /// - `value_builder`: The value builder that was used
+    fn finish(
+        &mut self,
+        _metadata_builder: &mut dyn MetadataBuilder,
+        _value_builder: &mut ValueBuilder,
+    ) {
     }
+
+    /// Called by [`ParentState::drop`] to revert any changes that were eagerly applied, if
+    /// [`ParentState::finish`] was never invoked.
+    ///
+    /// The provided implementation does nothing by default.
+    ///
+    /// The base [`ParentState`] will handle rolling back the value and metadata builders,
+    /// but builder-specific state may need to revert its own changes.
+    fn rollback(&mut self) {}
+}
+
+/// Empty no-op implementation for top-level variant building
+impl BuilderSpecificState for () {}
+
+/// Tracks information needed to correctly finalize a nested builder.
+///
+/// A child builder has no effect on its parent unless/until its `finalize` method is called, at
+/// which point the child appends the new value to the parent. As a (desirable) side effect,
+/// creating a parent state instance captures mutable references to a subset of the parent's fields,
+/// rendering the parent object completely unusable until the parent state goes out of scope. This
+/// ensures that at most one child builder can exist at a time.
+///
+/// The redundancy in `value_builder` and `metadata_builder` is because all the references come from
+/// the parent, and we cannot "split" a mutable reference across two objects (parent state and the
+/// child builder that uses it). So everything has to be here.
+#[derive(Debug)]
+pub struct ParentState<'a, S: BuilderSpecificState> {
+    pub(crate) value_builder: &'a mut ValueBuilder,
+    pub(crate) saved_value_builder_offset: usize,
+    pub(crate) metadata_builder: &'a mut dyn MetadataBuilder,
+    pub(crate) saved_metadata_builder_dict_size: usize,
+    pub(crate) builder_state: S,
+    pub(crate) finished: bool,
 }
 
-/// Helper to make room for header by moving data
-fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: usize) {
-    let current_len = buffer.len();
-    buffer.resize(current_len + header_size, 0);
+impl<'a, S: BuilderSpecificState> ParentState<'a, S> {
+    /// Creates a new ParentState instance. The value and metadata builder
+    /// state is checkpointed and will roll back on drop, unless [`Self::finish`] is called. The
+    /// builder-specific state is governed by its own `finish` and `rollback` calls.
+    pub fn new(
+        value_builder: &'a mut ValueBuilder,
+        metadata_builder: &'a mut dyn MetadataBuilder,
+        builder_state: S,
+    ) -> Self {
+        Self {
+            saved_value_builder_offset: value_builder.offset(),
+            value_builder,
+            saved_metadata_builder_dict_size: metadata_builder.num_field_names(),
+            metadata_builder,
+            builder_state,
+            finished: false,
+        }
+    }
+
+    /// Marks the insertion as having succeeded and invokes
+    /// [`BuilderSpecificState::finish`]. Internal state will no longer roll back on drop.
+    pub fn finish(&mut self) {
+        self.builder_state
+            .finish(self.metadata_builder, self.value_builder);
+        self.finished = true
+    }
+
+    // Rolls back value and metadata builder changes and invokes [`BuilderSpecificState::rollback`].
+    fn rollback(&mut self) {
+        if self.finished {
+            return;
+        }
+
+        self.value_builder
+            .inner_mut()
+            .truncate(self.saved_value_builder_offset);
+        self.metadata_builder
+            .truncate_field_names(self.saved_metadata_builder_dict_size);
+        self.builder_state.rollback();
+    }
+
+    // Useful because e.g. `let b = self.value_builder;` fails compilation.
+    pub(crate) fn value_builder(&mut self) -> &mut ValueBuilder {
+        self.value_builder
+    }
+
+    // Useful because e.g. `let b = self.metadata_builder;` fails compilation.
+    pub(crate) fn metadata_builder(&mut self) -> &mut dyn MetadataBuilder {
+        self.metadata_builder
+    }
+}
 
-    let src_start = start_pos;
-    let src_end = current_len;
-    let dst_start = start_pos + header_size;
+impl<'a> ParentState<'a, ()> {
+    /// Creates a new instance suitable for a top-level variant builder
+    /// (e.g. [`VariantBuilder`]). The value and metadata builder state is checkpointed and will
+    /// roll back on drop, unless [`Self::finish`] is called.
+    pub fn variant(
+        value_builder: &'a mut ValueBuilder,
+        metadata_builder: &'a mut dyn MetadataBuilder,
+    ) -> Self {
+        Self::new(value_builder, metadata_builder, ())
+    }
+}
 
-    buffer.copy_within(src_start..src_end, dst_start);
+/// Automatically rolls back any unfinished `ParentState`.
+impl<S: BuilderSpecificState> Drop for ParentState<'_, S> {
+    fn drop(&mut self) {
+        self.rollback()
+    }
 }
 
-/// Builder for [`Variant`] values
+/// Top level builder for [`Variant`] values
 ///
 /// # Example: create a Primitive Int8
 /// ```
@@ -101,26 +504,47 @@ fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: usi
 /// let mut builder = VariantBuilder::new();
 /// // Create an object builder that will write fields to the object
 /// let mut object_builder = builder.new_object();
-/// object_builder.append_value("first_name", "Jiaying");
-/// object_builder.append_value("last_name", "Li");
-/// object_builder.finish();
+/// object_builder.insert("first_name", "Jiaying");
+/// object_builder.insert("last_name", "Li");
+/// object_builder.finish(); // call finish to finalize the object
 /// // Finish the builder to get the metadata and value
 /// let (metadata, value) = builder.finish();
 /// // use the Variant API to verify the result
 /// let variant = Variant::try_new(&metadata, &value).unwrap();
-/// let Variant::Object(variant_object) = variant else {
-///   panic!("unexpected variant type")
-/// };
+/// let variant_object = variant.as_object().unwrap();
 /// assert_eq!(
-///   variant_object.field_by_name("first_name").unwrap(),
+///   variant_object.get("first_name"),
 ///   Some(Variant::from("Jiaying"))
 /// );
 /// assert_eq!(
-///   variant_object.field_by_name("last_name").unwrap(),
+///   variant_object.get("last_name"),
 ///   Some(Variant::from("Li"))
 /// );
 /// ```
 ///
+///
+/// You can also use the [`ObjectBuilder::with_field`] to add fields to the
+/// object
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder};
+/// // build the same object as above
+/// let mut builder = VariantBuilder::new();
+/// builder.new_object()
+///   .with_field("first_name", "Jiaying")
+///   .with_field("last_name", "Li")
+///   .finish();
+/// let (metadata, value) = builder.finish();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let variant_object = variant.as_object().unwrap();
+/// assert_eq!(
+///   variant_object.get("first_name"),
+///   Some(Variant::from("Jiaying"))
+/// );
+/// assert_eq!(
+///   variant_object.get("last_name"),
+///   Some(Variant::from("Li"))
+/// );
+/// ```
 /// # Example: Create a [`Variant::List`] (an Array)
 ///
 /// This example shows how to create an array of integers: `[1, 2, 3]`.
@@ -132,609 +556,1183 @@ fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: usi
 ///  list_builder.append_value(1i8);
 ///  list_builder.append_value(2i8);
 ///  list_builder.append_value(3i8);
+/// // call finish to finalize the list
 ///  list_builder.finish();
 /// // Finish the builder to get the metadata and value
 /// let (metadata, value) = builder.finish();
 /// // use the Variant API to verify the result
 /// let variant = Variant::try_new(&metadata, &value).unwrap();
-/// let Variant::List(variant_list) = variant else {
-///   panic!("unexpected variant type")
-/// };
+/// let variant_list = variant.as_list().unwrap();
 /// // Verify the list contents
 /// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
 /// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
 /// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
 /// ```
 ///
+/// You can also use the [`ListBuilder::with_value`] to append values to the
+/// list.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder};
+///  let mut builder = VariantBuilder::new();
+///  builder.new_list()
+///      .with_value(1i8)
+///      .with_value(2i8)
+///      .with_value(3i8)
+///      .finish();
+/// let (metadata, value) = builder.finish();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let variant_list = variant.as_list().unwrap();
+/// assert_eq!(variant_list.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_list.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_list.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
 /// # Example: [`Variant::List`] of  [`Variant::Object`]s
 ///
-/// THis example shows how to create an list  of objects:
+/// This example shows how to create an list of objects:
 /// ```json
 /// [
-///  {
-///   "first_name": "Jiaying",
-///  "last_name": "Li"
-/// },
 ///   {
-///    "first_name": "Malthe",
-///    "last_name": "Karbo"
-/// }
+///      "id": 1,
+///      "type": "Cauliflower"
+///   },
+///   {
+///      "id": 2,
+///      "type": "Beets"
+///   }
 /// ]
 /// ```
+/// ```
+/// use parquet_variant::{Variant, VariantBuilder};
+/// let mut builder = VariantBuilder::new();
 ///
-/// TODO
+/// // Create a builder that will write elements to the list
+/// let mut list_builder = builder.new_list();
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.insert("id", 1);
+///     object_builder.insert("type", "Cauliflower");
+///     object_builder.finish();
+/// }
+///
+/// {
+///     let mut object_builder = list_builder.new_object();
+///     object_builder.insert("id", 2);
+///     object_builder.insert("type", "Beets");
+///     object_builder.finish();
+/// }
+///
+/// list_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let variant_list = variant.as_list().unwrap();
+///
+///
+/// let obj1_variant = variant_list.get(0).unwrap();
+/// let obj1 = obj1_variant.as_object().unwrap();
+/// assert_eq!(
+///     obj1.get("id"),
+///     Some(Variant::from(1))
+/// );
+/// assert_eq!(
+///     obj1.get("type"),
+///     Some(Variant::from("Cauliflower"))
+/// );
 ///
+/// let obj2_variant = variant_list.get(1).unwrap();
+/// let obj2 = obj2_variant.as_object().unwrap();
+///
+/// assert_eq!(
+///     obj2.get("id"),
+///     Some(Variant::from(2))
+/// );
+/// assert_eq!(
+///     obj2.get("type"),
+///     Some(Variant::from("Beets"))
+/// );
+///
+/// ```
+/// # Example: Unique Field Validation
+///
+/// This example shows how enabling unique field validation will cause an error
+/// if the same field is inserted more than once.
+/// ```
+/// # use parquet_variant::VariantBuilder;
+/// #
+/// let mut builder = VariantBuilder::new().with_validate_unique_fields(true);
+///
+/// // When validation is enabled, try_with_field will return an error
+/// let result = builder
+///     .new_object()
+///     .with_field("a", 1)
+///     .try_with_field("a", 2);
+/// assert!(result.is_err());
+/// ```
+///
+/// # Example: Sorted dictionaries
+///
+/// This example shows how to create a [`VariantBuilder`] with a pre-sorted field dictionary
+/// to improve field access performance when reading [`Variant`] objects.
+///
+/// You can use [`VariantBuilder::with_field_names`] to add multiple field names at once:
+/// ```
+/// use parquet_variant::{Variant, VariantBuilder};
+/// let mut builder = VariantBuilder::new()
+///     .with_field_names(["age", "name", "score"].into_iter());
+///
+/// let mut obj = builder.new_object();
+/// obj.insert("name", "Alice");
+/// obj.insert("age", 30);
+/// obj.insert("score", 95.5);
+/// obj.finish();
+///
+/// let (metadata, value) = builder.finish();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// ```
+///
+/// Alternatively, you can use [`VariantBuilder::add_field_name`] to add field names one by one:
+/// ```
+/// use parquet_variant::{Variant, VariantBuilder};
+/// let mut builder = VariantBuilder::new();
+/// builder.add_field_name("age"); // field id = 0
+/// builder.add_field_name("name"); // field id = 1
+/// builder.add_field_name("score"); // field id = 2
+///
+/// let mut obj = builder.new_object();
+/// obj.insert("name", "Bob"); // field id = 3
+/// obj.insert("age", 25);
+/// obj.insert("score", 88.0);
+/// obj.finish();
+///
+/// let (metadata, value) = builder.finish();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// ```
+#[derive(Default, Debug)]
 pub struct VariantBuilder {
-    buffer: Vec<u8>,
-    dict: HashMap<String, u32>,
-    dict_keys: Vec<String>,
+    value_builder: ValueBuilder,
+    metadata_builder: WritableMetadataBuilder,
+    validate_unique_fields: bool,
 }
 
 impl VariantBuilder {
+    /// Create a new VariantBuilder with new underlying buffers
     pub fn new() -> Self {
         Self {
-            buffer: Vec::new(),
-            dict: HashMap::new(),
-            dict_keys: Vec::new(),
+            value_builder: ValueBuilder::new(),
+            metadata_builder: WritableMetadataBuilder::default(),
+            validate_unique_fields: false,
         }
     }
 
-    fn append_null(&mut self) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Null));
+    /// Create a new VariantBuilder with pre-existing [`VariantMetadata`].
+    pub fn with_metadata(mut self, metadata: VariantMetadata) -> Self {
+        self.metadata_builder.extend(metadata.iter());
+
+        self
     }
 
-    fn append_bool(&mut self, value: bool) {
-        let primitive_type = if value {
-            VariantPrimitiveType::BooleanTrue
-        } else {
-            VariantPrimitiveType::BooleanFalse
-        };
-        self.buffer.push(primitive_header(primitive_type));
+    /// Enables validation of unique field keys in nested objects.
+    ///
+    /// This setting is propagated to all [`ObjectBuilder`]s created through this [`VariantBuilder`]
+    /// (including via any [`ListBuilder`]), and causes [`ObjectBuilder::finish()`] to return
+    /// an error if duplicate keys were inserted.
+    pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
+        self.validate_unique_fields = validate_unique_fields;
+        self
     }
 
-    fn append_int8(&mut self, value: i8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int8));
-        self.buffer.push(value as u8);
+    /// This method pre-populates the field name directory in the Variant metadata with
+    /// the specific field names, in order.
+    ///
+    /// You can use this to pre-populate a [`VariantBuilder`] with a sorted dictionary if you
+    /// know the field names beforehand. Sorted dictionaries can accelerate field access when
+    /// reading [`Variant`]s.
+    pub fn with_field_names<'a>(mut self, field_names: impl IntoIterator<Item = &'a str>) -> Self {
+        self.metadata_builder.extend(field_names);
+
+        self
     }
 
-    fn append_int16(&mut self, value: i16) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int16));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
+    /// Builder-style API for appending a value to the list and returning self to enable method chaining.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ListBuilder::try_with_value`].
+    pub fn with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, value: T) -> Self {
+        self.append_value(value);
+        self
     }
 
-    fn append_int32(&mut self, value: i32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int32));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
+    /// Builder-style API for appending a value to the list and returns self for method chaining.
+    ///
+    /// This is the fallible version of [`ListBuilder::with_value`].
+    pub fn try_with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        mut self,
+        value: T,
+    ) -> Result<Self, ArrowError> {
+        self.try_append_value(value)?;
+        Ok(self)
     }
 
-    fn append_int64(&mut self, value: i64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Int64));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
+    /// This method reserves capacity for field names in the Variant metadata,
+    /// which can improve performance when you know the approximate number of unique field
+    /// names that will be used across all objects in the [`Variant`].
+    pub fn reserve(&mut self, capacity: usize) {
+        self.metadata_builder.field_names.reserve(capacity);
     }
 
-    fn append_float(&mut self, value: f32) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Float));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
+    /// Adds a single field name to the field name directory in the Variant metadata.
+    ///
+    /// This method does the same thing as [`VariantBuilder::with_field_names`] but adds one field name at a time.
+    pub fn add_field_name(&mut self, field_name: &str) {
+        self.metadata_builder.upsert_field_name(field_name);
     }
 
-    fn append_double(&mut self, value: f64) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Double));
-        self.buffer.extend_from_slice(&value.to_le_bytes());
+    /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_list(&mut self) -> ListBuilder<'_, ()> {
+        let parent_state =
+            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
+        ListBuilder::new(parent_state, self.validate_unique_fields)
     }
 
-    fn append_date(&mut self, value: chrono::NaiveDate) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Date));
-        let days_since_epoch = value.signed_duration_since(UNIX_EPOCH_DATE).num_days() as i32;
-        self.buffer
-            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> {
+        let parent_state =
+            ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
+        ObjectBuilder::new(parent_state, self.validate_unique_fields)
     }
 
-    fn append_timestamp_micros(&mut self, value: chrono::DateTime<chrono::Utc>) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
-        let micros = value.timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    /// Append a value to the builder.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`VariantBuilder::try_append_value`]
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder};
+    /// let mut builder = VariantBuilder::new();
+    /// // most primitive types can be appended directly as they implement `Into<Variant>`
+    /// builder.append_value(42i8);
+    /// ```
+    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
+        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
+        ValueBuilder::append_variant(state, value.into())
     }
 
-    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
-        let micros = value.and_utc().timestamp_micros();
-        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    /// Append a value to the builder.
+    pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        &mut self,
+        value: T,
+    ) -> Result<(), ArrowError> {
+        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
+        ValueBuilder::try_append_variant(state, value.into())
     }
 
-    fn append_decimal4(&mut self, integer: i32, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal4));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    /// Appends a variant value to the builder by copying raw bytes when possible.
+    ///
+    /// For objects and lists, this directly copies their underlying byte representation instead of
+    /// performing a logical copy and without touching the metadata builder. For other variant
+    /// types, this falls back to the standard append behavior.
+    ///
+    /// The caller must ensure that the metadata dictionary entries are already built and correct for
+    /// any objects or lists being appended.
+    pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
+        let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder);
+        ValueBuilder::append_variant_bytes(state, value.into());
     }
 
-    fn append_decimal8(&mut self, integer: i64, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal8));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    /// Finish the builder and return the metadata and value buffers.
+    pub fn finish(mut self) -> (Vec<u8>, Vec<u8>) {
+        self.metadata_builder.finish();
+        (
+            self.metadata_builder.into_inner(),
+            self.value_builder.into_inner(),
+        )
     }
+}
 
-    fn append_decimal16(&mut self, integer: i128, scale: u8) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Decimal16));
-        self.buffer.push(scale);
-        self.buffer.extend_from_slice(&integer.to_le_bytes());
+/// Extends [`VariantBuilder`] to help building nested [`Variant`]s
+///
+/// Allows users to append values to a [`VariantBuilder`], [`ListBuilder`] or
+/// [`ObjectBuilder`]. using the same interface.
+pub trait VariantBuilderExt {
+    /// The builder specific state used by nested builders
+    type State<'a>: BuilderSpecificState + 'a
+    where
+        Self: 'a;
+
+    /// Appends a NULL value to this builder. The semantics depend on the implementation, but will
+    /// often translate to appending a [`Variant::Null`] value.
+    fn append_null(&mut self);
+
+    /// Appends a new variant value to this builder. See e.g. [`VariantBuilder::append_value`].
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>);
+
+    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Panics if the nested
+    /// builder cannot be created, see e.g. [`ObjectBuilder::new_list`].
+    fn new_list(&mut self) -> ListBuilder<'_, Self::State<'_>> {
+        self.try_new_list().unwrap()
     }
 
-    fn append_binary(&mut self, value: &[u8]) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::Binary));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value);
+    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Panics if the
+    /// nested builder cannot be created, see e.g. [`ObjectBuilder::new_object`].
+    fn new_object(&mut self) -> ObjectBuilder<'_, Self::State<'_>> {
+        self.try_new_object().unwrap()
     }
 
-    fn append_short_string(&mut self, value: ShortString) {
-        let inner = value.0;
-        self.buffer.push(short_string_header(inner.len()));
-        self.buffer.extend_from_slice(inner.as_bytes());
+    /// Creates a nested list builder. See e.g. [`VariantBuilder::new_list`]. Returns an error if
+    /// the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_list`].
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError>;
+
+    /// Creates a nested object builder. See e.g. [`VariantBuilder::new_object`]. Returns an error
+    /// if the nested builder cannot be created, see e.g. [`ObjectBuilder::try_new_object`].
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError>;
+}
+
+impl VariantBuilderExt for VariantBuilder {
+    type State<'a>
+        = ()
+    where
+        Self: 'a;
+
+    /// Variant values cannot encode NULL, only [`Variant::Null`]. This is different from the column
+    /// that holds variant values being NULL at some positions.
+    fn append_null(&mut self) {
+        self.append_value(Variant::Null);
+    }
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.append_value(value);
     }
 
-    fn append_string(&mut self, value: &str) {
-        self.buffer
-            .push(primitive_header(VariantPrimitiveType::String));
-        self.buffer
-            .extend_from_slice(&(value.len() as u32).to_le_bytes());
-        self.buffer.extend_from_slice(value.as_bytes());
-    }
-
-    /// Add key to dictionary, return its ID
-    fn add_key(&mut self, key: &str) -> u32 {
-        use std::collections::hash_map::Entry;
-        match self.dict.entry(key.to_string()) {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                let id = self.dict_keys.len() as u32;
-                entry.insert(id);
-                self.dict_keys.push(key.to_string());
-                id
-            }
-        }
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(self.new_list())
     }
 
-    fn offset(&self) -> usize {
-        self.buffer.len()
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(self.new_object())
     }
+}
 
-    /// Create an [`ListBuilder`] for creating [`Variant::List`] values.
-    ///
-    /// See the examples on [`VariantBuilder`] for usage.
-    pub fn new_list(&mut self) -> ListBuilder {
-        ListBuilder::new(self)
+#[cfg(test)]
+mod tests {
+    use crate::{VariantMetadata, builder::metadata::ReadOnlyMetadataBuilder};
+
+    use super::*;
+    #[test]
+    fn test_simple_usage() {
+        test_variant_roundtrip((), Variant::Null);
+        test_variant_roundtrip(true, Variant::BooleanTrue);
+        test_variant_roundtrip(false, Variant::BooleanFalse);
+        test_variant_roundtrip(42i8, Variant::Int8(42));
+        test_variant_roundtrip(1234i16, Variant::Int16(1234));
+        test_variant_roundtrip(123456i32, Variant::Int32(123456));
+        test_variant_roundtrip(123456789i64, Variant::Int64(123456789));
+        test_variant_roundtrip(1.5f32, Variant::Float(1.5));
+        test_variant_roundtrip(2.5f64, Variant::Double(2.5));
+        test_variant_roundtrip("hello", Variant::ShortString(ShortString("hello")));
+
+        // Test long string (> 63 bytes)
+        let long_string = "This is a very long string that exceeds the short string limit of 63 bytes and should be encoded as a regular string type instead of a short string";
+        test_variant_roundtrip(long_string, Variant::String(long_string));
+
+        // Test binary data
+        let binary_data = b"binary data";
+        test_variant_roundtrip(
+            binary_data.as_slice(),
+            Variant::Binary(binary_data.as_slice()),
+        );
     }
 
-    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
-    ///
-    /// See the examples on [`VariantBuilder`] for usage.
-    pub fn new_object(&mut self) -> ObjectBuilder {
-        ObjectBuilder::new(self)
+    /// Helper function to test that a value can be built and reconstructed correctly
+    fn test_variant_roundtrip<'m, 'd, T: Into<Variant<'m, 'd>>>(input: T, expected: Variant) {
+        let mut builder = VariantBuilder::new();
+        builder.append_value(input);
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap_or_else(|_| {
+            panic!("Failed to create variant from metadata and value: {metadata:?}, {value:?}")
+        });
+        assert_eq!(variant, expected);
     }
 
-    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
-        let nkeys = self.dict_keys.len();
+    #[test]
+    fn test_nested_object_with_lists() {
+        /*
+        {
+            "door 1": {
+                "items": ["apple", false ]
+            }
+        }
 
-        // Calculate metadata size
-        let total_dict_size: usize = self.dict_keys.iter().map(|k| k.len()).sum();
+        */
 
-        // Determine appropriate offset size based on the larger of dict size or total string size
-        let max_offset = std::cmp::max(total_dict_size, nkeys);
-        let offset_size = int_size(max_offset);
+        let mut builder = VariantBuilder::new();
+        {
+            let mut outer_object_builder = builder.new_object();
+            {
+                let mut inner_object_builder = outer_object_builder.new_object("door 1");
+
+                // create inner_object_list
+                inner_object_builder
+                    .new_list("items")
+                    .with_value("apple")
+                    .with_value(false)
+                    .finish();
+
+                inner_object_builder.finish();
+            }
 
-        let offset_start = 1 + offset_size as usize;
-        let string_start = offset_start + (nkeys + 1) * offset_size as usize;
-        let metadata_size = string_start + total_dict_size;
+            outer_object_builder.finish();
+        }
 
-        // Pre-allocate exact size to avoid reallocations
-        let mut metadata = vec![0u8; metadata_size];
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_object = variant.as_object().unwrap();
 
-        // Write header: version=1, not sorted, with calculated offset_size
-        metadata[0] = 0x01 | ((offset_size - 1) << 6);
+        assert_eq!(outer_object.len(), 1);
+        assert_eq!(outer_object.field_name(0).unwrap(), "door 1");
 
-        // Write dictionary size
-        write_offset(&mut metadata[1..], nkeys, offset_size);
+        let inner_object_variant = outer_object.field(0).unwrap();
+        let inner_object = inner_object_variant.as_object().unwrap();
 
-        // Write offsets and string data
-        let mut cur_offset = 0;
-        for (i, key) in self.dict_keys.iter().enumerate() {
-            write_offset(
-                &mut metadata[offset_start + i * offset_size as usize..],
-                cur_offset,
-                offset_size,
-            );
-            let start = string_start + cur_offset;
-            metadata[start..start + key.len()].copy_from_slice(key.as_bytes());
-            cur_offset += key.len();
-        }
-        // Write final offset
-        write_offset(
-            &mut metadata[offset_start + nkeys * offset_size as usize..],
-            cur_offset,
-            offset_size,
-        );
+        assert_eq!(inner_object.len(), 1);
+        assert_eq!(inner_object.field_name(0).unwrap(), "items");
 
-        (metadata, self.buffer)
+        let items_variant = inner_object.field(0).unwrap();
+        let items_list = items_variant.as_list().unwrap();
+
+        assert_eq!(items_list.len(), 2);
+        assert_eq!(items_list.get(0).unwrap(), Variant::from("apple"));
+        assert_eq!(items_list.get(1).unwrap(), Variant::from(false));
     }
 
-    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
-        let variant = value.into();
-        match variant {
-            Variant::Null => self.append_null(),
-            Variant::BooleanTrue => self.append_bool(true),
-            Variant::BooleanFalse => self.append_bool(false),
-            Variant::Int8(v) => self.append_int8(v),
-            Variant::Int16(v) => self.append_int16(v),
-            Variant::Int32(v) => self.append_int32(v),
-            Variant::Int64(v) => self.append_int64(v),
-            Variant::Date(v) => self.append_date(v),
-            Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
-            Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v),
-            Variant::Decimal4 { integer, scale } => self.append_decimal4(integer, scale),
-            Variant::Decimal8 { integer, scale } => self.append_decimal8(integer, scale),
-            Variant::Decimal16 { integer, scale } => self.append_decimal16(integer, scale),
-            Variant::Float(v) => self.append_float(v),
-            Variant::Double(v) => self.append_double(v),
-            Variant::Binary(v) => self.append_binary(v),
-            Variant::String(s) => self.append_string(s),
-            Variant::ShortString(s) => self.append_short_string(s),
-            Variant::Object(_) | Variant::List(_) => {
-                unreachable!("Object and List variants cannot be created through Into<Variant>")
-            }
+    #[test]
+    fn test_sorted_dictionary() {
+        // check if variant metadatabuilders are equivalent from different ways of constructing them
+        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
+
+        let mut variant2 = {
+            let mut builder = VariantBuilder::new();
+
+            builder.add_field_name("b");
+            builder.add_field_name("c");
+            builder.add_field_name("d");
+
+            builder
+        };
+
+        assert_eq!(
+            variant1.metadata_builder.field_names,
+            variant2.metadata_builder.field_names
+        );
+
+        // check metadata builders say it's sorted
+        assert!(variant1.metadata_builder.is_sorted);
+        assert!(variant2.metadata_builder.is_sorted);
+
+        {
+            // test the bad case and break the sort order
+            variant2.add_field_name("a");
+            assert!(!variant2.metadata_builder.is_sorted);
+
+            // per the spec, make sure the variant will fail to build if only metadata is provided
+            let (m, v) = variant2.finish();
+            let res = Variant::try_new(&m, &v);
+            assert!(res.is_err());
+
+            // since it is not sorted, make sure the metadata says so
+            let header = VariantMetadata::try_new(&m).unwrap();
+            assert!(!header.is_sorted());
         }
+
+        // write out variant1 and make sure the sorted flag is properly encoded
+        variant1.append_value(false);
+
+        let (m, v) = variant1.finish();
+        let res = Variant::try_new(&m, &v);
+        assert!(res.is_ok());
+
+        let header = VariantMetadata::try_new(&m).unwrap();
+        assert!(header.is_sorted());
     }
-}
 
-impl Default for VariantBuilder {
-    fn default() -> Self {
-        Self::new()
+    #[test]
+    fn test_object_sorted_dictionary() {
+        // predefine the list of field names
+        let mut variant1 = VariantBuilder::new().with_field_names(["a", "b", "c"]);
+        let mut obj = variant1.new_object();
+
+        obj.insert("c", true);
+        obj.insert("a", false);
+        obj.insert("b", ());
+
+        // verify the field ids are correctly
+        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
+        assert_eq!(field_ids_by_insert_order, vec![2, 0, 1]);
+
+        // add a field name that wasn't pre-defined but doesn't break the sort order
+        obj.insert("d", 2);
+        obj.finish();
+
+        let (metadata, value) = variant1.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_sorted());
+
+        // verify object is sorted by field name order
+        let object = variant.as_object().unwrap();
+        let field_names = object
+            .iter()
+            .map(|(field_name, _)| field_name)
+            .collect::<Vec<_>>();
+
+        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
     }
-}
 
-/// A builder for creating [`Variant::List`] values.
-///
-/// See the examples on [`VariantBuilder`] for usage.
-pub struct ListBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
-    offsets: Vec<usize>,
-}
+    #[test]
+    fn test_object_not_sorted_dictionary() {
+        // predefine the list of field names
+        let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"]);
+        let mut obj = variant1.new_object();
 
-impl<'a> ListBuilder<'a> {
-    fn new(parent: &'a mut VariantBuilder) -> Self {
-        let start_pos = parent.offset();
-        Self {
-            parent,
-            start_pos,
-            offsets: vec![0],
-        }
+        obj.insert("c", true);
+        obj.insert("d", false);
+        obj.insert("b", ());
+
+        // verify the field ids are correctly
+        let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>();
+        assert_eq!(field_ids_by_insert_order, vec![1, 2, 0]);
+
+        // add a field name that wasn't pre-defined but breaks the sort order
+        obj.insert("a", 2);
+        obj.finish();
+
+        let (metadata, value) = variant1.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(!metadata.is_sorted());
+
+        // verify object field names are sorted by field name order
+        let object = variant.as_object().unwrap();
+        let field_names = object
+            .iter()
+            .map(|(field_name, _)| field_name)
+            .collect::<Vec<_>>();
+
+        assert_eq!(field_names, vec!["a", "b", "c", "d"]);
     }
 
-    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
-        self.parent.append_value(value);
-        let element_end = self.parent.offset() - self.start_pos;
-        self.offsets.push(element_end);
+    #[test]
+    fn test_building_sorted_dictionary() {
+        let mut builder = VariantBuilder::new();
+        assert!(!builder.metadata_builder.is_sorted);
+        assert_eq!(builder.metadata_builder.num_field_names(), 0);
+
+        builder.add_field_name("a");
+
+        assert!(builder.metadata_builder.is_sorted);
+        assert_eq!(builder.metadata_builder.num_field_names(), 1);
+
+        let builder = builder.with_field_names(["b", "c", "d"]);
+
+        assert!(builder.metadata_builder.is_sorted);
+        assert_eq!(builder.metadata_builder.num_field_names(), 4);
+
+        let builder = builder.with_field_names(["z", "y"]);
+        assert!(!builder.metadata_builder.is_sorted);
+        assert_eq!(builder.metadata_builder.num_field_names(), 6);
     }
 
-    pub fn finish(self) {
-        let data_size = self.parent.offset() - self.start_pos;
-        let num_elements = self.offsets.len() - 1;
-        let is_large = num_elements > u8::MAX as usize;
-        let size_bytes = if is_large { 4 } else { 1 };
-        let offset_size = int_size(data_size);
-        let header_size = 1 + size_bytes + (num_elements + 1) * offset_size as usize;
+    #[test]
+    fn test_variant_builder_to_list_builder_no_finish() {
+        // Create a list builder but never finish it
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+        list_builder.append_value("hi");
+        drop(list_builder);
 
-        make_room_for_header(&mut self.parent.buffer, self.start_pos, header_size);
+        builder.append_value(42i8);
 
-        // Write header
-        let mut pos = self.start_pos;
-        self.parent.buffer[pos] = array_header(is_large, offset_size);
-        pos += 1;
+        // The original builder should be unchanged
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty());
 
-        if is_large {
-            self.parent.buffer[pos..pos + 4].copy_from_slice(&(num_elements as u32).to_le_bytes());
-            pos += 4;
-        } else {
-            self.parent.buffer[pos] = num_elements as u8;
-            pos += 1;
-        }
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(42));
+    }
 
-        // Write offsets
-        for offset in &self.offsets {
-            write_offset(
-                &mut self.parent.buffer[pos..pos + offset_size as usize],
-                *offset,
-                offset_size,
-            );
-            pos += offset_size as usize;
-        }
+    #[test]
+    fn test_variant_builder_to_object_builder_no_finish() {
+        // Create an object builder but never finish it
+        let mut builder = VariantBuilder::new();
+        let mut object_builder = builder.new_object();
+        object_builder.insert("name", "unknown");
+        drop(object_builder);
+
+        builder.append_value(42i8);
+
+        // The original builder should be unchanged
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty()); // rolled back
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(42));
     }
-}
 
-/// A builder for creating [`Variant::Object`] values.
-///
-/// See the examples on [`VariantBuilder`] for usage.
-pub struct ObjectBuilder<'a> {
-    parent: &'a mut VariantBuilder,
-    start_pos: usize,
-    fields: Vec<(u32, usize)>, // (field_id, offset)
-}
+    #[test]
+    fn test_list_builder_to_list_builder_inner_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+        list_builder.append_value(1i8);
 
-impl<'a> ObjectBuilder<'a> {
-    fn new(parent: &'a mut VariantBuilder) -> Self {
-        let start_pos = parent.offset();
-        Self {
-            parent,
-            start_pos,
-            fields: Vec::new(),
-        }
+        // Create a nested list builder but never finish it
+        let mut nested_list_builder = list_builder.new_list();
+        nested_list_builder.append_value("hi");
+        drop(nested_list_builder);
+
+        list_builder.append_value(2i8);
+
+        // The parent list should only contain the original values
+        list_builder.finish();
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty());
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
+        assert_eq!(list.len(), 2);
+        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
+        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
     }
 
-    /// Add a field with key and value to the object
-    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, key: &str, value: T) {
-        let id = self.parent.add_key(key);
-        let field_start = self.parent.offset() - self.start_pos;
-        self.parent.append_value(value);
-        self.fields.push((id, field_start));
+    #[test]
+    fn test_list_builder_to_list_builder_outer_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+        list_builder.append_value(1i8);
+
+        // Create a nested list builder and finish it
+        let mut nested_list_builder = list_builder.new_list();
+        nested_list_builder.append_value("hi");
+        nested_list_builder.finish();
+
+        // Drop the outer list builder without finishing it
+        drop(list_builder);
+
+        builder.append_value(2i8);
+
+        // Only the second attempt should appear in the final variant
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty());
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(2));
     }
 
-    /// Finalize object with sorted fields
-    pub fn finish(mut self) {
-        // Sort fields by key name
-        self.fields.sort_by(|a, b| {
-            let key_a = &self.parent.dict_keys[a.0 as usize];
-            let key_b = &self.parent.dict_keys[b.0 as usize];
-            key_a.cmp(key_b)
-        });
+    #[test]
+    fn test_list_builder_to_object_builder_inner_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+        list_builder.append_value(1i8);
+
+        // Create a nested object builder but never finish it
+        let mut nested_object_builder = list_builder.new_object();
+        nested_object_builder.insert("name", "unknown");
+        drop(nested_object_builder);
 
-        let data_size = self.parent.offset() - self.start_pos;
-        let num_fields = self.fields.len();
-        let is_large = num_fields > u8::MAX as usize;
-        let size_bytes = if is_large { 4 } else { 1 };
+        list_builder.append_value(2i8);
 
-        let max_id = self.fields.iter().map(|&(id, _)| id).max().unwrap_or(0);
-        let id_size = int_size(max_id as usize);
-        let offset_size = int_size(data_size);
+        // The parent list should only contain the original values
+        list_builder.finish();
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty());
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
+        assert_eq!(list.len(), 2);
+        assert_eq!(list.get(0).unwrap(), Variant::Int8(1));
+        assert_eq!(list.get(1).unwrap(), Variant::Int8(2));
+    }
 
-        let header_size = 1
-            + size_bytes
-            + num_fields * id_size as usize
-            + (num_fields + 1) * offset_size as usize;
+    #[test]
+    fn test_list_builder_to_object_builder_outer_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+        list_builder.append_value(1i8);
 
-        make_room_for_header(&mut self.parent.buffer, self.start_pos, header_size);
+        // Create a nested object builder and finish it
+        let mut nested_object_builder = list_builder.new_object();
+        nested_object_builder.insert("name", "unknown");
+        nested_object_builder.finish();
 
-        // Write header
-        let mut pos = self.start_pos;
-        self.parent.buffer[pos] = object_header(is_large, id_size, offset_size);
-        pos += 1;
+        // Drop the outer list builder without finishing it
+        drop(list_builder);
 
-        if is_large {
-            self.parent.buffer[pos..pos + 4].copy_from_slice(&(num_fields as u32).to_le_bytes());
-            pos += 4;
-        } else {
-            self.parent.buffer[pos] = num_fields as u8;
-            pos += 1;
-        }
+        builder.append_value(2i8);
 
-        // Write field IDs (sorted order)
-        for &(id, _) in &self.fields {
-            write_offset(
-                &mut self.parent.buffer[pos..pos + id_size as usize],
-                id as usize,
-                id_size,
-            );
-            pos += id_size as usize;
-        }
+        // Only the second attempt should appear in the final variant
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty()); // rolled back
 
-        // Write field offsets
-        for &(_, offset) in &self.fields {
-            write_offset(
-                &mut self.parent.buffer[pos..pos + offset_size as usize],
-                offset,
-                offset_size,
-            );
-            pos += offset_size as usize;
-        }
-        write_offset(
-            &mut self.parent.buffer[pos..pos + offset_size as usize],
-            data_size,
-            offset_size,
-        );
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(2));
     }
-}
 
-#[cfg(test)]
-mod tests {
-    use super::*;
+    #[test]
+    fn test_object_builder_to_list_builder_inner_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut object_builder = builder.new_object();
+        object_builder.insert("first", 1i8);
+
+        // Create a nested list builder but never finish it
+        let mut nested_list_builder = object_builder.new_list("nested");
+        nested_list_builder.append_value("hi");
+        drop(nested_list_builder);
+
+        object_builder.insert("second", 2i8);
+
+        // The parent object should only contain the original fields
+        object_builder.finish();
+        let (metadata, value) = builder.finish();
+
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert_eq!(metadata.len(), 2);
+        assert_eq!(&metadata[0], "first");
+        assert_eq!(&metadata[1], "second");
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        let obj = variant.as_object().unwrap();
+        assert_eq!(obj.len(), 2);
+        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
+        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
+    }
 
     #[test]
-    fn test_simple_usage() {
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(());
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Null);
-        }
+    fn test_object_builder_to_list_builder_outer_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut object_builder = builder.new_object();
+        object_builder.insert("first", 1i8);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(true);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::BooleanTrue);
-        }
+        // Create a nested list builder and finish it
+        let mut nested_list_builder = object_builder.new_list("nested");
+        nested_list_builder.append_value("hi");
+        nested_list_builder.finish();
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(false);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::BooleanFalse);
-        }
+        // Drop the outer object builder without finishing it
+        drop(object_builder);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(42i8);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Int8(42));
-        }
+        builder.append_value(2i8);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(1234i16);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Int16(1234));
-        }
+        // Only the second attempt should appear in the final variant
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert!(metadata.is_empty()); // rolled back
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(123456i32);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Int32(123456));
-        }
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(2));
+    }
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(123456789i64);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Int64(123456789));
-        }
+    #[test]
+    fn test_object_builder_to_object_builder_inner_no_finish() {
+        let mut builder = VariantBuilder::new();
+        let mut object_builder = builder.new_object();
+        object_builder.insert("first", 1i8);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(1.5f32);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Float(1.5));
-        }
+        // Create a nested object builder but never finish it
+        let mut nested_object_builder = object_builder.new_object("nested");
+        nested_object_builder.insert("name", "unknown");
+        drop(nested_object_builder);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value(2.5f64);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Double(2.5));
-        }
+        object_builder.insert("second", 2i8);
 
-        {
-            let mut builder = VariantBuilder::new();
-            builder.append_value("hello");
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::ShortString(ShortString("hello")));
-        }
+        // The parent object should only contain the original fields
+        object_builder.finish();
+        let (metadata, value) = builder.finish();
 
-        {
-            let mut builder = VariantBuilder::new();
-            let long_string = "This is a very long string that exceeds the short string limit of 63 bytes and should be encoded as a regular string type instead of a short string";
-            builder.append_value(long_string);
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::String(long_string));
-        }
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert_eq!(metadata.len(), 2); // the fields of nested_object_builder has been rolled back
+        assert_eq!(&metadata[0], "first");
+        assert_eq!(&metadata[1], "second");
 
-        {
-            let mut builder = VariantBuilder::new();
-            let binary_data = b"binary data";
-            builder.append_value(binary_data.as_slice());
-            let (metadata, value) = builder.finish();
-            let variant = Variant::try_new(&metadata, &value).unwrap();
-            assert_eq!(variant, Variant::Binary(binary_data.as_slice()));
-        }
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        let obj = variant.as_object().unwrap();
+        assert_eq!(obj.len(), 2);
+        assert_eq!(obj.get("first"), Some(Variant::Int8(1)));
+        assert_eq!(obj.get("second"), Some(Variant::Int8(2)));
     }
 
     #[test]
-    fn test_list() {
+    fn test_object_builder_to_object_builder_outer_no_finish() {
         let mut builder = VariantBuilder::new();
+        let mut object_builder = builder.new_object();
+        object_builder.insert("first", 1i8);
+
+        // Create a nested object builder and finish it
+        let mut nested_object_builder = object_builder.new_object("nested");
+        nested_object_builder.insert("name", "unknown");
+        nested_object_builder.finish();
+
+        // Drop the outer object builder without finishing it
+        drop(object_builder);
 
+        builder.append_value(2i8);
+
+        // Only the second attempt should appear in the final variant
+        let (metadata, value) = builder.finish();
+        let metadata = VariantMetadata::try_new(&metadata).unwrap();
+        assert_eq!(metadata.len(), 0); // rolled back
+
+        let variant = Variant::try_new_with_metadata(metadata, &value).unwrap();
+        assert_eq!(variant, Variant::Int8(2));
+    }
+
+    // Make sure that we can correctly build deeply nested objects even when some of the nested
+    // builders don't finish.
+    #[test]
+    fn test_append_list_object_list_object() {
+        // An infinite counter
+        let mut counter = 0..;
+        let mut take = move |i| (&mut counter).take(i).collect::<Vec<_>>();
+        let mut builder = VariantBuilder::new();
+        let skip = 5;
         {
             let mut list = builder.new_list();
-            list.append_value(1i8);
-            list.append_value(2i8);
-            list.append_value("test");
+            for i in take(4) {
+                let mut object = list.new_object();
+                for i in take(4) {
+                    let field_name = format!("field{i}");
+                    let mut list = object.new_list(&field_name);
+                    for i in take(3) {
+                        let mut object = list.new_object();
+                        for i in take(3) {
+                            if i % skip != 0 {
+                                object.insert(&format!("field{i}"), i);
+                            }
+                        }
+                        if i % skip != 0 {
+                            object.finish();
+                        }
+                    }
+                    if i % skip != 0 {
+                        list.finish();
+                    }
+                }
+                if i % skip != 0 {
+                    object.finish();
+                }
+            }
             list.finish();
         }
-
         let (metadata, value) = builder.finish();
-        assert!(!metadata.is_empty());
-        assert!(!value.is_empty());
-
-        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let v1 = Variant::try_new(&metadata, &value).unwrap();
 
-        match variant {
-            Variant::List(list) => {
-                let val0 = list.get(0).unwrap();
-                assert_eq!(val0, Variant::Int8(1));
+        let (metadata, value) = VariantBuilder::new().with_value(v1.clone()).finish();
+        let v2 = Variant::try_new(&metadata, &value).unwrap();
 
-                let val1 = list.get(1).unwrap();
-                assert_eq!(val1, Variant::Int8(2));
+        assert_eq!(format!("{v1:?}"), format!("{v2:?}"));
+    }
 
-                let val2 = list.get(2).unwrap();
-                assert_eq!(val2, Variant::ShortString(ShortString("test")));
+    #[test]
+    fn test_append_variant_bytes_round_trip() {
+        // Create a complex variant with the normal builder
+        let mut builder = VariantBuilder::new();
+        {
+            let mut obj = builder.new_object();
+            obj.insert("name", "Alice");
+            obj.insert("age", 30i32);
+            {
+                let mut scores_list = obj.new_list("scores");
+                scores_list.append_value(95i32);
+                scores_list.append_value(87i32);
+                scores_list.append_value(92i32);
+                scores_list.finish();
             }
-            _ => panic!("Expected an array variant, got: {:?}", variant),
+            {
+                let mut address = obj.new_object("address");
+                address.insert("street", "123 Main St");
+                address.insert("city", "Anytown");
+                address.finish();
+            }
+            obj.finish();
         }
+        let (metadata, value1) = builder.finish();
+        let variant1 = Variant::try_new(&metadata, &value1).unwrap();
+
+        // Copy using the new bytes API
+        let metadata = VariantMetadata::new(&metadata);
+        let mut metadata = ReadOnlyMetadataBuilder::new(&metadata);
+        let mut builder2 = ValueBuilder::new();
+        let state = ParentState::variant(&mut builder2, &mut metadata);
+        ValueBuilder::append_variant_bytes(state, variant1);
+        let value2 = builder2.into_inner();
+
+        // The bytes should be identical, we merely copied them across.
+        assert_eq!(value1, value2);
     }
 
     #[test]
-    fn test_object() {
-        let mut builder = VariantBuilder::new();
-
+    fn test_object_insert_bytes_subset() {
+        // Create an original object, making sure to inject the field names we'll add later.
+        let mut builder = VariantBuilder::new().with_field_names(["new_field", "another_field"]);
         {
             let mut obj = builder.new_object();
-            obj.append_value("name", "John");
-            obj.append_value("age", 42i8);
+            obj.insert("field1", "value1");
+            obj.insert("field2", 42i32);
+            obj.insert("field3", true);
+            obj.insert("field4", "value4");
             obj.finish();
         }
+        let (metadata1, value1) = builder.finish();
+        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
+        let original_obj = original_variant.as_object().unwrap();
+
+        // Create a new object copying subset of fields interleaved with new ones
+        let metadata2 = VariantMetadata::new(&metadata1);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
+        let mut builder2 = ValueBuilder::new();
+        let state = ParentState::variant(&mut builder2, &mut metadata2);
+        {
+            let mut obj = ObjectBuilder::new(state, true);
 
-        let (metadata, value) = builder.finish();
-        assert!(!metadata.is_empty());
-        assert!(!value.is_empty());
+            // Copy field1 using bytes API
+            obj.insert_bytes("field1", original_obj.get("field1").unwrap());
+
+            // Add new field
+            obj.insert("new_field", "new_value");
+
+            // Copy field3 using bytes API
+            obj.insert_bytes("field3", original_obj.get("field3").unwrap());
+
+            // Add another new field
+            obj.insert("another_field", 99i32);
+
+            // Copy field2 using bytes API
+            obj.insert_bytes("field2", original_obj.get("field2").unwrap());
+
+            obj.finish();
+        }
+        let value2 = builder2.into_inner();
+        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
+        let result_obj = result_variant.as_object().unwrap();
+
+        // Verify the object contains expected fields
+        assert_eq!(result_obj.len(), 5);
+        assert_eq!(
+            result_obj.get("field1").unwrap().as_string().unwrap(),
+            "value1"
+        );
+        assert_eq!(result_obj.get("field2").unwrap().as_int32().unwrap(), 42);
+        assert!(result_obj.get("field3").unwrap().as_boolean().unwrap());
+        assert_eq!(
+            result_obj.get("new_field").unwrap().as_string().unwrap(),
+            "new_value"
+        );
+        assert_eq!(
+            result_obj.get("another_field").unwrap().as_int32().unwrap(),
+            99
+        );
     }
 
     #[test]
-    fn test_object_field_ordering() {
-        let mut builder = VariantBuilder::new();
+    fn test_complex_nested_filtering_injection() {
+        // Create a complex nested structure: object -> list -> objects. Make sure to pre-register
+        // the extra field names we'll need later while manipulating variant bytes.
+        let mut builder = VariantBuilder::new().with_field_names([
+            "active_count",
+            "active_users",
+            "computed_score",
+            "processed_at",
+            "status",
+        ]);
 
         {
-            let mut obj = builder.new_object();
-            obj.append_value("zebra", "stripes"); // ID = 0
-            obj.append_value("apple", "red"); // ID = 1
-            obj.append_value("banana", "yellow"); // ID = 2
-            obj.finish();
-        }
+            let mut root_obj = builder.new_object();
+            root_obj.insert("metadata", "original");
+
+            {
+                let mut users_list = root_obj.new_list("users");
+
+                // User 1
+                {
+                    let mut user1 = users_list.new_object();
+                    user1.insert("id", 1i32);
+                    user1.insert("name", "Alice");
+                    user1.insert("active", true);
+                    user1.finish();
+                }
+
+                // User 2
+                {
+                    let mut user2 = users_list.new_object();
+                    user2.insert("id", 2i32);
+                    user2.insert("name", "Bob");
+                    user2.insert("active", false);
+                    user2.finish();
+                }
+
+                // User 3
+                {
+                    let mut user3 = users_list.new_object();
+                    user3.insert("id", 3i32);
+                    user3.insert("name", "Charlie");
+                    user3.insert("active", true);
+                    user3.finish();
+                }
+
+                users_list.finish();
+            }
 
-        let (_, value) = builder.finish();
+            root_obj.insert("total_count", 3i32);
+            root_obj.finish();
+        }
+        let (metadata1, value1) = builder.finish();
+        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
+        let original_obj = original_variant.as_object().unwrap();
+        let original_users = original_obj.get("users").unwrap();
+        let original_users = original_users.as_list().unwrap();
+
+        // Create filtered/modified version: only copy active users and inject new data
+        let metadata2 = VariantMetadata::new(&metadata1);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
+        let mut builder2 = ValueBuilder::new();
+        let state = ParentState::variant(&mut builder2, &mut metadata2);
+        {
+            let mut root_obj = ObjectBuilder::new(state, true);
+
+            // Copy metadata using bytes API
+            root_obj.insert_bytes("metadata", original_obj.get("metadata").unwrap());
+
+            // Add processing timestamp
+            root_obj.insert("processed_at", "2024-01-01T00:00:00Z");
+
+            {
+                let mut filtered_users = root_obj.new_list("active_users");
+
+                // Copy only active users and inject additional data
+                for i in 0..original_users.len() {
+                    let user = original_users.get(i).unwrap();
+                    let user = user.as_object().unwrap();
+                    if user.get("active").unwrap().as_boolean().unwrap() {
+                        {
+                            let mut new_user = filtered_users.new_object();
+
+                            // Copy existing fields using bytes API
+                            new_user.insert_bytes("id", user.get("id").unwrap());
+                            new_user.insert_bytes("name", user.get("name").unwrap());
+
+                            // Inject new computed field
+                            let user_id = user.get("id").unwrap().as_int32().unwrap();
+                            new_user.insert("computed_score", user_id * 10);
+
+                            // Add status transformation (don't copy the 'active' field)
+                            new_user.insert("status", "verified");
+
+                            new_user.finish();
+                        }
+                    }
+                }
+
+                // Inject a completely new user
+                {
+                    let mut new_user = filtered_users.new_object();
+                    new_user.insert("id", 999i32);
+                    new_user.insert("name", "System User");
+                    new_user.insert("computed_score", 0i32);
+                    new_user.insert("status", "system");
+                    new_user.finish();
+                }
+
+                filtered_users.finish();
+            }
 
-        let header = value[0];
-        assert_eq!(header & 0x03, VariantBasicType::Object as u8);
+            // Update count
+            root_obj.insert("active_count", 3i32); // 2 active + 1 new
 
-        let field_count = value[1] as usize;
-        assert_eq!(field_count, 3);
+            root_obj.finish();
+        }
+        let value2 = builder2.into_inner();
+        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
+        let result_obj = result_variant.as_object().unwrap();
+
+        // Verify the filtered/modified structure
+        assert_eq!(
+            result_obj.get("metadata").unwrap().as_string().unwrap(),
+            "original"
+        );
+        assert_eq!(
+            result_obj.get("processed_at").unwrap().as_string().unwrap(),
+            "2024-01-01T00:00:00Z"
+        );
+        assert_eq!(
+            result_obj.get("active_count").unwrap().as_int32().unwrap(),
+            3
+        );
 
-        // Get field IDs from the object header
-        let field_ids: Vec<u8> = value[2..5].to_vec();
+        let active_users = result_obj.get("active_users").unwrap();
+        let active_users = active_users.as_list().unwrap();
+        assert_eq!(active_users.len(), 3);
+
+        // Verify Alice (id=1, was active)
+        let alice = active_users.get(0).unwrap();
+        let alice = alice.as_object().unwrap();
+        assert_eq!(alice.get("id").unwrap().as_int32().unwrap(), 1);
+        assert_eq!(alice.get("name").unwrap().as_string().unwrap(), "Alice");
+        assert_eq!(alice.get("computed_score").unwrap().as_int32().unwrap(), 10);
+        assert_eq!(
+            alice.get("status").unwrap().as_string().unwrap(),
+            "verified"
+        );
+        assert!(alice.get("active").is_none()); // This field was not copied
+
+        // Verify Charlie (id=3, was active) - Bob (id=2) was not active so not included
+        let charlie = active_users.get(1).unwrap();
+        let charlie = charlie.as_object().unwrap();
+        assert_eq!(charlie.get("id").unwrap().as_int32().unwrap(), 3);
+        assert_eq!(charlie.get("name").unwrap().as_string().unwrap(), "Charlie");
+        assert_eq!(
+            charlie.get("computed_score").unwrap().as_int32().unwrap(),
+            30
+        );
+        assert_eq!(
+            charlie.get("status").unwrap().as_string().unwrap(),
+            "verified"
+        );
 
-        // apple(1), banana(2), zebra(0)
-        assert_eq!(field_ids, vec![1, 2, 0]);
+        // Verify injected system user
+        let system_user = active_users.get(2).unwrap();
+        let system_user = system_user.as_object().unwrap();
+        assert_eq!(system_user.get("id").unwrap().as_int32().unwrap(), 999);
+        assert_eq!(
+            system_user.get("name").unwrap().as_string().unwrap(),
+            "System User"
+        );
+        assert_eq!(
+            system_user
+                .get("computed_score")
+                .unwrap()
+                .as_int32()
+                .unwrap(),
+            0
+        );
+        assert_eq!(
+            system_user.get("status").unwrap().as_string().unwrap(),
+            "system"
+        );
     }
 }
diff --git a/parquet-variant/src/builder/list.rs b/parquet-variant/src/builder/list.rs
new file mode 100644
index 000000000000..5064904ca7de
--- /dev/null
+++ b/parquet-variant/src/builder/list.rs
@@ -0,0 +1,764 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    BASIC_TYPE_BITS, BuilderSpecificState, ParentState, ValueBuilder, Variant, VariantBuilderExt,
+    builder::{metadata::MetadataBuilder, object::ObjectBuilder},
+    decoder::VariantBasicType,
+    int_size,
+};
+use arrow_schema::ArrowError;
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+/// Append `value_size` bytes of given `value` into `dest`.
+fn append_packed_u32(dest: &mut Vec<u8>, value: u32, value_size: usize) {
+    let n = dest.len() + value_size;
+    dest.extend(value.to_le_bytes());
+    dest.truncate(n);
+}
+
+/// A builder for creating [`Variant::List`] values.
+///
+/// See the examples on [`VariantBuilder`] for usage.
+///
+/// [`VariantBuilder`]: crate::VariantBuilder
+#[derive(Debug)]
+pub struct ListBuilder<'a, S: BuilderSpecificState> {
+    parent_state: ParentState<'a, S>,
+    offsets: Vec<usize>,
+    validate_unique_fields: bool,
+}
+
+impl<'a, S: BuilderSpecificState> ListBuilder<'a, S> {
+    /// Creates a new list builder, nested on top of the given parent state.
+    pub fn new(parent_state: ParentState<'a, S>, validate_unique_fields: bool) -> Self {
+        Self {
+            parent_state,
+            offsets: vec![],
+            validate_unique_fields,
+        }
+    }
+
+    /// Enables unique field key validation for objects created within this list.
+    ///
+    /// Propagates the validation flag to any [`ObjectBuilder`]s created using
+    /// [`ListBuilder::new_object`].
+    pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
+        self.validate_unique_fields = validate_unique_fields;
+        self
+    }
+
+    // Returns validate_unique_fields because we can no longer reference self once this method returns.
+    fn parent_state(&mut self) -> (ParentState<'_, ListState<'_>>, bool) {
+        let state = ParentState::list(
+            self.parent_state.value_builder,
+            self.parent_state.metadata_builder,
+            &mut self.offsets,
+            self.parent_state.saved_value_builder_offset,
+        );
+        (state, self.validate_unique_fields)
+    }
+
+    /// Returns an object builder that can be used to append a new (nested) object to this list.
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ObjectBuilder::finish`] is called.
+    pub fn new_object(&mut self) -> ObjectBuilder<'_, ListState<'_>> {
+        let (parent_state, validate_unique_fields) = self.parent_state();
+        ObjectBuilder::new(parent_state, validate_unique_fields)
+    }
+
+    /// Returns a list builder that can be used to append a new (nested) list to this list.
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ListBuilder::finish`] is called.
+    pub fn new_list(&mut self) -> ListBuilder<'_, ListState<'_>> {
+        let (parent_state, validate_unique_fields) = self.parent_state();
+        ListBuilder::new(parent_state, validate_unique_fields)
+    }
+
+    /// Appends a variant to the list.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ListBuilder::try_append_value`].
+    pub fn append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, value: T) {
+        let (state, _) = self.parent_state();
+        ValueBuilder::append_variant(state, value.into())
+    }
+
+    /// Appends a new primitive value to this list
+    pub fn try_append_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        &mut self,
+        value: T,
+    ) -> Result<(), ArrowError> {
+        let (state, _) = self.parent_state();
+        ValueBuilder::try_append_variant(state, value.into())
+    }
+
+    /// Appends a variant value to this list by copying raw bytes when possible.
+    ///
+    /// For objects and lists, this directly copies their underlying byte representation instead of
+    /// performing a logical copy. For other variant types, this falls back to the standard append
+    /// behavior.
+    ///
+    /// The caller must ensure that the metadata dictionary is already built and correct for
+    /// any objects or lists being appended.
+    pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into<Variant<'m, 'd>>) {
+        let (state, _) = self.parent_state();
+        ValueBuilder::append_variant_bytes(state, value.into())
+    }
+
+    /// Builder-style API for appending a value to the list and returning self to enable method chaining.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ListBuilder::try_with_value`].
+    pub fn with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, value: T) -> Self {
+        self.append_value(value);
+        self
+    }
+
+    /// Builder-style API for appending a value to the list and returns self for method chaining.
+    ///
+    /// This is the fallible version of [`ListBuilder::with_value`].
+    pub fn try_with_value<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        mut self,
+        value: T,
+    ) -> Result<Self, ArrowError> {
+        self.try_append_value(value)?;
+        Ok(self)
+    }
+
+    /// Finalizes this list and appends it to its parent, which otherwise remains unmodified.
+    pub fn finish(mut self) {
+        let starting_offset = self.parent_state.saved_value_builder_offset;
+        let value_builder = self.parent_state.value_builder();
+
+        let data_size = value_builder
+            .offset()
+            .checked_sub(starting_offset)
+            .expect("Data size overflowed usize");
+
+        let num_elements = self.offsets.len();
+        let is_large = num_elements > u8::MAX as usize;
+        let offset_size = int_size(data_size);
+
+        let num_elements_size = if is_large { 4 } else { 1 }; // is_large: 4 bytes, else 1 byte.
+        let num_elements = self.offsets.len();
+        let header_size = 1 +      // header (i.e., `array_header`)
+            num_elements_size +  // num_element_size
+            (num_elements + 1) * offset_size as usize; // offsets and data size
+
+        // Calculated header size becomes a hint; being wrong only risks extra allocations.
+        // Make sure to reserve enough capacity to handle the extra bytes we'll truncate.
+        let mut bytes_to_splice = Vec::with_capacity(header_size + 3);
+        // Write header
+        let header = array_header(is_large, offset_size as _);
+        bytes_to_splice.push(header);
+
+        append_packed_u32(&mut bytes_to_splice, num_elements as u32, num_elements_size);
+
+        for offset in &self.offsets {
+            append_packed_u32(&mut bytes_to_splice, *offset as u32, offset_size as usize);
+        }
+
+        append_packed_u32(&mut bytes_to_splice, data_size as u32, offset_size as usize);
+
+        value_builder
+            .inner_mut()
+            .splice(starting_offset..starting_offset, bytes_to_splice);
+
+        self.parent_state.finish();
+    }
+}
+
+impl<'a, S: BuilderSpecificState> VariantBuilderExt for ListBuilder<'a, S> {
+    type State<'s>
+        = ListState<'s>
+    where
+        Self: 's;
+
+    /// Variant arrays cannot encode NULL values, only `Variant::Null`.
+    fn append_null(&mut self) {
+        self.append_value(Variant::Null);
+    }
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.append_value(value);
+    }
+
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(self.new_list())
+    }
+
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
+        Ok(self.new_object())
+    }
+}
+
+impl<'a, 'm, 'v, S, V> Extend<V> for ListBuilder<'a, S>
+where
+    S: BuilderSpecificState,
+    V: Into<Variant<'m, 'v>>,
+{
+    fn extend<T: IntoIterator<Item = V>>(&mut self, iter: T) {
+        for v in iter.into_iter() {
+            self.append_value(v);
+        }
+    }
+}
+
+/// Internal state for list building
+#[derive(Debug)]
+pub struct ListState<'a> {
+    offsets: &'a mut Vec<usize>,
+    saved_offsets_size: usize,
+}
+
+// `ListBuilder::finish()` eagerly updates the list offsets, which we should rollback on failure.
+impl BuilderSpecificState for ListState<'_> {
+    fn rollback(&mut self) {
+        self.offsets.truncate(self.saved_offsets_size);
+    }
+}
+
+impl<'a> ParentState<'a, ListState<'a>> {
+    /// Creates a new instance suitable for a [`ListBuilder`]. The value and metadata builder state
+    /// is checkpointed and will roll back on drop, unless [`Self::finish`] is called. The new
+    /// element's offset is also captured eagerly and will also roll back if not finished.
+    pub fn list(
+        value_builder: &'a mut ValueBuilder,
+        metadata_builder: &'a mut dyn MetadataBuilder,
+        offsets: &'a mut Vec<usize>,
+        saved_parent_value_builder_offset: usize,
+    ) -> Self {
+        // The saved_parent_buffer_offset is the buffer size as of when the parent builder was
+        // constructed. The saved_buffer_offset is the buffer size as of now (when a child builder
+        // is created). The variant field_offset entry for this list element is their difference.
+        let saved_value_builder_offset = value_builder.offset();
+        let saved_offsets_size = offsets.len();
+        offsets.push(saved_value_builder_offset - saved_parent_value_builder_offset);
+
+        let builder_state = ListState {
+            offsets,
+            saved_offsets_size,
+        };
+        Self {
+            saved_metadata_builder_dict_size: metadata_builder.num_field_names(),
+            saved_value_builder_offset,
+            metadata_builder,
+            value_builder,
+            builder_state,
+            finished: false,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        ShortString, ValueBuilder, VariantBuilder, VariantMetadata,
+        builder::metadata::ReadOnlyMetadataBuilder,
+    };
+
+    use super::*;
+
+    #[test]
+    fn test_list() {
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_list()
+            .with_value(1i8)
+            .with_value(2i8)
+            .with_value("test")
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        assert!(!metadata.is_empty());
+        assert!(!value.is_empty());
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        match variant {
+            Variant::List(list) => {
+                let val0 = list.get(0).unwrap();
+                assert_eq!(val0, Variant::Int8(1));
+
+                let val1 = list.get(1).unwrap();
+                assert_eq!(val1, Variant::Int8(2));
+
+                let val2 = list.get(2).unwrap();
+                assert_eq!(val2, Variant::ShortString(ShortString("test")));
+            }
+            _ => panic!("Expected an array variant, got: {variant:?}"),
+        }
+    }
+
+    #[test]
+    fn test_nested_list() {
+        let mut builder = VariantBuilder::new();
+
+        let mut outer_list_builder = builder.new_list();
+
+        // create inner list
+        outer_list_builder
+            .new_list()
+            .with_value("a")
+            .with_value("b")
+            .with_value("c")
+            .with_value("d")
+            .finish();
+
+        outer_list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_list = variant.as_list().unwrap();
+
+        assert_eq!(outer_list.len(), 1);
+
+        let inner_variant = outer_list.get(0).unwrap();
+        let inner_list = inner_variant.as_list().unwrap();
+
+        assert_eq!(
+            vec![
+                Variant::from("a"),
+                Variant::from("b"),
+                Variant::from("c"),
+                Variant::from("d"),
+            ],
+            inner_list.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_super_nested_list() {
+        /*
+        [[[[[1]]]]]
+        */
+
+        let mut builder = VariantBuilder::new();
+        {
+            let mut list_builder1 = builder.new_list();
+            {
+                let mut list_builder2 = list_builder1.new_list();
+                {
+                    let mut list_builder3 = list_builder2.new_list();
+                    {
+                        let mut list_builder4 = list_builder3.new_list();
+                        {
+                            let mut list_builder5 = list_builder4.new_list();
+                            list_builder5.append_value(1);
+                            list_builder5.finish();
+                        }
+                        list_builder4.finish();
+                    }
+                    list_builder3.finish();
+                }
+                list_builder2.finish();
+            }
+            list_builder1.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let list1 = variant.as_list().unwrap();
+        assert_eq!(list1.len(), 1);
+
+        let list2_variant = list1.get(0).unwrap();
+        let list2 = list2_variant.as_list().unwrap();
+        assert_eq!(list2.len(), 1);
+
+        let list3_variant = list2.get(0).unwrap();
+        let list3 = list3_variant.as_list().unwrap();
+        assert_eq!(list3.len(), 1);
+
+        let list4_variant = list3.get(0).unwrap();
+        let list4 = list4_variant.as_list().unwrap();
+        assert_eq!(list4.len(), 1);
+
+        let list5_variant = list4.get(0).unwrap();
+        let list5 = list5_variant.as_list().unwrap();
+        assert_eq!(list5.len(), 1);
+
+        assert_eq!(list5.len(), 1);
+
+        assert_eq!(list5.get(0).unwrap(), Variant::from(1));
+    }
+
+    #[test]
+    fn test_list_append_bytes_subset() {
+        // Create an original list
+        let mut builder = VariantBuilder::new();
+        {
+            let mut list = builder.new_list();
+            list.append_value("item1");
+            list.append_value(42i32);
+            list.append_value(true);
+            list.append_value("item4");
+            list.append_value(1.234f64);
+            list.finish();
+        }
+        let (metadata1, value1) = builder.finish();
+        let original_variant = Variant::try_new(&metadata1, &value1).unwrap();
+        let original_list = original_variant.as_list().unwrap();
+
+        // Create a new list copying subset of elements interleaved with new ones
+        let metadata2 = VariantMetadata::new(&metadata1);
+        let mut metadata2 = ReadOnlyMetadataBuilder::new(&metadata2);
+        let mut builder2 = ValueBuilder::new();
+        let state = ParentState::variant(&mut builder2, &mut metadata2);
+        {
+            let mut list = ListBuilder::new(state, true);
+
+            // Copy first element using bytes API
+            list.append_value_bytes(original_list.get(0).unwrap());
+
+            // Add new element
+            list.append_value("new_item");
+
+            // Copy third element using bytes API
+            list.append_value_bytes(original_list.get(2).unwrap());
+
+            // Add another new element
+            list.append_value(99i32);
+
+            // Copy last element using bytes API
+            list.append_value_bytes(original_list.get(4).unwrap());
+
+            list.finish();
+        }
+        let value2 = builder2.into_inner();
+        let result_variant = Variant::try_new(&metadata1, &value2).unwrap();
+        let result_list = result_variant.as_list().unwrap();
+
+        // Verify the list contains expected elements
+        assert_eq!(result_list.len(), 5);
+        assert_eq!(result_list.get(0).unwrap().as_string().unwrap(), "item1");
+        assert_eq!(result_list.get(1).unwrap().as_string().unwrap(), "new_item");
+        assert!(result_list.get(2).unwrap().as_boolean().unwrap());
+        assert_eq!(result_list.get(3).unwrap().as_int32().unwrap(), 99);
+        assert_eq!(result_list.get(4).unwrap().as_f64().unwrap(), 1.234);
+    }
+
+    #[test]
+    fn test_append_list() {
+        let (m1, v1) = make_list();
+        let variant = Variant::new(&m1, &v1);
+        let mut builder = VariantBuilder::new();
+        builder.append_value(variant.clone());
+        let (metadata, value) = builder.finish();
+        assert_eq!(variant, Variant::new(&metadata, &value));
+    }
+
+    /// make a simple List variant
+    fn make_list() -> (Vec<u8>, Vec<u8>) {
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_list()
+            .with_value(1234)
+            .with_value("a string value")
+            .finish();
+
+        builder.finish()
+    }
+
+    #[test]
+    fn test_append_nested_list() {
+        let (m1, v1) = make_nested_list();
+        let variant = Variant::new(&m1, &v1);
+        let mut builder = VariantBuilder::new();
+        builder.append_value(variant.clone());
+        let (metadata, value) = builder.finish();
+        assert_eq!(variant, Variant::new(&metadata, &value));
+    }
+
+    fn make_nested_list() -> (Vec<u8>, Vec<u8>) {
+        let mut builder = VariantBuilder::new();
+        let mut list = builder.new_list();
+
+        //create inner list
+        list.new_list()
+            .with_value("the dog licked the oil")
+            .with_value(4.3)
+            .finish();
+
+        list.finish();
+
+        builder.finish()
+    }
+
+    #[test]
+    fn test_object_list() {
+        let mut builder = VariantBuilder::new();
+
+        let mut list_builder = builder.new_list();
+
+        list_builder
+            .new_object()
+            .with_field("id", 1)
+            .with_field("type", "Cauliflower")
+            .finish();
+
+        list_builder
+            .new_object()
+            .with_field("id", 2)
+            .with_field("type", "Beets")
+            .finish();
+
+        list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
+
+        assert_eq!(list.len(), 2);
+
+        let obj1_variant = list.get(0).unwrap();
+        let obj1 = obj1_variant.as_object().unwrap();
+
+        assert_eq!(
+            vec![
+                ("id", Variant::from(1)),
+                ("type", Variant::from("Cauliflower")),
+            ],
+            obj1.iter().collect::<Vec<_>>()
+        );
+
+        let obj2_variant = list.get(1).unwrap();
+        let obj2 = obj2_variant.as_object().unwrap();
+
+        assert_eq!(
+            vec![("id", Variant::from(2)), ("type", Variant::from("Beets")),],
+            obj2.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_object_list2() {
+        let mut builder = VariantBuilder::new();
+
+        let mut list_builder = builder.new_list();
+
+        list_builder.new_object().with_field("a", 1).finish();
+
+        list_builder.new_object().with_field("b", 2).finish();
+
+        list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
+        assert_eq!(list.len(), 2);
+
+        let obj1_variant = list.get(0).unwrap();
+        let obj1 = obj1_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("a", Variant::from(1)),],
+            obj1.iter().collect::<Vec<_>>()
+        );
+
+        let obj2_variant = list.get(1).unwrap();
+        let obj2 = obj2_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("b", Variant::from(2)),],
+            obj2.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_hetergenous_list() {
+        /*
+        [
+            1,
+            { "a": 1 },
+            2,
+            { "b": 2},
+            3
+        ]
+        */
+
+        let mut builder = VariantBuilder::new();
+
+        let mut list_builder = builder.new_list();
+
+        list_builder.append_value(1);
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.insert("a", 1);
+            object_builder.finish();
+        }
+
+        list_builder.append_value(2);
+
+        {
+            let mut object_builder = list_builder.new_object();
+            object_builder.insert("b", 2);
+            object_builder.finish();
+        }
+
+        list_builder.append_value(3);
+
+        list_builder.finish();
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let list = variant.as_list().unwrap();
+        assert_eq!(list.len(), 5);
+        assert_eq!(list.get(0).unwrap(), Variant::from(1));
+
+        let obj1_variant = list.get(1).unwrap();
+        let obj1 = obj1_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("a", Variant::from(1)),],
+            obj1.iter().collect::<Vec<_>>()
+        );
+
+        assert_eq!(list.get(2).unwrap(), Variant::from(2));
+
+        let obj2_variant = list.get(3).unwrap();
+        let obj2 = obj2_variant.as_object().unwrap();
+        assert_eq!(
+            vec![("b", Variant::from(2)),],
+            obj2.iter().collect::<Vec<_>>()
+        );
+
+        assert_eq!(list.get(4).unwrap(), Variant::from(3));
+    }
+
+    // This test wants to cover the logic for reuse parent buffer for list builder
+    // the builder looks like
+    // [ "apple", "false", [{"a": "b", "b": "c"}, {"c":"d", "d":"e"}], [[1, true], ["tree", false]], 1]
+    #[test]
+    fn test_nested_list_with_heterogeneous_fields_for_buffer_reuse() {
+        let mut builder = VariantBuilder::new();
+
+        {
+            let mut outer_list_builder = builder.new_list();
+
+            outer_list_builder.append_value("apple");
+            outer_list_builder.append_value(false);
+
+            {
+                // the list here wants to cover the logic object builder inside list builder
+                let mut inner_list_builder = outer_list_builder.new_list();
+
+                {
+                    let mut inner_object_builder = inner_list_builder.new_object();
+                    inner_object_builder.insert("a", "b");
+                    inner_object_builder.insert("b", "c");
+                    inner_object_builder.finish();
+                }
+
+                {
+                    // the seconde object builder here wants to cover the logic for
+                    // list builder resue the parent buffer.
+                    let mut inner_object_builder = inner_list_builder.new_object();
+                    inner_object_builder.insert("c", "d");
+                    inner_object_builder.insert("d", "e");
+                    inner_object_builder.finish();
+                }
+
+                inner_list_builder.finish();
+            }
+
+            {
+                // the list here wants to cover the logic list builder inside list builder
+                let mut inner_list_builder = outer_list_builder.new_list();
+
+                {
+                    let mut double_inner_list_builder = inner_list_builder.new_list();
+                    double_inner_list_builder.append_value(1);
+                    double_inner_list_builder.append_value(true);
+
+                    double_inner_list_builder.finish();
+                }
+
+                {
+                    let mut double_inner_list_builder = inner_list_builder.new_list();
+                    double_inner_list_builder.append_value("tree");
+                    double_inner_list_builder.append_value(false);
+
+                    double_inner_list_builder.finish();
+                }
+                inner_list_builder.finish();
+            }
+
+            outer_list_builder.append_value(1);
+
+            outer_list_builder.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_list = variant.as_list().unwrap();
+
+        assert_eq!(5, outer_list.len());
+
+        // Primitive value
+        assert_eq!(Variant::from("apple"), outer_list.get(0).unwrap());
+        assert_eq!(Variant::from(false), outer_list.get(1).unwrap());
+        assert_eq!(Variant::from(1), outer_list.get(4).unwrap());
+
+        // The first inner list [{"a": "b", "b": "c"}, {"c":"d", "d":"e"}]
+        let list1_variant = outer_list.get(2).unwrap();
+        let list1 = list1_variant.as_list().unwrap();
+        assert_eq!(2, list1.len());
+
+        let list1_obj1_variant = list1.get(0).unwrap();
+        let list1_obj1 = list1_obj1_variant.as_object().unwrap();
+        assert_eq!("a", list1_obj1.field_name(0).unwrap());
+        assert_eq!(Variant::from("b"), list1_obj1.field(0).unwrap());
+
+        assert_eq!("b", list1_obj1.field_name(1).unwrap());
+        assert_eq!(Variant::from("c"), list1_obj1.field(1).unwrap());
+
+        // The second inner list [[1, true], ["tree", false]]
+        let list2_variant = outer_list.get(3).unwrap();
+        let list2 = list2_variant.as_list().unwrap();
+        assert_eq!(2, list2.len());
+
+        // The list [1, true]
+        let list2_list1_variant = list2.get(0).unwrap();
+        let list2_list1 = list2_list1_variant.as_list().unwrap();
+        assert_eq!(2, list2_list1.len());
+        assert_eq!(Variant::from(1), list2_list1.get(0).unwrap());
+        assert_eq!(Variant::from(true), list2_list1.get(1).unwrap());
+
+        // The list ["true", false]
+        let list2_list2_variant = list2.get(1).unwrap();
+        let list2_list2 = list2_list2_variant.as_list().unwrap();
+        assert_eq!(2, list2_list2.len());
+        assert_eq!(Variant::from("tree"), list2_list2.get(0).unwrap());
+        assert_eq!(Variant::from(false), list2_list2.get(1).unwrap());
+    }
+}
diff --git a/parquet-variant/src/builder/metadata.rs b/parquet-variant/src/builder/metadata.rs
new file mode 100644
index 000000000000..efccc2e4c63e
--- /dev/null
+++ b/parquet-variant/src/builder/metadata.rs
@@ -0,0 +1,424 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashMap;
+
+use arrow_schema::ArrowError;
+use indexmap::IndexSet;
+
+use crate::{VariantMetadata, int_size};
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut Vec<u8>, value: usize, nbytes: u8) {
+    let bytes = value.to_le_bytes();
+    buf.extend_from_slice(&bytes[..nbytes as usize]);
+}
+
+/// A trait for building variant metadata dictionaries, to be used in conjunction with a
+/// [`ValueBuilder`]. The trait provides methods for managing field names and their IDs, as well as
+/// rolling back a failed builder operation that might have created new field ids.
+///
+/// [`ValueBuilder`]: crate::builder::ValueBuilder
+pub trait MetadataBuilder: std::fmt::Debug {
+    /// Attempts to register a field name, returning the corresponding (possibly newly-created)
+    /// field id on success. Attempting to register the same field name twice will _generally_
+    /// produce the same field id both times, but the variant spec does not actually require it.
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError>;
+
+    /// Retrieves the field name for a given field id, which must be less than
+    /// [`Self::num_field_names`]. Panics if the field id is out of bounds.
+    fn field_name(&self, field_id: usize) -> &str;
+
+    /// Returns the number of field names stored in this metadata builder. Any number less than this
+    /// is a valid field id. The builder can be reverted back to this size later on (discarding any
+    /// newer/higher field ids) by calling [`Self::truncate_field_names`].
+    fn num_field_names(&self) -> usize;
+
+    /// Reverts the field names to a previous size, discarding any newly out of bounds field ids.
+    fn truncate_field_names(&mut self, new_size: usize);
+
+    /// Finishes the current metadata dictionary, returning the new size of the underlying buffer.
+    fn finish(&mut self) -> usize;
+}
+
+impl MetadataBuilder for WritableMetadataBuilder {
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError> {
+        Ok(self.upsert_field_name(field_name))
+    }
+    fn field_name(&self, field_id: usize) -> &str {
+        self.field_name(field_id)
+    }
+    fn num_field_names(&self) -> usize {
+        self.num_field_names()
+    }
+    fn truncate_field_names(&mut self, new_size: usize) {
+        self.field_names.truncate(new_size)
+    }
+    fn finish(&mut self) -> usize {
+        self.finish()
+    }
+}
+
+/// A metadata builder that cannot register new field names, and merely returns the field id
+/// associated with a known field name. This is useful for variant unshredding operations, where the
+/// metadata column is fixed and -- per variant shredding spec -- already contains all field names
+/// from the typed_value column. It is also useful when projecting a subset of fields from a variant
+/// object value, since the bytes can be copied across directly without re-encoding their field ids.
+///
+/// NOTE: [`Self::finish`] is a no-op. If the intent is to make a copy of the underlying bytes each
+/// time `finish` is called, a different trait impl will be needed.
+#[derive(Debug)]
+pub struct ReadOnlyMetadataBuilder<'m> {
+    metadata: &'m VariantMetadata<'m>,
+    // A cache that tracks field names this builder has already seen, because finding the field id
+    // for a given field name is expensive -- O(n) for a large and unsorted metadata dictionary.
+    known_field_names: HashMap<&'m str, u32>,
+}
+
+impl<'m> ReadOnlyMetadataBuilder<'m> {
+    /// Creates a new read-only metadata builder from the given metadata dictionary.
+    pub fn new(metadata: &'m VariantMetadata<'m>) -> Self {
+        Self {
+            metadata,
+            known_field_names: HashMap::new(),
+        }
+    }
+}
+
+impl MetadataBuilder for ReadOnlyMetadataBuilder<'_> {
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, ArrowError> {
+        if let Some(field_id) = self.known_field_names.get(field_name) {
+            return Ok(*field_id);
+        }
+
+        let Some((field_id, field_name)) = self.metadata.get_entry(field_name) else {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Field name '{field_name}' not found in metadata dictionary"
+            )));
+        };
+
+        self.known_field_names.insert(field_name, field_id);
+        Ok(field_id)
+    }
+    fn field_name(&self, field_id: usize) -> &str {
+        &self.metadata[field_id]
+    }
+    fn num_field_names(&self) -> usize {
+        self.metadata.len()
+    }
+    fn truncate_field_names(&mut self, new_size: usize) {
+        debug_assert_eq!(self.metadata.len(), new_size);
+    }
+    fn finish(&mut self) -> usize {
+        self.metadata.bytes.len()
+    }
+}
+
+/// Builder for constructing metadata for [`Variant`] values.
+///
+/// This is used internally by the [`VariantBuilder`] to construct the metadata
+///
+/// You can use an existing `Vec<u8>` as the metadata buffer by using the `from` impl.
+///
+/// [`Variant`]: crate::Variant
+/// [`VariantBuilder`]: crate::VariantBuilder
+#[derive(Default, Debug)]
+pub struct WritableMetadataBuilder {
+    pub(crate) field_names: IndexSet<String>,
+
+    pub(crate) is_sorted: bool,
+
+    /// Output buffer. Metadata is written to the end of this buffer
+    metadata_buffer: Vec<u8>,
+}
+
+impl WritableMetadataBuilder {
+    /// Upsert field name to dictionary, return its ID
+    pub fn upsert_field_name(&mut self, field_name: &str) -> u32 {
+        let (id, new_entry) = self.field_names.insert_full(field_name.to_string());
+
+        if new_entry {
+            let n = self.num_field_names();
+
+            // Dictionary sort order tracking:
+            // - An empty dictionary is unsorted (ambiguous in spec but required by interop tests)
+            // - A single-entry dictionary is trivially sorted
+            // - Otherwise, an already-sorted dictionary becomes unsorted if the new entry breaks order
+            self.is_sorted =
+                n == 1 || self.is_sorted && (self.field_names[n - 2] < self.field_names[n - 1]);
+        }
+
+        id as u32
+    }
+
+    /// The current length of the underlying metadata buffer
+    pub fn offset(&self) -> usize {
+        self.metadata_buffer.len()
+    }
+
+    /// Returns the number of field names stored in the metadata builder.
+    /// Note: this method should be the only place to call `self.field_names.len()`
+    ///
+    /// # Panics
+    ///
+    /// If the number of field names exceeds the maximum allowed value for `u32`.
+    fn num_field_names(&self) -> usize {
+        let n = self.field_names.len();
+        assert!(n <= u32::MAX as usize);
+
+        n
+    }
+
+    fn field_name(&self, i: usize) -> &str {
+        &self.field_names[i]
+    }
+
+    fn metadata_size(&self) -> usize {
+        self.field_names.iter().map(|k| k.len()).sum()
+    }
+
+    /// Finalizes the metadata dictionary and appends its serialized bytes to the underlying buffer,
+    /// returning the resulting [`Self::offset`]. The builder state is reset and ready to start
+    /// building a new metadata dictionary.
+    pub fn finish(&mut self) -> usize {
+        let nkeys = self.num_field_names();
+
+        // Calculate metadata size
+        let total_dict_size: usize = self.metadata_size();
+
+        let metadata_buffer = &mut self.metadata_buffer;
+        let is_sorted = std::mem::take(&mut self.is_sorted);
+        let field_names = std::mem::take(&mut self.field_names);
+
+        // Determine appropriate offset size based on the larger of dict size or total string size
+        let max_offset = std::cmp::max(total_dict_size, nkeys);
+        let offset_size = int_size(max_offset) as u8;
+
+        let offset_start = 1 + offset_size as usize;
+        let string_start = offset_start + (nkeys + 1) * offset_size as usize;
+        let metadata_size = string_start + total_dict_size;
+
+        metadata_buffer.reserve(metadata_size);
+
+        // Write header: version=1, field names are sorted, with calculated offset_size
+        metadata_buffer.push(0x01 | (is_sorted as u8) << 4 | ((offset_size - 1) << 6));
+
+        // Write dictionary size
+        write_offset(metadata_buffer, nkeys, offset_size);
+
+        // Write offsets
+        let mut cur_offset = 0;
+        for key in field_names.iter() {
+            write_offset(metadata_buffer, cur_offset, offset_size);
+            cur_offset += key.len();
+        }
+        // Write final offset
+        write_offset(metadata_buffer, cur_offset, offset_size);
+
+        // Write string data
+        for key in field_names {
+            metadata_buffer.extend_from_slice(key.as_bytes());
+        }
+
+        metadata_buffer.len()
+    }
+
+    /// Returns the inner buffer, consuming self without finalizing any in progress metadata.
+    pub fn into_inner(self) -> Vec<u8> {
+        self.metadata_buffer
+    }
+}
+
+impl<S: AsRef<str>> FromIterator<S> for WritableMetadataBuilder {
+    fn from_iter<T: IntoIterator<Item = S>>(iter: T) -> Self {
+        let mut this = Self::default();
+        this.extend(iter);
+
+        this
+    }
+}
+
+impl<S: AsRef<str>> Extend<S> for WritableMetadataBuilder {
+    fn extend<T: IntoIterator<Item = S>>(&mut self, iter: T) {
+        let iter = iter.into_iter();
+        let (min, _) = iter.size_hint();
+
+        self.field_names.reserve(min);
+
+        for field_name in iter {
+            self.upsert_field_name(field_name.as_ref());
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{
+        ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
+        builder::{
+            metadata::{ReadOnlyMetadataBuilder, WritableMetadataBuilder},
+            object::ObjectBuilder,
+        },
+    };
+
+    #[test]
+    fn test_metadata_builder_from_iter() {
+        let metadata = WritableMetadataBuilder::from_iter(vec!["apple", "banana", "cherry"]);
+        assert_eq!(metadata.num_field_names(), 3);
+        assert_eq!(metadata.field_name(0), "apple");
+        assert_eq!(metadata.field_name(1), "banana");
+        assert_eq!(metadata.field_name(2), "cherry");
+        assert!(metadata.is_sorted);
+
+        let metadata = WritableMetadataBuilder::from_iter(["zebra", "apple", "banana"]);
+        assert_eq!(metadata.num_field_names(), 3);
+        assert_eq!(metadata.field_name(0), "zebra");
+        assert_eq!(metadata.field_name(1), "apple");
+        assert_eq!(metadata.field_name(2), "banana");
+        assert!(!metadata.is_sorted);
+
+        let metadata = WritableMetadataBuilder::from_iter(Vec::<&str>::new());
+        assert_eq!(metadata.num_field_names(), 0);
+        assert!(!metadata.is_sorted);
+    }
+
+    #[test]
+    fn test_metadata_builder_extend() {
+        let mut metadata = WritableMetadataBuilder::default();
+        assert_eq!(metadata.num_field_names(), 0);
+        assert!(!metadata.is_sorted);
+
+        metadata.extend(["apple", "cherry"]);
+        assert_eq!(metadata.num_field_names(), 2);
+        assert_eq!(metadata.field_name(0), "apple");
+        assert_eq!(metadata.field_name(1), "cherry");
+        assert!(metadata.is_sorted);
+
+        // extend with more field names that maintain sort order
+        metadata.extend(vec!["dinosaur", "monkey"]);
+        assert_eq!(metadata.num_field_names(), 4);
+        assert_eq!(metadata.field_name(2), "dinosaur");
+        assert_eq!(metadata.field_name(3), "monkey");
+        assert!(metadata.is_sorted);
+
+        // test extending with duplicate field names
+        let initial_count = metadata.num_field_names();
+        metadata.extend(["apple", "monkey"]);
+        assert_eq!(metadata.num_field_names(), initial_count); // No new fields added
+    }
+
+    #[test]
+    fn test_metadata_builder_extend_sort_order() {
+        let mut metadata = WritableMetadataBuilder::default();
+
+        metadata.extend(["middle"]);
+        assert!(metadata.is_sorted);
+
+        metadata.extend(["zebra"]);
+        assert!(metadata.is_sorted);
+
+        // add field that breaks sort order
+        metadata.extend(["apple"]);
+        assert!(!metadata.is_sorted);
+    }
+
+    #[test]
+    fn test_metadata_builder_from_iter_with_string_types() {
+        // &str
+        let metadata = WritableMetadataBuilder::from_iter(["a", "b", "c"]);
+        assert_eq!(metadata.num_field_names(), 3);
+
+        // string
+        let metadata = WritableMetadataBuilder::from_iter(vec![
+            "a".to_string(),
+            "b".to_string(),
+            "c".to_string(),
+        ]);
+        assert_eq!(metadata.num_field_names(), 3);
+
+        // mixed types (anything that implements AsRef<str>)
+        let field_names: Vec<Box<str>> = vec!["a".into(), "b".into(), "c".into()];
+        let metadata = WritableMetadataBuilder::from_iter(field_names);
+        assert_eq!(metadata.num_field_names(), 3);
+    }
+
+    #[test]
+    fn test_read_only_metadata_builder() {
+        // First create some metadata with a few field names
+        let mut default_builder = VariantBuilder::new();
+        default_builder.add_field_name("name");
+        default_builder.add_field_name("age");
+        default_builder.add_field_name("active");
+        let (metadata_bytes, _) = default_builder.finish();
+
+        // Use the metadata to build new variant values
+        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
+        let mut value_builder = ValueBuilder::new();
+
+        {
+            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
+            let mut obj = ObjectBuilder::new(state, false);
+
+            // These should succeed because the fields exist in the metadata
+            obj.insert("name", "Alice");
+            obj.insert("age", 30i8);
+            obj.insert("active", true);
+            obj.finish();
+        }
+
+        let value = value_builder.into_inner();
+
+        // Verify the variant was built correctly
+        let variant = Variant::try_new(&metadata_bytes, &value).unwrap();
+        let obj = variant.as_object().unwrap();
+        assert_eq!(obj.get("name"), Some(Variant::from("Alice")));
+        assert_eq!(obj.get("age"), Some(Variant::Int8(30)));
+        assert_eq!(obj.get("active"), Some(Variant::from(true)));
+    }
+
+    #[test]
+    fn test_read_only_metadata_builder_fails_on_unknown_field() {
+        // Create metadata with only one field
+        let mut default_builder = VariantBuilder::new();
+        default_builder.add_field_name("known_field");
+        let (metadata_bytes, _) = default_builder.finish();
+
+        // Use the metadata to build new variant values
+        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
+        let mut value_builder = ValueBuilder::new();
+
+        {
+            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
+            let mut obj = ObjectBuilder::new(state, false);
+
+            // This should succeed
+            obj.insert("known_field", "value");
+
+            // This should fail because "unknown_field" is not in the metadata
+            let result = obj.try_insert("unknown_field", "value");
+            assert!(result.is_err());
+            assert!(
+                result
+                    .unwrap_err()
+                    .to_string()
+                    .contains("Field name 'unknown_field' not found")
+            );
+        }
+    }
+}
diff --git a/parquet-variant/src/builder/object.rs b/parquet-variant/src/builder/object.rs
new file mode 100644
index 000000000000..876c2e2d4c7c
--- /dev/null
+++ b/parquet-variant/src/builder/object.rs
@@ -0,0 +1,911 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::builder::list::ListBuilder;
+use crate::builder::metadata::MetadataBuilder;
+use crate::decoder::VariantBasicType;
+use crate::{
+    BASIC_TYPE_BITS, BuilderSpecificState, ParentState, ValueBuilder, Variant, VariantBuilderExt,
+    int_size,
+};
+use arrow_schema::ArrowError;
+use indexmap::IndexMap;
+
+fn object_header<const LARGE_BIT: u8, const ID_SIZE: u8, const OFFSET_SIZE: u8>() -> u8 {
+    (LARGE_BIT << (BASIC_TYPE_BITS + 4))
+        | ((ID_SIZE - 1) << (BASIC_TYPE_BITS + 2))
+        | ((OFFSET_SIZE - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+struct ObjectHeaderWriter<const OFFSET_SIZE: u8, const ID_SIZE: u8>();
+
+impl<const OFFSET_SIZE: u8, const ID_SIZE: u8> ObjectHeaderWriter<OFFSET_SIZE, ID_SIZE> {
+    fn write(
+        dst: &mut Vec<u8>,
+        num_fields: usize,
+        field_ids: impl Iterator<Item = u32>,
+        offsets: impl Iterator<Item = usize>,
+        data_size: usize,
+    ) {
+        let is_large = num_fields > u8::MAX as usize;
+        // num_fields will consume 4 bytes when it is larger than u8::MAX
+        if is_large {
+            dst.push(object_header::<1, { ID_SIZE }, { OFFSET_SIZE }>());
+            append_packed_u32::<4>(dst, num_fields);
+        } else {
+            dst.push(object_header::<0, { ID_SIZE }, { OFFSET_SIZE }>());
+            append_packed_u32::<1>(dst, num_fields);
+        }
+
+        for id in field_ids {
+            append_packed_u32::<ID_SIZE>(dst, id as usize);
+        }
+
+        for off in offsets {
+            append_packed_u32::<OFFSET_SIZE>(dst, off);
+        }
+
+        append_packed_u32::<OFFSET_SIZE>(dst, data_size);
+    }
+}
+
+#[inline(always)]
+fn append_packed_u32<const SIZE: u8>(dest: &mut Vec<u8>, value: usize) {
+    dest.extend_from_slice(&value.to_le_bytes()[..SIZE as usize]);
+}
+
+/// A builder for creating [`Variant::Object`] values.
+///
+/// See the examples on [`VariantBuilder`] for usage.
+///
+/// [`VariantBuilder`]: crate::VariantBuilder
+#[derive(Debug)]
+pub struct ObjectBuilder<'a, S: BuilderSpecificState> {
+    parent_state: ParentState<'a, S>,
+    pub(crate) fields: IndexMap<u32, usize>, // (field_id, offset)
+    validate_unique_fields: bool,
+}
+
+impl<'a, S: BuilderSpecificState> ObjectBuilder<'a, S> {
+    /// Creates a new object builder, nested on top of the given parent state.
+    pub fn new(parent_state: ParentState<'a, S>, validate_unique_fields: bool) -> Self {
+        Self {
+            parent_state,
+            fields: IndexMap::new(),
+            validate_unique_fields,
+        }
+    }
+
+    /// Add a field with key and value to the object
+    ///
+    /// # See Also
+    /// - [`ObjectBuilder::try_insert`] for a fallible version.
+    /// - [`ObjectBuilder::with_field`] for a builder-style API.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ObjectBuilder::try_insert`]
+    pub fn insert<'m, 'd, T: Into<Variant<'m, 'd>>>(&mut self, key: &str, value: T) {
+        let (state, _) = self.parent_state(key).unwrap();
+        ValueBuilder::append_variant(state, value.into())
+    }
+
+    /// Add a field with key and value to the object
+    ///
+    /// # See Also
+    /// - [`ObjectBuilder::insert`] for an infallible version that panics
+    /// - [`ObjectBuilder::try_with_field`] for a builder-style API.
+    ///
+    /// # Note
+    /// Attempting to insert a duplicate field name produces an error if unique field
+    /// validation is enabled. Otherwise, the new value overwrites the previous field mapping
+    /// without erasing the old value, resulting in a larger variant
+    pub fn try_insert<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        &mut self,
+        key: &str,
+        value: T,
+    ) -> Result<(), ArrowError> {
+        let (state, _) = self.parent_state(key)?;
+        ValueBuilder::try_append_variant(state, value.into())
+    }
+
+    /// Add a field with key and value to the object by copying raw bytes when possible.
+    ///
+    /// For objects and lists, this directly copies their underlying byte representation instead of
+    /// performing a logical copy, and without touching the metadata builder. For other variant
+    /// types, this falls back to the standard append behavior.
+    ///
+    /// The caller must ensure that the metadata dictionary is already built and correct for
+    /// any objects or lists being appended, but the value's new field name is handled normally.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the variant contains duplicate field names in objects
+    /// when validation is enabled. For a fallible version, use [`ObjectBuilder::try_insert_bytes`]
+    pub fn insert_bytes<'m, 'd>(&mut self, key: &str, value: impl Into<Variant<'m, 'd>>) {
+        self.try_insert_bytes(key, value).unwrap()
+    }
+
+    /// Add a field with key and value to the object by copying raw bytes when possible.
+    ///
+    /// For objects and lists, this directly copies their underlying byte representation instead of
+    /// performing a logical copy, and without touching the metadata builder. For other variant
+    /// types, this falls back to the standard append behavior.
+    ///
+    /// The caller must ensure that the metadata dictionary is already built and correct for
+    /// any objects or lists being appended, but the value's new field name is handled normally.
+    ///
+    /// # Note
+    /// When inserting duplicate keys, the new value overwrites the previous mapping,
+    /// but the old value remains in the buffer, resulting in a larger variant
+    pub fn try_insert_bytes<'m, 'd>(
+        &mut self,
+        key: &str,
+        value: impl Into<Variant<'m, 'd>>,
+    ) -> Result<(), ArrowError> {
+        let (state, _) = self.parent_state(key)?;
+        ValueBuilder::append_variant_bytes(state, value.into());
+        Ok(())
+    }
+
+    /// Builder style API for adding a field with key and value to the object
+    ///
+    /// Same as [`ObjectBuilder::insert`], but returns `self` for chaining.
+    pub fn with_field<'m, 'd, T: Into<Variant<'m, 'd>>>(mut self, key: &str, value: T) -> Self {
+        self.insert(key, value);
+        self
+    }
+
+    /// Builder style API for adding a field with key and value to the object
+    ///
+    /// Same as [`ObjectBuilder::try_insert`], but returns `self` for chaining.
+    pub fn try_with_field<'m, 'd, T: Into<Variant<'m, 'd>>>(
+        mut self,
+        key: &str,
+        value: T,
+    ) -> Result<Self, ArrowError> {
+        self.try_insert(key, value)?;
+        Ok(self)
+    }
+
+    /// Enables validation for unique field keys when inserting into this object.
+    ///
+    /// When this is enabled, calling [`ObjectBuilder::finish`] will return an error
+    /// if any duplicate field keys were added using [`ObjectBuilder::insert`].
+    pub fn with_validate_unique_fields(mut self, validate_unique_fields: bool) -> Self {
+        self.validate_unique_fields = validate_unique_fields;
+        self
+    }
+
+    // Returns validate_unique_fields because we can no longer reference self once this method returns.
+    fn parent_state<'b>(
+        &'b mut self,
+        field_name: &str,
+    ) -> Result<(ParentState<'b, ObjectState<'b>>, bool), ArrowError> {
+        let validate_unique_fields = self.validate_unique_fields;
+        let state = ParentState::try_object(
+            self.parent_state.value_builder,
+            self.parent_state.metadata_builder,
+            &mut self.fields,
+            self.parent_state.saved_value_builder_offset,
+            field_name,
+            validate_unique_fields,
+        )?;
+        Ok((state, validate_unique_fields))
+    }
+
+    /// Returns an object builder that can be used to append a new (nested) object to this object.
+    ///
+    /// Panics if the proposed key was a duplicate
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ObjectBuilder::finish`] is called.
+    pub fn new_object<'b>(&'b mut self, key: &'b str) -> ObjectBuilder<'b, ObjectState<'b>> {
+        self.try_new_object(key).unwrap()
+    }
+
+    /// Returns an object builder that can be used to append a new (nested) object to this object.
+    ///
+    /// Fails if the proposed key was a duplicate
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ObjectBuilder::finish`] is called.
+    pub fn try_new_object<'b>(
+        &'b mut self,
+        key: &str,
+    ) -> Result<ObjectBuilder<'b, ObjectState<'b>>, ArrowError> {
+        let (parent_state, validate_unique_fields) = self.parent_state(key)?;
+        Ok(ObjectBuilder::new(parent_state, validate_unique_fields))
+    }
+
+    /// Returns a list builder that can be used to append a new (nested) list to this object.
+    ///
+    /// Panics if the proposed key was a duplicate
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ListBuilder::finish`] is called.
+    pub fn new_list<'b>(&'b mut self, key: &str) -> ListBuilder<'b, ObjectState<'b>> {
+        self.try_new_list(key).unwrap()
+    }
+
+    /// Returns a list builder that can be used to append a new (nested) list to this object.
+    ///
+    /// Fails if the proposed key was a duplicate
+    ///
+    /// WARNING: The builder will have no effect unless/until [`ListBuilder::finish`] is called.
+    pub fn try_new_list<'b>(
+        &'b mut self,
+        key: &str,
+    ) -> Result<ListBuilder<'b, ObjectState<'b>>, ArrowError> {
+        let (parent_state, validate_unique_fields) = self.parent_state(key)?;
+        Ok(ListBuilder::new(parent_state, validate_unique_fields))
+    }
+
+    /// Finalizes this object and appends it to its parent, which otherwise remains unmodified.
+    pub fn finish(mut self) {
+        let metadata_builder = self.parent_state.metadata_builder();
+
+        self.fields.sort_by(|&field_a_id, _, &field_b_id, _| {
+            let field_a_name = metadata_builder.field_name(field_a_id as usize);
+            let field_b_name = metadata_builder.field_name(field_b_id as usize);
+            field_a_name.cmp(field_b_name)
+        });
+
+        let max_id = self.fields.iter().map(|(i, _)| *i).max().unwrap_or(0);
+        let id_size = int_size(max_id as usize);
+
+        let starting_offset = self.parent_state.saved_value_builder_offset;
+        let value_builder = self.parent_state.value_builder();
+        let current_offset = value_builder.offset();
+        // Current object starts from `object_start_offset`
+        let data_size = current_offset - starting_offset;
+        let offset_size = int_size(data_size);
+
+        let num_fields = self.fields.len();
+        let is_large = num_fields > u8::MAX as usize;
+
+        let header_size = 1 + // header byte
+            (if is_large { 4 } else { 1 }) + // num_fields
+            (num_fields * id_size as usize) + // field IDs
+            ((num_fields + 1) * offset_size as usize); // field offsets + data_size
+
+        let mut bytes_to_splice = Vec::with_capacity(header_size);
+
+        macro_rules! write_header {
+            ($offset_size:expr, $id_size:expr) => {
+                ObjectHeaderWriter::<{ $offset_size as u8 }, { $id_size as u8 }>::write(
+                    &mut bytes_to_splice,
+                    num_fields,
+                    self.fields.keys().copied(),
+                    self.fields.values().copied(),
+                    data_size,
+                )
+            };
+        }
+
+        use crate::decoder::OffsetSizeBytes::*;
+        match (offset_size, id_size) {
+            (One, One) => write_header!(One, One),
+            (One, Two) => write_header!(One, Two),
+            (One, Three) => write_header!(One, Three),
+            (One, Four) => write_header!(One, Four),
+            (Two, One) => write_header!(Two, One),
+            (Two, Two) => write_header!(Two, Two),
+            (Two, Three) => write_header!(Two, Three),
+            (Two, Four) => write_header!(Two, Four),
+            (Three, One) => write_header!(Three, One),
+            (Three, Two) => write_header!(Three, Two),
+            (Three, Three) => write_header!(Three, Three),
+            (Three, Four) => write_header!(Three, Four),
+            (Four, One) => write_header!(Four, One),
+            (Four, Two) => write_header!(Four, Two),
+            (Four, Three) => write_header!(Four, Three),
+            (Four, Four) => write_header!(Four, Four),
+        }
+
+        // Shift existing data to make room for the header
+        value_builder
+            .inner_mut()
+            .splice(starting_offset..starting_offset, bytes_to_splice);
+
+        self.parent_state.finish();
+    }
+}
+
+impl<'a, 'm, 'v, S, K, V> Extend<(K, V)> for ObjectBuilder<'a, S>
+where
+    S: BuilderSpecificState,
+    K: AsRef<str>,
+    V: Into<Variant<'m, 'v>>,
+{
+    fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
+        for (key, value) in iter.into_iter() {
+            self.insert(key.as_ref(), value);
+        }
+    }
+}
+
+/// Internal state for object building
+#[derive(Debug)]
+pub struct ObjectState<'a> {
+    fields: &'a mut IndexMap<u32, usize>,
+    saved_fields_size: usize,
+}
+
+// `ObjectBuilder::finish()` eagerly updates the field offsets, which we should rollback on failure.
+impl BuilderSpecificState for ObjectState<'_> {
+    fn rollback(&mut self) {
+        self.fields.truncate(self.saved_fields_size);
+    }
+}
+
+impl<'a> ParentState<'a, ObjectState<'a>> {
+    /// Creates a new instance suitable for an [`ObjectBuilder`]. The value and metadata builder state
+    /// is checkpointed and will roll back on drop, unless [`Self::finish`] is called. The new
+    /// field's name and offset are also captured eagerly and will also roll back if not finished.
+    ///
+    /// The call fails if the field name is invalid (e.g. because it duplicates an existing field).
+    pub fn try_object(
+        value_builder: &'a mut ValueBuilder,
+        metadata_builder: &'a mut dyn MetadataBuilder,
+        fields: &'a mut IndexMap<u32, usize>,
+        saved_parent_value_builder_offset: usize,
+        field_name: &str,
+        validate_unique_fields: bool,
+    ) -> Result<Self, ArrowError> {
+        // The saved_parent_buffer_offset is the buffer size as of when the parent builder was
+        // constructed. The saved_buffer_offset is the buffer size as of now (when a child builder
+        // is created). The variant field_offset entry for this field is their difference.
+        let saved_value_builder_offset = value_builder.offset();
+        let saved_fields_size = fields.len();
+        let saved_metadata_builder_dict_size = metadata_builder.num_field_names();
+        let field_id = metadata_builder.try_upsert_field_name(field_name)?;
+        let field_start = saved_value_builder_offset - saved_parent_value_builder_offset;
+        if fields.insert(field_id, field_start).is_some() && validate_unique_fields {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Duplicate field name: {field_name}"
+            )));
+        }
+
+        let builder_state = ObjectState {
+            fields,
+            saved_fields_size,
+        };
+        Ok(Self {
+            saved_metadata_builder_dict_size,
+            saved_value_builder_offset,
+            value_builder,
+            metadata_builder,
+            builder_state,
+            finished: false,
+        })
+    }
+}
+
+/// A [`VariantBuilderExt`] that inserts a new field into a variant object.
+pub struct ObjectFieldBuilder<'o, 'v, 's, S: BuilderSpecificState> {
+    key: &'s str,
+    builder: &'o mut ObjectBuilder<'v, S>,
+}
+
+impl<'o, 'v, 's, S: BuilderSpecificState> ObjectFieldBuilder<'o, 'v, 's, S> {
+    pub fn new(key: &'s str, builder: &'o mut ObjectBuilder<'v, S>) -> Self {
+        Self { key, builder }
+    }
+}
+
+impl<S: BuilderSpecificState> VariantBuilderExt for ObjectFieldBuilder<'_, '_, '_, S> {
+    type State<'a>
+        = ObjectState<'a>
+    where
+        Self: 'a;
+
+    /// A NULL object field is interpreted as missing, so nothing gets inserted at all.
+    fn append_null(&mut self) {}
+    fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
+        self.builder.insert(self.key, value);
+    }
+
+    fn try_new_list(&mut self) -> Result<ListBuilder<'_, Self::State<'_>>, ArrowError> {
+        self.builder.try_new_list(self.key)
+    }
+
+    fn try_new_object(&mut self) -> Result<ObjectBuilder<'_, Self::State<'_>>, ArrowError> {
+        self.builder.try_new_object(self.key)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{
+        ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata,
+        builder::{metadata::ReadOnlyMetadataBuilder, object::ObjectBuilder},
+        decoder::VariantBasicType,
+    };
+
+    #[test]
+    fn test_object() {
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_object()
+            .with_field("name", "John")
+            .with_field("age", 42i8)
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        assert!(!metadata.is_empty());
+        assert!(!value.is_empty());
+    }
+
+    #[test]
+    fn test_object_field_ordering() {
+        let mut builder = VariantBuilder::new();
+
+        builder
+            .new_object()
+            .with_field("zebra", "stripes")
+            .with_field("apple", "red")
+            .with_field("banana", "yellow")
+            .finish();
+
+        let (_, value) = builder.finish();
+
+        let header = value[0];
+        assert_eq!(header & 0x03, VariantBasicType::Object as u8);
+
+        let field_count = value[1] as usize;
+        assert_eq!(field_count, 3);
+
+        // Get field IDs from the object header
+        let field_ids: Vec<u8> = value[2..5].to_vec();
+
+        // apple(1), banana(2), zebra(0)
+        assert_eq!(field_ids, vec![1, 2, 0]);
+    }
+
+    #[test]
+    fn test_duplicate_fields_in_object() {
+        let mut builder = VariantBuilder::new();
+        builder
+            .new_object()
+            .with_field("name", "Ron Artest")
+            .with_field("name", "Metta World Peace") // Duplicate field
+            .finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        let obj = variant.as_object().unwrap();
+        assert_eq!(obj.len(), 1);
+        assert_eq!(obj.field(0).unwrap(), Variant::from("Metta World Peace"));
+
+        assert_eq!(
+            vec![("name", Variant::from("Metta World Peace"))],
+            obj.iter().collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_read_only_metadata_builder() {
+        // First create some metadata with a few field names
+        let mut default_builder = VariantBuilder::new();
+        default_builder.add_field_name("name");
+        default_builder.add_field_name("age");
+        default_builder.add_field_name("active");
+        let (metadata_bytes, _) = default_builder.finish();
+
+        // Use the metadata to build new variant values
+        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+        let mut metadata_builder = ReadOnlyMetadataBuilder::new(&metadata);
+        let mut value_builder = ValueBuilder::new();
+
+        {
+            let state = ParentState::variant(&mut value_builder, &mut metadata_builder);
+            let mut obj = ObjectBuilder::new(state, false);
+
+            // These should succeed because the fields exist in the metadata
+            obj.insert("name", "Alice");
+            obj.insert("age", 30i8);
+            obj.insert("active", true);
+            obj.finish();
+        }
+
+        let value = value_builder.into_inner();
+
+        // Verify the variant was built correctly
+        let variant = Variant::try_new(&metadata_bytes, &value).unwrap();
+        let obj = variant.as_object().unwrap();
+        assert_eq!(obj.get("name"), Some(Variant::from("Alice")));
+        assert_eq!(obj.get("age"), Some(Variant::Int8(30)));
+        assert_eq!(obj.get("active"), Some(Variant::from(true)));
+    }
+
+    // matthew
+    #[test]
+    fn test_append_object() {
+        let (m1, v1) = make_object();
+        let variant = Variant::new(&m1, &v1);
+
+        let mut builder = VariantBuilder::new().with_metadata(VariantMetadata::new(&m1));
+
+        builder.append_value(variant.clone());
+
+        let (metadata, value) = builder.finish();
+        assert_eq!(variant, Variant::new(&metadata, &value));
+    }
+
+    /// make an object variant with field names in reverse lexicographical order
+    fn make_object() -> (Vec<u8>, Vec<u8>) {
+        let mut builder = VariantBuilder::new();
+
+        let mut obj = builder.new_object();
+
+        obj.insert("b", true);
+        obj.insert("a", false);
+        obj.finish();
+        builder.finish()
+    }
+
+    #[test]
+    fn test_append_nested_object() {
+        let (m1, v1) = make_nested_object();
+        let variant = Variant::new(&m1, &v1);
+
+        // because we can guarantee metadata is validated through the builder
+        let mut builder = VariantBuilder::new().with_metadata(VariantMetadata::new(&m1));
+        builder.append_value(variant.clone());
+
+        let (metadata, value) = builder.finish();
+        let result_variant = Variant::new(&metadata, &value);
+
+        assert_eq!(variant, result_variant);
+    }
+
+    /// make a nested object variant
+    fn make_nested_object() -> (Vec<u8>, Vec<u8>) {
+        let mut builder = VariantBuilder::new();
+
+        {
+            let mut outer_obj = builder.new_object();
+
+            {
+                let mut inner_obj = outer_obj.new_object("b");
+                inner_obj.insert("a", "inner_value");
+                inner_obj.finish();
+            }
+
+            outer_obj.finish();
+        }
+
+        builder.finish()
+    }
+
+    #[test]
+    fn test_nested_object() {
+        /*
+        {
+            "c": {
+                "b": "a"
+            }
+        }
+
+        */
+
+        let mut builder = VariantBuilder::new();
+        {
+            let mut outer_object_builder = builder.new_object();
+            {
+                let mut inner_object_builder = outer_object_builder.new_object("c");
+                inner_object_builder.insert("b", "a");
+                inner_object_builder.finish();
+            }
+
+            outer_object_builder.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_object = variant.as_object().unwrap();
+
+        assert_eq!(outer_object.len(), 1);
+        assert_eq!(outer_object.field_name(0).unwrap(), "c");
+
+        let inner_object_variant = outer_object.field(0).unwrap();
+        let inner_object = inner_object_variant.as_object().unwrap();
+
+        assert_eq!(inner_object.len(), 1);
+        assert_eq!(inner_object.field_name(0).unwrap(), "b");
+        assert_eq!(inner_object.field(0).unwrap(), Variant::from("a"));
+    }
+
+    #[test]
+    fn test_nested_object_with_duplicate_field_names_per_object() {
+        /*
+        {
+            "c": {
+                "b": false,
+                "c": "a"
+            },
+            "b": false,
+        }
+
+        */
+
+        let mut builder = VariantBuilder::new();
+        {
+            let mut outer_object_builder = builder.new_object();
+            {
+                let mut inner_object_builder = outer_object_builder.new_object("c");
+                inner_object_builder.insert("b", false);
+                inner_object_builder.insert("c", "a");
+
+                inner_object_builder.finish();
+            }
+
+            outer_object_builder.insert("b", false);
+            outer_object_builder.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_object = variant.as_object().unwrap();
+
+        assert_eq!(outer_object.len(), 2);
+        assert_eq!(outer_object.field_name(0).unwrap(), "b");
+
+        let inner_object_variant = outer_object.field(1).unwrap();
+        let inner_object = inner_object_variant.as_object().unwrap();
+
+        assert_eq!(inner_object.len(), 2);
+        assert_eq!(inner_object.field_name(0).unwrap(), "b");
+        assert_eq!(inner_object.field(0).unwrap(), Variant::from(false));
+        assert_eq!(inner_object.field_name(1).unwrap(), "c");
+        assert_eq!(inner_object.field(1).unwrap(), Variant::from("a"));
+    }
+
+    #[test]
+    fn test_nested_object_with_heterogeneous_fields() {
+        /*
+        {
+            "a": false,
+            "c": {
+                "b": "a",
+                "c": {
+                   "aa": "bb",
+                },
+                "d": {
+                    "cc": "dd"
+                }
+            },
+            "b": true,
+            "d": {
+               "e": 1,
+               "f": [1, true],
+               "g": ["tree", false],
+            }
+        }
+        */
+
+        let mut builder = VariantBuilder::new();
+        {
+            let mut outer_object_builder = builder.new_object();
+
+            outer_object_builder.insert("a", false);
+
+            {
+                let mut inner_object_builder = outer_object_builder.new_object("c");
+                inner_object_builder.insert("b", "a");
+
+                {
+                    let mut inner_inner_object_builder = inner_object_builder.new_object("c");
+                    inner_inner_object_builder.insert("aa", "bb");
+                    inner_inner_object_builder.finish();
+                }
+
+                {
+                    let mut inner_inner_object_builder = inner_object_builder.new_object("d");
+                    inner_inner_object_builder.insert("cc", "dd");
+                    inner_inner_object_builder.finish();
+                }
+                inner_object_builder.finish();
+            }
+
+            outer_object_builder.insert("b", true);
+
+            {
+                let mut inner_object_builder = outer_object_builder.new_object("d");
+                inner_object_builder.insert("e", 1);
+                {
+                    let mut inner_list_builder = inner_object_builder.new_list("f");
+                    inner_list_builder.append_value(1);
+                    inner_list_builder.append_value(true);
+
+                    inner_list_builder.finish();
+                }
+
+                {
+                    let mut inner_list_builder = inner_object_builder.new_list("g");
+                    inner_list_builder.append_value("tree");
+                    inner_list_builder.append_value(false);
+
+                    inner_list_builder.finish();
+                }
+
+                inner_object_builder.finish();
+            }
+
+            outer_object_builder.finish();
+        }
+
+        let (metadata, value) = builder.finish();
+
+        // note, object fields are now sorted lexigraphically by field name
+        /*
+         {
+            "a": false,
+            "b": true,
+            "c": {
+                "b": "a",
+                "c": {
+                   "aa": "bb",
+                },
+                "d": {
+                    "cc": "dd"
+                }
+            },
+            "d": {
+               "e": 1,
+               "f": [1, true],
+               "g": ["tree", false],
+            }
+        }
+        */
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_object = variant.as_object().unwrap();
+
+        assert_eq!(outer_object.len(), 4);
+
+        assert_eq!(outer_object.field_name(0).unwrap(), "a");
+        assert_eq!(outer_object.field(0).unwrap(), Variant::from(false));
+
+        assert_eq!(outer_object.field_name(2).unwrap(), "c");
+
+        let inner_object_variant = outer_object.field(2).unwrap();
+        let inner_object = inner_object_variant.as_object().unwrap();
+
+        assert_eq!(inner_object.len(), 3);
+        assert_eq!(inner_object.field_name(0).unwrap(), "b");
+        assert_eq!(inner_object.field(0).unwrap(), Variant::from("a"));
+
+        let inner_iner_object_variant_c = inner_object.field(1).unwrap();
+        let inner_inner_object_c = inner_iner_object_variant_c.as_object().unwrap();
+        assert_eq!(inner_inner_object_c.len(), 1);
+        assert_eq!(inner_inner_object_c.field_name(0).unwrap(), "aa");
+        assert_eq!(inner_inner_object_c.field(0).unwrap(), Variant::from("bb"));
+
+        let inner_iner_object_variant_d = inner_object.field(2).unwrap();
+        let inner_inner_object_d = inner_iner_object_variant_d.as_object().unwrap();
+        assert_eq!(inner_inner_object_d.len(), 1);
+        assert_eq!(inner_inner_object_d.field_name(0).unwrap(), "cc");
+        assert_eq!(inner_inner_object_d.field(0).unwrap(), Variant::from("dd"));
+
+        assert_eq!(outer_object.field_name(1).unwrap(), "b");
+        assert_eq!(outer_object.field(1).unwrap(), Variant::from(true));
+
+        let out_object_variant_d = outer_object.field(3).unwrap();
+        let out_object_d = out_object_variant_d.as_object().unwrap();
+        assert_eq!(out_object_d.len(), 3);
+        assert_eq!("e", out_object_d.field_name(0).unwrap());
+        assert_eq!(Variant::from(1), out_object_d.field(0).unwrap());
+        assert_eq!("f", out_object_d.field_name(1).unwrap());
+
+        let first_inner_list_variant_f = out_object_d.field(1).unwrap();
+        let first_inner_list_f = first_inner_list_variant_f.as_list().unwrap();
+        assert_eq!(2, first_inner_list_f.len());
+        assert_eq!(Variant::from(1), first_inner_list_f.get(0).unwrap());
+        assert_eq!(Variant::from(true), first_inner_list_f.get(1).unwrap());
+
+        let second_inner_list_variant_g = out_object_d.field(2).unwrap();
+        let second_inner_list_g = second_inner_list_variant_g.as_list().unwrap();
+        assert_eq!(2, second_inner_list_g.len());
+        assert_eq!(Variant::from("tree"), second_inner_list_g.get(0).unwrap());
+        assert_eq!(Variant::from(false), second_inner_list_g.get(1).unwrap());
+    }
+
+    #[test]
+    fn test_object_without_unique_field_validation() {
+        let mut builder = VariantBuilder::new();
+
+        // Root object with duplicates
+        let mut obj = builder.new_object();
+        obj.insert("a", 1);
+        obj.insert("a", 2);
+        obj.finish();
+
+        // Deeply nested list structure with duplicates
+        let mut builder = VariantBuilder::new();
+        let mut outer_list = builder.new_list();
+        let mut inner_list = outer_list.new_list();
+        let mut nested_obj = inner_list.new_object();
+        nested_obj.insert("x", 1);
+        nested_obj.insert("x", 2);
+        nested_obj.new_list("x").with_value(3).finish();
+        nested_obj.new_object("x").with_field("y", 4).finish();
+        nested_obj.finish();
+        inner_list.finish();
+        outer_list.finish();
+
+        // Verify the nested object is built correctly -- the nested object "x" should have "won"
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let outer_element = variant.get_list_element(0).unwrap();
+        let inner_element = outer_element.get_list_element(0).unwrap();
+        let outer_field = inner_element.get_object_field("x").unwrap();
+        let inner_field = outer_field.get_object_field("y").unwrap();
+        assert_eq!(inner_field, Variant::from(4));
+    }
+
+    #[test]
+    fn test_object_with_unique_field_validation() {
+        let mut builder = VariantBuilder::new().with_validate_unique_fields(true);
+
+        // Root-level object with duplicates
+        let result = builder
+            .new_object()
+            .with_field("a", 1)
+            .with_field("b", 2)
+            .try_with_field("a", 3);
+        assert_eq!(
+            result.unwrap_err().to_string(),
+            "Invalid argument error: Duplicate field name: a"
+        );
+
+        // Deeply nested list -> list -> object with duplicate
+        let mut outer_list = builder.new_list();
+        let mut inner_list = outer_list.new_list();
+        let mut object = inner_list.new_object().with_field("x", 1);
+        let nested_result = object.try_insert("x", 2);
+        assert_eq!(
+            nested_result.unwrap_err().to_string(),
+            "Invalid argument error: Duplicate field name: x"
+        );
+        let nested_result = object.try_new_list("x");
+        assert_eq!(
+            nested_result.unwrap_err().to_string(),
+            "Invalid argument error: Duplicate field name: x"
+        );
+
+        let nested_result = object.try_new_object("x");
+        assert_eq!(
+            nested_result.unwrap_err().to_string(),
+            "Invalid argument error: Duplicate field name: x"
+        );
+
+        drop(object);
+        inner_list.finish();
+        outer_list.finish();
+
+        // Valid object should succeed
+        let mut list = builder.new_list();
+        let mut valid_obj = list.new_object();
+        valid_obj.insert("m", 1);
+        valid_obj.insert("n", 2);
+
+        valid_obj.finish();
+        list.finish();
+    }
+}
diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs
index 7096b0a08631..8cf3cec1129e 100644
--- a/parquet-variant/src/decoder.rs
+++ b/parquet-variant/src/decoder.rs
@@ -14,15 +14,22 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::utils::{array_from_slice, slice_from_slice, string_from_slice};
 use crate::ShortString;
+use crate::utils::{
+    array_from_slice, overflow_error, slice_from_slice_at_offset, string_from_slice,
+};
 
 use arrow_schema::ArrowError;
-use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
-
-use std::array::TryFromSliceError;
-use std::num::TryFromIntError;
-
+use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc};
+use uuid::Uuid;
+
+/// The basic type of a [`Variant`] value, encoded in the first two bits of the
+/// header byte.
+///
+/// See the [Variant Encoding specification] for details
+///
+/// [`Variant`]: crate::Variant
+/// [Variant Encoding specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#encoding-types
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum VariantBasicType {
     Primitive = 0,
@@ -31,6 +38,13 @@ pub enum VariantBasicType {
     Array = 3,
 }
 
+/// The type of [`VariantBasicType::Primitive`], for a primitive [`Variant`]
+/// value.
+///
+/// See the [Variant Encoding specification] for details
+///
+/// [`Variant`]: crate::Variant
+/// [Variant Encoding specification]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#encoding-types
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum VariantPrimitiveType {
     Null = 0,
@@ -50,6 +64,10 @@ pub enum VariantPrimitiveType {
     Float = 14,
     Binary = 15,
     String = 16,
+    Time = 17,
+    TimestampNanos = 18,
+    TimestampNtzNanos = 19,
+    Uuid = 20,
 }
 
 /// Extracts the basic type from a header byte
@@ -91,9 +109,12 @@ impl TryFrom<u8> for VariantPrimitiveType {
             14 => Ok(VariantPrimitiveType::Float),
             15 => Ok(VariantPrimitiveType::Binary),
             16 => Ok(VariantPrimitiveType::String),
+            17 => Ok(VariantPrimitiveType::Time),
+            18 => Ok(VariantPrimitiveType::TimestampNanos),
+            19 => Ok(VariantPrimitiveType::TimestampNtzNanos),
+            20 => Ok(VariantPrimitiveType::Uuid),
             _ => Err(ArrowError::InvalidArgumentError(format!(
-                "unknown primitive type: {}",
-                value
+                "unknown primitive type: {value}",
             ))),
         }
     }
@@ -102,7 +123,7 @@ impl TryFrom<u8> for VariantPrimitiveType {
 /// Used to unpack offset array entries such as metadata dictionary offsets or object/array value
 /// offsets. Also used to unpack object field ids. These are always derived from a two-bit
 /// `XXX_size_minus_one` field in the corresponding header byte.
-#[derive(Clone, Debug, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub(crate) enum OffsetSizeBytes {
     One = 1,
     Two = 2,
@@ -122,7 +143,7 @@ impl OffsetSizeBytes {
             _ => {
                 return Err(ArrowError::InvalidArgumentError(
                     "offset_size_minus_one must be 0–3".to_string(),
-                ))
+                ));
             }
         };
         Ok(result)
@@ -130,24 +151,38 @@ impl OffsetSizeBytes {
 
     /// Return one unsigned little-endian value from `bytes`.
     ///
-    /// * `bytes` – the Variant-metadata buffer.
+    /// * `bytes` – the byte buffer to index
+    /// * `index` – 0-based index into the buffer
+    ///
+    /// Each value is `self as u32` bytes wide (1, 2, 3 or 4), zero-extended to 32 bits as needed.
+    pub(crate) fn unpack_u32(&self, bytes: &[u8], index: usize) -> Result<u32, ArrowError> {
+        self.unpack_u32_at_offset(bytes, 0, index)
+    }
+
+    /// Return one unsigned little-endian value from `bytes`.
+    ///
+    /// * `bytes` – the byte buffer to index
     /// * `byte_offset` – number of bytes to skip **before** reading the first
-    ///   value (usually `1` to move past the header byte).
-    /// * `offset_index` – 0-based index **after** the skip
+    ///   value (e.g. `1` to move past a header byte).
+    /// * `offset_index` – 0-based index **after** the skipped bytes
     ///   (`0` is the first value, `1` the next, …).
     ///
-    /// Each value is `self as usize` bytes wide (1, 2, 3 or 4).
-    /// Three-byte values are zero-extended to 32 bits before the final
-    /// fallible cast to `usize`.
-    pub(crate) fn unpack_usize(
+    /// Each value is `self as u32` bytes wide (1, 2, 3 or 4), zero-extended to 32 bits as needed.
+    pub(crate) fn unpack_u32_at_offset(
         &self,
         bytes: &[u8],
         byte_offset: usize,  // how many bytes to skip
         offset_index: usize, // which offset in an array of offsets
-    ) -> Result<usize, ArrowError> {
+    ) -> Result<u32, ArrowError> {
         use OffsetSizeBytes::*;
-        let offset = byte_offset + (*self as usize) * offset_index;
-        let result = match self {
+
+        // Index into the byte array:
+        // byte_offset + (*self as usize) * offset_index
+        let offset = offset_index
+            .checked_mul(*self as usize)
+            .and_then(|n| n.checked_add(byte_offset))
+            .ok_or_else(|| overflow_error("unpacking offset array value"))?;
+        let value = match self {
             One => u8::from_le_bytes(array_from_slice(bytes, offset)?).into(),
             Two => u16::from_le_bytes(array_from_slice(bytes, offset)?).into(),
             Three => {
@@ -157,28 +192,38 @@ impl OffsetSizeBytes {
                 let mut buf = [0u8; 4];
                 buf[..3].copy_from_slice(&b3_chunks);
                 u32::from_le_bytes(buf)
-                    .try_into()
-                    .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?
             }
-            Four => u32::from_le_bytes(array_from_slice(bytes, offset)?)
-                .try_into()
-                .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string()))?,
+            Four => u32::from_le_bytes(array_from_slice(bytes, offset)?),
         };
-        Ok(result)
+        Ok(value)
     }
 }
 
+/// Converts a byte buffer to offset values based on the specific offset size
+pub(crate) fn map_bytes_to_offsets(
+    buffer: &[u8],
+    offset_size: OffsetSizeBytes,
+) -> impl Iterator<Item = usize> + use<'_> {
+    buffer
+        .chunks_exact(offset_size as usize)
+        .map(move |chunk| match offset_size {
+            OffsetSizeBytes::One => chunk[0] as usize,
+            OffsetSizeBytes::Two => u16::from_le_bytes([chunk[0], chunk[1]]) as usize,
+            OffsetSizeBytes::Three => {
+                u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]) as usize
+            }
+            OffsetSizeBytes::Four => {
+                u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]) as usize
+            }
+        })
+}
+
 /// Extract the primitive type from a Variant value-metadata byte
 pub(crate) fn get_primitive_type(metadata: u8) -> Result<VariantPrimitiveType, ArrowError> {
     // last 6 bits contain the primitive-type, see spec
     VariantPrimitiveType::try_from(metadata >> 2)
 }
 
-/// To be used in `map_err` when unpacking an integer from a slice of bytes.
-fn map_try_from_slice_error(e: TryFromSliceError) -> ArrowError {
-    ArrowError::InvalidArgumentError(e.to_string())
-}
-
 /// Decodes an Int8 from the value section of a variant.
 pub(crate) fn decode_int8(data: &[u8]) -> Result<i8, ArrowError> {
     Ok(i8::from_le_bytes(array_from_slice(data, 0)?))
@@ -259,181 +304,319 @@ pub(crate) fn decode_timestampntz_micros(data: &[u8]) -> Result<NaiveDateTime, A
         .map(|v| v.naive_utc())
 }
 
+pub(crate) fn decode_time_ntz(data: &[u8]) -> Result<NaiveTime, ArrowError> {
+    let micros_since_epoch = u64::from_le_bytes(array_from_slice(data, 0)?);
+
+    let case_error = ArrowError::CastError(format!(
+        "Could not cast {micros_since_epoch} microseconds into a NaiveTime"
+    ));
+
+    if micros_since_epoch >= 86_400_000_000 {
+        return Err(case_error);
+    }
+
+    let nanos_since_midnight = micros_since_epoch * 1_000;
+    NaiveTime::from_num_seconds_from_midnight_opt(
+        (nanos_since_midnight / 1_000_000_000) as u32,
+        (nanos_since_midnight % 1_000_000_000) as u32,
+    )
+    .ok_or(case_error)
+}
+
+/// Decodes a TimestampNanos from the value section of a variant.
+pub(crate) fn decode_timestamp_nanos(data: &[u8]) -> Result<DateTime<Utc>, ArrowError> {
+    let nanos_since_epoch = i64::from_le_bytes(array_from_slice(data, 0)?);
+
+    // DateTime::from_timestamp_nanos would never fail
+    Ok(DateTime::from_timestamp_nanos(nanos_since_epoch))
+}
+
+/// Decodes a TimestampNtzNanos from the value section of a variant.
+pub(crate) fn decode_timestampntz_nanos(data: &[u8]) -> Result<NaiveDateTime, ArrowError> {
+    decode_timestamp_nanos(data).map(|v| v.naive_utc())
+}
+
+/// Decodes a UUID from the value section of a variant.
+pub(crate) fn decode_uuid(data: &[u8]) -> Result<Uuid, ArrowError> {
+    Uuid::from_slice(&data[0..16])
+        .map_err(|_| ArrowError::CastError(format!("Cant decode uuid from {:?}", &data[0..16])))
+}
+
 /// Decodes a Binary from the value section of a variant.
 pub(crate) fn decode_binary(data: &[u8]) -> Result<&[u8], ArrowError> {
     let len = u32::from_le_bytes(array_from_slice(data, 0)?) as usize;
-    let value = slice_from_slice(data, 4..4 + len)?;
-    Ok(value)
+    slice_from_slice_at_offset(data, 4, 0..len)
 }
 
 /// Decodes a long string from the value section of a variant.
 pub(crate) fn decode_long_string(data: &[u8]) -> Result<&str, ArrowError> {
     let len = u32::from_le_bytes(array_from_slice(data, 0)?) as usize;
-    let string = string_from_slice(data, 4..4 + len)?;
-    Ok(string)
+    string_from_slice(data, 4, 0..len)
 }
 
 /// Decodes a short string from the value section of a variant.
-pub(crate) fn decode_short_string(metadata: u8, data: &[u8]) -> Result<ShortString, ArrowError> {
+pub(crate) fn decode_short_string(
+    metadata: u8,
+    data: &[u8],
+) -> Result<ShortString<'_>, ArrowError> {
     let len = (metadata >> 2) as usize;
-    let string = string_from_slice(data, 0..len)?;
+    let string = string_from_slice(data, 0, 0..len)?;
     ShortString::try_new(string)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-
-    #[test]
-    fn test_i8() -> Result<(), ArrowError> {
-        let data = [0x2a];
-        let result = decode_int8(&data)?;
-        assert_eq!(result, 42);
-        Ok(())
+    use paste::paste;
+
+    macro_rules! test_decoder_bounds {
+        ($test_name:ident, $data:expr, $decode_fn:ident, $expected:expr) => {
+            paste! {
+                #[test]
+                fn [<$test_name _exact_length>]() {
+                    let result = $decode_fn(&$data).unwrap();
+                    assert_eq!(result, $expected);
+                }
+
+                #[test]
+                fn [<$test_name _truncated_length>]() {
+                    // Remove the last byte of data so that there is not enough to decode
+                    let truncated_data = &$data[.. $data.len() - 1];
+                    let result = $decode_fn(truncated_data);
+                    assert!(matches!(result, Err(ArrowError::InvalidArgumentError(_))));
+                }
+            }
+        };
     }
 
-    #[test]
-    fn test_i16() -> Result<(), ArrowError> {
-        let data = [0xd2, 0x04];
-        let result = decode_int16(&data)?;
-        assert_eq!(result, 1234);
-        Ok(())
+    mod integer {
+        use super::*;
+
+        test_decoder_bounds!(test_i8, [0x2a], decode_int8, 42);
+        test_decoder_bounds!(test_i16, [0xd2, 0x04], decode_int16, 1234);
+        test_decoder_bounds!(test_i32, [0x40, 0xe2, 0x01, 0x00], decode_int32, 123456);
+        test_decoder_bounds!(
+            test_i64,
+            [0x15, 0x81, 0xe9, 0x7d, 0xf4, 0x10, 0x22, 0x11],
+            decode_int64,
+            1234567890123456789
+        );
     }
 
-    #[test]
-    fn test_i32() -> Result<(), ArrowError> {
-        let data = [0x40, 0xe2, 0x01, 0x00];
-        let result = decode_int32(&data)?;
-        assert_eq!(result, 123456);
-        Ok(())
-    }
+    mod decimal {
+        use super::*;
+
+        test_decoder_bounds!(
+            test_decimal4,
+            [
+                0x02, // Scale
+                0xd2, 0x04, 0x00, 0x00, // Unscaled Value
+            ],
+            decode_decimal4,
+            (1234, 2)
+        );
 
-    #[test]
-    fn test_i64() -> Result<(), ArrowError> {
-        let data = [0x15, 0x81, 0xe9, 0x7d, 0xf4, 0x10, 0x22, 0x11];
-        let result = decode_int64(&data)?;
-        assert_eq!(result, 1234567890123456789);
-        Ok(())
-    }
+        test_decoder_bounds!(
+            test_decimal8,
+            [
+                0x02, // Scale
+                0xd2, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00, // Unscaled Value
+            ],
+            decode_decimal8,
+            (1234567890, 2)
+        );
 
-    #[test]
-    fn test_decimal4() -> Result<(), ArrowError> {
-        let data = [
-            0x02, // Scale
-            0xd2, 0x04, 0x00, 0x00, // Integer
-        ];
-        let result = decode_decimal4(&data)?;
-        assert_eq!(result, (1234, 2));
-        Ok(())
+        test_decoder_bounds!(
+            test_decimal16,
+            [
+                0x02, // Scale
+                0xd2, 0xb6, 0x23, 0xc0, 0xf4, 0x10, 0x22, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                0x00, 0x00, // Unscaled Value
+            ],
+            decode_decimal16,
+            (1234567891234567890, 2)
+        );
     }
 
-    #[test]
-    fn test_decimal8() -> Result<(), ArrowError> {
-        let data = [
-            0x02, // Scale
-            0xd2, 0x02, 0x96, 0x49, 0x00, 0x00, 0x00, 0x00, // Integer
-        ];
-        let result = decode_decimal8(&data)?;
-        assert_eq!(result, (1234567890, 2));
-        Ok(())
-    }
+    mod float {
+        use super::*;
 
-    #[test]
-    fn test_decimal16() -> Result<(), ArrowError> {
-        let data = [
-            0x02, // Scale
-            0xd2, 0xb6, 0x23, 0xc0, 0xf4, 0x10, 0x22, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, // Integer
-        ];
-        let result = decode_decimal16(&data)?;
-        assert_eq!(result, (1234567891234567890, 2));
-        Ok(())
-    }
+        test_decoder_bounds!(
+            test_float,
+            [0x06, 0x2c, 0x93, 0x4e],
+            decode_float,
+            1234567890.1234
+        );
 
-    #[test]
-    fn test_float() -> Result<(), ArrowError> {
-        let data = [0x06, 0x2c, 0x93, 0x4e];
-        let result = decode_float(&data)?;
-        assert_eq!(result, 1234567890.1234);
-        Ok(())
+        test_decoder_bounds!(
+            test_double,
+            [0xc9, 0xe5, 0x87, 0xb4, 0x80, 0x65, 0xd2, 0x41],
+            decode_double,
+            1234567890.1234
+        );
     }
 
-    #[test]
-    fn test_double() -> Result<(), ArrowError> {
-        let data = [0xc9, 0xe5, 0x87, 0xb4, 0x80, 0x65, 0xd2, 0x41];
-        let result = decode_double(&data)?;
-        assert_eq!(result, 1234567890.1234);
-        Ok(())
-    }
+    mod datetime {
+        use super::*;
 
-    #[test]
-    fn test_date() -> Result<(), ArrowError> {
-        let data = [0xe2, 0x4e, 0x0, 0x0];
-        let result = decode_date(&data)?;
-        assert_eq!(result, NaiveDate::from_ymd_opt(2025, 4, 16).unwrap());
-        Ok(())
-    }
+        test_decoder_bounds!(
+            test_date,
+            [0xe2, 0x4e, 0x0, 0x0],
+            decode_date,
+            NaiveDate::from_ymd_opt(2025, 4, 16).unwrap()
+        );
 
-    #[test]
-    fn test_timestamp_micros() -> Result<(), ArrowError> {
-        let data = [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00];
-        let result = decode_timestamp_micros(&data)?;
-        assert_eq!(
-            result,
+        test_decoder_bounds!(
+            test_timestamp_micros,
+            [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00],
+            decode_timestamp_micros,
             NaiveDate::from_ymd_opt(2025, 4, 16)
                 .unwrap()
                 .and_hms_milli_opt(16, 34, 56, 780)
                 .unwrap()
                 .and_utc()
         );
-        Ok(())
-    }
 
-    #[test]
-    fn test_timestampntz_micros() -> Result<(), ArrowError> {
-        let data = [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00];
-        let result = decode_timestampntz_micros(&data)?;
-        assert_eq!(
-            result,
+        test_decoder_bounds!(
+            test_timestampntz_micros,
+            [0xe0, 0x52, 0x97, 0xdd, 0xe7, 0x32, 0x06, 0x00],
+            decode_timestampntz_micros,
             NaiveDate::from_ymd_opt(2025, 4, 16)
                 .unwrap()
                 .and_hms_milli_opt(16, 34, 56, 780)
                 .unwrap()
         );
-        Ok(())
+
+        test_decoder_bounds!(
+            test_timestamp_nanos,
+            [0x15, 0x41, 0xa2, 0x5a, 0x36, 0xa2, 0x5b, 0x18],
+            decode_timestamp_nanos,
+            NaiveDate::from_ymd_opt(2025, 8, 14)
+                .unwrap()
+                .and_hms_nano_opt(12, 33, 54, 123456789)
+                .unwrap()
+                .and_utc()
+        );
+
+        test_decoder_bounds!(
+            test_timestamp_nanos_before_epoch,
+            [0x15, 0x41, 0x52, 0xd4, 0x94, 0xe5, 0xad, 0xfa],
+            decode_timestamp_nanos,
+            NaiveDate::from_ymd_opt(1957, 11, 7)
+                .unwrap()
+                .and_hms_nano_opt(12, 33, 54, 123456789)
+                .unwrap()
+                .and_utc()
+        );
+
+        test_decoder_bounds!(
+            test_timestampntz_nanos,
+            [0x15, 0x41, 0xa2, 0x5a, 0x36, 0xa2, 0x5b, 0x18],
+            decode_timestampntz_nanos,
+            NaiveDate::from_ymd_opt(2025, 8, 14)
+                .unwrap()
+                .and_hms_nano_opt(12, 33, 54, 123456789)
+                .unwrap()
+        );
+
+        test_decoder_bounds!(
+            test_timestampntz_nanos_before_epoch,
+            [0x15, 0x41, 0x52, 0xd4, 0x94, 0xe5, 0xad, 0xfa],
+            decode_timestampntz_nanos,
+            NaiveDate::from_ymd_opt(1957, 11, 7)
+                .unwrap()
+                .and_hms_nano_opt(12, 33, 54, 123456789)
+                .unwrap()
+        );
     }
 
     #[test]
-    fn test_binary() -> Result<(), ArrowError> {
+    fn test_uuid() {
+        let data = [
+            0xf2, 0x4f, 0x9b, 0x64, 0x81, 0xfa, 0x49, 0xd1, 0xb7, 0x4e, 0x8c, 0x09, 0xa6, 0xe3,
+            0x1c, 0x56,
+        ];
+        let result = decode_uuid(&data).unwrap();
+        assert_eq!(
+            Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap(),
+            result
+        );
+    }
+
+    mod time {
+        use super::*;
+
+        test_decoder_bounds!(
+            test_timentz,
+            [0x53, 0x1f, 0x8e, 0xdf, 0x2, 0, 0, 0],
+            decode_time_ntz,
+            NaiveTime::from_num_seconds_from_midnight_opt(12340, 567_891_000).unwrap()
+        );
+
+        #[test]
+        fn test_decode_time_ntz_invalid() {
+            let invalid_second = u64::MAX;
+            let data = invalid_second.to_le_bytes();
+            let result = decode_time_ntz(&data);
+            assert!(matches!(result, Err(ArrowError::CastError(_))));
+        }
+    }
+
+    #[test]
+    fn test_binary_exact_length() {
         let data = [
             0x09, 0, 0, 0, // Length of binary data, 4-byte little-endian
             0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe,
         ];
-        let result = decode_binary(&data)?;
+        let result = decode_binary(&data).unwrap();
         assert_eq!(
             result,
             [0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe]
         );
-        Ok(())
     }
 
     #[test]
-    fn test_short_string() -> Result<(), ArrowError> {
+    fn test_binary_truncated_length() {
+        let data = [
+            0x09, 0, 0, 0, // Length of binary data, 4-byte little-endian
+            0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca,
+        ];
+        let result = decode_binary(&data);
+        assert!(matches!(result, Err(ArrowError::InvalidArgumentError(_))));
+    }
+
+    #[test]
+    fn test_short_string_exact_length() {
         let data = [b'H', b'e', b'l', b'l', b'o', b'o'];
-        let result = decode_short_string(1 | 5 << 2, &data)?;
+        let result = decode_short_string(1 | 5 << 2, &data).unwrap();
         assert_eq!(result.0, "Hello");
-        Ok(())
     }
 
     #[test]
-    fn test_string() -> Result<(), ArrowError> {
+    fn test_short_string_truncated_length() {
+        let data = [b'H', b'e', b'l'];
+        let result = decode_short_string(1 | 5 << 2, &data);
+        assert!(matches!(result, Err(ArrowError::InvalidArgumentError(_))));
+    }
+
+    #[test]
+    fn test_string_exact_length() {
         let data = [
             0x05, 0, 0, 0, // Length of string, 4-byte little-endian
             b'H', b'e', b'l', b'l', b'o', b'o',
         ];
-        let result = decode_long_string(&data)?;
+        let result = decode_long_string(&data).unwrap();
         assert_eq!(result, "Hello");
-        Ok(())
+    }
+
+    #[test]
+    fn test_string_truncated_length() {
+        let data = [
+            0x05, 0, 0, 0, // Length of string, 4-byte little-endian
+            b'H', b'e', b'l',
+        ];
+        let result = decode_long_string(&data);
+        assert!(matches!(result, Err(ArrowError::InvalidArgumentError(_))));
     }
 
     #[test]
@@ -449,61 +632,51 @@ mod tests {
     }
 
     #[test]
-    fn unpack_usize_all_widths() {
+    fn unpack_u32_all_widths() {
         // One-byte offsets
         let buf_one = [0x01u8, 0xAB, 0xCD];
-        assert_eq!(
-            OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 0).unwrap(),
-            0x01
-        );
-        assert_eq!(
-            OffsetSizeBytes::One.unpack_usize(&buf_one, 0, 2).unwrap(),
-            0xCD
-        );
+        assert_eq!(OffsetSizeBytes::One.unpack_u32(&buf_one, 0).unwrap(), 0x01);
+        assert_eq!(OffsetSizeBytes::One.unpack_u32(&buf_one, 2).unwrap(), 0xCD);
 
         // Two-byte offsets (little-endian 0x1234, 0x5678)
         let buf_two = [0x34, 0x12, 0x78, 0x56];
         assert_eq!(
-            OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 0).unwrap(),
+            OffsetSizeBytes::Two.unpack_u32(&buf_two, 0).unwrap(),
             0x1234
         );
         assert_eq!(
-            OffsetSizeBytes::Two.unpack_usize(&buf_two, 0, 1).unwrap(),
+            OffsetSizeBytes::Two.unpack_u32(&buf_two, 1).unwrap(),
             0x5678
         );
 
         // Three-byte offsets (0x030201 and 0x0000FF)
         let buf_three = [0x01, 0x02, 0x03, 0xFF, 0x00, 0x00];
         assert_eq!(
-            OffsetSizeBytes::Three
-                .unpack_usize(&buf_three, 0, 0)
-                .unwrap(),
+            OffsetSizeBytes::Three.unpack_u32(&buf_three, 0).unwrap(),
             0x030201
         );
         assert_eq!(
-            OffsetSizeBytes::Three
-                .unpack_usize(&buf_three, 0, 1)
-                .unwrap(),
+            OffsetSizeBytes::Three.unpack_u32(&buf_three, 1).unwrap(),
             0x0000FF
         );
 
         // Four-byte offsets (0x12345678, 0x90ABCDEF)
         let buf_four = [0x78, 0x56, 0x34, 0x12, 0xEF, 0xCD, 0xAB, 0x90];
         assert_eq!(
-            OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 0).unwrap(),
+            OffsetSizeBytes::Four.unpack_u32(&buf_four, 0).unwrap(),
             0x1234_5678
         );
         assert_eq!(
-            OffsetSizeBytes::Four.unpack_usize(&buf_four, 0, 1).unwrap(),
+            OffsetSizeBytes::Four.unpack_u32(&buf_four, 1).unwrap(),
             0x90AB_CDEF
         );
     }
 
     #[test]
-    fn unpack_usize_out_of_bounds() {
+    fn unpack_u32_out_of_bounds() {
         let tiny = [0x00u8]; // deliberately too short
-        assert!(OffsetSizeBytes::Two.unpack_usize(&tiny, 0, 0).is_err());
-        assert!(OffsetSizeBytes::Three.unpack_usize(&tiny, 0, 0).is_err());
+        assert!(OffsetSizeBytes::Two.unpack_u32(&tiny, 0).is_err());
+        assert!(OffsetSizeBytes::Three.unpack_u32(&tiny, 0).is_err());
     }
 
     #[test]
@@ -518,20 +691,21 @@ mod tests {
 
         let width = OffsetSizeBytes::Two;
 
-        // dictionary_size starts immediately after the header
-        let dict_size = width.unpack_usize(&buf, 1, 0).unwrap();
+        // dictionary_size starts immediately after the header byte
+        let dict_size = width.unpack_u32_at_offset(&buf, 1, 0).unwrap();
         assert_eq!(dict_size, 2);
 
-        let first = width.unpack_usize(&buf, 1, 1).unwrap();
+        // offset array immediately follows the dictionary size
+        let first = width.unpack_u32_at_offset(&buf, 1, 1).unwrap();
         assert_eq!(first, 0);
 
-        let second = width.unpack_usize(&buf, 1, 2).unwrap();
+        let second = width.unpack_u32_at_offset(&buf, 1, 2).unwrap();
         assert_eq!(second, 5);
 
-        let third = width.unpack_usize(&buf, 1, 3).unwrap();
+        let third = width.unpack_u32_at_offset(&buf, 1, 3).unwrap();
         assert_eq!(third, 9);
 
-        let err = width.unpack_usize(&buf, 1, 4);
+        let err = width.unpack_u32_at_offset(&buf, 1, 4);
         assert!(err.is_err())
     }
 }
diff --git a/parquet-variant/src/lib.rs b/parquet-variant/src/lib.rs
index 00a8a69aff99..a57b4709799d 100644
--- a/parquet-variant/src/lib.rs
+++ b/parquet-variant/src/lib.rs
@@ -20,6 +20,10 @@
 //! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
 //! [Apache Parquet]: https://parquet.apache.org/
 //!
+//! ## Main APIs
+//! - [`Variant`]: Represents a variant value, which can be an object, list, or primitive.
+//! - [`VariantBuilder`]: For building `Variant` values.
+//!
 //! ## 🚧 Work In Progress
 //!
 //! This crate is under active development and is not yet ready for production use.
@@ -27,14 +31,12 @@
 //!
 //! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736
 
-// TODO: dead code removal
-#[allow(dead_code)]
-mod decoder;
-mod variant;
-// TODO: dead code removal
 mod builder;
-#[allow(dead_code)]
+mod decoder;
+mod path;
 mod utils;
+mod variant;
 
 pub use builder::*;
+pub use path::{VariantPath, VariantPathElement};
 pub use variant::*;
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
new file mode 100644
index 000000000000..2aeb9df97d82
--- /dev/null
+++ b/parquet-variant/src/path.rs
@@ -0,0 +1,255 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::{borrow::Cow, ops::Deref};
+
+use crate::utils::parse_path;
+
+/// Represents a qualified path to a potential subfield or index of a variant
+/// value.
+///
+/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
+/// a variant value.
+///
+/// [`Variant::get_path`]: crate::Variant::get_path
+///
+/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
+/// from a single field name or index.
+///
+/// # Example: Simple paths
+/// ```rust
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// // access the field "foo" in a variant object value
+/// let path = VariantPath::from("foo");
+/// // access the first element in a variant list vale
+/// let path = VariantPath::from(0);
+/// ```
+///
+/// # Example: Compound paths
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// /// You can also create a path by joining elements together:
+/// // access the field "foo" and then the first element in a variant list value
+/// let path = VariantPath::from("foo").join(0);
+/// // this is the same as the previous one
+/// let path2 = VariantPath::from_iter(["foo".into(), 0.into()]);
+/// assert_eq!(path, path2);
+/// // you can also create a path from a vector of `VariantPathElement` directly
+/// let path3 = [
+///   VariantPathElement::field("foo"),
+///   VariantPathElement::index(0)
+/// ].into_iter().collect::<VariantPath>();
+/// assert_eq!(path, path3);
+/// ```
+///
+/// # Example: From Dot notation strings
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// /// You can also convert strings directly into paths using dot notation
+/// let path = VariantPath::from("foo.bar.baz");
+/// let expected = VariantPath::from("foo").join("bar").join("baz");
+/// assert_eq!(path, expected);
+/// ```
+///
+/// # Example: Accessing Compound paths
+/// ```
+/// # use parquet_variant::{VariantPath, VariantPathElement};
+/// /// You can access the paths using slices
+/// // access the field "foo" and then the first element in a variant list value
+/// let path = VariantPath::from("foo")
+///   .join("bar")
+///   .join("baz");
+/// assert_eq!(path[1], VariantPathElement::field("bar"));
+/// ```
+#[derive(Debug, Clone, PartialEq, Default)]
+pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
+
+impl<'a> VariantPath<'a> {
+    /// Create a new `VariantPath` from a vector of `VariantPathElement`.
+    pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
+        Self(path)
+    }
+
+    /// Return the inner path elements.
+    pub fn path(&self) -> &Vec<VariantPathElement<'_>> {
+        &self.0
+    }
+
+    /// Return a new `VariantPath` with element appended
+    pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
+        self.push(element);
+        self
+    }
+
+    /// Append a new element to the path
+    pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
+        self.0.push(element.into());
+    }
+
+    /// Returns whether [`VariantPath`] has no path elements
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
+    fn from(value: Vec<VariantPathElement<'a>>) -> Self {
+        Self::new(value)
+    }
+}
+
+/// Create from &str with support for dot notation
+impl<'a> From<&'a str> for VariantPath<'a> {
+    fn from(path: &'a str) -> Self {
+        VariantPath::new(path.split(".").flat_map(parse_path).collect())
+    }
+}
+
+/// Create from usize
+impl<'a> From<usize> for VariantPath<'a> {
+    fn from(index: usize) -> Self {
+        VariantPath::new(vec![VariantPathElement::index(index)])
+    }
+}
+
+impl<'a> From<&[VariantPathElement<'a>]> for VariantPath<'a> {
+    fn from(elements: &[VariantPathElement<'a>]) -> Self {
+        VariantPath::new(elements.to_vec())
+    }
+}
+
+/// Create from iter
+impl<'a> FromIterator<VariantPathElement<'a>> for VariantPath<'a> {
+    fn from_iter<T: IntoIterator<Item = VariantPathElement<'a>>>(iter: T) -> Self {
+        VariantPath::new(Vec::from_iter(iter))
+    }
+}
+
+impl<'a> Deref for VariantPath<'a> {
+    type Target = [VariantPathElement<'a>];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+/// Element of a [`VariantPath`] that can be a field name or an index.
+///
+/// See [`VariantPath`] for more details and examples.
+#[derive(Debug, Clone, PartialEq)]
+pub enum VariantPathElement<'a> {
+    /// Access field with name `name`
+    Field { name: Cow<'a, str> },
+    /// Access the list element at `index`
+    Index { index: usize },
+}
+
+impl<'a> VariantPathElement<'a> {
+    pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
+        let name = name.into();
+        VariantPathElement::Field { name }
+    }
+
+    pub fn index(index: usize) -> VariantPathElement<'a> {
+        VariantPathElement::Index { index }
+    }
+}
+
+// Conversion utilities for `VariantPathElement` from string types
+impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
+    fn from(name: Cow<'a, str>) -> Self {
+        VariantPathElement::field(name)
+    }
+}
+
+impl<'a> From<&'a str> for VariantPathElement<'a> {
+    fn from(name: &'a str) -> Self {
+        VariantPathElement::field(Cow::Borrowed(name))
+    }
+}
+
+impl<'a> From<String> for VariantPathElement<'a> {
+    fn from(name: String) -> Self {
+        VariantPathElement::field(Cow::Owned(name))
+    }
+}
+
+impl<'a> From<&'a String> for VariantPathElement<'a> {
+    fn from(name: &'a String) -> Self {
+        VariantPathElement::field(Cow::Borrowed(name.as_str()))
+    }
+}
+
+impl<'a> From<usize> for VariantPathElement<'a> {
+    fn from(index: usize) -> Self {
+        VariantPathElement::index(index)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_variant_path_empty() {
+        let path = VariantPath::from_iter([]);
+        assert!(path.is_empty());
+    }
+
+    #[test]
+    fn test_variant_path_empty_str() {
+        let path = VariantPath::from("");
+        assert!(path.is_empty());
+    }
+
+    #[test]
+    fn test_variant_path_non_empty() {
+        let p = VariantPathElement::from("a");
+        let path = VariantPath::from_iter([p]);
+        assert!(!path.is_empty());
+    }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_array_index() {
+        let path = VariantPath::from("city.store.books[3].title");
+
+        let expected = VariantPath::from("city")
+            .join("store")
+            .join("books")
+            .join(3)
+            .join("title");
+
+        assert_eq!(path, expected);
+    }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_only_array_index() {
+        let path = VariantPath::from("[3]");
+
+        let expected = VariantPath::from(3);
+
+        assert_eq!(path, expected);
+    }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_starting_array_index() {
+        let path = VariantPath::from("[3].title");
+
+        let expected = VariantPath::from(3).join("title");
+
+        assert_eq!(path, expected);
+    }
+}
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index 7a1b9f039937..6accbcb36649 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -16,11 +16,18 @@
 // under the License.
 use std::{array::TryFromSliceError, ops::Range, str};
 
+use crate::VariantPathElement;
 use arrow_schema::ArrowError;
 
+use std::cmp::Ordering;
 use std::fmt::Debug;
 use std::slice::SliceIndex;
 
+/// Helper for reporting integer overflow errors in a consistent way.
+pub(crate) fn overflow_error(msg: &str) -> ArrowError {
+    ArrowError::InvalidArgumentError(format!("Integer overflow computing {msg}"))
+}
+
 #[inline]
 pub(crate) fn slice_from_slice<I: SliceIndex<[u8]> + Clone + Debug>(
     bytes: &[u8],
@@ -33,17 +40,33 @@ pub(crate) fn slice_from_slice<I: SliceIndex<[u8]> + Clone + Debug>(
         ))
     })
 }
+
+/// Helper to safely slice bytes with offset calculations.
+///
+/// Equivalent to `slice_from_slice(bytes, (base_offset + range.start)..(base_offset + range.end))`
+/// but using checked addition to prevent integer overflow panics on 32-bit systems.
+#[inline]
+pub(crate) fn slice_from_slice_at_offset(
+    bytes: &[u8],
+    base_offset: usize,
+    range: Range<usize>,
+) -> Result<&[u8], ArrowError> {
+    let start_byte = base_offset
+        .checked_add(range.start)
+        .ok_or_else(|| overflow_error("slice start"))?;
+    let end_byte = base_offset
+        .checked_add(range.end)
+        .ok_or_else(|| overflow_error("slice end"))?;
+    slice_from_slice(bytes, start_byte..end_byte)
+}
+
 pub(crate) fn array_from_slice<const N: usize>(
     bytes: &[u8],
     offset: usize,
 ) -> Result<[u8; N], ArrowError> {
-    let bytes = slice_from_slice(bytes, offset..offset + N)?;
-    bytes.try_into().map_err(map_try_from_slice_error)
-}
-
-/// To be used in `map_err` when unpacking an integer from a slice of bytes.
-pub(crate) fn map_try_from_slice_error(e: TryFromSliceError) -> ArrowError {
-    ArrowError::InvalidArgumentError(e.to_string())
+    slice_from_slice_at_offset(bytes, offset, 0..N)?
+        .try_into()
+        .map_err(|e: TryFromSliceError| ArrowError::InvalidArgumentError(e.to_string()))
 }
 
 pub(crate) fn first_byte_from_slice(slice: &[u8]) -> Result<u8, ArrowError> {
@@ -53,9 +76,28 @@ pub(crate) fn first_byte_from_slice(slice: &[u8]) -> Result<u8, ArrowError> {
         .ok_or_else(|| ArrowError::InvalidArgumentError("Received empty bytes".to_string()))
 }
 
-/// Helper to get a &str from a slice based on range, if it's valid or an error otherwise
-pub(crate) fn string_from_slice(slice: &[u8], range: Range<usize>) -> Result<&str, ArrowError> {
-    str::from_utf8(slice_from_slice(slice, range)?)
+/// Helper to get a &str from a slice at the given offset and range, or an error if it contains invalid UTF-8 data.
+#[inline]
+pub(crate) fn string_from_slice(
+    slice: &[u8],
+    offset: usize,
+    range: Range<usize>,
+) -> Result<&str, ArrowError> {
+    let offset_buffer = slice_from_slice_at_offset(slice, offset, range)?;
+
+    //Use simdutf8 by default
+    #[cfg(feature = "simdutf8")]
+    {
+        simdutf8::basic::from_utf8(offset_buffer).map_err(|_| {
+            // Use simdutf8::compat to return details about the decoding error
+            let e = simdutf8::compat::from_utf8(offset_buffer).unwrap_err();
+            ArrowError::InvalidArgumentError(format!("encountered non UTF-8 data: {e}"))
+        })
+    }
+
+    //Use std::str if simdutf8 is not enabled
+    #[cfg(not(feature = "simdutf8"))]
+    str::from_utf8(offset_buffer)
         .map_err(|_| ArrowError::InvalidArgumentError("invalid UTF-8 string".to_string()))
 }
 
@@ -69,39 +111,86 @@ pub(crate) fn string_from_slice(slice: &[u8], range: Range<usize>) -> Result<&st
 /// * `range` - The range to search in
 /// * `target` - The target value to search for
 /// * `key_extractor` - A function that extracts a comparable key from slice elements.
-///   This function can fail and return an error.
+///   This function can fail and return None.
 ///
 /// # Returns
-/// * `Ok(Ok(index))` - Element found at the given index
-/// * `Ok(Err(index))` - Element not found, but would be inserted at the given index
-/// * `Err(e)` - Key extraction failed with error `e`
-pub(crate) fn try_binary_search_range_by<K, E, F>(
+/// * `Some(Ok(index))` - Element found at the given index
+/// * `Some(Err(index))` - Element not found, but would be inserted at the given index
+/// * `None` - Key extraction failed
+pub(crate) fn try_binary_search_range_by<F>(
     range: Range<usize>,
-    target: &K,
-    mut key_extractor: F,
-) -> Result<Result<usize, usize>, E>
+    cmp: F,
+) -> Option<Result<usize, usize>>
 where
-    K: Ord,
-    F: FnMut(usize) -> Result<K, E>,
+    F: Fn(usize) -> Option<Ordering>,
 {
     let Range { mut start, mut end } = range;
     while start < end {
         let mid = start + (end - start) / 2;
-        let key = key_extractor(mid)?;
-        match key.cmp(target) {
-            std::cmp::Ordering::Equal => return Ok(Ok(mid)),
-            std::cmp::Ordering::Greater => end = mid,
-            std::cmp::Ordering::Less => start = mid + 1,
+        match cmp(mid)? {
+            Ordering::Equal => return Some(Ok(mid)),
+            Ordering::Greater => end = mid,
+            Ordering::Less => start = mid + 1,
         }
     }
 
-    Ok(Err(start))
+    Some(Err(start))
+}
+
+/// Verifies the expected size of type T, for a type that should only grow if absolutely necessary.
+#[allow(unused)]
+pub(crate) const fn expect_size_of<T>(expected: usize) {
+    let size = std::mem::size_of::<T>();
+    if size != expected {
+        let _ = [""; 0][size];
+    }
+}
+
+pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
+    n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
+}
+
+// Helper fn to parse input segments like foo[0] or foo[0][0]
+#[inline]
+pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec<VariantPathElement<'a>> {
+    if segment.is_empty() {
+        return Vec::new();
+    }
+
+    let mut path_elements = Vec::new();
+    let mut base = segment;
+
+    while let Some(stripped) = base.strip_suffix(']') {
+        let Some(open_pos) = stripped.rfind('[') else {
+            return vec![VariantPathElement::field(segment)];
+        };
+
+        let index_str = &stripped[open_pos + 1..];
+        let Ok(index) = index_str.parse::<usize>() else {
+            return vec![VariantPathElement::field(segment)];
+        };
+
+        path_elements.push(VariantPathElement::index(index));
+        base = &stripped[..open_pos];
+    }
+
+    if !base.is_empty() {
+        path_elements.push(VariantPathElement::field(base));
+    }
+
+    path_elements.reverse();
+    path_elements
 }
 
-/// Attempts to prove a fallible iterator is actually infallible in practice, by consuming every
-/// element and returning the first error (if any).
-pub(crate) fn validate_fallible_iterator<T, E>(
-    mut it: impl Iterator<Item = Result<T, E>>,
-) -> Result<(), E> {
-    it.find(Result::is_err).transpose().map(|_| ())
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_fits_precision() {
+        assert!(fits_precision::<10>(1023));
+        assert!(!fits_precision::<10>(1024));
+        assert!(fits_precision::<10>(-1023));
+        assert!(!fits_precision::<10>(-1024));
+    }
 }
diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs
index 2e042b6074cb..819c20d554ce 100644
--- a/parquet-variant/src/variant.rs
+++ b/parquet-variant/src/variant.rs
@@ -1,5 +1,3 @@
-use std::ops::Deref;
-
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -16,17 +14,27 @@ use std::ops::Deref;
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
+pub use self::decimal::{VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType};
 pub use self::list::VariantList;
-pub use self::metadata::VariantMetadata;
+pub use self::metadata::{EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES, VariantMetadata};
 pub use self::object::VariantObject;
+
+// Publically export types used in the API
+pub use half::f16;
+pub use uuid::Uuid;
+
 use crate::decoder::{
-    self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType,
+    self, VariantBasicType, VariantPrimitiveType, get_basic_type, get_primitive_type,
 };
-use crate::utils::{first_byte_from_slice, slice_from_slice};
+use crate::path::{VariantPath, VariantPathElement};
+use crate::utils::{first_byte_from_slice, fits_precision, slice_from_slice};
+use std::ops::Deref;
 
 use arrow_schema::ArrowError;
-use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
+use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc};
 
+mod decimal;
 mod list;
 mod metadata;
 mod object;
@@ -41,11 +49,12 @@ const MAX_SHORT_STRING_BYTES: usize = 0x3F;
 pub struct ShortString<'a>(pub(crate) &'a str);
 
 impl<'a> ShortString<'a> {
-    /// Attempts to interpret `value` as a variant short string value.  
+    /// Attempts to interpret `value` as a variant short string value.
     ///
-    /// # Validation
+    /// # Errors
     ///
-    /// This constructor verifies that `value` is shorter than or equal to `MAX_SHORT_STRING_BYTES`
+    /// Returns an error if  `value` is longer than the maximum allowed length
+    /// of a Variant short string (63 bytes).
     pub fn try_new(value: &'a str) -> Result<Self, ArrowError> {
         if value.len() > MAX_SHORT_STRING_BYTES {
             return Err(ArrowError::InvalidArgumentError(format!(
@@ -76,13 +85,13 @@ impl<'a> TryFrom<&'a str> for ShortString<'a> {
     }
 }
 
-impl<'a> AsRef<str> for ShortString<'a> {
+impl AsRef<str> for ShortString<'_> {
     fn as_ref(&self) -> &str {
         self.0
     }
 }
 
-impl<'a> Deref for ShortString<'a> {
+impl Deref for ShortString<'_> {
     type Target = str;
 
     fn deref(&self) -> &Self::Target {
@@ -160,7 +169,7 @@ impl<'a> Deref for ShortString<'a> {
 /// // parse the header metadata
 /// assert_eq!(
 ///   Variant::from("HI"),
-///   Variant::try_new(&metadata, &value).unwrap()
+///   Variant::new(&metadata, &value)
 /// );
 /// ```
 ///
@@ -175,7 +184,39 @@ impl<'a> Deref for ShortString<'a> {
 ///   _ => println!("Other variant"),
 /// }
 /// ```
-#[derive(Clone, Debug, PartialEq)]
+///
+/// # Validation
+///
+/// Every instance of variant is either _valid_ or _invalid_. depending on whether the
+/// underlying bytes are a valid encoding of a variant value (see below).
+///
+/// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`]
+/// are fully _validated_. They always contain _valid_ data, and infallible accesses such as
+/// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and
+/// `v` are the number of bytes in the metadata and value buffers, respectively.
+///
+/// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so
+/// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and
+/// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and
+/// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be
+/// used to _validate_ an _unvalidated_ instance, if desired.
+///
+/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
+/// knows the underlying bytes were already validated previously, or if the caller intends to
+/// perform a small number of (fallible) accesses to a large variant value.
+///
+/// A _validated_ variant value guarantees that the associated [metadata] and all nested [object]
+/// and [array] values are _valid_. Primitive variant subtypes are always _valid_ by construction.
+///
+/// # Safety
+///
+/// Even an _invalid_ variant value is still _safe_ to use in the Rust sense. Accessing it with
+/// infallible methods may cause panics but will never lead to undefined behavior.
+///
+/// [metadata]: VariantMetadata#Validation
+/// [object]: VariantObject#Validation
+/// [array]: VariantList#Validation
+#[derive(Clone, PartialEq)]
 pub enum Variant<'m, 'v> {
     /// Primitive type: Null
     Null,
@@ -193,12 +234,16 @@ pub enum Variant<'m, 'v> {
     TimestampMicros(DateTime<Utc>),
     /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, MICROS)
     TimestampNtzMicros(NaiveDateTime),
+    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=true, NANOS)
+    TimestampNanos(DateTime<Utc>),
+    /// Primitive (type_id=1): TIMESTAMP(isAdjustedToUTC=false, NANOS)
+    TimestampNtzNanos(NaiveDateTime),
     /// Primitive (type_id=1): DECIMAL(precision, scale) 32-bits
-    Decimal4 { integer: i32, scale: u8 },
+    Decimal4(VariantDecimal4),
     /// Primitive (type_id=1): DECIMAL(precision, scale) 64-bits
-    Decimal8 { integer: i64, scale: u8 },
+    Decimal8(VariantDecimal8),
     /// Primitive (type_id=1): DECIMAL(precision, scale) 128-bits
-    Decimal16 { integer: i128, scale: u8 },
+    Decimal16(VariantDecimal16),
     /// Primitive (type_id=1): FLOAT
     Float(f32),
     /// Primitive (type_id=1): DOUBLE
@@ -212,6 +257,10 @@ pub enum Variant<'m, 'v> {
     Binary(&'v [u8]),
     /// Primitive (type_id=1): STRING
     String(&'v str),
+    /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS)
+    Time(NaiveTime),
+    /// Primitive (type_id=1): UUID
+    Uuid(Uuid),
     /// Short String (type_id=2): STRING
     ShortString(ShortString<'v>),
     // need both metadata & value
@@ -221,8 +270,13 @@ pub enum Variant<'m, 'v> {
     List(VariantList<'m, 'v>),
 }
 
+// We don't want this to grow because it could hurt performance of a frequently-created type.
+const _: () = crate::utils::expect_size_of::<Variant>(80);
+
 impl<'m, 'v> Variant<'m, 'v> {
-    /// Create a new `Variant` from metadata and value.
+    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
+    ///
+    /// The instance is fully [validated].
     ///
     /// # Example
     /// ```
@@ -235,12 +289,40 @@ impl<'m, 'v> Variant<'m, 'v> {
     ///   Variant::try_new(&metadata, &value).unwrap()
     /// );
     /// ```
+    ///
+    /// [validated]: Self#Validation
     pub fn try_new(metadata: &'m [u8], value: &'v [u8]) -> Result<Self, ArrowError> {
         let metadata = VariantMetadata::try_new(metadata)?;
         Self::try_new_with_metadata(metadata, value)
     }
 
-    /// Create a new variant with existing metadata
+    /// Attempts to interpret a metadata and value buffer pair as a new `Variant`.
+    ///
+    /// The instance is [unvalidated].
+    ///
+    /// # Example
+    /// ```
+    /// use parquet_variant::{Variant, VariantMetadata};
+    /// let metadata = [0x01, 0x00, 0x00];
+    /// let value = [0x09, 0x48, 0x49];
+    /// // parse the header metadata
+    /// assert_eq!(
+    ///   Variant::from("HI"),
+    ///   Variant::new(&metadata, &value)
+    /// );
+    /// ```
+    ///
+    /// [unvalidated]: Self#Validation
+    pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self {
+        let metadata = VariantMetadata::try_new_with_shallow_validation(metadata)
+            .expect("Invalid variant metadata");
+        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+            .expect("Invalid variant data")
+    }
+
+    /// Create a new variant with existing metadata.
+    ///
+    /// The instance is fully [validated].
     ///
     /// # Example
     /// ```
@@ -248,15 +330,33 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let metadata = [0x01, 0x00, 0x00];
     /// let value = [0x09, 0x48, 0x49];
     /// // parse the header metadata first
-    /// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+    /// let metadata = VariantMetadata::new(&metadata);
     /// assert_eq!(
     ///   Variant::from("HI"),
     ///   Variant::try_new_with_metadata(metadata, &value).unwrap()
     /// );
     /// ```
+    ///
+    /// [validated]: Self#Validation
     pub fn try_new_with_metadata(
         metadata: VariantMetadata<'m>,
         value: &'v [u8],
+    ) -> Result<Self, ArrowError> {
+        Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation()
+    }
+
+    /// Similar to [`Self::try_new_with_metadata`], but [unvalidated].
+    ///
+    /// [unvalidated]: Self#Validation
+    pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
+        Self::try_new_with_metadata_and_shallow_validation(metadata, value)
+            .expect("Invalid variant")
+    }
+
+    // The actual constructor, which only performs shallow (constant-time) validation.
+    fn try_new_with_metadata_and_shallow_validation(
+        metadata: VariantMetadata<'m>,
+        value: &'v [u8],
     ) -> Result<Self, ArrowError> {
         let value_metadata = first_byte_from_slice(value)?;
         let value_data = slice_from_slice(value, 1..)?;
@@ -269,15 +369,15 @@ impl<'m, 'v> Variant<'m, 'v> {
                 VariantPrimitiveType::Int64 => Variant::Int64(decoder::decode_int64(value_data)?),
                 VariantPrimitiveType::Decimal4 => {
                     let (integer, scale) = decoder::decode_decimal4(value_data)?;
-                    Variant::Decimal4 { integer, scale }
+                    Variant::Decimal4(VariantDecimal4::try_new(integer, scale)?)
                 }
                 VariantPrimitiveType::Decimal8 => {
                     let (integer, scale) = decoder::decode_decimal8(value_data)?;
-                    Variant::Decimal8 { integer, scale }
+                    Variant::Decimal8(VariantDecimal8::try_new(integer, scale)?)
                 }
                 VariantPrimitiveType::Decimal16 => {
                     let (integer, scale) = decoder::decode_decimal16(value_data)?;
-                    Variant::Decimal16 { integer, scale }
+                    Variant::Decimal16(VariantDecimal16::try_new(integer, scale)?)
                 }
                 VariantPrimitiveType::Float => Variant::Float(decoder::decode_float(value_data)?),
                 VariantPrimitiveType::Double => {
@@ -292,22 +392,65 @@ impl<'m, 'v> Variant<'m, 'v> {
                 VariantPrimitiveType::TimestampNtzMicros => {
                     Variant::TimestampNtzMicros(decoder::decode_timestampntz_micros(value_data)?)
                 }
+                VariantPrimitiveType::TimestampNanos => {
+                    Variant::TimestampNanos(decoder::decode_timestamp_nanos(value_data)?)
+                }
+                VariantPrimitiveType::TimestampNtzNanos => {
+                    Variant::TimestampNtzNanos(decoder::decode_timestampntz_nanos(value_data)?)
+                }
+                VariantPrimitiveType::Uuid => Variant::Uuid(decoder::decode_uuid(value_data)?),
                 VariantPrimitiveType::Binary => {
                     Variant::Binary(decoder::decode_binary(value_data)?)
                 }
                 VariantPrimitiveType::String => {
                     Variant::String(decoder::decode_long_string(value_data)?)
                 }
+                VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?),
             },
             VariantBasicType::ShortString => {
                 Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?)
             }
-            VariantBasicType::Object => Variant::Object(VariantObject::try_new(metadata, value)?),
-            VariantBasicType::Array => Variant::List(VariantList::try_new(metadata, value)?),
+            VariantBasicType::Object => Variant::Object(
+                VariantObject::try_new_with_shallow_validation(metadata, value)?,
+            ),
+            VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation(
+                metadata, value,
+            )?),
         };
         Ok(new_self)
     }
 
+    /// True if this variant instance has already been [validated].
+    ///
+    /// [validated]: Self#Validation
+    pub fn is_fully_validated(&self) -> bool {
+        match self {
+            Variant::List(list) => list.is_fully_validated(),
+            Variant::Object(obj) => obj.is_fully_validated(),
+            _ => true,
+        }
+    }
+
+    /// Recursively validates this variant value, ensuring that infallible access will not panic due
+    /// to invalid bytes.
+    ///
+    /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be
+    /// constructed in unvalidated (and potentially invalid) state.
+    ///
+    /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)`
+    /// where `m` and `v` are the sizes of metadata and value buffers, respectively.
+    ///
+    /// [objects]: VariantObject#Validation
+    /// [arrays]: VariantList#Validation
+    pub fn with_full_validation(self) -> Result<Self, ArrowError> {
+        use Variant::*;
+        match self {
+            List(list) => list.with_full_validation().map(List),
+            Object(obj) => obj.with_full_validation().map(Object),
+            _ => Ok(self),
+        }
+    }
+
     /// Converts this variant to `()` if it is null.
     ///
     /// Returns `Some(())` for null variants,
@@ -390,8 +533,8 @@ impl<'m, 'v> Variant<'m, 'v> {
 
     /// Converts this variant to a `DateTime<Utc>` if possible.
     ///
-    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
-    /// `None` for non-timestamp variants.
+    /// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
+    /// `None` for other variants.
     ///
     /// # Examples
     ///
@@ -400,23 +543,101 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// use chrono::NaiveDate;
     ///
     /// // you can extract a DateTime<Utc> from a UTC-adjusted variant
-    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
+    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+    ///     .unwrap()
+    ///     .and_hms_milli_opt(12, 34, 56, 780)
+    ///     .unwrap()
+    ///     .and_utc();
     /// let v1 = Variant::from(datetime);
-    /// assert_eq!(v1.as_datetime_utc(), Some(datetime));
+    /// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
+    ///
+    /// // but not for other variants.
+    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
+    ///     .unwrap()
+    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
+    ///     .unwrap()
+    ///     .and_utc();
+    /// let v2 = Variant::from(datetime_nanos);
+    /// assert_eq!(v2.as_timestamp_micros(), None);
+    /// ```
+    pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
+        match *self {
+            Variant::TimestampMicros(d) => Some(d),
+            _ => None,
+        }
+    }
+
+    /// Converts this variant to a `NaiveDateTime` if possible.
     ///
-    /// // or a non-UTC-adjusted variant
-    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
-    /// let v2 = Variant::from(datetime);
-    /// assert_eq!(v2.as_datetime_utc(), Some(datetime.and_utc()));
+    /// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
+    /// `None` for other variants.
     ///
-    /// // but not from other variants
+    /// # Examples
+    ///
+    /// ```
+    /// use parquet_variant::Variant;
+    /// use chrono::NaiveDate;
+    ///
+    /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
+    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+    ///     .unwrap()
+    ///     .and_hms_milli_opt(12, 34, 56, 780)
+    ///     .unwrap();
+    /// let v1 = Variant::from(datetime);
+    /// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
+    ///
+    /// // but not for other variants.
+    /// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14)
+    ///     .unwrap()
+    ///     .and_hms_nano_opt(12, 33, 54, 123456789)
+    ///     .unwrap();
+    /// let v2 = Variant::from(datetime_nanos);
+    /// assert_eq!(v2.as_timestamp_micros(), None);
+    /// ```
+    pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
+        match *self {
+            Variant::TimestampNtzMicros(d) => Some(d),
+            _ => None,
+        }
+    }
+
+    /// Converts this variant to a `DateTime<Utc>` if possible.
+    ///
+    /// Returns `Some(DateTime<Utc>)` for timestamp variants,
+    /// `None` for other variants.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use parquet_variant::Variant;
+    /// use chrono::NaiveDate;
+    ///
+    /// // you can extract a DateTime<Utc> from a UTC-adjusted nanosecond-precision variant
+    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+    ///     .unwrap()
+    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
+    ///     .unwrap()
+    ///     .and_utc();
+    /// let v1 = Variant::from(datetime);
+    /// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
+    ///
+    /// // or from UTC-adjusted microsecond-precision variant
+    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
+    ///     .unwrap()
+    ///     .and_hms_milli_opt(12, 33, 54, 123)
+    ///     .unwrap()
+    ///     .and_utc();
+    /// // this will convert to `Variant::TimestampMicros`.
+    /// let v2 = Variant::from(datetime_micros);
+    /// assert_eq!(v2.as_timestamp_nanos(), Some(datetime_micros));
+    ///
+    /// // but not for other variants.
     /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_datetime_utc(), None);
+    /// assert_eq!(v3.as_timestamp_nanos(), None);
     /// ```
-    pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
+    pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
         match *self {
-            Variant::TimestampMicros(d) => Some(d),
-            Variant::TimestampNtzMicros(d) => Some(d.and_utc()),
+            Variant::TimestampNanos(d) | Variant::TimestampMicros(d) => Some(d),
             _ => None,
         }
     }
@@ -424,7 +645,7 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// Converts this variant to a `NaiveDateTime` if possible.
     ///
     /// Returns `Some(NaiveDateTime)` for timestamp variants,
-    /// `None` for non-timestamp variants.
+    /// `None` for other variants.
     ///
     /// # Examples
     ///
@@ -433,23 +654,29 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// use chrono::NaiveDate;
     ///
     /// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
-    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
+    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16)
+    ///     .unwrap()
+    ///     .and_hms_nano_opt(12, 34, 56, 789123456)
+    ///     .unwrap();
     /// let v1 = Variant::from(datetime);
-    /// assert_eq!(v1.as_naive_datetime(), Some(datetime));
-    ///
-    /// // or a UTC-adjusted variant
-    /// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
-    /// let v2 = Variant::from(datetime);
-    /// assert_eq!(v2.as_naive_datetime(), Some(datetime.naive_utc()));
-    ///
-    /// // but not from other variants
+    /// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
+    ///
+    /// // or from a microsecond-precision non-UTC-adjusted variant
+    /// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14)
+    ///     .unwrap()
+    ///     .and_hms_milli_opt(12, 33, 54, 123)
+    ///     .unwrap();
+    /// // this will convert to `Variant::TimestampMicros`.
+    /// let v2 = Variant::from(datetime_micros);
+    /// assert_eq!(v2.as_timestamp_ntz_nanos(), Some(datetime_micros));
+    ///
+    /// // but not for other variants.
     /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_naive_datetime(), None);
+    /// assert_eq!(v3.as_timestamp_ntz_nanos(), None);
     /// ```
-    pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
+    pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
         match *self {
-            Variant::TimestampNtzMicros(d) => Some(d),
-            Variant::TimestampMicros(d) => Some(d.naive_utc()),
+            Variant::TimestampNtzNanos(d) | Variant::TimestampNtzMicros(d) => Some(d),
             _ => None,
         }
     }
@@ -507,6 +734,32 @@ impl<'m, 'v> Variant<'m, 'v> {
         }
     }
 
+    /// Converts this variant to a `uuid hyphenated string` if possible.
+    ///
+    /// Returns `Some(String)` for UUID variants, `None` for non-UUID variants.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use parquet_variant::Variant;
+    ///
+    /// // You can extract a UUID from a UUID variant
+    /// let s = uuid::Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
+    /// let v1 = Variant::Uuid(s);
+    /// assert_eq!(s, v1.as_uuid().unwrap());
+    /// assert_eq!("67e55044-10b1-426f-9247-bb680e5fe0c8", v1.as_uuid().unwrap().to_string());
+    ///
+    /// //but not from other variants
+    /// let v2 = Variant::from(1234);
+    /// assert_eq!(None, v2.as_uuid())
+    /// ```
+    pub fn as_uuid(&self) -> Option<Uuid> {
+        match self {
+            Variant::Uuid(u) => Some(*u),
+            _ => None,
+        }
+    }
+
     /// Converts this variant to an `i8` if possible.
     ///
     /// Returns `Some(i8)` for integer variants that fit in `i8` range,
@@ -535,6 +788,9 @@ impl<'m, 'v> Variant<'m, 'v> {
             Variant::Int16(i) => i.try_into().ok(),
             Variant::Int32(i) => i.try_into().ok(),
             Variant::Int64(i) => i.try_into().ok(),
+            Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
             _ => None,
         }
     }
@@ -567,6 +823,9 @@ impl<'m, 'v> Variant<'m, 'v> {
             Variant::Int16(i) => Some(i),
             Variant::Int32(i) => i.try_into().ok(),
             Variant::Int64(i) => i.try_into().ok(),
+            Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
             _ => None,
         }
     }
@@ -599,6 +858,9 @@ impl<'m, 'v> Variant<'m, 'v> {
             Variant::Int16(i) => Some(i.into()),
             Variant::Int32(i) => Some(i),
             Variant::Int64(i) => i.try_into().ok(),
+            Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer()),
+            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
             _ => None,
         }
     }
@@ -627,10 +889,173 @@ impl<'m, 'v> Variant<'m, 'v> {
             Variant::Int16(i) => Some(i.into()),
             Variant::Int32(i) => Some(i.into()),
             Variant::Int64(i) => Some(i),
+            Variant::Decimal4(d) if d.scale() == 0 => Some(d.integer().into()),
+            Variant::Decimal8(d) if d.scale() == 0 => Some(d.integer()),
+            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
             _ => None,
         }
     }
 
+    fn generic_convert_unsigned_primitive<T>(&self) -> Option<T>
+    where
+        T: TryFrom<i8> + TryFrom<i16> + TryFrom<i32> + TryFrom<i64> + TryFrom<i128>,
+    {
+        match *self {
+            Variant::Int8(i) => i.try_into().ok(),
+            Variant::Int16(i) => i.try_into().ok(),
+            Variant::Int32(i) => i.try_into().ok(),
+            Variant::Int64(i) => i.try_into().ok(),
+            Variant::Decimal4(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal8(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            Variant::Decimal16(d) if d.scale() == 0 => d.integer().try_into().ok(),
+            _ => None,
+        }
+    }
+
+    /// Converts this variant to a `u8` if possible.
+    ///
+    /// Returns `Some(u8)` for integer variants that fit in `u8`
+    /// `None` for non-integer variants or values that would overflow.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///  use parquet_variant::{Variant, VariantDecimal4};
+    ///
+    ///  // you can read an int64 variant into an u8
+    ///  let v1 = Variant::from(123i64);
+    ///  assert_eq!(v1.as_u8(), Some(123u8));
+    ///
+    ///  // or a Decimal4 with scale 0 into u8
+    ///  let d = VariantDecimal4::try_new(26, 0).unwrap();
+    ///  let v2 = Variant::from(d);
+    ///  assert_eq!(v2.as_u8(), Some(26u8));
+    ///
+    ///  // but not a variant that can't fit into the range
+    ///  let v3 = Variant::from(-1);
+    ///  assert_eq!(v3.as_u8(), None);
+    ///
+    ///  // not a variant that decimal with scale not equal to zero
+    ///  let d = VariantDecimal4::try_new(1, 2).unwrap();
+    ///  let v4 = Variant::from(d);
+    ///  assert_eq!(v4.as_u8(), None);
+    ///
+    ///  // or not a variant that cannot be cast into an integer
+    ///  let v5 = Variant::from("hello!");
+    ///  assert_eq!(v5.as_u8(), None);
+    /// ```
+    pub fn as_u8(&self) -> Option<u8> {
+        self.generic_convert_unsigned_primitive::<u8>()
+    }
+
+    /// Converts this variant to an `u16` if possible.
+    ///
+    /// Returns `Some(u16)` for integer variants that fit in `u16`
+    /// `None` for non-integer variants or values that would overflow.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///  use parquet_variant::{Variant, VariantDecimal4};
+    ///
+    ///  // you can read an int64 variant into an u16
+    ///  let v1 = Variant::from(123i64);
+    ///  assert_eq!(v1.as_u16(), Some(123u16));
+    ///
+    ///  // or a Decimal4 with scale 0 into u8
+    ///  let d = VariantDecimal4::try_new(u16::MAX as i32, 0).unwrap();
+    ///  let v2 = Variant::from(d);
+    ///  assert_eq!(v2.as_u16(), Some(u16::MAX));
+    ///
+    ///  // but not a variant that can't fit into the range
+    ///  let v3 = Variant::from(-1);
+    ///  assert_eq!(v3.as_u16(), None);
+    ///
+    ///  // not a variant that decimal with scale not equal to zero
+    ///  let d = VariantDecimal4::try_new(1, 2).unwrap();
+    ///  let v4 = Variant::from(d);
+    ///  assert_eq!(v4.as_u16(), None);
+    ///
+    ///  // or not a variant that cannot be cast into an integer
+    ///  let v5 = Variant::from("hello!");
+    ///  assert_eq!(v5.as_u16(), None);
+    /// ```
+    pub fn as_u16(&self) -> Option<u16> {
+        self.generic_convert_unsigned_primitive::<u16>()
+    }
+
+    /// Converts this variant to an `u32` if possible.
+    ///
+    /// Returns `Some(u32)` for integer variants that fit in `u32`
+    /// `None` for non-integer variants or values that would overflow.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///  use parquet_variant::{Variant, VariantDecimal8};
+    ///
+    ///  // you can read an int64 variant into an u32
+    ///  let v1 = Variant::from(123i64);
+    ///  assert_eq!(v1.as_u32(), Some(123u32));
+    ///
+    ///  // or a Decimal4 with scale 0 into u8
+    ///  let d = VariantDecimal8::try_new(u32::MAX as i64, 0).unwrap();
+    ///  let v2 = Variant::from(d);
+    ///  assert_eq!(v2.as_u32(), Some(u32::MAX));
+    ///
+    ///  // but not a variant that can't fit into the range
+    ///  let v3 = Variant::from(-1);
+    ///  assert_eq!(v3.as_u32(), None);
+    ///
+    ///  // not a variant that decimal with scale not equal to zero
+    ///  let d = VariantDecimal8::try_new(1, 2).unwrap();
+    ///  let v4 = Variant::from(d);
+    ///  assert_eq!(v4.as_u32(), None);
+    ///
+    ///  // or not a variant that cannot be cast into an integer
+    ///  let v5 = Variant::from("hello!");
+    ///  assert_eq!(v5.as_u32(), None);
+    /// ```
+    pub fn as_u32(&self) -> Option<u32> {
+        self.generic_convert_unsigned_primitive::<u32>()
+    }
+
+    /// Converts this variant to an `u64` if possible.
+    ///
+    /// Returns `Some(u64)` for integer variants that fit in `u64`
+    /// `None` for non-integer variants or values that would overflow.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///  use parquet_variant::{Variant, VariantDecimal16};
+    ///
+    ///  // you can read an int64 variant into an u64
+    ///  let v1 = Variant::from(123i64);
+    ///  assert_eq!(v1.as_u64(), Some(123u64));
+    ///
+    ///  // or a Decimal16 with scale 0 into u8
+    ///  let d = VariantDecimal16::try_new(u64::MAX as i128, 0).unwrap();
+    ///  let v2 = Variant::from(d);
+    ///  assert_eq!(v2.as_u64(), Some(u64::MAX));
+    ///
+    ///  // but not a variant that can't fit into the range
+    ///  let v3 = Variant::from(-1);
+    ///  assert_eq!(v3.as_u64(), None);
+    ///
+    ///  // not a variant that decimal with scale not equal to zero
+    /// let d = VariantDecimal16::try_new(1, 2).unwrap();
+    ///  let v4 = Variant::from(d);
+    ///  assert_eq!(v4.as_u64(), None);
+    ///
+    ///  // or not a variant that cannot be cast into an integer
+    ///  let v5 = Variant::from("hello!");
+    ///  assert_eq!(v5.as_u64(), None);
+    /// ```
+    pub fn as_u64(&self) -> Option<u64> {
+        self.generic_convert_unsigned_primitive::<u64>()
+    }
+
     /// Converts this variant to tuple with a 4-byte unscaled value if possible.
     ///
     /// Returns `Some((i32, u8))` for decimal variants where the unscaled value
@@ -640,41 +1065,33 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// # Examples
     ///
     /// ```
-    /// use parquet_variant::Variant;
+    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8};
     ///
     /// // you can extract decimal parts from smaller or equally-sized decimal variants
-    /// let v1 = Variant::from((1234_i32, 2));
-    /// assert_eq!(v1.as_decimal_int32(), Some((1234_i32, 2)));
+    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
+    /// assert_eq!(v1.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
     ///
     /// // and from larger decimal variants if they fit
-    /// let v2 = Variant::from((1234_i64, 2));
-    /// assert_eq!(v2.as_decimal_int32(), Some((1234_i32, 2)));
+    /// let v2 = Variant::from(VariantDecimal8::try_new(1234_i64, 2).unwrap());
+    /// assert_eq!(v2.as_decimal4(), VariantDecimal4::try_new(1234_i32, 2).ok());
     ///
     /// // but not if the value would overflow i32
-    /// let v3 = Variant::from((12345678901i64, 2));
-    /// assert_eq!(v3.as_decimal_int32(), None);
+    /// let v3 = Variant::from(VariantDecimal8::try_new(12345678901i64, 2).unwrap());
+    /// assert_eq!(v3.as_decimal4(), None);
     ///
     /// // or if the variant is not a decimal
     /// let v4 = Variant::from("hello!");
-    /// assert_eq!(v4.as_decimal_int32(), None);
+    /// assert_eq!(v4.as_decimal4(), None);
     /// ```
-    pub fn as_decimal_int32(&self) -> Option<(i32, u8)> {
+    pub fn as_decimal4(&self) -> Option<VariantDecimal4> {
         match *self {
-            Variant::Decimal4 { integer, scale } => Some((integer, scale)),
-            Variant::Decimal8 { integer, scale } => {
-                if let Ok(converted_integer) = integer.try_into() {
-                    Some((converted_integer, scale))
-                } else {
-                    None
-                }
-            }
-            Variant::Decimal16 { integer, scale } => {
-                if let Ok(converted_integer) = integer.try_into() {
-                    Some((converted_integer, scale))
-                } else {
-                    None
-                }
-            }
+            Variant::Int8(i) => i32::from(i).try_into().ok(),
+            Variant::Int16(i) => i32::from(i).try_into().ok(),
+            Variant::Int32(i) => i.try_into().ok(),
+            Variant::Int64(i) => i32::try_from(i).ok()?.try_into().ok(),
+            Variant::Decimal4(decimal4) => Some(decimal4),
+            Variant::Decimal8(decimal8) => decimal8.try_into().ok(),
+            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
             _ => None,
         }
     }
@@ -688,35 +1105,33 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// # Examples
     ///
     /// ```
-    /// use parquet_variant::Variant;
+    /// use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
     ///
     /// // you can extract decimal parts from smaller or equally-sized decimal variants
-    /// let v1 = Variant::from((1234_i64, 2));
-    /// assert_eq!(v1.as_decimal_int64(), Some((1234_i64, 2)));
+    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
+    /// assert_eq!(v1.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
     ///
     /// // and from larger decimal variants if they fit
-    /// let v2 = Variant::from((1234_i128, 2));
-    /// assert_eq!(v2.as_decimal_int64(), Some((1234_i64, 2)));
+    /// let v2 = Variant::from(VariantDecimal16::try_new(1234_i128, 2).unwrap());
+    /// assert_eq!(v2.as_decimal8(), VariantDecimal8::try_new(1234_i64, 2).ok());
     ///
     /// // but not if the value would overflow i64
-    /// let v3 = Variant::from((2e19 as i128, 2));
-    /// assert_eq!(v3.as_decimal_int64(), None);
+    /// let v3 = Variant::from(VariantDecimal16::try_new(2e19 as i128, 2).unwrap());
+    /// assert_eq!(v3.as_decimal8(), None);
     ///
     /// // or if the variant is not a decimal
     /// let v4 = Variant::from("hello!");
-    /// assert_eq!(v4.as_decimal_int64(), None);
+    /// assert_eq!(v4.as_decimal8(), None);
     /// ```
-    pub fn as_decimal_int64(&self) -> Option<(i64, u8)> {
+    pub fn as_decimal8(&self) -> Option<VariantDecimal8> {
         match *self {
-            Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
-            Variant::Decimal8 { integer, scale } => Some((integer, scale)),
-            Variant::Decimal16 { integer, scale } => {
-                if let Ok(converted_integer) = integer.try_into() {
-                    Some((converted_integer, scale))
-                } else {
-                    None
-                }
-            }
+            Variant::Int8(i) => i64::from(i).try_into().ok(),
+            Variant::Int16(i) => i64::from(i).try_into().ok(),
+            Variant::Int32(i) => i64::from(i).try_into().ok(),
+            Variant::Int64(i) => i.try_into().ok(),
+            Variant::Decimal4(decimal4) => Some(decimal4.into()),
+            Variant::Decimal8(decimal8) => Some(decimal8),
+            Variant::Decimal16(decimal16) => decimal16.try_into().ok(),
             _ => None,
         }
     }
@@ -730,28 +1145,71 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// # Examples
     ///
     /// ```
-    /// use parquet_variant::Variant;
+    /// use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4};
     ///
     /// // you can extract decimal parts from smaller or equally-sized decimal variants
-    /// let v1 = Variant::from((1234_i128, 2));
-    /// assert_eq!(v1.as_decimal_int128(), Some((1234_i128, 2)));
+    /// let v1 = Variant::from(VariantDecimal4::try_new(1234_i32, 2).unwrap());
+    /// assert_eq!(v1.as_decimal16(), VariantDecimal16::try_new(1234_i128, 2).ok());
     ///
     /// // but not if the variant is not a decimal
     /// let v2 = Variant::from("hello!");
-    /// assert_eq!(v2.as_decimal_int128(), None);
+    /// assert_eq!(v2.as_decimal16(), None);
     /// ```
-    pub fn as_decimal_int128(&self) -> Option<(i128, u8)> {
+    pub fn as_decimal16(&self) -> Option<VariantDecimal16> {
         match *self {
-            Variant::Decimal4 { integer, scale } => Some((integer.into(), scale)),
-            Variant::Decimal8 { integer, scale } => Some((integer.into(), scale)),
-            Variant::Decimal16 { integer, scale } => Some((integer, scale)),
+            Variant::Int8(i) => i128::from(i).try_into().ok(),
+            Variant::Int16(i) => i128::from(i).try_into().ok(),
+            Variant::Int32(i) => i128::from(i).try_into().ok(),
+            Variant::Int64(i) => i128::from(i).try_into().ok(),
+            Variant::Decimal4(decimal4) => Some(decimal4.into()),
+            Variant::Decimal8(decimal8) => Some(decimal8.into()),
+            Variant::Decimal16(decimal16) => Some(decimal16),
             _ => None,
         }
     }
+
+    /// Converts this variant to an `f16` if possible.
+    ///
+    /// Returns `Some(f16)` for floating point values, and integers with up to 11 bits of
+    /// precision. `None` otherwise.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use parquet_variant::Variant;
+    /// use half::f16;
+    ///
+    /// // you can extract an f16 from a float variant
+    /// let v1 = Variant::from(std::f32::consts::PI);
+    /// assert_eq!(v1.as_f16(), Some(f16::from_f32(std::f32::consts::PI)));
+    ///
+    /// // and from a double variant (with loss of precision to nearest f16)
+    /// let v2 = Variant::from(std::f64::consts::PI);
+    /// assert_eq!(v2.as_f16(), Some(f16::from_f64(std::f64::consts::PI)));
+    ///
+    /// // and from integers with no more than 11 bits of precision
+    /// let v3 = Variant::from(2047);
+    /// assert_eq!(v3.as_f16(), Some(f16::from_f32(2047.0)));
+    ///
+    /// // but not from other variants
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f16(), None);
+    pub fn as_f16(&self) -> Option<f16> {
+        match *self {
+            Variant::Float(i) => Some(f16::from_f32(i)),
+            Variant::Double(i) => Some(f16::from_f64(i)),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
+            Variant::Int32(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
+            Variant::Int64(i) if fits_precision::<11>(i) => Some(f16::from_f32(i as _)),
+            _ => None,
+        }
+    }
+
     /// Converts this variant to an `f32` if possible.
     ///
-    /// Returns `Some(f32)` for float and double variants,
-    /// `None` for non-floating-point variants.
+    /// Returns `Some(f32)` for floating point values, and integer values with up to 24 bits of
+    /// precision.  `None` otherwise.
     ///
     /// # Examples
     ///
@@ -766,23 +1224,31 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v2 = Variant::from(std::f64::consts::PI);
     /// assert_eq!(v2.as_f32(), Some(std::f32::consts::PI));
     ///
+    /// // and from integers with no more than 24 bits of precision
+    /// let v3 = Variant::from(16777215i64);
+    /// assert_eq!(v3.as_f32(), Some(16777215.0));
+    ///
     /// // but not from other variants
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_f32(), None);
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f32(), None);
     /// ```
     #[allow(clippy::cast_possible_truncation)]
     pub fn as_f32(&self) -> Option<f32> {
         match *self {
             Variant::Float(i) => Some(i),
             Variant::Double(i) => Some(i as f32),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) => Some(i.into()),
+            Variant::Int32(i) if fits_precision::<24>(i) => Some(i as _),
+            Variant::Int64(i) if fits_precision::<24>(i) => Some(i as _),
             _ => None,
         }
     }
 
     /// Converts this variant to an `f64` if possible.
     ///
-    /// Returns `Some(f64)` for float and double variants,
-    /// `None` for non-floating-point variants.
+    /// Returns `Some(f64)` for floating point values, and integer values with up to 53 bits of
+    /// precision.  `None` otherwise.
     ///
     /// # Examples
     ///
@@ -797,25 +1263,223 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v2 = Variant::from(std::f64::consts::PI);
     /// assert_eq!(v2.as_f64(), Some(std::f64::consts::PI));
     ///
+    /// // and from integers with no more than 53 bits of precision
+    /// let v3 = Variant::from(9007199254740991i64);
+    /// assert_eq!(v3.as_f64(), Some(9007199254740991.0));
+    ///
     /// // but not from other variants
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_f64(), None);
+    /// let v4 = Variant::from("hello!");
+    /// assert_eq!(v4.as_f64(), None);
     /// ```
     pub fn as_f64(&self) -> Option<f64> {
         match *self {
             Variant::Float(i) => Some(i.into()),
             Variant::Double(i) => Some(i),
+            Variant::Int8(i) => Some(i.into()),
+            Variant::Int16(i) => Some(i.into()),
+            Variant::Int32(i) => Some(i.into()),
+            Variant::Int64(i) if fits_precision::<53>(i) => Some(i as _),
             _ => None,
         }
     }
 
-    pub fn metadata(&self) -> Option<&'m VariantMetadata> {
+    /// Converts this variant to an `Object` if it is an [`VariantObject`].
+    ///
+    /// Returns `Some(&VariantObject)` for object variants,
+    /// `None` for non-object variants.
+    ///
+    /// See [`Self::get_path`] to dynamically traverse objects
+    ///
+    /// # Examples
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
+    /// # let (metadata, value) = {
+    /// # let mut builder = VariantBuilder::new();
+    /// #   let mut obj = builder.new_object();
+    /// #   obj.insert("name", "John");
+    /// #   obj.finish();
+    /// #   builder.finish()
+    /// # };
+    /// // object that is {"name": "John"}
+    ///  let variant = Variant::new(&metadata, &value);
+    /// // use the `as_object` method to access the object
+    /// let obj = variant.as_object().expect("variant should be an object");
+    /// assert_eq!(obj.get("name"), Some(Variant::from("John")));
+    /// ```
+    pub fn as_object(&'m self) -> Option<&'m VariantObject<'m, 'v>> {
+        if let Variant::Object(obj) = self {
+            Some(obj)
+        } else {
+            None
+        }
+    }
+
+    /// If this is an object and the requested field name exists, retrieves the corresponding field
+    /// value. Otherwise, returns None.
+    ///
+    /// This is shorthand for [`Self::as_object`] followed by [`VariantObject::get`].
+    ///
+    /// # Examples
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
+    /// # let mut builder = VariantBuilder::new();
+    /// # let mut obj = builder.new_object();
+    /// # obj.insert("name", "John");
+    /// # obj.finish();
+    /// # let (metadata, value) = builder.finish();
+    /// // object that is {"name": "John"}
+    ///  let variant = Variant::new(&metadata, &value);
+    /// // use the `get_object_field` method to access the object
+    /// let obj = variant.get_object_field("name");
+    /// assert_eq!(obj, Some(Variant::from("John")));
+    /// let obj = variant.get_object_field("foo");
+    /// assert!(obj.is_none());
+    /// ```
+    pub fn get_object_field(&self, field_name: &str) -> Option<Self> {
         match self {
-            Variant::Object(VariantObject { metadata, .. })
-            | Variant::List(VariantList { metadata, .. }) => Some(metadata),
+            Variant::Object(object) => object.get(field_name),
+            _ => None,
+        }
+    }
+
+    /// Converts this variant to a `List` if it is a [`VariantList`].
+    ///
+    /// Returns `Some(&VariantList)` for list variants,
+    /// `None` for non-list variants.
+    ///
+    /// See [`Self::get_path`] to dynamically traverse lists
+    ///
+    /// # Examples
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
+    /// # let (metadata, value) = {
+    /// # let mut builder = VariantBuilder::new();
+    /// #   let mut list = builder.new_list();
+    /// #   list.append_value("John");
+    /// #   list.append_value("Doe");
+    /// #   list.finish();
+    /// #   builder.finish()
+    /// # };
+    /// // list that is ["John", "Doe"]
+    /// let variant = Variant::new(&metadata, &value);
+    /// // use the `as_list` method to access the list
+    /// let list = variant.as_list().expect("variant should be a list");
+    /// assert_eq!(list.len(), 2);
+    /// assert_eq!(list.get(0).unwrap(), Variant::from("John"));
+    /// assert_eq!(list.get(1).unwrap(), Variant::from("Doe"));
+    /// ```
+    pub fn as_list(&'m self) -> Option<&'m VariantList<'m, 'v>> {
+        if let Variant::List(list) = self {
+            Some(list)
+        } else {
+            None
+        }
+    }
+
+    /// Converts this variant to a `NaiveTime` if possible.
+    ///
+    /// Returns `Some(NaiveTime)` for `Variant::Time`,
+    /// `None` for non-Time variants.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use chrono::NaiveTime;
+    /// use parquet_variant::Variant;
+    ///
+    /// // you can extract a `NaiveTime` from a `Variant::Time`
+    /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
+    /// let v1 = Variant::from(time);
+    /// assert_eq!(Some(time), v1.as_time_utc());
+    ///
+    /// // but not from other variants.
+    /// let v2 = Variant::from("Hello");
+    /// assert_eq!(None, v2.as_time_utc());
+    /// ```
+    pub fn as_time_utc(&'m self) -> Option<NaiveTime> {
+        if let Variant::Time(time) = self {
+            Some(*time)
+        } else {
+            None
+        }
+    }
+
+    /// If this is a list and the requested index is in bounds, retrieves the corresponding
+    /// element. Otherwise, returns None.
+    ///
+    /// This is shorthand for [`Self::as_list`] followed by [`VariantList::get`].
+    ///
+    /// # Examples
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantList};
+    /// # let mut builder = VariantBuilder::new();
+    /// # let mut list = builder.new_list();
+    /// # list.append_value("John");
+    /// # list.append_value("Doe");
+    /// # list.finish();
+    /// # let (metadata, value) = builder.finish();
+    /// // list that is ["John", "Doe"]
+    /// let variant = Variant::new(&metadata, &value);
+    /// // use the `get_list_element` method to access the list
+    /// assert_eq!(variant.get_list_element(0), Some(Variant::from("John")));
+    /// assert_eq!(variant.get_list_element(1), Some(Variant::from("Doe")));
+    /// assert!(variant.get_list_element(2).is_none());
+    /// ```
+    pub fn get_list_element(&self, index: usize) -> Option<Self> {
+        match self {
+            Variant::List(list) => list.get(index),
             _ => None,
         }
     }
+
+    /// Return the metadata dictionary associated with this variant value.
+    pub fn metadata(&self) -> &VariantMetadata<'m> {
+        match self {
+            Variant::Object(VariantObject { metadata, .. })
+            | Variant::List(VariantList { metadata, .. }) => metadata,
+            _ => &EMPTY_VARIANT_METADATA,
+        }
+    }
+
+    /// Return a new Variant with the path followed.
+    ///
+    /// If the path is not found, `None` is returned.
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
+    /// # let mut builder = VariantBuilder::new();
+    /// # let mut obj = builder.new_object();
+    /// # let mut list = obj.new_list("foo");
+    /// # list.append_value("bar");
+    /// # list.append_value("baz");
+    /// # list.finish();
+    /// # obj.finish();
+    /// # let (metadata, value) = builder.finish();
+    /// // given a variant like `{"foo": ["bar", "baz"]}`
+    /// let variant = Variant::new(&metadata, &value);
+    /// // Accessing a non existent path returns None
+    /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
+    /// // Access obj["foo"]
+    /// let path = VariantPath::from("foo");
+    /// let foo = variant.get_path(&path).expect("field `foo` should exist");
+    /// assert!(foo.as_list().is_some(), "field `foo` should be a list");
+    /// // Access foo[0]
+    /// let path = VariantPath::from(0);
+    /// let bar = foo.get_path(&path).expect("element 0 should exist");
+    /// // bar is a string
+    /// assert_eq!(bar.as_string(), Some("bar"));
+    /// // You can also access nested paths
+    /// let path = VariantPath::from("foo").join(0);
+    /// assert_eq!(variant.get_path(&path).unwrap(), bar);
+    /// ```
+    pub fn get_path(&self, path: &VariantPath) -> Option<Variant<'_, '_>> {
+        path.iter()
+            .try_fold(self.clone(), |output, element| match element {
+                VariantPathElement::Field { name } => output.get_object_field(name),
+                VariantPathElement::Index { index } => output.get_list_element(*index),
+            })
+    }
 }
 
 impl From<()> for Variant<'_, '_> {
@@ -824,6 +1488,15 @@ impl From<()> for Variant<'_, '_> {
     }
 }
 
+impl From<bool> for Variant<'_, '_> {
+    fn from(value: bool) -> Self {
+        match value {
+            true => Variant::BooleanTrue,
+            false => Variant::BooleanFalse,
+        }
+    }
+}
+
 impl From<i8> for Variant<'_, '_> {
     fn from(value: i8) -> Self {
         Variant::Int8(value)
@@ -848,33 +1521,74 @@ impl From<i64> for Variant<'_, '_> {
     }
 }
 
-impl From<(i32, u8)> for Variant<'_, '_> {
-    fn from(value: (i32, u8)) -> Self {
-        Variant::Decimal4 {
-            integer: value.0,
-            scale: value.1,
+impl From<u8> for Variant<'_, '_> {
+    fn from(value: u8) -> Self {
+        // if it fits in i8, use that, otherwise use i16
+        if let Ok(value) = i8::try_from(value) {
+            Variant::Int8(value)
+        } else {
+            Variant::Int16(i16::from(value))
         }
     }
 }
 
-impl From<(i64, u8)> for Variant<'_, '_> {
-    fn from(value: (i64, u8)) -> Self {
-        Variant::Decimal8 {
-            integer: value.0,
-            scale: value.1,
+impl From<u16> for Variant<'_, '_> {
+    fn from(value: u16) -> Self {
+        // if it fits in i16, use that, otherwise use i32
+        if let Ok(value) = i16::try_from(value) {
+            Variant::Int16(value)
+        } else {
+            Variant::Int32(i32::from(value))
+        }
+    }
+}
+impl From<u32> for Variant<'_, '_> {
+    fn from(value: u32) -> Self {
+        // if it fits in i32, use that, otherwise use i64
+        if let Ok(value) = i32::try_from(value) {
+            Variant::Int32(value)
+        } else {
+            Variant::Int64(i64::from(value))
         }
     }
 }
 
-impl From<(i128, u8)> for Variant<'_, '_> {
-    fn from(value: (i128, u8)) -> Self {
-        Variant::Decimal16 {
-            integer: value.0,
-            scale: value.1,
+impl From<u64> for Variant<'_, '_> {
+    fn from(value: u64) -> Self {
+        // if it fits in i64, use that, otherwise use Decimal16
+        if let Ok(value) = i64::try_from(value) {
+            Variant::Int64(value)
+        } else {
+            // u64 max is 18446744073709551615, which fits in i128
+            Variant::Decimal16(VariantDecimal16::try_new(i128::from(value), 0).unwrap())
         }
     }
 }
 
+impl From<VariantDecimal4> for Variant<'_, '_> {
+    fn from(value: VariantDecimal4) -> Self {
+        Variant::Decimal4(value)
+    }
+}
+
+impl From<VariantDecimal8> for Variant<'_, '_> {
+    fn from(value: VariantDecimal8) -> Self {
+        Variant::Decimal8(value)
+    }
+}
+
+impl From<VariantDecimal16> for Variant<'_, '_> {
+    fn from(value: VariantDecimal16) -> Self {
+        Variant::Decimal16(value)
+    }
+}
+
+impl From<half::f16> for Variant<'_, '_> {
+    fn from(value: half::f16) -> Self {
+        Variant::Float(value.into())
+    }
+}
+
 impl From<f32> for Variant<'_, '_> {
     fn from(value: f32) -> Self {
         Variant::Float(value)
@@ -887,16 +1601,6 @@ impl From<f64> for Variant<'_, '_> {
     }
 }
 
-impl From<bool> for Variant<'_, '_> {
-    fn from(value: bool) -> Self {
-        if value {
-            Variant::BooleanTrue
-        } else {
-            Variant::BooleanFalse
-        }
-    }
-}
-
 impl From<NaiveDate> for Variant<'_, '_> {
     fn from(value: NaiveDate) -> Self {
         Variant::Date(value)
@@ -905,12 +1609,21 @@ impl From<NaiveDate> for Variant<'_, '_> {
 
 impl From<DateTime<Utc>> for Variant<'_, '_> {
     fn from(value: DateTime<Utc>) -> Self {
-        Variant::TimestampMicros(value)
+        if value.nanosecond() % 1000 > 0 {
+            Variant::TimestampNanos(value)
+        } else {
+            Variant::TimestampMicros(value)
+        }
     }
 }
+
 impl From<NaiveDateTime> for Variant<'_, '_> {
     fn from(value: NaiveDateTime) -> Self {
-        Variant::TimestampNtzMicros(value)
+        if value.nanosecond() % 1000 > 0 {
+            Variant::TimestampNtzNanos(value)
+        } else {
+            Variant::TimestampNtzMicros(value)
+        }
     }
 }
 
@@ -920,6 +1633,18 @@ impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
     }
 }
 
+impl From<NaiveTime> for Variant<'_, '_> {
+    fn from(value: NaiveTime) -> Self {
+        Variant::Time(value)
+    }
+}
+
+impl From<Uuid> for Variant<'_, '_> {
+    fn from(value: Uuid) -> Self {
+        Variant::Uuid(value)
+    }
+}
+
 impl<'v> From<&'v str> for Variant<'_, 'v> {
     fn from(value: &'v str) -> Self {
         if value.len() > MAX_SHORT_STRING_BYTES {
@@ -930,10 +1655,127 @@ impl<'v> From<&'v str> for Variant<'_, 'v> {
     }
 }
 
+impl TryFrom<(i32, u8)> for Variant<'_, '_> {
+    type Error = ArrowError;
+
+    fn try_from(value: (i32, u8)) -> Result<Self, Self::Error> {
+        Ok(Variant::Decimal4(VariantDecimal4::try_new(
+            value.0, value.1,
+        )?))
+    }
+}
+
+impl TryFrom<(i64, u8)> for Variant<'_, '_> {
+    type Error = ArrowError;
+
+    fn try_from(value: (i64, u8)) -> Result<Self, Self::Error> {
+        Ok(Variant::Decimal8(VariantDecimal8::try_new(
+            value.0, value.1,
+        )?))
+    }
+}
+
+impl TryFrom<(i128, u8)> for Variant<'_, '_> {
+    type Error = ArrowError;
+
+    fn try_from(value: (i128, u8)) -> Result<Self, Self::Error> {
+        Ok(Variant::Decimal16(VariantDecimal16::try_new(
+            value.0, value.1,
+        )?))
+    }
+}
+
+// helper to print <invalid> instead of "<invalid>" in debug mode when a VariantObject or VariantList contains invalid values.
+struct InvalidVariant;
+
+impl std::fmt::Debug for InvalidVariant {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "<invalid>")
+    }
+}
+
+// helper to print binary data in hex format in debug mode, as space-separated hex byte values.
+struct HexString<'a>(&'a [u8]);
+
+impl<'a> std::fmt::Debug for HexString<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some((first, rest)) = self.0.split_first() {
+            write!(f, "{:02x}", first)?;
+            for b in rest {
+                write!(f, " {:02x}", b)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl std::fmt::Debug for Variant<'_, '_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Variant::Null => write!(f, "Null"),
+            Variant::BooleanTrue => write!(f, "BooleanTrue"),
+            Variant::BooleanFalse => write!(f, "BooleanFalse"),
+            Variant::Int8(v) => f.debug_tuple("Int8").field(v).finish(),
+            Variant::Int16(v) => f.debug_tuple("Int16").field(v).finish(),
+            Variant::Int32(v) => f.debug_tuple("Int32").field(v).finish(),
+            Variant::Int64(v) => f.debug_tuple("Int64").field(v).finish(),
+            Variant::Float(v) => f.debug_tuple("Float").field(v).finish(),
+            Variant::Double(v) => f.debug_tuple("Double").field(v).finish(),
+            Variant::Decimal4(d) => f.debug_tuple("Decimal4").field(d).finish(),
+            Variant::Decimal8(d) => f.debug_tuple("Decimal8").field(d).finish(),
+            Variant::Decimal16(d) => f.debug_tuple("Decimal16").field(d).finish(),
+            Variant::Date(d) => f.debug_tuple("Date").field(d).finish(),
+            Variant::TimestampMicros(ts) => f.debug_tuple("TimestampMicros").field(ts).finish(),
+            Variant::TimestampNtzMicros(ts) => {
+                f.debug_tuple("TimestampNtzMicros").field(ts).finish()
+            }
+            Variant::TimestampNanos(ts) => f.debug_tuple("TimestampNanos").field(ts).finish(),
+            Variant::TimestampNtzNanos(ts) => f.debug_tuple("TimestampNtzNanos").field(ts).finish(),
+            Variant::Binary(bytes) => write!(f, "Binary({:?})", HexString(bytes)),
+            Variant::String(s) => f.debug_tuple("String").field(s).finish(),
+            Variant::Time(s) => f.debug_tuple("Time").field(s).finish(),
+            Variant::ShortString(s) => f.debug_tuple("ShortString").field(s).finish(),
+            Variant::Uuid(uuid) => f.debug_tuple("Uuid").field(&uuid).finish(),
+            Variant::Object(obj) => {
+                let mut map = f.debug_map();
+                for res in obj.iter_try() {
+                    match res {
+                        Ok((k, v)) => map.entry(&k, &v),
+                        Err(_) => map.entry(&InvalidVariant, &InvalidVariant),
+                    };
+                }
+                map.finish()
+            }
+            Variant::List(arr) => {
+                let mut list = f.debug_list();
+                for res in arr.iter_try() {
+                    match res {
+                        Ok(v) => list.entry(&v),
+                        Err(_) => list.entry(&InvalidVariant),
+                    };
+                }
+                list.finish()
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
+
     use super::*;
 
+    #[test]
+    fn test_empty_variant_will_fail() {
+        let metadata = VariantMetadata::try_new(&[1, 0, 0]).unwrap();
+
+        let err = Variant::try_new_with_metadata(metadata, &[]).unwrap_err();
+
+        assert!(matches!(
+            err,
+            ArrowError::InvalidArgumentError(ref msg) if msg == "Received empty bytes"));
+    }
+
     #[test]
     fn test_construct_short_string() {
         let short_string = ShortString::try_new("norm").expect("should fit in short string");
@@ -943,4 +1785,273 @@ mod tests {
         let res = ShortString::try_new(&long_string);
         assert!(res.is_err());
     }
+
+    #[test]
+    fn test_variant_decimal_conversion() {
+        let decimal4 = VariantDecimal4::try_new(1234_i32, 2).unwrap();
+        let variant = Variant::from(decimal4);
+        assert_eq!(variant.as_decimal4(), Some(decimal4));
+
+        let decimal8 = VariantDecimal8::try_new(12345678901_i64, 2).unwrap();
+        let variant = Variant::from(decimal8);
+        assert_eq!(variant.as_decimal8(), Some(decimal8));
+
+        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890_i128, 2).unwrap();
+        let variant = Variant::from(decimal16);
+        assert_eq!(variant.as_decimal16(), Some(decimal16));
+    }
+
+    #[test]
+    fn test_variant_all_subtypes_debug() {
+        use crate::VariantBuilder;
+
+        let mut builder = VariantBuilder::new();
+
+        // Create a root object that contains one of every variant subtype
+        let mut root_obj = builder.new_object();
+
+        // Add primitive types
+        root_obj.insert("null", ());
+        root_obj.insert("boolean_true", true);
+        root_obj.insert("boolean_false", false);
+        root_obj.insert("int8", 42i8);
+        root_obj.insert("int16", 1234i16);
+        root_obj.insert("int32", 123456i32);
+        root_obj.insert("int64", 1234567890123456789i64);
+        root_obj.insert("float", 1.234f32);
+        root_obj.insert("double", 1.23456789f64);
+
+        // Add date and timestamp types
+        let date = chrono::NaiveDate::from_ymd_opt(2024, 12, 25).unwrap();
+        root_obj.insert("date", date);
+
+        let timestamp_utc = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
+            .unwrap()
+            .and_hms_milli_opt(15, 30, 45, 123)
+            .unwrap()
+            .and_utc();
+        root_obj.insert("timestamp_micros", Variant::TimestampMicros(timestamp_utc));
+
+        let timestamp_ntz = chrono::NaiveDate::from_ymd_opt(2024, 12, 25)
+            .unwrap()
+            .and_hms_milli_opt(15, 30, 45, 123)
+            .unwrap();
+        root_obj.insert(
+            "timestamp_ntz_micros",
+            Variant::TimestampNtzMicros(timestamp_ntz),
+        );
+
+        let timestamp_nanos_utc = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
+            .unwrap()
+            .and_hms_nano_opt(12, 3, 4, 123456789)
+            .unwrap()
+            .and_utc();
+        root_obj.insert(
+            "timestamp_nanos",
+            Variant::TimestampNanos(timestamp_nanos_utc),
+        );
+
+        let timestamp_ntz_nanos = chrono::NaiveDate::from_ymd_opt(2025, 8, 15)
+            .unwrap()
+            .and_hms_nano_opt(12, 3, 4, 123456789)
+            .unwrap();
+        root_obj.insert(
+            "timestamp_ntz_nanos",
+            Variant::TimestampNtzNanos(timestamp_ntz_nanos),
+        );
+
+        // Add decimal types
+        let decimal4 = VariantDecimal4::try_new(1234i32, 2).unwrap();
+        root_obj.insert("decimal4", decimal4);
+
+        let decimal8 = VariantDecimal8::try_new(123456789i64, 3).unwrap();
+        root_obj.insert("decimal8", decimal8);
+
+        let decimal16 = VariantDecimal16::try_new(123456789012345678901234567890i128, 4).unwrap();
+        root_obj.insert("decimal16", decimal16);
+
+        // Add binary and string types
+        let binary_data = b"\x01\x02\x03\x04\xde\xad\xbe\xef";
+        root_obj.insert("binary", binary_data.as_slice());
+
+        let long_string =
+            "This is a long string that exceeds the short string limit and contains emoji 🦀";
+        root_obj.insert("string", long_string);
+        root_obj.insert("short_string", "Short string with emoji 🎉");
+        let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap();
+        root_obj.insert("time", time);
+
+        // Add uuid
+        let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
+        root_obj.insert("uuid", Variant::Uuid(uuid));
+
+        // Add nested object
+        let mut nested_obj = root_obj.new_object("nested_object");
+        nested_obj.insert("inner_key1", "inner_value1");
+        nested_obj.insert("inner_key2", 999i32);
+        nested_obj.finish();
+
+        // Add list with mixed types
+        let mut mixed_list = root_obj.new_list("mixed_list");
+        mixed_list.append_value(1i32);
+        mixed_list.append_value("two");
+        mixed_list.append_value(true);
+        mixed_list.append_value(4.0f32);
+        mixed_list.append_value(());
+
+        // Add nested list inside the mixed list
+        let mut nested_list = mixed_list.new_list();
+        nested_list.append_value("nested");
+        nested_list.append_value(10i8);
+        nested_list.finish();
+
+        mixed_list.finish();
+
+        root_obj.finish();
+
+        let (metadata, value) = builder.finish();
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        // Test Debug formatter (?)
+        let debug_output = format!("{:?}", variant);
+
+        // Verify that the debug output contains all the expected types
+        assert!(debug_output.contains("\"null\": Null"));
+        assert!(debug_output.contains("\"boolean_true\": BooleanTrue"));
+        assert!(debug_output.contains("\"boolean_false\": BooleanFalse"));
+        assert!(debug_output.contains("\"int8\": Int8(42)"));
+        assert!(debug_output.contains("\"int16\": Int16(1234)"));
+        assert!(debug_output.contains("\"int32\": Int32(123456)"));
+        assert!(debug_output.contains("\"int64\": Int64(1234567890123456789)"));
+        assert!(debug_output.contains("\"float\": Float(1.234)"));
+        assert!(debug_output.contains("\"double\": Double(1.23456789"));
+        assert!(debug_output.contains("\"date\": Date(2024-12-25)"));
+        assert!(debug_output.contains("\"timestamp_micros\": TimestampMicros("));
+        assert!(debug_output.contains("\"timestamp_ntz_micros\": TimestampNtzMicros("));
+        assert!(debug_output.contains("\"timestamp_nanos\": TimestampNanos("));
+        assert!(debug_output.contains("\"timestamp_ntz_nanos\": TimestampNtzNanos("));
+        assert!(debug_output.contains("\"decimal4\": Decimal4("));
+        assert!(debug_output.contains("\"decimal8\": Decimal8("));
+        assert!(debug_output.contains("\"decimal16\": Decimal16("));
+        assert!(debug_output.contains("\"binary\": Binary(01 02 03 04 de ad be ef)"));
+        assert!(debug_output.contains("\"string\": String("));
+        assert!(debug_output.contains("\"short_string\": ShortString("));
+        assert!(debug_output.contains("\"uuid\": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)"));
+        assert!(debug_output.contains("\"time\": Time(01:02:03.000004)"));
+        assert!(debug_output.contains("\"nested_object\":"));
+        assert!(debug_output.contains("\"mixed_list\":"));
+
+        let expected = r#"{"binary": Binary(01 02 03 04 de ad be ef), "boolean_false": BooleanFalse, "boolean_true": BooleanTrue, "date": Date(2024-12-25), "decimal16": Decimal16(VariantDecimal16 { integer: 123456789012345678901234567890, scale: 4 }), "decimal4": Decimal4(VariantDecimal4 { integer: 1234, scale: 2 }), "decimal8": Decimal8(VariantDecimal8 { integer: 123456789, scale: 3 }), "double": Double(1.23456789), "float": Float(1.234), "int16": Int16(1234), "int32": Int32(123456), "int64": Int64(1234567890123456789), "int8": Int8(42), "mixed_list": [Int32(1), ShortString(ShortString("two")), BooleanTrue, Float(4.0), Null, [ShortString(ShortString("nested")), Int8(10)]], "nested_object": {"inner_key1": ShortString(ShortString("inner_value1")), "inner_key2": Int32(999)}, "null": Null, "short_string": ShortString(ShortString("Short string with emoji 🎉")), "string": String("This is a long string that exceeds the short string limit and contains emoji 🦀"), "time": Time(01:02:03.000004), "timestamp_micros": TimestampMicros(2024-12-25T15:30:45.123Z), "timestamp_nanos": TimestampNanos(2025-08-15T12:03:04.123456789Z), "timestamp_ntz_micros": TimestampNtzMicros(2024-12-25T15:30:45.123), "timestamp_ntz_nanos": TimestampNtzNanos(2025-08-15T12:03:04.123456789), "uuid": Uuid(67e55044-10b1-426f-9247-bb680e5fe0c8)}"#;
+        assert_eq!(debug_output, expected);
+
+        // Test alternate Debug formatter (#?)
+        let alt_debug_output = format!("{:#?}", variant);
+        let expected = r#"{
+    "binary": Binary(01 02 03 04 de ad be ef),
+    "boolean_false": BooleanFalse,
+    "boolean_true": BooleanTrue,
+    "date": Date(
+        2024-12-25,
+    ),
+    "decimal16": Decimal16(
+        VariantDecimal16 {
+            integer: 123456789012345678901234567890,
+            scale: 4,
+        },
+    ),
+    "decimal4": Decimal4(
+        VariantDecimal4 {
+            integer: 1234,
+            scale: 2,
+        },
+    ),
+    "decimal8": Decimal8(
+        VariantDecimal8 {
+            integer: 123456789,
+            scale: 3,
+        },
+    ),
+    "double": Double(
+        1.23456789,
+    ),
+    "float": Float(
+        1.234,
+    ),
+    "int16": Int16(
+        1234,
+    ),
+    "int32": Int32(
+        123456,
+    ),
+    "int64": Int64(
+        1234567890123456789,
+    ),
+    "int8": Int8(
+        42,
+    ),
+    "mixed_list": [
+        Int32(
+            1,
+        ),
+        ShortString(
+            ShortString(
+                "two",
+            ),
+        ),
+        BooleanTrue,
+        Float(
+            4.0,
+        ),
+        Null,
+        [
+            ShortString(
+                ShortString(
+                    "nested",
+                ),
+            ),
+            Int8(
+                10,
+            ),
+        ],
+    ],
+    "nested_object": {
+        "inner_key1": ShortString(
+            ShortString(
+                "inner_value1",
+            ),
+        ),
+        "inner_key2": Int32(
+            999,
+        ),
+    },
+    "null": Null,
+    "short_string": ShortString(
+        ShortString(
+            "Short string with emoji 🎉",
+        ),
+    ),
+    "string": String(
+        "This is a long string that exceeds the short string limit and contains emoji 🦀",
+    ),
+    "time": Time(
+        01:02:03.000004,
+    ),
+    "timestamp_micros": TimestampMicros(
+        2024-12-25T15:30:45.123Z,
+    ),
+    "timestamp_nanos": TimestampNanos(
+        2025-08-15T12:03:04.123456789Z,
+    ),
+    "timestamp_ntz_micros": TimestampNtzMicros(
+        2024-12-25T15:30:45.123,
+    ),
+    "timestamp_ntz_nanos": TimestampNtzNanos(
+        2025-08-15T12:03:04.123456789,
+    ),
+    "uuid": Uuid(
+        67e55044-10b1-426f-9247-bb680e5fe0c8,
+    ),
+}"#;
+        assert_eq!(alt_debug_output, expected);
+    }
 }
diff --git a/parquet-variant/src/variant/decimal.rs b/parquet-variant/src/variant/decimal.rs
new file mode 100644
index 000000000000..c7849a381af9
--- /dev/null
+++ b/parquet-variant/src/variant/decimal.rs
@@ -0,0 +1,758 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_schema::ArrowError;
+use std::fmt;
+
+/// Trait for variant decimal types, enabling generic code across Decimal4/8/16
+///
+/// This trait provides a common interface for the three variant decimal types,
+/// allowing generic functions and data structures to work with any decimal width.
+/// It is modeled after Arrow's `DecimalType` trait but adapted for variant semantics.
+///
+/// # Example
+///
+/// ```
+/// # use parquet_variant::{VariantDecimal4, VariantDecimal8, VariantDecimalType};
+/// #
+/// fn extract_scale<D: VariantDecimalType>(decimal: D) -> u8 {
+///     decimal.scale()
+/// }
+///
+/// let dec4 = VariantDecimal4::try_new(12345, 2).unwrap();
+/// let dec8 = VariantDecimal8::try_new(67890, 3).unwrap();
+///
+/// assert_eq!(extract_scale(dec4), 2);
+/// assert_eq!(extract_scale(dec8), 3);
+/// ```
+pub trait VariantDecimalType: Into<super::Variant<'static, 'static>> {
+    /// The underlying signed integer type (i32, i64, or i128)
+    type Native;
+
+    /// Maximum number of significant digits this decimal type can represent (9, 18, or 38)
+    const MAX_PRECISION: u8;
+    /// The largest positive unscaled value that fits in [`Self::MAX_PRECISION`] digits.
+    const MAX_UNSCALED_VALUE: Self::Native;
+
+    /// True if the given precision and scale are valid for this variant decimal type.
+    ///
+    /// NOTE: By a strict reading of the "decimal table" in the [variant spec], one might conclude that
+    /// each decimal type has both lower and upper bounds on precision (i.e. Decimal16 with precision 5
+    /// is invalid because Decimal4 "covers" it). But the variant shredding integration tests
+    /// specifically expect such cases to succeed, so we only enforce the upper bound here.
+    ///
+    /// [shredding spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#encoding-types
+    ///
+    /// # Example
+    /// ```
+    /// # use parquet_variant::{VariantDecimal4, VariantDecimalType};
+    /// #
+    /// assert!(VariantDecimal4::is_valid_precision_and_scale(&5, &2));
+    /// assert!(!VariantDecimal4::is_valid_precision_and_scale(&10, &2)); // too wide
+    /// assert!(!VariantDecimal4::is_valid_precision_and_scale(&5, &-1)); // negative scale
+    /// assert!(!VariantDecimal4::is_valid_precision_and_scale(&5, &7)); // scale too big
+    /// ```
+    fn is_valid_precision_and_scale(precision: &u8, scale: &i8) -> bool {
+        (1..=Self::MAX_PRECISION).contains(precision) && (0..=*precision as i8).contains(scale)
+    }
+
+    /// Creates a new decimal value from the given unscaled integer and scale, failing if the
+    /// integer's width, or the requested scale, exceeds `MAX_PRECISION`.
+    ///
+    /// NOTE: For compatibility with arrow decimal types, negative scale is allowed as long
+    /// as the rescaled value fits in the available precision.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use parquet_variant::{VariantDecimal4, VariantDecimalType};
+    /// #
+    /// // Valid: 123.45 (5 digits, scale 2)
+    /// let d = VariantDecimal4::try_new(12345, 2).unwrap();
+    /// assert_eq!(d.integer(), 12345);
+    /// assert_eq!(d.scale(), 2);
+    ///
+    /// VariantDecimal4::try_new(123, 10).expect_err("scale exceeds MAX_PRECISION");
+    /// VariantDecimal4::try_new(1234567890, 10).expect_err("value's width exceeds MAX_PRECISION");
+    /// ```
+    fn try_new(integer: Self::Native, scale: u8) -> Result<Self, ArrowError>;
+
+    /// Attempts to convert an unscaled arrow decimal value to the indicated variant decimal type.
+    ///
+    /// Unlike [`Self::try_new`], this function accepts a signed scale, and attempts to rescale
+    /// negative-scale values to their equivalent (larger) scale-0 values. For example, a decimal
+    /// value of 123 with scale -2 becomes 12300 with scale 0.
+    ///
+    /// Fails if rescaling fails, or for any of the reasons [`Self::try_new`] could fail.
+    fn try_new_with_signed_scale(integer: Self::Native, scale: i8) -> Result<Self, ArrowError>;
+
+    /// Returns the unscaled integer value
+    fn integer(&self) -> Self::Native;
+
+    /// Returns the scale (number of digits after the decimal point)
+    fn scale(&self) -> u8;
+}
+
+/// Implements the complete variant decimal type: methods, Display, and VariantDecimalType trait
+macro_rules! impl_variant_decimal {
+    ($struct_name:ident, $native:ty) => {
+        impl $struct_name {
+            /// Attempts to create a new instance of this decimal type, failing if the value is too
+            /// wide or the scale is too large.
+            pub fn try_new(integer: $native, scale: u8) -> Result<Self, ArrowError> {
+                let max_precision = Self::MAX_PRECISION;
+                if scale > max_precision {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Scale {scale} is larger than max precision {max_precision}",
+                    )));
+                }
+                if !(-Self::MAX_UNSCALED_VALUE..=Self::MAX_UNSCALED_VALUE).contains(&integer) {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "{integer} is wider than max precision {max_precision}",
+                    )));
+                }
+
+                Ok(Self { integer, scale })
+            }
+
+            /// Returns the unscaled integer value of the decimal.
+            ///
+            /// For example, if the decimal is `123.45`, this will return `12345`.
+            pub fn integer(&self) -> $native {
+                self.integer
+            }
+
+            /// Returns the scale of the decimal (how many digits after the decimal point).
+            ///
+            /// For example, if the decimal is `123.45`, this will return `2`.
+            pub fn scale(&self) -> u8 {
+                self.scale
+            }
+        }
+
+        impl VariantDecimalType for $struct_name {
+            type Native = $native;
+            const MAX_PRECISION: u8 = Self::MAX_PRECISION;
+            const MAX_UNSCALED_VALUE: $native = <$native>::pow(10, Self::MAX_PRECISION as u32) - 1;
+
+            fn try_new(integer: $native, scale: u8) -> Result<Self, ArrowError> {
+                Self::try_new(integer, scale)
+            }
+
+            fn try_new_with_signed_scale(integer: $native, scale: i8) -> Result<Self, ArrowError> {
+                let (integer, scale) = if scale < 0 {
+                    let multiplier = <$native>::checked_pow(10, -scale as u32);
+                    let Some(rescaled) = multiplier.and_then(|m| integer.checked_mul(m)) else {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Overflow when rescaling {integer} with scale {scale}"
+                        )));
+                    };
+                    (rescaled, 0u8)
+                } else {
+                    (integer, scale as u8)
+                };
+                Self::try_new(integer, scale)
+            }
+
+            fn integer(&self) -> $native {
+                self.integer()
+            }
+
+            fn scale(&self) -> u8 {
+                self.scale()
+            }
+        }
+
+        impl fmt::Display for $struct_name {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                let integer = if self.scale == 0 {
+                    self.integer
+                } else {
+                    let divisor = <$native>::pow(10, self.scale as u32);
+                    let remainder = self.integer % divisor;
+                    if remainder != 0 {
+                        // Track the sign explicitly, in case the quotient is zero
+                        let sign = if self.integer < 0 { "-" } else { "" };
+                        // Format an unsigned remainder with leading zeros and strip trailing zeros
+                        let remainder =
+                            format!("{:0width$}", remainder.abs(), width = self.scale as usize);
+                        let remainder = remainder.trim_end_matches('0');
+                        let quotient = (self.integer / divisor).abs();
+                        return write!(f, "{sign}{quotient}.{remainder}");
+                    }
+                    self.integer / divisor
+                };
+                write!(f, "{integer}")
+            }
+        }
+    };
+}
+
+/// Represents a 4-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 32-bit signed integer for the coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is limited to 9 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+/// # Example: Create a VariantDecimal4
+/// ```
+/// # use parquet_variant::VariantDecimal4;
+/// // Create a value representing the decimal 123.4567
+/// let decimal = VariantDecimal4::try_new(1234567, 4).expect("Failed to create decimal");
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal4 {
+    integer: i32,
+    scale: u8,
+}
+
+impl VariantDecimal4 {
+    /// Maximum number of significant digits (9 for 4-byte decimals)
+    pub const MAX_PRECISION: u8 = arrow_schema::DECIMAL32_MAX_PRECISION;
+}
+
+impl_variant_decimal!(VariantDecimal4, i32);
+
+/// Represents an 8-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 64-bit signed integer for the coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is between 10 and 18 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+///
+/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+/// # Example: Create a VariantDecimal8
+/// ```
+/// # use parquet_variant::VariantDecimal8;
+/// // Create a value representing the decimal 123456.78
+/// let decimal = VariantDecimal8::try_new(12345678, 2).expect("Failed to create decimal");
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal8 {
+    integer: i64,
+    scale: u8,
+}
+
+impl VariantDecimal8 {
+    /// Maximum number of significant digits (18 for 8-byte decimals)
+    pub const MAX_PRECISION: u8 = arrow_schema::DECIMAL64_MAX_PRECISION;
+}
+
+impl_variant_decimal!(VariantDecimal8, i64);
+
+/// Represents an 16-byte decimal value in the Variant format.
+///
+/// This struct stores a decimal number using a 128-bit signed integer for the coefficient
+/// and an 8-bit unsigned integer for the scale (number of decimal places). Its precision is between 19 and 38 digits.
+///
+/// For valid precision and scale values, see the Variant specification:
+///
+/// <https://github.com/apache/parquet-format/blob/87f2c8bf77eefb4c43d0ebaeea1778bd28ac3609/VariantEncoding.md?plain=1#L418-L420>
+///
+/// # Example: Create a VariantDecimal16
+/// ```
+/// # use parquet_variant::VariantDecimal16;
+/// // Create a value representing the decimal 12345678901234567.890
+/// let decimal = VariantDecimal16::try_new(12345678901234567890, 3).unwrap();
+/// ```
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct VariantDecimal16 {
+    integer: i128,
+    scale: u8,
+}
+
+impl VariantDecimal16 {
+    /// Maximum number of significant digits (38 for 16-byte decimals)
+    pub const MAX_PRECISION: u8 = arrow_schema::DECIMAL128_MAX_PRECISION;
+}
+
+impl_variant_decimal!(VariantDecimal16, i128);
+
+// Infallible conversion from a narrower decimal type to a wider one
+macro_rules! impl_from_decimal_for_decimal {
+    ($from_ty:ty, $for_ty:ty) => {
+        impl From<$from_ty> for $for_ty {
+            fn from(decimal: $from_ty) -> Self {
+                Self {
+                    integer: decimal.integer.into(),
+                    scale: decimal.scale,
+                }
+            }
+        }
+    };
+}
+
+impl_from_decimal_for_decimal!(VariantDecimal4, VariantDecimal8);
+impl_from_decimal_for_decimal!(VariantDecimal4, VariantDecimal16);
+impl_from_decimal_for_decimal!(VariantDecimal8, VariantDecimal16);
+
+// Fallible conversion from a wider decimal type to a narrower one
+macro_rules! impl_try_from_decimal_for_decimal {
+    ($from_ty:ty, $for_ty:ty) => {
+        impl TryFrom<$from_ty> for $for_ty {
+            type Error = ArrowError;
+
+            fn try_from(decimal: $from_ty) -> Result<Self, ArrowError> {
+                let Ok(integer) = decimal.integer.try_into() else {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Value {} is wider than max precision {}",
+                        decimal.integer,
+                        Self::MAX_PRECISION
+                    )));
+                };
+                Self::try_new(integer, decimal.scale)
+            }
+        }
+    };
+}
+
+impl_try_from_decimal_for_decimal!(VariantDecimal8, VariantDecimal4);
+impl_try_from_decimal_for_decimal!(VariantDecimal16, VariantDecimal4);
+impl_try_from_decimal_for_decimal!(VariantDecimal16, VariantDecimal8);
+
+// Fallible conversion from a decimal's underlying integer type
+macro_rules! impl_try_from_int_for_decimal {
+    ($from_ty:ty, $for_ty:ty) => {
+        impl TryFrom<$from_ty> for $for_ty {
+            type Error = ArrowError;
+
+            fn try_from(integer: $from_ty) -> Result<Self, ArrowError> {
+                Self::try_new(integer, 0)
+            }
+        }
+    };
+}
+
+impl_try_from_int_for_decimal!(i32, VariantDecimal4);
+impl_try_from_int_for_decimal!(i64, VariantDecimal8);
+impl_try_from_int_for_decimal!(i128, VariantDecimal16);
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_variant_decimal_invalid_precision() {
+        // Test precision validation for Decimal4
+        let decimal4_too_large = VariantDecimal4::try_new(1_000_000_000_i32, 2);
+        assert!(
+            decimal4_too_large.is_err(),
+            "Decimal4 precision overflow should fail"
+        );
+        assert!(
+            decimal4_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        let decimal4_too_small = VariantDecimal4::try_new(-1_000_000_000_i32, 2);
+        assert!(
+            decimal4_too_small.is_err(),
+            "Decimal4 precision underflow should fail"
+        );
+        assert!(
+            decimal4_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        // Test valid edge cases for Decimal4
+        let decimal4_max_valid = VariantDecimal4::try_new(999_999_999_i32, 2);
+        assert!(
+            decimal4_max_valid.is_ok(),
+            "Decimal4 max valid value should succeed"
+        );
+
+        let decimal4_min_valid = VariantDecimal4::try_new(-999_999_999_i32, 2);
+        assert!(
+            decimal4_min_valid.is_ok(),
+            "Decimal4 min valid value should succeed"
+        );
+
+        // Test precision validation for Decimal8
+        let decimal8_too_large = VariantDecimal8::try_new(1_000_000_000_000_000_000_i64, 2);
+        assert!(
+            decimal8_too_large.is_err(),
+            "Decimal8 precision overflow should fail"
+        );
+        assert!(
+            decimal8_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        let decimal8_too_small = VariantDecimal8::try_new(-1_000_000_000_000_000_000_i64, 2);
+        assert!(
+            decimal8_too_small.is_err(),
+            "Decimal8 precision underflow should fail"
+        );
+        assert!(
+            decimal8_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        // Test valid edge cases for Decimal8
+        let decimal8_max_valid = VariantDecimal8::try_new(999_999_999_999_999_999_i64, 2);
+        assert!(
+            decimal8_max_valid.is_ok(),
+            "Decimal8 max valid value should succeed"
+        );
+
+        let decimal8_min_valid = VariantDecimal8::try_new(-999_999_999_999_999_999_i64, 2);
+        assert!(
+            decimal8_min_valid.is_ok(),
+            "Decimal8 min valid value should succeed"
+        );
+
+        // Test precision validation for Decimal16
+        let decimal16_too_large =
+            VariantDecimal16::try_new(100000000000000000000000000000000000000_i128, 2);
+        assert!(
+            decimal16_too_large.is_err(),
+            "Decimal16 precision overflow should fail"
+        );
+        assert!(
+            decimal16_too_large
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        let decimal16_too_small =
+            VariantDecimal16::try_new(-100000000000000000000000000000000000000_i128, 2);
+        assert!(
+            decimal16_too_small.is_err(),
+            "Decimal16 precision underflow should fail"
+        );
+        assert!(
+            decimal16_too_small
+                .unwrap_err()
+                .to_string()
+                .contains("wider than max precision")
+        );
+
+        // Test valid edge cases for Decimal16
+        let decimal16_max_valid =
+            VariantDecimal16::try_new(99999999999999999999999999999999999999_i128, 2);
+        assert!(
+            decimal16_max_valid.is_ok(),
+            "Decimal16 max valid value should succeed"
+        );
+
+        let decimal16_min_valid =
+            VariantDecimal16::try_new(-99999999999999999999999999999999999999_i128, 2);
+        assert!(
+            decimal16_min_valid.is_ok(),
+            "Decimal16 min valid value should succeed"
+        );
+    }
+
+    #[test]
+    fn test_variant_decimal_invalid_scale() {
+        // Test invalid scale for Decimal4 (scale > 9)
+        let decimal4_invalid_scale = VariantDecimal4::try_new(123_i32, 10);
+        assert!(
+            decimal4_invalid_scale.is_err(),
+            "Decimal4 with scale > 9 should fail"
+        );
+        assert!(
+            decimal4_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
+
+        let decimal4_invalid_scale_large = VariantDecimal4::try_new(123_i32, 20);
+        assert!(
+            decimal4_invalid_scale_large.is_err(),
+            "Decimal4 with scale > 9 should fail"
+        );
+
+        // Test valid scale edge case for Decimal4
+        let decimal4_valid_scale = VariantDecimal4::try_new(123_i32, 9);
+        assert!(
+            decimal4_valid_scale.is_ok(),
+            "Decimal4 with scale = 9 should succeed"
+        );
+
+        // Test invalid scale for Decimal8 (scale > 18)
+        let decimal8_invalid_scale = VariantDecimal8::try_new(123_i64, 19);
+        assert!(
+            decimal8_invalid_scale.is_err(),
+            "Decimal8 with scale > 18 should fail"
+        );
+        assert!(
+            decimal8_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
+
+        let decimal8_invalid_scale_large = VariantDecimal8::try_new(123_i64, 25);
+        assert!(
+            decimal8_invalid_scale_large.is_err(),
+            "Decimal8 with scale > 18 should fail"
+        );
+
+        // Test valid scale edge case for Decimal8
+        let decimal8_valid_scale = VariantDecimal8::try_new(123_i64, 18);
+        assert!(
+            decimal8_valid_scale.is_ok(),
+            "Decimal8 with scale = 18 should succeed"
+        );
+
+        // Test invalid scale for Decimal16 (scale > 38)
+        let decimal16_invalid_scale = VariantDecimal16::try_new(123_i128, 39);
+        assert!(
+            decimal16_invalid_scale.is_err(),
+            "Decimal16 with scale > 38 should fail"
+        );
+        assert!(
+            decimal16_invalid_scale
+                .unwrap_err()
+                .to_string()
+                .contains("larger than max precision")
+        );
+
+        let decimal16_invalid_scale_large = VariantDecimal16::try_new(123_i128, 50);
+        assert!(
+            decimal16_invalid_scale_large.is_err(),
+            "Decimal16 with scale > 38 should fail"
+        );
+
+        // Test valid scale edge case for Decimal16
+        let decimal16_valid_scale = VariantDecimal16::try_new(123_i128, 38);
+        assert!(
+            decimal16_valid_scale.is_ok(),
+            "Decimal16 with scale = 38 should succeed"
+        );
+    }
+
+    #[test]
+    fn test_variant_decimal4_display() {
+        // Test zero scale (integers)
+        let d = VariantDecimal4::try_new(42, 0).unwrap();
+        assert_eq!(d.to_string(), "42");
+
+        let d = VariantDecimal4::try_new(-42, 0).unwrap();
+        assert_eq!(d.to_string(), "-42");
+
+        // Test basic decimal formatting
+        let d = VariantDecimal4::try_new(12345, 2).unwrap();
+        assert_eq!(d.to_string(), "123.45");
+
+        let d = VariantDecimal4::try_new(-12345, 2).unwrap();
+        assert_eq!(d.to_string(), "-123.45");
+
+        // Test trailing zeros are trimmed
+        let d = VariantDecimal4::try_new(12300, 2).unwrap();
+        assert_eq!(d.to_string(), "123");
+
+        let d = VariantDecimal4::try_new(-12300, 2).unwrap();
+        assert_eq!(d.to_string(), "-123");
+
+        // Test leading zeros in decimal part
+        let d = VariantDecimal4::try_new(1005, 3).unwrap();
+        assert_eq!(d.to_string(), "1.005");
+
+        let d = VariantDecimal4::try_new(-1005, 3).unwrap();
+        assert_eq!(d.to_string(), "-1.005");
+
+        // Test number smaller than scale (leading zero before decimal)
+        let d = VariantDecimal4::try_new(123, 4).unwrap();
+        assert_eq!(d.to_string(), "0.0123");
+
+        let d = VariantDecimal4::try_new(-123, 4).unwrap();
+        assert_eq!(d.to_string(), "-0.0123");
+
+        // Test zero
+        let d = VariantDecimal4::try_new(0, 0).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        let d = VariantDecimal4::try_new(0, 3).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        // Test max scale
+        let d = VariantDecimal4::try_new(123456789, 9).unwrap();
+        assert_eq!(d.to_string(), "0.123456789");
+
+        let d = VariantDecimal4::try_new(-123456789, 9).unwrap();
+        assert_eq!(d.to_string(), "-0.123456789");
+
+        // Test max precision
+        let d = VariantDecimal4::try_new(999999999, 0).unwrap();
+        assert_eq!(d.to_string(), "999999999");
+
+        let d = VariantDecimal4::try_new(-999999999, 0).unwrap();
+        assert_eq!(d.to_string(), "-999999999");
+
+        // Test trailing zeros with mixed decimal places
+        let d = VariantDecimal4::try_new(120050, 4).unwrap();
+        assert_eq!(d.to_string(), "12.005");
+
+        let d = VariantDecimal4::try_new(-120050, 4).unwrap();
+        assert_eq!(d.to_string(), "-12.005");
+    }
+
+    #[test]
+    fn test_variant_decimal8_display() {
+        // Test zero scale (integers)
+        let d = VariantDecimal8::try_new(42, 0).unwrap();
+        assert_eq!(d.to_string(), "42");
+
+        let d = VariantDecimal8::try_new(-42, 0).unwrap();
+        assert_eq!(d.to_string(), "-42");
+
+        // Test basic decimal formatting
+        let d = VariantDecimal8::try_new(1234567890, 3).unwrap();
+        assert_eq!(d.to_string(), "1234567.89");
+
+        let d = VariantDecimal8::try_new(-1234567890, 3).unwrap();
+        assert_eq!(d.to_string(), "-1234567.89");
+
+        // Test trailing zeros are trimmed
+        let d = VariantDecimal8::try_new(123000000, 6).unwrap();
+        assert_eq!(d.to_string(), "123");
+
+        let d = VariantDecimal8::try_new(-123000000, 6).unwrap();
+        assert_eq!(d.to_string(), "-123");
+
+        // Test leading zeros in decimal part
+        let d = VariantDecimal8::try_new(100005, 6).unwrap();
+        assert_eq!(d.to_string(), "0.100005");
+
+        let d = VariantDecimal8::try_new(-100005, 6).unwrap();
+        assert_eq!(d.to_string(), "-0.100005");
+
+        // Test number smaller than scale
+        let d = VariantDecimal8::try_new(123, 10).unwrap();
+        assert_eq!(d.to_string(), "0.0000000123");
+
+        let d = VariantDecimal8::try_new(-123, 10).unwrap();
+        assert_eq!(d.to_string(), "-0.0000000123");
+
+        // Test zero
+        let d = VariantDecimal8::try_new(0, 0).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        let d = VariantDecimal8::try_new(0, 10).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        // Test max scale
+        let d = VariantDecimal8::try_new(123456789012345678, 18).unwrap();
+        assert_eq!(d.to_string(), "0.123456789012345678");
+
+        let d = VariantDecimal8::try_new(-123456789012345678, 18).unwrap();
+        assert_eq!(d.to_string(), "-0.123456789012345678");
+
+        // Test max precision
+        let d = VariantDecimal8::try_new(999999999999999999, 0).unwrap();
+        assert_eq!(d.to_string(), "999999999999999999");
+
+        let d = VariantDecimal8::try_new(-999999999999999999, 0).unwrap();
+        assert_eq!(d.to_string(), "-999999999999999999");
+
+        // Test complex trailing zeros
+        let d = VariantDecimal8::try_new(1200000050000, 10).unwrap();
+        assert_eq!(d.to_string(), "120.000005");
+
+        let d = VariantDecimal8::try_new(-1200000050000, 10).unwrap();
+        assert_eq!(d.to_string(), "-120.000005");
+    }
+
+    #[test]
+    fn test_variant_decimal16_display() {
+        // Test zero scale (integers)
+        let d = VariantDecimal16::try_new(42, 0).unwrap();
+        assert_eq!(d.to_string(), "42");
+
+        let d = VariantDecimal16::try_new(-42, 0).unwrap();
+        assert_eq!(d.to_string(), "-42");
+
+        // Test basic decimal formatting
+        let d = VariantDecimal16::try_new(123456789012345, 4).unwrap();
+        assert_eq!(d.to_string(), "12345678901.2345");
+
+        let d = VariantDecimal16::try_new(-123456789012345, 4).unwrap();
+        assert_eq!(d.to_string(), "-12345678901.2345");
+
+        // Test trailing zeros are trimmed
+        let d = VariantDecimal16::try_new(12300000000, 8).unwrap();
+        assert_eq!(d.to_string(), "123");
+
+        let d = VariantDecimal16::try_new(-12300000000, 8).unwrap();
+        assert_eq!(d.to_string(), "-123");
+
+        // Test leading zeros in decimal part
+        let d = VariantDecimal16::try_new(10000005, 8).unwrap();
+        assert_eq!(d.to_string(), "0.10000005");
+
+        let d = VariantDecimal16::try_new(-10000005, 8).unwrap();
+        assert_eq!(d.to_string(), "-0.10000005");
+
+        // Test number smaller than scale
+        let d = VariantDecimal16::try_new(123, 20).unwrap();
+        assert_eq!(d.to_string(), "0.00000000000000000123");
+
+        let d = VariantDecimal16::try_new(-123, 20).unwrap();
+        assert_eq!(d.to_string(), "-0.00000000000000000123");
+
+        // Test zero
+        let d = VariantDecimal16::try_new(0, 0).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        let d = VariantDecimal16::try_new(0, 20).unwrap();
+        assert_eq!(d.to_string(), "0");
+
+        // Test max scale
+        let d = VariantDecimal16::try_new(12345678901234567890123456789012345678_i128, 38).unwrap();
+        assert_eq!(d.to_string(), "0.12345678901234567890123456789012345678");
+
+        let d =
+            VariantDecimal16::try_new(-12345678901234567890123456789012345678_i128, 38).unwrap();
+        assert_eq!(d.to_string(), "-0.12345678901234567890123456789012345678");
+
+        // Test max precision integer
+        let d = VariantDecimal16::try_new(99999999999999999999999999999999999999_i128, 0).unwrap();
+        assert_eq!(d.to_string(), "99999999999999999999999999999999999999");
+
+        let d = VariantDecimal16::try_new(-99999999999999999999999999999999999999_i128, 0).unwrap();
+        assert_eq!(d.to_string(), "-99999999999999999999999999999999999999");
+
+        // Test complex trailing zeros
+        let d = VariantDecimal16::try_new(12000000000000050000000000000_i128, 25).unwrap();
+        assert_eq!(d.to_string(), "1200.000000000005");
+
+        let d = VariantDecimal16::try_new(-12000000000000050000000000000_i128, 25).unwrap();
+        assert_eq!(d.to_string(), "-1200.000000000005");
+
+        // Test large integer that would overflow i64 but fits in i128
+        let large_int = 12345678901234567890123456789_i128;
+        let d = VariantDecimal16::try_new(large_int, 0).unwrap();
+        assert_eq!(d.to_string(), "12345678901234567890123456789");
+
+        let d = VariantDecimal16::try_new(-large_int, 0).unwrap();
+        assert_eq!(d.to_string(), "-12345678901234567890123456789");
+    }
+}
diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs
index d9fd20eacc13..fd71afba7342 100644
--- a/parquet-variant/src/variant/list.rs
+++ b/parquet-variant/src/variant/list.rs
@@ -14,108 +14,231 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::decoder::OffsetSizeBytes;
-use crate::utils::{first_byte_from_slice, slice_from_slice, validate_fallible_iterator};
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
+use crate::utils::{
+    first_byte_from_slice, overflow_error, slice_from_slice, slice_from_slice_at_offset,
+};
 use crate::variant::{Variant, VariantMetadata};
 
 use arrow_schema::ArrowError;
 
+// The value header occupies one byte; use a named constant for readability
+const NUM_HEADER_BYTES: u32 = 1;
+
 /// A parsed version of the variant array value header byte.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub(crate) struct VariantListHeader {
+    num_elements_size: OffsetSizeBytes,
     offset_size: OffsetSizeBytes,
-    is_large: bool,
 }
 
 impl VariantListHeader {
+    // Hide the ugly casting
+    const fn num_elements_size(&self) -> u32 {
+        self.num_elements_size as _
+    }
+    const fn offset_size(&self) -> u32 {
+        self.offset_size as _
+    }
+
+    // Avoid materializing this offset, since it's cheaply and safely computable
+    const fn first_offset_byte(&self) -> u32 {
+        NUM_HEADER_BYTES + self.num_elements_size()
+    }
+
     pub(crate) fn try_new(header_byte: u8) -> Result<Self, ArrowError> {
         // The 6 first bits to the left are the value_header and the 2 bits
         // to the right are the basic type, so we shift to get only the value_header
         let value_header = header_byte >> 2;
         let is_large = (value_header & 0x04) != 0; // 3rd bit from the right
         let field_offset_size_minus_one = value_header & 0x03; // Last two bits
+
+        // The size of the num_elements entry in the array value_data is 4 bytes if
+        // is_large is true, otherwise 1 byte.
+        let num_elements_size = match is_large {
+            true => OffsetSizeBytes::Four,
+            false => OffsetSizeBytes::One,
+        };
         let offset_size = OffsetSizeBytes::try_new(field_offset_size_minus_one)?;
 
         Ok(Self {
+            num_elements_size,
             offset_size,
-            is_large,
         })
     }
 }
 
 /// [`Variant`] Array.
 ///
+/// See the [Variant spec] for details.
+///
 /// NOTE: The "list" naming differs from the variant spec -- which calls it "array" -- in order to be
 /// consistent with Parquet and Arrow type naming. Otherwise, the name would conflict with the
 /// `VariantArray : Array` we must eventually define for variant-typed arrow arrays.
-#[derive(Clone, Debug, PartialEq)]
+///
+/// # Validation
+///
+/// Every instance of variant list is either _valid_ or _invalid_. depending on whether the
+/// underlying bytes are a valid encoding of a variant array (see below).
+///
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully _validated_. They always
+/// contain _valid_ data, and infallible accesses such as iteration and indexing are panic-free. The
+/// validation cost is linear in the number of underlying bytes.
+///
+/// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or
+/// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying
+/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an
+/// _unvalidated_ instance, if desired.
+///
+/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
+/// knows the underlying bytes were already validated previously, or if the caller intends to
+/// perform a small number of (fallible) accesses to a large list.
+///
+/// A _validated_ variant list instance guarantees that:
+///
+/// - header byte is valid
+/// - num_elements is in bounds
+/// - offset array content is in-bounds
+/// - first offset is zero
+/// - last offset is in-bounds
+/// - all other offsets are in-bounds (*)
+/// - all offsets are monotonically increasing (*)
+/// - all values are (recursively) valid variant objects (*)
+/// - the associated variant metadata is [valid] (*)
+///
+/// NOTE: [`Self::new`] only skips expensive (non-constant cost) validation checks (marked by `(*)`
+/// in the list above); it panics any of the other checks fails.
+///
+/// # Safety
+///
+/// Even an _invalid_ variant list instance is still _safe_ to use in the Rust sense. Accessing
+/// it with infallible methods may cause panics but will never lead to undefined behavior.
+///
+/// [valid]: VariantMetadata#Validation
+/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-array-basic_type3
+#[derive(Debug, Clone)]
 pub struct VariantList<'m, 'v> {
     pub metadata: VariantMetadata<'m>,
     pub value: &'v [u8],
     header: VariantListHeader,
-    num_elements: usize,
-    first_offset_byte: usize,
-    first_value_byte: usize,
+    num_elements: u32,
+    first_value_byte: u32,
+    validated: bool,
 }
 
+// We don't want this to grow because it could increase the size of `Variant` and hurt performance.
+const _: () = crate::utils::expect_size_of::<VariantList>(64);
+
 impl<'m, 'v> VariantList<'m, 'v> {
     /// Attempts to interpret `value` as a variant array value.
     ///
     /// # Validation
     ///
     /// This constructor verifies that `value` points to a valid variant array value. In particular,
-    /// that all offsets are in-bounds and point to valid objects.
-    // TODO: How to make the validation non-recursive while still making iterators safely infallible??
-    // See https://github.com/apache/arrow-rs/issues/7711
+    /// that all offsets are in-bounds and point to valid (recursively validated) objects.
     pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> {
-        let header_byte = first_byte_from_slice(value)?;
-        let header = VariantListHeader::try_new(header_byte)?;
-
-        // The size of the num_elements entry in the array value_data is 4 bytes if
-        // is_large is true, otherwise 1 byte.
-        let num_elements_size = match header.is_large {
-            true => OffsetSizeBytes::Four,
-            false => OffsetSizeBytes::One,
-        };
-
-        // Skip the header byte to read the num_elements
-        let num_elements = num_elements_size.unpack_usize(value, 1, 0)?;
-        let first_offset_byte = 1 + num_elements_size as usize;
+        Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation()
+    }
 
-        let overflow =
-            || ArrowError::InvalidArgumentError("Variant value_byte_length overflow".into());
+    pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
+        Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant list value")
+    }
 
-        // 1.  num_elements + 1
-        let n_offsets = num_elements.checked_add(1).ok_or_else(overflow)?;
+    /// Attempts to interpet `metadata` and `value` as a variant array, performing only basic
+    /// (constant-cost) [validation].
+    ///
+    /// [validation]: Self#Validation
+    pub(crate) fn try_new_with_shallow_validation(
+        metadata: VariantMetadata<'m>,
+        value: &'v [u8],
+    ) -> Result<Self, ArrowError> {
+        let header_byte = first_byte_from_slice(value)?;
+        let header = VariantListHeader::try_new(header_byte)?;
 
-        // 2.  (num_elements + 1) * offset_size
-        let value_bytes = n_offsets
-            .checked_mul(header.offset_size as usize)
-            .ok_or_else(overflow)?;
+        // Skip the header byte to read the num_elements; the offset array immediately follows
+        let num_elements =
+            header
+                .num_elements_size
+                .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?;
 
-        // 3.  first_offset_byte + ...
-        let first_value_byte = first_offset_byte
-            .checked_add(value_bytes)
-            .ok_or_else(overflow)?;
+        // (num_elements + 1) * offset_size + first_offset_byte
+        let first_value_byte = num_elements
+            .checked_add(1)
+            .and_then(|n| n.checked_mul(header.offset_size()))
+            .and_then(|n| n.checked_add(header.first_offset_byte()))
+            .ok_or_else(|| overflow_error("offset of variant list values"))?;
 
-        let new_self = Self {
+        let mut new_self = Self {
             metadata,
             value,
             header,
             num_elements,
-            first_offset_byte,
             first_value_byte,
+            validated: false,
         };
 
-        // Iterate over all values of this array in order to validate the field_offset array and
-        // prove that the field values are all in bounds. Otherwise, `iter` might panic on `unwrap`.
-        validate_fallible_iterator(new_self.iter_checked())?;
+        // Validate just the first and last offset, ignoring the other offsets and all value bytes.
+        let first_offset = new_self.get_offset(0)?;
+        if first_offset != 0 {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "First offset is not zero: {first_offset}"
+            )));
+        }
+
+        // Use the last offset to upper-bound the value buffer
+        let last_offset = new_self
+            .get_offset(num_elements as _)?
+            .checked_add(first_value_byte)
+            .ok_or_else(|| overflow_error("variant array size"))?;
+        new_self.value = slice_from_slice(value, ..last_offset as _)?;
         Ok(new_self)
     }
 
+    /// True if this instance is fully [validated] for panic-free infallible accesses.
+    ///
+    /// [validated]: Self#Validation
+    pub fn is_fully_validated(&self) -> bool {
+        self.validated
+    }
+
+    /// Performs a full [validation] of this variant array and returns the result.
+    ///
+    /// [validation]: Self#Validation
+    pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
+        if !self.validated {
+            // Validate the metadata dictionary first, if not already validated, because we pass it
+            // by value to all the children (who would otherwise re-validate it repeatedly).
+            self.metadata = self.metadata.with_full_validation()?;
+
+            let offset_buffer = slice_from_slice(
+                self.value,
+                self.header.first_offset_byte() as _..self.first_value_byte as _,
+            )?;
+
+            let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?;
+
+            // Validate whether values are valid variant objects
+            //
+            // Since we use offsets to slice into the value buffer, this also verifies all offsets are in-bounds
+            // and monotonically increasing
+            let mut offset_iter = map_bytes_to_offsets(offset_buffer, self.header.offset_size);
+            let mut current_offset = offset_iter.next().unwrap_or(0);
+
+            for next_offset in offset_iter {
+                let value_bytes = slice_from_slice(value_buffer, current_offset..next_offset)?;
+                Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?;
+                current_offset = next_offset;
+            }
+
+            self.validated = true;
+        }
+        Ok(self)
+    }
+
     /// Return the length of this array
     pub fn len(&self) -> usize {
-        self.num_elements
+        self.num_elements as _
     }
 
     /// Is the array of zero length
@@ -123,46 +246,82 @@ impl<'m, 'v> VariantList<'m, 'v> {
         self.len() == 0
     }
 
-    pub fn get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
-        if index >= self.num_elements {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Index {} out of bounds for list of length {}",
-                index, self.num_elements,
-            )));
-        }
+    /// Returns element by index in `0..self.len()`, if any. May panic if this list is [invalid].
+    ///
+    /// [invalid]: Self#Validation
+    pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> {
+        (index < self.len()).then(|| {
+            self.try_get_with_shallow_validation(index)
+                .expect("Invalid variant array element")
+        })
+    }
 
-        // Skip header and num_elements bytes to read the offsets
-        let unpack = |i| {
-            self.header
-                .offset_size
-                .unpack_usize(self.value, self.first_offset_byte, i)
-        };
+    /// Fallible version of `get`. Returns element by index, capturing validation errors
+    pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+        self.try_get_with_shallow_validation(index)?
+            .with_full_validation()
+    }
 
-        // Read the value bytes from the offsets
-        let variant_value_bytes = slice_from_slice(
-            self.value,
-            self.first_value_byte + unpack(index)?..self.first_value_byte + unpack(index + 1)?,
-        )?;
-        let variant = Variant::try_new_with_metadata(self.metadata, variant_value_bytes)?;
-        Ok(variant)
+    // Fallible version of `get`, performing only basic (constant-time) validation.
+    fn try_get_with_shallow_validation(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+        // Fetch the value bytes between the two offsets for this index, from the value array region
+        // of the byte buffer
+        let byte_range = self.get_offset(index)? as _..self.get_offset(index + 1)? as _;
+        let value_bytes =
+            slice_from_slice_at_offset(self.value, self.first_value_byte as _, byte_range)?;
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes)
     }
 
-    /// Iterates over the values of this list
+    /// Iterates over the values of this list. When working with [unvalidated] input, consider
+    /// [`Self::iter_try`] to avoid panics due to invalid data.
+    ///
+    /// [unvalidated]: Self#Validation
     pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ {
-        // NOTE: It is safe to unwrap because the constructor already made a successful traversal.
-        self.iter_checked().map(Result::unwrap)
+        self.iter_try_with_shallow_validation()
+            .map(|result| result.expect("Invalid variant list entry"))
+    }
+
+    /// Fallible iteration over the elements of this list.
+    pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+        self.iter_try_with_shallow_validation()
+            .map(|result| result?.with_full_validation())
+    }
+
+    // Fallible iteration that only performs basic (constant-time) validation.
+    fn iter_try_with_shallow_validation(
+        &self,
+    ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
+        (0..self.len()).map(|i| self.try_get_with_shallow_validation(i))
+    }
+
+    // Attempts to retrieve the ith offset from the offset array region of the byte buffer.
+    fn get_offset(&self, index: usize) -> Result<u32, ArrowError> {
+        let byte_range = self.header.first_offset_byte() as _..self.first_value_byte as _;
+        let offset_bytes = slice_from_slice(self.value, byte_range)?;
+        self.header.offset_size.unpack_u32(offset_bytes, index)
     }
+}
+
+// Custom implementation of PartialEq for variant arrays
+//
+// Instead of comparing the raw bytes of 2 variant lists, this implementation recursively
+// checks whether their elements are equal.
+impl<'m, 'v> PartialEq for VariantList<'m, 'v> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.num_elements != other.num_elements {
+            return false;
+        }
 
-    // Fallible iteration over the fields of this dictionary. The constructor traverses the iterator
-    // to prove it has no errors, so that all other use sites can blindly `unwrap` the result.
-    fn iter_checked(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ {
-        (0..self.len()).map(move |i| self.get(i))
+        self.iter().zip(other.iter()).all(|(a, b)| a == b)
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::VariantBuilder;
+    use std::iter::repeat_n;
+    use std::ops::Range;
 
     #[test]
     fn test_variant_list_simple() {
@@ -211,11 +370,7 @@ mod tests {
 
         // Test out of bounds access
         let out_of_bounds = variant_list.get(3);
-        assert!(out_of_bounds.is_err());
-        assert!(matches!(
-            out_of_bounds.unwrap_err(),
-            ArrowError::InvalidArgumentError(ref msg) if msg.contains("out of bounds")
-        ));
+        assert!(out_of_bounds.is_none());
 
         // Test values iterator
         let values: Vec<_> = variant_list.iter().collect();
@@ -251,7 +406,7 @@ mod tests {
 
         // Test out of bounds access on empty list
         let out_of_bounds = variant_list.get(0);
-        assert!(out_of_bounds.is_err());
+        assert!(out_of_bounds.is_none());
 
         // Test values iterator on empty list
         let values: Vec<_> = variant_list.iter().collect();
@@ -294,4 +449,295 @@ mod tests {
         let elem1 = variant_list.get(1).unwrap();
         assert_eq!(elem1.as_boolean(), Some(false));
     }
+
+    #[test]
+    fn test_large_variant_list_with_total_child_length_between_2_pow_8_and_2_pow_16() {
+        // all the tests below will set the total child size to ~500,
+        // which is larger than 2^8 but less than 2^16.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 500;
+
+        // offset size will be OffSizeBytes::Two as the total child length between 2^8 and 2^16
+        let expected_offset_size = OffsetSizeBytes::Two;
+
+        test_large_variant_list_with_child_length(
+            list_size,             // the elements in the list
+            single_child_item_len, // this will control the total child size in the list
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 2;
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    #[test]
+    fn test_large_variant_list_with_total_child_length_between_2_pow_16_and_2_pow_24() {
+        // all the tests below will set the total child size to ~70,000,
+        // which is larger than 2^16 but less than 2^24.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 70000;
+
+        // offset size will be OffSizeBytes::Two as the total child length between 2^16 and 2^24
+        let expected_offset_size = OffsetSizeBytes::Three;
+
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 275;
+        // total child size = 255 * 275 = 70,125
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 274;
+        // total child size = 256 * 274 = 70,144
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 234;
+        // total child size = 300 * 234 = 70,200
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    #[test]
+    fn test_large_variant_list_with_total_child_length_between_2_pow_24_and_2_pow_32() {
+        // all the tests below will set the total child size to ~20,000,000,
+        // which is larger than 2^24 but less than 2^32.
+        // total child size = list_size * single_child_item_len
+
+        let mut list_size: usize = 1;
+        let mut single_child_item_len: usize = 20000000;
+
+        // offset size will be OffSizeBytes::Two as the total child length between 2^24 and 2^32
+        let expected_offset_size = OffsetSizeBytes::Four;
+
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 255;
+        single_child_item_len = 78432;
+        // total child size = 255 * 78,432 = 20,000,160
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::One, // will be OffsetSizeBytes::One as the size of the list is less than 256
+            expected_offset_size,
+        );
+
+        list_size = 256;
+        single_child_item_len = 78125;
+        // total child size = 256 * 78,125 = 20,000,000
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+
+        list_size = 300;
+        single_child_item_len = 66667;
+        // total child size = 300 * 66,667 = 20,000,100
+        test_large_variant_list_with_child_length(
+            list_size,
+            single_child_item_len,
+            OffsetSizeBytes::Four, // will be OffsetSizeBytes::Four as the size of the list is bigger than 255
+            expected_offset_size,
+        );
+    }
+
+    // this function will create a large variant list from VariantBuilder
+    // with specified size and each child item with the given length.
+    // and verify the content and some meta for the variant list in the final.
+    fn test_large_variant_list_with_child_length(
+        list_size: usize,
+        single_child_item_len: usize,
+        expected_num_element_size: OffsetSizeBytes,
+        expected_offset_size_bytes: OffsetSizeBytes,
+    ) {
+        let mut builder = VariantBuilder::new();
+        let mut list_builder = builder.new_list();
+
+        let mut expected_list = vec![];
+        for i in 0..list_size {
+            let random_string: String =
+                repeat_n(char::from((i % 256) as u8), single_child_item_len).collect();
+
+            list_builder.append_value(Variant::String(random_string.as_str()));
+            expected_list.push(random_string);
+        }
+
+        list_builder.finish();
+        // Finish the builder to get the metadata and value
+        let (metadata, value) = builder.finish();
+        // use the Variant API to verify the result
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+
+        let variant_list = variant.as_list().unwrap();
+
+        // verify that the head is expected
+        assert_eq!(expected_offset_size_bytes, variant_list.header.offset_size);
+        assert_eq!(
+            expected_num_element_size,
+            variant_list.header.num_elements_size
+        );
+        assert_eq!(list_size, variant_list.num_elements as usize);
+
+        // verify the data in the variant
+        assert_eq!(list_size, variant_list.len());
+        for i in 0..list_size {
+            let item = variant_list.get(i).unwrap();
+            let item_str = item.as_string().unwrap();
+            assert_eq!(expected_list.get(i).unwrap(), item_str);
+        }
+    }
+
+    #[test]
+    fn test_variant_list_equality() {
+        // Create two lists with the same values (0..10)
+        let (metadata1, value1) = make_listi32(0..10);
+        let list1 = Variant::new(&metadata1, &value1);
+        let (metadata2, value2) = make_listi32(0..10);
+        let list2 = Variant::new(&metadata2, &value2);
+        // They should be equal
+        assert_eq!(list1, list2);
+    }
+
+    #[test]
+    fn test_variant_list_equality_different_length() {
+        // Create two lists with different lengths
+        let (metadata1, value1) = make_listi32(0..10);
+        let list1 = Variant::new(&metadata1, &value1);
+        let (metadata2, value2) = make_listi32(0..5);
+        let list2 = Variant::new(&metadata2, &value2);
+        // They should not be equal
+        assert_ne!(list1, list2);
+    }
+
+    #[test]
+    fn test_variant_list_equality_different_values() {
+        // Create two lists with different values
+        let (metadata1, value1) = make_listi32(0..10);
+        let list1 = Variant::new(&metadata1, &value1);
+        let (metadata2, value2) = make_listi32(5..15);
+        let list2 = Variant::new(&metadata2, &value2);
+        // They should not be equal
+        assert_ne!(list1, list2);
+    }
+
+    #[test]
+    fn test_variant_list_equality_different_types() {
+        // Create two lists with different types
+        let (metadata1, value1) = make_listi32(0i32..10i32);
+        let list1 = Variant::new(&metadata1, &value1);
+        let (metadata2, value2) = make_listi64(0..10);
+        let list2 = Variant::new(&metadata2, &value2);
+        // They should not be equal due to type mismatch
+        assert_ne!(list1, list2);
+    }
+
+    #[test]
+    fn test_variant_list_equality_slices() {
+        // Make an object like this and make sure equality works
+        // when the lists are sub fields
+        //
+        // {
+        //   "list1": [0, 1, 2, ..., 9],
+        //   "list2": [0, 1, 2, ..., 9],
+        //   "list3": [10, 11, 12, ..., 19],
+        //  }
+        let (metadata, value) = {
+            let mut builder = VariantBuilder::new();
+            let mut object_builder = builder.new_object();
+            // list1 (0..10)
+            let (metadata1, value1) = make_listi32(0i32..10i32);
+            object_builder.insert("list1", Variant::new(&metadata1, &value1));
+
+            // list2 (0..10)
+            let (metadata2, value2) = make_listi32(0i32..10i32);
+            object_builder.insert("list2", Variant::new(&metadata2, &value2));
+
+            // list3 (10..20)
+            let (metadata3, value3) = make_listi32(10i32..20i32);
+            object_builder.insert("list3", Variant::new(&metadata3, &value3));
+            object_builder.finish();
+            builder.finish()
+        };
+
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let object = variant.as_object().unwrap();
+        // Check that list1 and list2 are equal
+        assert_eq!(object.get("list1").unwrap(), object.get("list2").unwrap());
+        // Check that list1 and list3 are not equal
+        assert_ne!(object.get("list1").unwrap(), object.get("list3").unwrap());
+    }
+
+    /// return metadata/value for a simple variant list with values in a range
+    fn make_listi32(range: Range<i32>) -> (Vec<u8>, Vec<u8>) {
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        list_builder.extend(range);
+        list_builder.finish();
+        variant_builder.finish()
+    }
+
+    /// return metadata/value for a simple variant list with values in a range
+    fn make_listi64(range: Range<i64>) -> (Vec<u8>, Vec<u8>) {
+        let mut variant_builder = VariantBuilder::new();
+        let mut list_builder = variant_builder.new_list();
+        list_builder.extend(range);
+        list_builder.finish();
+        variant_builder.finish()
+    }
 }
diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs
index bfefeb506d3d..9f9688acd090 100644
--- a/parquet-variant/src/variant/metadata.rs
+++ b/parquet-variant/src/variant/metadata.rs
@@ -15,15 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::decoder::OffsetSizeBytes;
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
-    first_byte_from_slice, slice_from_slice, string_from_slice, validate_fallible_iterator,
+    first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice,
+    try_binary_search_range_by,
 };
 
 use arrow_schema::ArrowError;
 
 /// Header structure for [`VariantMetadata`]
-#[derive(Clone, Debug, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq)]
 pub(crate) struct VariantMetadataHeader {
     version: u8,
     is_sorted: bool,
@@ -35,7 +36,20 @@ pub(crate) struct VariantMetadataHeader {
 // purposes and to make that visible.
 const CORRECT_VERSION_VALUE: u8 = 1;
 
+// The metadata header occupies one byte; use a named constant for readability
+const NUM_HEADER_BYTES: u32 = 1;
+
 impl VariantMetadataHeader {
+    // Hide the cast
+    const fn offset_size(&self) -> u32 {
+        self.offset_size as u32
+    }
+
+    // Avoid materializing this offset, since it's cheaply and safely computable
+    const fn first_offset_byte(&self) -> u32 {
+        NUM_HEADER_BYTES + self.offset_size()
+    }
+
     /// Tries to construct the variant metadata header, which has the form
     ///
     /// ```text
@@ -56,8 +70,7 @@ impl VariantMetadataHeader {
         let version = header_byte & 0x0F; // First four bits
         if version != CORRECT_VERSION_VALUE {
             let err_msg = format!(
-                "The version bytes in the header is not {CORRECT_VERSION_VALUE}, got {:b}",
-                version
+                "The version bytes in the header is not {CORRECT_VERSION_VALUE}, got {version:b}",
             );
             return Err(ArrowError::InvalidArgumentError(err_msg));
         }
@@ -75,107 +88,339 @@ impl VariantMetadataHeader {
 ///
 /// See the [Variant Spec] file for more information
 ///
+/// # Validation
+///
+/// Every instance of variant metadata is either _valid_ or _invalid_. depending on whether the
+/// underlying bytes are a valid encoding of variant metadata (see below).
+///
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully _validated_. They always
+/// contain _valid_ data, and infallible accesses such as iteration and indexing are panic-free. The
+/// validation cost is linear in the number of underlying bytes.
+///
+/// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or
+/// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying
+/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an
+/// _unvalidated_ instance, if desired.
+///
+/// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller
+/// knows the underlying bytes were already validated previously, or if the caller intends to
+/// perform a small number of (fallible) accesses to a large dictionary.
+///
+/// A _validated_ variant [metadata instance guarantees that:
+///
+/// - header byte is valid
+/// - dictionary size is in bounds
+/// - offset array content is in-bounds
+/// - first offset is zero
+/// - last offset is in-bounds
+/// - all other offsets are in-bounds (*)
+/// - all offsets are monotonically increasing (*)
+/// - all values are valid utf-8 (*)
+///
+/// NOTE: [`Self::new`] only skips expensive (non-constant cost) validation checks (marked by `(*)`
+/// in the list above); it panics any of the other checks fails.
+///
+/// # Safety
+///
+/// Even an _invalid_ variant metadata instance is still _safe_ to use in the Rust sense. Accessing
+/// it with infallible methods may cause panics but will never lead to undefined behavior.
+///
 /// [`Variant`]: crate::Variant
 /// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct VariantMetadata<'m> {
-    bytes: &'m [u8],
+    /// (Only) the bytes that make up this metadata instance.
+    pub(crate) bytes: &'m [u8],
     header: VariantMetadataHeader,
-    dict_size: usize,
-    dictionary_key_start_byte: usize,
+    dictionary_size: u32,
+    first_value_byte: u32,
+    validated: bool,
 }
 
+// We don't want this to grow because it increases the size of VariantList and VariantObject, which
+// could increase the size of Variant. All those size increases could hurt performance.
+const _: () = crate::utils::expect_size_of::<VariantMetadata>(32);
+
+/// The canonical byte slice corresponding to an empty metadata dictionary.
+///
+/// ```
+/// # use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, VariantMetadata, WritableMetadataBuilder};
+/// let mut metadata_builder = WritableMetadataBuilder::default();
+/// metadata_builder.finish();
+/// let metadata_bytes = metadata_builder.into_inner();
+/// assert_eq!(&metadata_bytes, EMPTY_VARIANT_METADATA_BYTES);
+/// ```
+pub const EMPTY_VARIANT_METADATA_BYTES: &[u8] = &[1, 0, 0];
+
+/// The empty metadata dictionary.
+///
+/// ```
+/// # use parquet_variant::{EMPTY_VARIANT_METADATA, VariantMetadata, WritableMetadataBuilder};
+/// let mut metadata_builder = WritableMetadataBuilder::default();
+/// metadata_builder.finish();
+/// let metadata_bytes = metadata_builder.into_inner();
+/// let empty_metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+/// assert_eq!(empty_metadata, EMPTY_VARIANT_METADATA);
+/// ```
+pub const EMPTY_VARIANT_METADATA: VariantMetadata = VariantMetadata {
+    bytes: EMPTY_VARIANT_METADATA_BYTES,
+    header: VariantMetadataHeader {
+        version: CORRECT_VERSION_VALUE,
+        is_sorted: false,
+        offset_size: OffsetSizeBytes::One,
+    },
+    dictionary_size: 0,
+    first_value_byte: 3,
+    validated: true,
+};
+
 impl<'m> VariantMetadata<'m> {
-    /// View the raw bytes (needed by very low-level decoders)
-    #[inline]
-    pub const fn as_bytes(&self) -> &'m [u8] {
-        self.bytes
+    /// Attempts to interpret `bytes` as a variant metadata instance, with full [validation] of all
+    /// dictionary entries.
+    ///
+    /// [validation]: Self#Validation
+    pub fn try_new(bytes: &'m [u8]) -> Result<Self, ArrowError> {
+        Self::try_new_with_shallow_validation(bytes)?.with_full_validation()
     }
 
-    /// Attempts to interpret `bytes` as a variant metadata instance.
+    /// Interprets `bytes` as a variant metadata instance, without attempting to [validate] dictionary
+    /// entries. Panics if basic sanity checking fails, and subsequent infallible accesses such as
+    /// indexing and iteration could also panic if the underlying bytes are invalid.
     ///
-    /// # Validation
+    /// This constructor can be a useful lightweight alternative to [`Self::try_new`] if the bytes
+    /// were already validated previously by other means, or if the caller expects a small number of
+    /// accesses to a large dictionary (preferring to use a small number of fallible accesses as
+    /// needed, instead of paying expensive full validation up front).
     ///
-    /// This constructor verifies that `bytes` points to a valid variant metadata instance. In
-    /// particular, all offsets are in-bounds and point to valid utf8 strings.
-    pub fn try_new(bytes: &'m [u8]) -> Result<Self, ArrowError> {
+    /// [validate]: Self#Validation
+    pub fn new(bytes: &'m [u8]) -> Self {
+        Self::try_new_with_shallow_validation(bytes).expect("Invalid variant metadata")
+    }
+
+    // The actual constructor, which performs only basic (constant-const) validation.
+    pub(crate) fn try_new_with_shallow_validation(bytes: &'m [u8]) -> Result<Self, ArrowError> {
         let header_byte = first_byte_from_slice(bytes)?;
         let header = VariantMetadataHeader::try_new(header_byte)?;
 
-        // Offset 1, index 0 because first element after header is dictionary size
-        let dict_size = header.offset_size.unpack_usize(bytes, 1, 0)?;
+        // First element after header is dictionary size; the offset array immediately follows.
+        let dictionary_size =
+            header
+                .offset_size
+                .unpack_u32_at_offset(bytes, NUM_HEADER_BYTES as usize, 0)?;
 
         // Calculate the starting offset of the dictionary string bytes.
         //
-        // Value header, dict_size (offset_size bytes), and dict_size+1 offsets
-        // = 1 + offset_size + (dict_size + 1) * offset_size
-        // = (dict_size + 2) * offset_size + 1
-        let dictionary_key_start_byte = dict_size
-            .checked_add(2)
-            .and_then(|n| n.checked_mul(header.offset_size as usize))
-            .and_then(|n| n.checked_add(1))
-            .ok_or_else(|| ArrowError::InvalidArgumentError("metadata length overflow".into()))?;
-        println!("dictionary_key_start_byte: {dictionary_key_start_byte}");
-        let new_self = Self {
+        // There are dict_size + 1 offsets, and the value bytes immediately follow
+        // = (dict_size + 1) * offset_size + header.first_offset_byte()
+        let first_value_byte = dictionary_size
+            .checked_add(1)
+            .and_then(|n| n.checked_mul(header.offset_size()))
+            .and_then(|n| n.checked_add(header.first_offset_byte()))
+            .ok_or_else(|| overflow_error("offset of variant metadata dictionary"))?;
+
+        let mut new_self = Self {
             bytes,
             header,
-            dict_size,
-            dictionary_key_start_byte,
+            dictionary_size,
+            first_value_byte,
+            validated: false,
         };
 
-        // Iterate over all string keys in this dictionary in order to validate the offset array and
-        // prove that the string bytes are all in bounds. Otherwise, `iter` might panic on `unwrap`.
-        validate_fallible_iterator(new_self.iter_checked())?;
+        // Validate just the first and last offset, ignoring the other offsets and all value bytes.
+        let first_offset = new_self.get_offset(0)?;
+        if first_offset != 0 {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "First offset is not zero: {first_offset}"
+            )));
+        }
+
+        // Use the last offset to upper-bound the byte slice
+        let last_offset = new_self
+            .get_offset(dictionary_size as _)?
+            .checked_add(first_value_byte)
+            .ok_or_else(|| overflow_error("variant metadata size"))?;
+        new_self.bytes = slice_from_slice(bytes, ..last_offset as _)?;
         Ok(new_self)
     }
 
+    /// The number of metadata dictionary entries
+    pub fn len(&self) -> usize {
+        self.dictionary_size as _
+    }
+
+    /// True if this metadata dictionary contains no entries
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// True if this instance is fully [validated] for panic-free infallible accesses.
+    ///
+    /// [validated]: Self#Validation
+    pub fn is_fully_validated(&self) -> bool {
+        self.validated
+    }
+
+    /// Performs a full [validation] of this metadata dictionary and returns the result.
+    ///
+    /// [validation]: Self#Validation
+    pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
+        if !self.validated {
+            let offset_bytes = slice_from_slice(
+                self.bytes,
+                self.header.first_offset_byte() as _..self.first_value_byte as _,
+            )?;
+
+            // Verify the string values in the dictionary are UTF-8 encoded strings.
+            let value_buffer =
+                string_from_slice(self.bytes, 0, self.first_value_byte as _..self.bytes.len())?;
+
+            let mut offsets = map_bytes_to_offsets(offset_bytes, self.header.offset_size);
+
+            if self.header.is_sorted {
+                // Validate the dictionary values are unique and lexicographically sorted
+                //
+                // Since we use the offsets to access dictionary values, this also validates
+                // offsets are in-bounds and monotonically increasing
+                let mut current_offset = offsets.next().unwrap_or(0);
+                let mut prev_value: Option<&str> = None;
+                for next_offset in offsets {
+                    let current_value = value_buffer.get(current_offset..next_offset).ok_or_else(
+                        || {
+                            ArrowError::InvalidArgumentError(format!(
+                                "range {current_offset}..{next_offset} is invalid or out of bounds"
+                            ))
+                        },
+                    )?;
+
+                    if let Some(prev_val) = prev_value {
+                        if current_value <= prev_val {
+                            return Err(ArrowError::InvalidArgumentError(
+                                "dictionary values are not unique and ordered".to_string(),
+                            ));
+                        }
+                    }
+
+                    prev_value = Some(current_value);
+                    current_offset = next_offset;
+                }
+            } else {
+                // Validate offsets are in-bounds and monotonically increasing
+                //
+                // Since shallow validation ensures the first and last offsets are in bounds,
+                // we can also verify all offsets are in-bounds by checking if
+                // offsets are monotonically increasing
+                if !offsets.is_sorted_by(|a, b| a < b) {
+                    return Err(ArrowError::InvalidArgumentError(
+                        "offsets not monotonically increasing".to_string(),
+                    ));
+                }
+            }
+
+            self.validated = true;
+        }
+        Ok(self)
+    }
+
     /// Whether the dictionary keys are sorted and unique
     pub fn is_sorted(&self) -> bool {
         self.header.is_sorted
     }
 
-    /// Get the dictionary size
-    pub fn dictionary_size(&self) -> usize {
-        self.dict_size
-    }
-
     /// The variant protocol version
-    pub fn version(&self) -> u8 {
+    pub const fn version(&self) -> u8 {
         self.header.version
     }
 
-    /// Gets an offset array entry by index.
+    /// Gets an offset into the dictionary entry by index.
     ///
     /// This offset is an index into the dictionary, at the boundary between string `i-1` and string
     /// `i`. See [`Self::get`] to retrieve a specific dictionary entry.
-    fn get_offset(&self, i: usize) -> Result<usize, ArrowError> {
-        // Skipping the header byte (setting byte_offset = 1) and the dictionary_size (setting offset_index +1)
-        let bytes = slice_from_slice(self.bytes, ..self.dictionary_key_start_byte)?;
-        self.header.offset_size.unpack_usize(bytes, 1, i + 1)
+    fn get_offset(&self, i: usize) -> Result<u32, ArrowError> {
+        let offset_byte_range = self.header.first_offset_byte() as _..self.first_value_byte as _;
+        let bytes = slice_from_slice(self.bytes, offset_byte_range)?;
+        self.header.offset_size.unpack_u32(bytes, i)
     }
 
-    /// Gets a dictionary entry by index
+    /// Returns the total size, in bytes, of the metadata.
+    ///
+    /// Note this value may be smaller than what was passed to [`Self::new`] or
+    /// [`Self::try_new`] if the input was larger than necessary to encode the
+    /// metadata dictionary.
+    pub fn size(&self) -> usize {
+        self.bytes.len()
+    }
+
+    /// Attempts to retrieve a dictionary entry by index, failing if out of bounds or if the
+    /// underlying bytes are [invalid].
+    ///
+    /// [invalid]: Self#Validation
     pub fn get(&self, i: usize) -> Result<&'m str, ArrowError> {
-        let dictionary_keys_bytes = slice_from_slice(self.bytes, self.dictionary_key_start_byte..)?;
-        let byte_range = self.get_offset(i)?..self.get_offset(i + 1)?;
-        string_from_slice(dictionary_keys_bytes, byte_range)
+        let byte_range = self.get_offset(i)? as _..self.get_offset(i + 1)? as _;
+        string_from_slice(self.bytes, self.first_value_byte as _, byte_range)
+    }
+
+    // Helper method used by our `impl Index` and also by `get_entry`. Panics if the underlying
+    // bytes are invalid. Needed because the `Index` trait forces the returned result to have the
+    // lifetime of `self` instead of the string's own (longer) lifetime `'m`.
+    fn get_impl(&self, i: usize) -> &'m str {
+        self.get(i).expect("Invalid metadata dictionary entry")
+    }
+
+    /// Attempts to retrieve a dictionary entry and its field id, returning None if the requested field
+    /// name is not present. The search cost is logarithmic if [`Self::is_sorted`] and linear
+    /// otherwise.
+    ///
+    /// WARNING: This method panics if the underlying bytes are [invalid].
+    ///
+    /// [invalid]: Self#Validation
+    pub fn get_entry(&self, field_name: &str) -> Option<(u32, &'m str)> {
+        let field_id = if self.is_sorted() && self.len() > 10 {
+            // Binary search is faster for a not-tiny sorted metadata dictionary
+            let cmp = |i| Some(self.get_impl(i).cmp(field_name));
+            try_binary_search_range_by(0..self.len(), cmp)?.ok()?
+        } else {
+            // Fall back to Linear search for tiny or unsorted dictionary
+            (0..self.len()).find(|i| self.get_impl(*i) == field_name)?
+        };
+        Some((field_id as u32, self.get_impl(field_id)))
+    }
+
+    /// Returns an iterator that attempts to visit all dictionary entries, producing `Err` if the
+    /// iterator encounters [invalid] data.
+    ///
+    /// [invalid]: Self#Validation
+    pub fn iter_try(&self) -> impl Iterator<Item = Result<&'m str, ArrowError>> + '_ {
+        (0..self.len()).map(|i| self.get(i))
     }
 
-    /// Get all dictionary entries as an Iterator of strings
+    /// Iterates over all dictionary entries. When working with [unvalidated] input, consider
+    /// [`Self::iter_try`] to avoid panics due to invalid data.
+    ///
+    /// [unvalidated]: Self#Validation
     pub fn iter(&self) -> impl Iterator<Item = &'m str> + '_ {
-        // NOTE: It is safe to unwrap because the constructor already made a successful traversal.
-        self.iter_checked().map(Result::unwrap)
+        self.iter_try()
+            .map(|result| result.expect("Invalid metadata dictionary entry"))
     }
+}
 
-    // Fallible iteration over the fields of this dictionary. The constructor traverses the iterator
-    // to prove it has no errors, so that all other use sites can blindly `unwrap` the result.
-    fn iter_checked(&self) -> impl Iterator<Item = Result<&'m str, ArrowError>> + '_ {
-        (0..self.dict_size).map(move |i| self.get(i))
+/// Retrieves the ith dictionary entry, panicking if the index is out of bounds. Accessing
+/// [unvalidated] input could also panic if the underlying bytes are invalid.
+///
+/// [unvalidated]: Self#Validation
+impl std::ops::Index<usize> for VariantMetadata<'_> {
+    type Output = str;
+
+    fn index(&self, i: usize) -> &str {
+        self.get_impl(i)
     }
 }
 
 #[cfg(test)]
 mod tests {
+
+    use crate::VariantBuilder;
+
     use super::*;
 
     /// `"cat"`, `"dog"` – valid metadata
@@ -196,10 +441,10 @@ mod tests {
         ];
 
         let md = VariantMetadata::try_new(bytes).expect("should parse");
-        assert_eq!(md.dictionary_size(), 2);
+        assert_eq!(md.len(), 2);
         // Fields
-        assert_eq!(md.get(0).unwrap(), "cat");
-        assert_eq!(md.get(1).unwrap(), "dog");
+        assert_eq!(&md[0], "cat");
+        assert_eq!(&md[1], "dog");
 
         // Offsets
         assert_eq!(md.get_offset(0).unwrap(), 0x00);
@@ -231,9 +476,9 @@ mod tests {
         ];
 
         let working_md = VariantMetadata::try_new(bytes).expect("should parse");
-        assert_eq!(working_md.dictionary_size(), 2);
-        assert_eq!(working_md.get(0).unwrap(), "a");
-        assert_eq!(working_md.get(1).unwrap(), "b");
+        assert_eq!(working_md.len(), 2);
+        assert_eq!(&working_md[0], "a");
+        assert_eq!(&working_md[1], "b");
 
         let truncated = &bytes[..bytes.len() - 1];
 
@@ -273,6 +518,42 @@ mod tests {
         );
     }
 
+    #[test]
+    fn try_new_fails_non_monotonic2() {
+        // this test case checks whether offsets are monotonic in the full validation logic.
+
+        // 'cat', 'dog', 'lamb', "eel"
+        let bytes = &[
+            0b0000_0001, // header, offset_size_minus_one=0 and version=1
+            4,           // dictionary_size
+            0x00,
+            0x02,
+            0x01, // Doesn't increase monotonically
+            0x10,
+            13,
+            b'c',
+            b'a',
+            b't',
+            b'd',
+            b'o',
+            b'g',
+            b'l',
+            b'a',
+            b'm',
+            b'b',
+            b'e',
+            b'e',
+            b'l',
+        ];
+
+        let err = VariantMetadata::try_new(bytes).unwrap_err();
+
+        assert!(
+            matches!(err, ArrowError::InvalidArgumentError(_)),
+            "unexpected error: {err:?}"
+        );
+    }
+
     #[test]
     fn try_new_truncated_offsets_inline() {
         // Missing final offset
@@ -284,4 +565,98 @@ mod tests {
             "unexpected error: {err:?}"
         );
     }
+
+    #[test]
+    fn empty_string_is_valid() {
+        let bytes = &[
+            0b0001_0001, // header: offset_size_minus_one=0, ordered=1, version=1
+            1,
+            0x00,
+            0x00,
+        ];
+        let metadata = VariantMetadata::try_new(bytes).unwrap();
+        assert_eq!(&metadata[0], "");
+
+        let bytes = &[
+            0b0001_0001, // header: offset_size_minus_one=0, ordered=1, version=1
+            2,
+            0x00,
+            0x00,
+            0x02,
+            b'h',
+            b'i',
+        ];
+        let metadata = VariantMetadata::try_new(bytes).unwrap();
+        assert_eq!(&metadata[0], "");
+        assert_eq!(&metadata[1], "hi");
+
+        let bytes = &[
+            0b0001_0001, // header: offset_size_minus_one=0, ordered=1, version=1
+            2,
+            0x00,
+            0x02,
+            0x02, // empty string is allowed, but must be first in a sorted dict
+            b'h',
+            b'i',
+        ];
+        let err = VariantMetadata::try_new(bytes).unwrap_err();
+        assert!(
+            matches!(err, ArrowError::InvalidArgumentError(_)),
+            "unexpected error: {err:?}"
+        );
+    }
+
+    #[test]
+    fn test_compare_sorted_dictionary_with_unsorted_dictionary() {
+        // create a sorted object
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", false);
+        o.insert("b", false);
+
+        o.finish();
+
+        let (m, _) = b.finish();
+
+        let m1 = VariantMetadata::new(&m);
+        assert!(m1.is_sorted());
+
+        // Create metadata with an unsorted dictionary (field names are "a", "a", "b")
+        // Since field names are not unique, it is considered not sorted.
+        let metadata_bytes = vec![
+            0b0000_0001,
+            3, // dictionary size
+            0, // "a"
+            1, // "a"
+            2, // "b"
+            3,
+            b'a',
+            b'a',
+            b'b',
+        ];
+        let m2 = VariantMetadata::try_new(&metadata_bytes).unwrap();
+        assert!(!m2.is_sorted());
+
+        assert_ne!(m1, m2);
+    }
+
+    #[test]
+    fn test_compare_sorted_dictionary_with_sorted_dictionary() {
+        // create a sorted object
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", false);
+        o.insert("b", false);
+
+        o.finish();
+
+        let (m, _) = b.finish();
+
+        let m1 = VariantMetadata::new(&m);
+        let m2 = VariantMetadata::new(&m);
+
+        assert_eq!(m1, m2);
+    }
 }
diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs
index 471b94ccdb0c..52dc2ef42f98 100644
--- a/parquet-variant/src/variant/object.rs
+++ b/parquet-variant/src/variant/object.rs
@@ -14,118 +14,291 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::decoder::OffsetSizeBytes;
+
+use crate::decoder::{OffsetSizeBytes, map_bytes_to_offsets};
 use crate::utils::{
-    first_byte_from_slice, slice_from_slice, try_binary_search_range_by, validate_fallible_iterator,
+    first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
 };
 use crate::variant::{Variant, VariantMetadata};
 
 use arrow_schema::ArrowError;
 
+// The value header occupies one byte; use a named constant for readability
+const NUM_HEADER_BYTES: u32 = 1;
+
 /// Header structure for [`VariantObject`]
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub(crate) struct VariantObjectHeader {
-    field_offset_size: OffsetSizeBytes,
+    num_elements_size: OffsetSizeBytes,
     field_id_size: OffsetSizeBytes,
-    is_large: bool,
+    field_offset_size: OffsetSizeBytes,
 }
 
 impl VariantObjectHeader {
+    // Hide the ugly casting
+    const fn num_elements_size(&self) -> u32 {
+        self.num_elements_size as _
+    }
+    const fn field_id_size(&self) -> u32 {
+        self.field_id_size as _
+    }
+    const fn field_offset_size(&self) -> u32 {
+        self.field_offset_size as _
+    }
+
+    // Avoid materializing this offset, since it's cheaply and safely computable
+    const fn field_ids_start_byte(&self) -> u32 {
+        NUM_HEADER_BYTES + self.num_elements_size()
+    }
+
     pub(crate) fn try_new(header_byte: u8) -> Result<Self, ArrowError> {
         // Parse the header byte to get object parameters
         let value_header = header_byte >> 2;
         let field_offset_size_minus_one = value_header & 0x03; // Last 2 bits
         let field_id_size_minus_one = (value_header >> 2) & 0x03; // Next 2 bits
         let is_large = (value_header & 0x10) != 0; // 5th bit
-
+        let num_elements_size = match is_large {
+            true => OffsetSizeBytes::Four,
+            false => OffsetSizeBytes::One,
+        };
         Ok(Self {
-            field_offset_size: OffsetSizeBytes::try_new(field_offset_size_minus_one)?,
+            num_elements_size,
             field_id_size: OffsetSizeBytes::try_new(field_id_size_minus_one)?,
-            is_large,
+            field_offset_size: OffsetSizeBytes::try_new(field_offset_size_minus_one)?,
         })
     }
 }
 
 /// A [`Variant`] Object (struct with named fields).
-#[derive(Clone, Debug, PartialEq)]
+///
+/// See the [Variant spec] file for more information.
+///
+/// # Validation
+///
+/// Every instance of variant object is either _valid_ or _invalid_. depending on whether the
+/// underlying bytes are a valid encoding of a variant object subtype (see below).
+///
+/// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully (and recursively)
+/// _validated_. They always contain _valid_ data, and infallible accesses such as iteration and
+/// indexing are panic-free. The validation cost is linear in the number of underlying bytes.
+///
+/// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or
+/// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying
+/// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are
+/// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an
+/// _unvalidated_ instance, if desired.
+///
+/// _Unvalidated_ instances can be constructed in constant time. They can be useful if the caller
+/// knows the underlying bytes were already validated previously, or if the caller intends to
+/// perform a small number of (fallible) field accesses against a large object.
+///
+/// A _validated_ instance guarantees that:
+///
+/// - header byte is valid
+/// - num_elements is in bounds
+/// - field id array is in bounds
+/// - field offset array is in bounds
+/// - field value array is in bounds
+/// - all field ids are valid metadata dictionary entries (*)
+/// - field ids are lexically ordered according by their corresponding string values (*)
+/// - all field offsets are in bounds (*)
+/// - all field values are (recursively) _valid_ variant values (*)
+/// - the associated variant metadata is [valid] (*)
+///
+/// NOTE: [`Self::new`] only skips expensive (non-constant cost) validation checks (marked by `(*)`
+/// in the list above); it panics any of the other checks fails.
+///
+/// # Safety
+///
+/// Even an _invalid_ variant object instance is still _safe_ to use in the Rust sense. Accessing it
+/// with infallible methods may cause panics but will never lead to undefined behavior.
+///
+/// [valid]: VariantMetadata#Validation
+/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-object-basic_type2
+#[derive(Debug, Clone)]
 pub struct VariantObject<'m, 'v> {
     pub metadata: VariantMetadata<'m>,
     pub value: &'v [u8],
     header: VariantObjectHeader,
-    num_elements: usize,
-    field_ids_start_byte: usize,
-    field_offsets_start_byte: usize,
-    values_start_byte: usize,
+    num_elements: u32,
+    first_field_offset_byte: u32,
+    first_value_byte: u32,
+    validated: bool,
 }
 
+// We don't want this to grow because it could increase the size of `Variant` and hurt performance.
+const _: () = crate::utils::expect_size_of::<VariantObject>(64);
+
 impl<'m, 'v> VariantObject<'m, 'v> {
-    /// Attempts to interpret `value` as a variant object value.
+    pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self {
+        Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant object")
+    }
+
+    /// Attempts to interpet `metadata` and `value` as a variant object.
     ///
     /// # Validation
     ///
     /// This constructor verifies that `value` points to a valid variant object value. In
     /// particular, that all field ids exist in `metadata`, and all offsets are in-bounds and point
     /// to valid objects.
-    // TODO: How to make the validation non-recursive while still making iterators safely infallible??
-    // See https://github.com/apache/arrow-rs/issues/7711
     pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> {
+        Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation()
+    }
+
+    /// Attempts to interpet `metadata` and `value` as a variant object, performing only basic
+    /// (constant-cost) [validation].
+    ///
+    /// [validation]: Self#Validation
+    pub(crate) fn try_new_with_shallow_validation(
+        metadata: VariantMetadata<'m>,
+        value: &'v [u8],
+    ) -> Result<Self, ArrowError> {
         let header_byte = first_byte_from_slice(value)?;
         let header = VariantObjectHeader::try_new(header_byte)?;
 
-        // Determine num_elements size based on is_large flag
-        let num_elements_size = if header.is_large {
-            OffsetSizeBytes::Four
-        } else {
-            OffsetSizeBytes::One
-        };
-
-        // Parse num_elements
-        let num_elements = num_elements_size.unpack_usize(value, 1, 0)?;
-
-        // Calculate byte offsets for different sections
-        let field_ids_start_byte = 1 + num_elements_size as usize;
-        let field_offsets_start_byte =
-            field_ids_start_byte + num_elements * header.field_id_size as usize;
-        let values_start_byte =
-            field_offsets_start_byte + (num_elements + 1) * header.field_offset_size as usize;
-
-        // Spec says: "The last field_offset points to the byte after the end of the last value"
-        //
-        // Use the last offset as a bounds check. The iterator check below doesn't use it -- offsets
-        // are not monotonic -- so we have to check separately here.
-        let last_field_offset =
+        // Determine num_elements size based on is_large flag and fetch the value
+        let num_elements =
             header
-                .field_offset_size
-                .unpack_usize(value, field_offsets_start_byte, num_elements)?;
-        if values_start_byte + last_field_offset > value.len() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Last field offset value {} at offset {} is outside the value slice of length {}",
-                last_field_offset,
-                values_start_byte,
-                value.len()
-            )));
-        }
-
-        let new_self = Self {
+                .num_elements_size
+                .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?;
+
+        // Calculate byte offsets for field offsets and values with overflow protection, and verify
+        // they're in bounds
+        let first_field_offset_byte = num_elements
+            .checked_mul(header.field_id_size())
+            .and_then(|n| n.checked_add(header.field_ids_start_byte()))
+            .ok_or_else(|| overflow_error("offset of variant object field offsets"))?;
+
+        let first_value_byte = num_elements
+            .checked_add(1)
+            .and_then(|n| n.checked_mul(header.field_offset_size()))
+            .and_then(|n| n.checked_add(first_field_offset_byte))
+            .ok_or_else(|| overflow_error("offset of variant object field values"))?;
+
+        let mut new_self = Self {
             metadata,
             value,
             header,
             num_elements,
-            field_ids_start_byte,
-            field_offsets_start_byte,
-            values_start_byte,
+            first_field_offset_byte,
+            first_value_byte,
+            validated: false,
         };
 
-        // Iterate over all fields of this object in order to validate the field_id and field_offset
-        // arrays, and also to prove the field values are all in bounds. Otherwise, `iter` might
-        // panic on `unwrap`.
-        validate_fallible_iterator(new_self.iter_checked())?;
+        // Spec says: "The last field_offset points to the byte after the end of the last value"
+        //
+        // Use it to upper-bound the value bytes, which also verifies that the field id and field
+        // offset arrays are in bounds.
+        let last_offset = new_self
+            .get_offset(num_elements as _)?
+            .checked_add(first_value_byte)
+            .ok_or_else(|| overflow_error("variant object size"))?;
+        new_self.value = slice_from_slice(value, ..last_offset as _)?;
         Ok(new_self)
     }
 
+    /// True if this instance is fully [validated] for panic-free infallible accesses.
+    ///
+    /// [validated]: Self#Validation
+    pub fn is_fully_validated(&self) -> bool {
+        self.validated
+    }
+
+    /// Performs a full [validation] of this variant object.
+    ///
+    /// [validation]: Self#Validation
+    pub fn with_full_validation(mut self) -> Result<Self, ArrowError> {
+        if !self.validated {
+            // Validate the metadata dictionary first, if not already validated, because we pass it
+            // by value to all the children (who would otherwise re-validate it repeatedly).
+            self.metadata = self.metadata.with_full_validation()?;
+
+            let field_id_buffer = slice_from_slice(
+                self.value,
+                self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _,
+            )?;
+
+            let mut field_ids_iter =
+                map_bytes_to_offsets(field_id_buffer, self.header.field_id_size);
+
+            // Validate all field ids exist in the metadata dictionary and the corresponding field names are lexicographically sorted
+            if self.metadata.is_sorted() {
+                // Since the metadata dictionary has unique and sorted field names, we can also guarantee this object's field names
+                // are lexicographically sorted by their field id ordering
+                let dictionary_size = self.metadata.len();
+
+                if let Some(mut current_id) = field_ids_iter.next() {
+                    for next_id in field_ids_iter {
+                        if current_id >= dictionary_size {
+                            return Err(ArrowError::InvalidArgumentError(
+                                "field id is not valid".to_string(),
+                            ));
+                        }
+
+                        if next_id <= current_id {
+                            return Err(ArrowError::InvalidArgumentError(
+                                "field names not sorted".to_string(),
+                            ));
+                        }
+                        current_id = next_id;
+                    }
+
+                    if current_id >= dictionary_size {
+                        return Err(ArrowError::InvalidArgumentError(
+                            "field id is not valid".to_string(),
+                        ));
+                    }
+                }
+            } else {
+                // The metadata dictionary can't guarantee uniqueness or sortedness, so we have to parse out the corresponding field names
+                // to check lexicographical order
+                //
+                // Since we are probing the metadata dictionary by field id, this also verifies field ids are in-bounds
+                let mut current_field_name = match field_ids_iter.next() {
+                    Some(field_id) => Some(self.metadata.get(field_id)?),
+                    None => None,
+                };
+
+                for field_id in field_ids_iter {
+                    let next_field_name = self.metadata.get(field_id)?;
+
+                    if let Some(current_name) = current_field_name {
+                        if next_field_name < current_name {
+                            return Err(ArrowError::InvalidArgumentError(
+                                "field names not sorted".to_string(),
+                            ));
+                        }
+                    }
+                    current_field_name = Some(next_field_name);
+                }
+            }
+
+            // Validate whether values are valid variant objects
+            let field_offset_buffer = slice_from_slice(
+                self.value,
+                self.first_field_offset_byte as _..self.first_value_byte as _,
+            )?;
+            let num_offsets = field_offset_buffer.len() / self.header.field_offset_size() as usize;
+
+            let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?;
+
+            map_bytes_to_offsets(field_offset_buffer, self.header.field_offset_size)
+                .take(num_offsets.saturating_sub(1))
+                .try_for_each(|offset| {
+                    let value_bytes = slice_from_slice(value_buffer, offset..)?;
+                    Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?;
+
+                    Ok::<_, ArrowError>(())
+                })?;
+
+            self.validated = true;
+        }
+        Ok(self)
+    }
+
     /// Returns the number of key-value pairs in this object
     pub fn len(&self) -> usize {
-        self.num_elements
+        self.num_elements as _
     }
 
     /// Returns true if the object contains no key-value pairs
@@ -134,64 +307,135 @@ impl<'m, 'v> VariantObject<'m, 'v> {
     }
 
     /// Get a field's value by index in `0..self.len()`
-    pub fn field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
-        let start_offset = self.header.field_offset_size.unpack_usize(
-            self.value,
-            self.field_offsets_start_byte,
-            i,
-        )?;
-        let value_bytes = slice_from_slice(self.value, self.values_start_byte + start_offset..)?;
-        Variant::try_new_with_metadata(self.metadata, value_bytes)
+    ///
+    /// # Panics
+    ///
+    /// If the index is out of bounds. Also if variant object is corrupted (e.g., invalid offsets or
+    /// field IDs). The latter can only happen when working with an unvalidated object produced by
+    /// [`Self::new`].
+    pub fn field(&self, i: usize) -> Option<Variant<'m, 'v>> {
+        (i < self.len()).then(|| {
+            self.try_field_with_shallow_validation(i)
+                .expect("Invalid object field value")
+        })
+    }
+
+    /// Fallible version of `field`. Returns field value by index, capturing validation errors
+    pub fn try_field(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+        self.try_field_with_shallow_validation(i)?
+            .with_full_validation()
+    }
+
+    // Attempts to retrieve the ith field value from the value region of the byte buffer; it
+    // performs only basic (constant-cost) validation.
+    fn try_field_with_shallow_validation(&self, i: usize) -> Result<Variant<'m, 'v>, ArrowError> {
+        let value_bytes = slice_from_slice(self.value, self.first_value_byte as _..)?;
+        let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)? as _..)?;
+        Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes)
+    }
+
+    // Attempts to retrieve the ith offset from the field offset region of the byte buffer.
+    fn get_offset(&self, i: usize) -> Result<u32, ArrowError> {
+        let byte_range = self.first_field_offset_byte as _..self.first_value_byte as _;
+        let field_offsets = slice_from_slice(self.value, byte_range)?;
+        self.header.field_offset_size.unpack_u32(field_offsets, i)
     }
 
     /// Get a field's name by index in `0..self.len()`
-    pub fn field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
-        let field_id =
-            self.header
-                .field_id_size
-                .unpack_usize(self.value, self.field_ids_start_byte, i)?;
-        self.metadata.get(field_id)
+    ///
+    /// # Panics
+    /// If the variant object is corrupted (e.g., invalid offsets or field IDs).
+    /// This should never happen since the constructor validates all data upfront.
+    pub fn field_name(&self, i: usize) -> Option<&'m str> {
+        (i < self.len()).then(|| {
+            self.try_field_name(i)
+                .expect("Invalid variant object field name")
+        })
+    }
+
+    /// Fallible version of `field_name`. Returns field name by index, capturing validation errors
+    fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> {
+        let byte_range = self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _;
+        let field_id_bytes = slice_from_slice(self.value, byte_range)?;
+        let field_id = self.header.field_id_size.unpack_u32(field_id_bytes, i)?;
+        self.metadata.get(field_id as _)
     }
 
     /// Returns an iterator of (name, value) pairs over the fields of this object.
     pub fn iter(&self) -> impl Iterator<Item = (&'m str, Variant<'m, 'v>)> + '_ {
-        // NOTE: It is safe to unwrap because the constructor already made a successful traversal.
-        self.iter_checked().map(Result::unwrap)
+        self.iter_try_with_shallow_validation()
+            .map(|result| result.expect("Invalid variant object field value"))
     }
 
-    // Fallible iteration over the fields of this object. The constructor traverses the iterator to
-    // prove it has no errors, so that all other use sites can blindly `unwrap` the result.
-    fn iter_checked(
+    /// Fallible iteration over the fields of this object.
+    pub fn iter_try(
         &self,
     ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
-        (0..self.num_elements).map(move |i| Ok((self.field_name(i)?, self.field(i)?)))
+        self.iter_try_with_shallow_validation().map(|result| {
+            let (name, value) = result?;
+            Ok((name, value.with_full_validation()?))
+        })
+    }
+
+    // Fallible iteration over the fields of this object that performs only shallow (constant-cost)
+    // validation of field values.
+    fn iter_try_with_shallow_validation(
+        &self,
+    ) -> impl Iterator<Item = Result<(&'m str, Variant<'m, 'v>), ArrowError>> + '_ {
+        (0..self.len()).map(|i| {
+            let field = self.try_field_with_shallow_validation(i)?;
+            Ok((self.try_field_name(i)?, field))
+        })
     }
 
     /// Returns the value of the field with the specified name, if any.
     ///
-    /// `Ok(None)` means the field does not exist; `Err` means the search encountered an error.
-    pub fn field_by_name(&self, name: &str) -> Result<Option<Variant<'m, 'v>>, ArrowError> {
+    /// Returns `Some(Variant)` if the field exists, or `None` if the field does not exist.
+    pub fn get(&self, name: &str) -> Option<Variant<'m, 'v>> {
         // Binary search through the field IDs of this object to find the requested field name.
         //
         // NOTE: This does not require a sorted metadata dictionary, because the variant spec
         // requires object field ids to be lexically sorted by their corresponding string values,
         // and probing the dictionary for a field id is always O(1) work.
-        let search_result =
-            try_binary_search_range_by(0..self.num_elements, &name, |i| self.field_name(i))?;
+        let cmp = |i| Some(self.field_name(i)?.cmp(name));
+        let i = try_binary_search_range_by(0..self.len(), cmp)?.ok()?;
+        self.field(i)
+    }
+}
+
+// Custom implementation of PartialEq for variant objects
+//
+// According to the spec, field values are not required to be in the same order as the field IDs,
+// to enable flexibility when constructing Variant values
+//
+// Instead of comparing the raw bytes of 2 variant objects, this implementation recursively
+// checks whether the field values are equal -- regardless of their order
+impl<'m, 'v> PartialEq for VariantObject<'m, 'v> {
+    fn eq(&self, other: &Self) -> bool {
+        if self.num_elements != other.num_elements {
+            return false;
+        }
 
-        search_result.ok().map(|i| self.field(i)).transpose()
+        // IFF two objects are valid and logically equal, they will have the same
+        // field names in the same order, because the spec requires the object
+        // fields to be sorted lexicographically.
+        self.iter()
+            .zip(other.iter())
+            .all(|((name_a, value_a), (name_b, value_b))| name_a == name_b && value_a == value_b)
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::VariantBuilder;
+
     use super::*;
 
     #[test]
     fn test_variant_object_simple() {
         // Create metadata with field names: "age", "name", "active" (sorted)
         // Header: version=1, sorted=1, offset_size=1 (offset_size_minus_one=0)
-        // So header byte = 00_0_1_0001 = 0x10
+        // So header byte = 00_0_1_0001 = 0x11
         let metadata_bytes = vec![
             0b0001_0001,
             3, // dictionary size
@@ -245,22 +489,34 @@ mod tests {
         assert!(!variant_obj.is_empty());
 
         // Test field access
-        let active_field = variant_obj.field_by_name("active").unwrap();
+        let active_field = variant_obj.get("active");
         assert!(active_field.is_some());
         assert_eq!(active_field.unwrap().as_boolean(), Some(true));
 
-        let age_field = variant_obj.field_by_name("age").unwrap();
+        let age_field = variant_obj.get("age");
         assert!(age_field.is_some());
         assert_eq!(age_field.unwrap().as_int8(), Some(42));
 
-        let name_field = variant_obj.field_by_name("name").unwrap();
+        let name_field = variant_obj.get("name");
         assert!(name_field.is_some());
         assert_eq!(name_field.unwrap().as_string(), Some("hello"));
 
         // Test non-existent field
-        let missing_field = variant_obj.field_by_name("missing").unwrap();
+        let missing_field = variant_obj.get("missing");
         assert!(missing_field.is_none());
 
+        let missing_field_name = variant_obj.field_name(3);
+        assert!(missing_field_name.is_none());
+
+        let missing_field_name = variant_obj.field_name(300);
+        assert!(missing_field_name.is_none());
+
+        let missing_field_value = variant_obj.field(3);
+        assert!(missing_field_value.is_none());
+
+        let missing_field_value = variant_obj.field(300);
+        assert!(missing_field_value.is_none());
+
         // Test fields iterator
         let fields: Vec<_> = variant_obj.iter().collect();
         assert_eq!(fields.len(), 3);
@@ -274,6 +530,30 @@ mod tests {
 
         assert_eq!(fields[2].0, "name");
         assert_eq!(fields[2].1.as_string(), Some("hello"));
+
+        // Test field access by index
+        // Fields should be in sorted order: active, age, name
+        assert_eq!(variant_obj.field_name(0), Some("active"));
+        assert_eq!(variant_obj.field(0).unwrap().as_boolean(), Some(true));
+
+        assert_eq!(variant_obj.field_name(1), Some("age"));
+        assert_eq!(variant_obj.field(1).unwrap().as_int8(), Some(42));
+
+        assert_eq!(variant_obj.field_name(2), Some("name"));
+        assert_eq!(variant_obj.field(2).unwrap().as_string(), Some("hello"));
+    }
+
+    #[test]
+    fn test_variant_object_empty_fields() {
+        let mut builder = VariantBuilder::new();
+        builder.new_object().with_field("", 42).finish();
+        let (metadata, value) = builder.finish();
+
+        // Resulting object is valid and has a single empty field
+        let variant = Variant::try_new(&metadata, &value).unwrap();
+        let variant_obj = variant.as_object().unwrap();
+        assert_eq!(variant_obj.len(), 1);
+        assert_eq!(variant_obj.get(""), Some(Variant::from(42)));
     }
 
     #[test]
@@ -301,11 +581,413 @@ mod tests {
         assert!(variant_obj.is_empty());
 
         // Test field access on empty object
-        let missing_field = variant_obj.field_by_name("anything").unwrap();
+        let missing_field = variant_obj.get("anything");
         assert!(missing_field.is_none());
 
         // Test fields iterator on empty object
         let fields: Vec<_> = variant_obj.iter().collect();
         assert_eq!(fields.len(), 0);
     }
+
+    #[test]
+    fn test_variant_object_invalid_metadata_end_offset() {
+        // Create metadata with field names: "age", "name" (sorted)
+        let metadata_bytes = vec![
+            0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
+            2,           // dictionary size
+            0,           // "age"
+            3,           // "name"
+            8,           // Invalid end offset (should be 7)
+            b'a',
+            b'g',
+            b'e',
+            b'n',
+            b'a',
+            b'm',
+            b'e',
+        ];
+        let err = VariantMetadata::try_new(&metadata_bytes);
+        let err = err.unwrap_err();
+        assert!(matches!(
+            err,
+            ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..13 from 12-byte buffer")
+        ));
+    }
+
+    #[test]
+    fn test_variant_object_invalid_end_offset() {
+        // Create metadata with field names: "age", "name" (sorted)
+        let metadata_bytes = vec![
+            0b0001_0001, // header: version=1, sorted=1, offset_size_minus_one=0
+            2,           // dictionary size
+            0,           // "age"
+            3,           // "name"
+            7,
+            b'a',
+            b'g',
+            b'e',
+            b'n',
+            b'a',
+            b'm',
+            b'e',
+        ];
+        let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap();
+
+        // Create object value data for: {"age": 42, "name": "hello"}
+        // Field IDs in sorted order: [0, 1] (age, name)
+        // Header: basic_type=2, field_offset_size_minus_one=0, field_id_size_minus_one=0, is_large=0
+        // value_header = 0000_00_00 = 0x00
+        let object_value = vec![
+            0x02, // header: basic_type=2, value_header=0x00
+            2,    // num_elements = 2
+            // Field IDs (1 byte each): age=0, name=1
+            0, 1,
+            // Field offsets (1 byte each): 3 offsets total
+            0, // offset to first value (int8)
+            2, // offset to second value (short string)
+            9, // invalid end offset (correct would be 8)
+            // Values:
+            0x0C,
+            42, // int8: primitive_header=3, basic_type=0 -> (3 << 2) | 0 = 0x0C, then value 42
+            0x15, b'h', b'e', b'l', b'l',
+            b'o', // short string: length=5, basic_type=1 -> (5 << 2) | 1 = 0x15
+        ];
+
+        let err = VariantObject::try_new(metadata, &object_value);
+        let err = err.unwrap_err();
+        assert!(matches!(
+            err,
+            ArrowError::InvalidArgumentError(ref msg) if msg.contains("Tried to extract byte(s) ..16 from 15-byte buffer")
+        ));
+    }
+
+    fn test_variant_object_with_count(count: i32, expected_field_id_size: OffsetSizeBytes) {
+        let field_names: Vec<_> = (0..count).map(|val| val.to_string()).collect();
+        let mut builder =
+            VariantBuilder::new().with_field_names(field_names.iter().map(|s| s.as_str()));
+
+        let mut obj = builder.new_object();
+
+        for i in 0..count {
+            obj.insert(&field_names[i as usize], i);
+        }
+
+        obj.finish();
+        let (metadata, value) = builder.finish();
+        let variant = Variant::new(&metadata, &value);
+
+        if let Variant::Object(obj) = variant {
+            assert_eq!(obj.len(), count as usize);
+
+            assert_eq!(obj.get(&field_names[0]).unwrap(), Variant::Int32(0));
+            assert_eq!(
+                obj.get(&field_names[(count - 1) as usize]).unwrap(),
+                Variant::Int32(count - 1)
+            );
+            assert_eq!(
+                obj.header.field_id_size, expected_field_id_size,
+                "Expected {}-byte field IDs, got {}-byte field IDs",
+                expected_field_id_size as usize, obj.header.field_id_size as usize
+            );
+        } else {
+            panic!("Expected object variant");
+        }
+    }
+
+    #[test]
+    fn test_variant_object_257_elements() {
+        test_variant_object_with_count((1 << 8) + 1, OffsetSizeBytes::Two); // 2^8 + 1, expected 2-byte field IDs
+    }
+
+    #[test]
+    fn test_variant_object_65537_elements() {
+        test_variant_object_with_count((1 << 16) + 1, OffsetSizeBytes::Three);
+        // 2^16 + 1, expected 3-byte field IDs
+    }
+
+    /* Can't run this test now as it takes 45x longer than other tests
+    #[test]
+    fn test_variant_object_16777217_elements() {
+        test_variant_object_with_count((1 << 24) + 1, OffsetSizeBytes::Four);
+        // 2^24 + 1, expected 4-byte field IDs
+    }
+     */
+
+    #[test]
+    fn test_variant_object_small_sizes_255_elements() {
+        test_variant_object_with_count(255, OffsetSizeBytes::One);
+    }
+
+    fn test_variant_object_with_large_data(
+        data_size_per_field: usize,
+        expected_field_offset_size: OffsetSizeBytes,
+    ) {
+        let num_fields = 20;
+        let mut builder = VariantBuilder::new();
+        let mut obj = builder.new_object();
+
+        let str_val = "a".repeat(data_size_per_field);
+
+        for val in 0..num_fields {
+            let key = format!("id_{val}");
+            obj.insert(&key, str_val.as_str());
+        }
+
+        obj.finish();
+        let (metadata, value) = builder.finish();
+        let variant = Variant::new(&metadata, &value);
+
+        if let Variant::Object(obj) = variant {
+            assert_eq!(obj.len(), num_fields);
+            assert_eq!(
+                obj.header.field_offset_size, expected_field_offset_size,
+                "Expected {}-byte field offsets, got {}-byte field offsets",
+                expected_field_offset_size as usize, obj.header.field_offset_size as usize
+            );
+        } else {
+            panic!("Expected object variant");
+        }
+    }
+
+    #[test]
+    fn test_variant_object_child_data_0_byte_offsets_minus_one() {
+        test_variant_object_with_large_data(10, OffsetSizeBytes::One);
+    }
+
+    #[test]
+    fn test_variant_object_256_bytes_child_data_3_byte_offsets() {
+        test_variant_object_with_large_data(256 + 1, OffsetSizeBytes::Two); // 2^8 - 2^16 elements
+    }
+
+    #[test]
+    fn test_variant_object_16777216_bytes_child_data_4_byte_offsets() {
+        test_variant_object_with_large_data(65536 + 1, OffsetSizeBytes::Three); // 2^16 - 2^24 elements
+    }
+
+    #[test]
+    fn test_variant_object_65535_bytes_child_data_2_byte_offsets() {
+        test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
+        // 2^24
+    }
+
+    #[test]
+    fn test_objects_with_same_fields_are_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("b", ());
+        o.insert("c", ());
+        o.insert("a", ());
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+        let v2 = Variant::try_new(&m, &v).unwrap();
+
+        assert_eq!(v1, v2);
+    }
+
+    #[test]
+    fn test_same_objects_with_different_builder_are_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", false);
+
+        o.finish();
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", false);
+
+        o.finish();
+        let (m, v) = b.finish();
+
+        let v2 = Variant::try_new(&m, &v).unwrap();
+
+        assert_eq!(v1, v2);
+    }
+
+    #[test]
+    fn test_objects_with_different_values_are_not_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", 4.3);
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+
+        // second object, same field name but different values
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        let mut inner_o = o.new_object("b");
+        inner_o.insert("a", 3.3);
+        inner_o.finish();
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v2 = Variant::try_new(&m, &v).unwrap();
+
+        let m1 = v1.metadata();
+        let m2 = v2.metadata();
+
+        // metadata would be equal since they contain the same keys
+        assert_eq!(m1, m2);
+
+        // but the objects are not equal
+        assert_ne!(v1, v2);
+    }
+
+    #[test]
+    fn test_objects_with_different_field_names_are_not_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", 4.3);
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+
+        // second object, same field name but different values
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("aardvark", ());
+        o.insert("barracuda", 3.3);
+
+        o.finish();
+
+        let (m, v) = b.finish();
+        let v2 = Variant::try_new(&m, &v).unwrap();
+
+        assert_ne!(v1, v2);
+    }
+
+    #[test]
+    fn test_objects_with_different_insertion_order_are_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("b", false);
+        o.insert("a", ());
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+        assert!(!v1.metadata().is_sorted());
+
+        // create another object pre-filled with field names, b and a
+        // but insert the fields in the order of a, b
+        let mut b = VariantBuilder::new().with_field_names(["b", "a"]);
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", false);
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v2 = Variant::try_new(&m, &v).unwrap();
+
+        // v2 should also have a unsorted dictionary
+        assert!(!v2.metadata().is_sorted());
+
+        assert_eq!(v1, v2);
+    }
+
+    #[test]
+    fn test_objects_with_differing_metadata_are_equal() {
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", ());
+        o.insert("b", 4.3);
+
+        o.finish();
+
+        let (meta1, value1) = b.finish();
+
+        let v1 = Variant::try_new(&meta1, &value1).unwrap();
+        // v1 is sorted
+        assert!(v1.metadata().is_sorted());
+
+        // create a second object with different insertion order
+        let mut b = VariantBuilder::new().with_field_names(["d", "c", "b", "a"]);
+        let mut o = b.new_object();
+
+        o.insert("b", 4.3);
+        o.insert("a", ());
+
+        o.finish();
+
+        let (meta2, value2) = b.finish();
+
+        let v2 = Variant::try_new(&meta2, &value2).unwrap();
+        // v2 is not sorted
+        assert!(!v2.metadata().is_sorted());
+
+        // object metadata are not the same
+        assert_ne!(v1.metadata(), v2.metadata());
+
+        // objects are still logically equal
+        assert_eq!(v1, v2);
+    }
+
+    #[test]
+    fn test_compare_object_with_unsorted_dictionary_vs_sorted_dictionary() {
+        // create a sorted object
+        let mut b = VariantBuilder::new();
+        let mut o = b.new_object();
+
+        o.insert("a", false);
+        o.insert("b", false);
+
+        o.finish();
+
+        let (m, v) = b.finish();
+
+        let v1 = Variant::try_new(&m, &v).unwrap();
+
+        // Create metadata with an unsorted dictionary (field names are "a", "a", "b")
+        // Since field names are not unique, it is considered not sorted.
+        let metadata_bytes = vec![
+            0b0000_0001,
+            3, // dictionary size
+            0, // "a"
+            1, // "b"
+            2, // "a"
+            3,
+            b'a',
+            b'b',
+            b'a',
+        ];
+        let m = VariantMetadata::try_new(&metadata_bytes).unwrap();
+        assert!(!m.is_sorted());
+
+        let v2 = Variant::new_with_metadata(m, &v);
+        assert_eq!(v1, v2);
+    }
 }
diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs
index bfa2ab267c27..70b42e1f3c28 100644
--- a/parquet-variant/tests/variant_interop.rs
+++ b/parquet-variant/tests/variant_interop.rs
@@ -18,19 +18,69 @@
 //! End-to-end check: (almost) every sample from apache/parquet-testing/variant
 //! can be parsed into our `Variant`.
 
-// NOTE: We keep this file separate rather than a test mod inside variant.rs because it should be
-// moved to the test folder later
-use std::fs;
 use std::path::{Path, PathBuf};
+use std::{env, fs};
 
-use chrono::NaiveDate;
-use parquet_variant::{ShortString, Variant, VariantBuilder};
+use chrono::{DateTime, NaiveDate, NaiveTime};
+use parquet_variant::{
+    ShortString, Variant, VariantBuilder, VariantDecimal4, VariantDecimal8, VariantDecimal16,
+};
 
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+use uuid::Uuid;
+
+/// Returns a directory path for the parquet variant test data.
+///
+/// The data lives in the `parquet-testing` git repository:
+/// <https://github.com/apache/parquet-testing>
+///
+/// Normally this is checked out as a git submodule in the root of the `arrow-rs` repository,
+/// so the relative path is
+/// * `CARGO_MANIFEST_DIR/../parquet-testing/variant`.
+///
+/// However, the user can override this by setting the environment variable `PARQUET_TEST_DATA`
+/// to point to a different directory (as is done by the `verify-release-candidate.sh` script).
+///
+/// In this case, the environment variable `PARQUET_TEST_DATA` is expected to point to a directory
+/// `parquet-testing/data`, so the relative path to the `variant` subdirectory is
+/// * `PARQUET_TEST_DATA/../variant`.
 fn cases_dir() -> PathBuf {
-    Path::new(env!("CARGO_MANIFEST_DIR"))
+    // which we expect to point at "../parquet-testing/data"
+    let env_name = "PARQUET_TEST_DATA";
+    if let Ok(dir) = env::var(env_name) {
+        let trimmed = dir.trim();
+        if !trimmed.is_empty() {
+            let pb = PathBuf::from(trimmed).join("..").join("variant");
+            if pb.is_dir() {
+                return pb;
+            } else {
+                panic!(
+                    "Can't find variant data at `{pb:?}`. Used value of env `{env_name}`../variant ",
+                )
+            }
+        }
+    }
+
+    // PARQUET_TEST_DATA is undefined or its value is trimmed to empty, let's try default dir.
+
+    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
+    // set by `cargo run` or `cargo test`, see:
+    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
+    let pb = Path::new(env!("CARGO_MANIFEST_DIR"))
         .join("..")
         .join("parquet-testing")
-        .join("variant")
+        .join("variant");
+
+    if pb.is_dir() {
+        pb
+    } else {
+        panic!(
+            "env `{env_name}` is undefined or has empty value, and \
+             `CARGO_MANIFEST_DIR/../parquet-testing/variant` is not a directory: `{pb:?}`\n\
+             HINT: try running `git submodule update --init`",
+        )
+    }
 }
 
 struct Case {
@@ -59,13 +109,29 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> {
     // Cases are commented out
     // Enabling is tracked in  https://github.com/apache/arrow-rs/issues/7630
     vec![
-        ("primitive_binary", Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe])),
+        (
+            "primitive_binary",
+            Variant::Binary(&[0x03, 0x13, 0x37, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe]),
+        ),
         ("primitive_boolean_false", Variant::BooleanFalse),
         ("primitive_boolean_true", Variant::BooleanTrue),
-        ("primitive_date", Variant::Date(NaiveDate::from_ymd_opt(2025, 4 , 16).unwrap())),
-        ("primitive_decimal4", Variant::Decimal4{integer: 1234, scale: 2}),
-        ("primitive_decimal8", Variant::Decimal8{integer: 1234567890, scale: 2}),
-        ("primitive_decimal16", Variant::Decimal16{integer: 1234567891234567890, scale: 2}),
+        (
+            "primitive_date",
+            Variant::Date(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap()),
+        ),
+        (
+            "primitive_decimal4",
+            Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap()),
+        ),
+        // ("primitive_decimal8", Variant::Decimal8{integer: 1234567890, scale: 2}),
+        (
+            "primitive_decimal8",
+            Variant::Decimal8(VariantDecimal8::try_new(1234567890, 2).unwrap()),
+        ),
+        (
+            "primitive_decimal16",
+            Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap()),
+        ),
         ("primitive_float", Variant::Float(1234567890.1234)),
         ("primitive_double", Variant::Double(1234567890.1234)),
         ("primitive_int8", Variant::Int8(42)),
@@ -73,10 +139,64 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> {
         ("primitive_int32", Variant::Int32(123456)),
         ("primitive_int64", Variant::Int64(1234567890123456789)),
         ("primitive_null", Variant::Null),
-        ("primitive_string", Variant::String("This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!")),
-        ("primitive_timestamp", Variant::TimestampMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(16, 34, 56, 780).unwrap().and_utc())),
-        ("primitive_timestampntz", Variant::TimestampNtzMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap())),
-        ("short_string", Variant::ShortString(ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with utf8)").unwrap())),
+        (
+            "primitive_string",
+            Variant::String(
+                "This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes several non ascii characters such as 🐢, 💖, ♥\u{fe0f}, 🎣 and 🤦!!",
+            ),
+        ),
+        (
+            "primitive_timestamp",
+            Variant::TimestampMicros(
+                NaiveDate::from_ymd_opt(2025, 4, 16)
+                    .unwrap()
+                    .and_hms_milli_opt(16, 34, 56, 780)
+                    .unwrap()
+                    .and_utc(),
+            ),
+        ),
+        (
+            "primitive_timestampntz",
+            Variant::TimestampNtzMicros(
+                NaiveDate::from_ymd_opt(2025, 4, 16)
+                    .unwrap()
+                    .and_hms_milli_opt(12, 34, 56, 780)
+                    .unwrap(),
+            ),
+        ),
+        (
+            "primitive_timestamp_nanos",
+            Variant::TimestampNanos(
+                NaiveDate::from_ymd_opt(2024, 11, 7)
+                    .unwrap()
+                    .and_hms_nano_opt(12, 33, 54, 123456789)
+                    .unwrap()
+                    .and_utc(),
+            ),
+        ),
+        (
+            "primitive_timestampntz_nanos",
+            Variant::TimestampNtzNanos(
+                NaiveDate::from_ymd_opt(2024, 11, 7)
+                    .unwrap()
+                    .and_hms_nano_opt(12, 33, 54, 123456789)
+                    .unwrap(),
+            ),
+        ),
+        (
+            "primitive_uuid",
+            Variant::Uuid(Uuid::parse_str("f24f9b64-81fa-49d1-b74e-8c09a6e31c56").unwrap()),
+        ),
+        (
+            "short_string",
+            Variant::ShortString(
+                ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with utf8)").unwrap(),
+            ),
+        ),
+        (
+            "primitive_time",
+            Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54, 123456).unwrap()),
+        ),
     ]
 }
 #[test]
@@ -123,10 +243,7 @@ fn variant_object_primitive() {
         // spark wrote this as a decimal4 (not a double)
         (
             "double_field",
-            Variant::Decimal4 {
-                integer: 123456789,
-                scale: 8,
-            },
+            Variant::Decimal4(VariantDecimal4::try_new(123456789, 8).unwrap()),
         ),
         ("int_field", Variant::Int8(1)),
         ("null_field", Variant::Null),
@@ -206,16 +323,19 @@ fn variant_object_builder() {
     let mut builder = VariantBuilder::new();
 
     let mut obj = builder.new_object();
-    obj.append_value("int_field", 1i8);
+    obj.insert("int_field", 1i8);
 
     // The double field is actually encoded as decimal4 with scale 8
     // Value: 123456789, Scale: 8 -> 1.23456789
-    obj.append_value("double_field", (123456789i32, 8u8));
-    obj.append_value("boolean_true_field", true);
-    obj.append_value("boolean_false_field", false);
-    obj.append_value("string_field", "Apache Parquet");
-    obj.append_value("null_field", ());
-    obj.append_value("timestamp_field", "2025-04-16T12:34:56.78");
+    obj.insert(
+        "double_field",
+        VariantDecimal4::try_new(123456789i32, 8u8).unwrap(),
+    );
+    obj.insert("boolean_true_field", true);
+    obj.insert("boolean_false_field", false);
+    obj.insert("string_field", "Apache Parquet");
+    obj.insert("null_field", ());
+    obj.insert("timestamp_field", "2025-04-16T12:34:56.78");
 
     obj.finish();
 
@@ -228,3 +348,329 @@ fn variant_object_builder() {
 }
 
 // TODO: Add tests for object_nested and array_nested
+
+//
+// Validation Fuzzing Tests
+//
+// 1. Generate valid variants using the builder
+// 2. Randomly corrupt bytes in the serialized data
+// 3. Test both validation pathways:
+//    - If validation succeeds -> verify infallible APIs don't panic
+//    - If validation fails -> verify fallible APIs handle errors gracefully
+//
+
+#[test]
+fn test_validation_fuzz_integration() {
+    let mut rng = StdRng::seed_from_u64(42);
+
+    for _ in 0..1000 {
+        // Generate a random valid variant
+        let (metadata, value) = generate_random_variant(&mut rng);
+
+        // Corrupt it
+        let (corrupted_metadata, corrupted_value) = corrupt_variant_data(&mut rng, metadata, value);
+
+        // Test the validation workflow
+        test_validation_workflow(&corrupted_metadata, &corrupted_value);
+    }
+}
+
+fn generate_random_variant(rng: &mut StdRng) -> (Vec<u8>, Vec<u8>) {
+    let mut builder = VariantBuilder::new();
+    generate_random_value(rng, &mut builder, 3); // Max depth of 3
+    builder.finish()
+}
+
+fn generate_random_value(rng: &mut StdRng, builder: &mut VariantBuilder, max_depth: u32) {
+    if max_depth == 0 {
+        // Force simple values at max depth
+        builder.append_value(rng.random::<i32>());
+        return;
+    }
+
+    match rng.random_range(0..18) {
+        0 => builder.append_value(()),
+        1 => builder.append_value(rng.random::<bool>()),
+        2 => builder.append_value(rng.random::<i8>()),
+        3 => builder.append_value(rng.random::<i16>()),
+        4 => builder.append_value(rng.random::<i32>()),
+        5 => builder.append_value(rng.random::<i64>()),
+        6 => builder.append_value(rng.random::<f32>()),
+        7 => builder.append_value(rng.random::<f64>()),
+        8 => {
+            // String
+            let len = rng.random_range(0..50);
+            let s: String = (0..len).map(|_| rng.random::<char>()).collect();
+            builder.append_value(s.as_str());
+        }
+        9 => {
+            // Binary
+            let len = rng.random_range(0..50);
+            let bytes: Vec<u8> = (0..len).map(|_| rng.random()).collect();
+            builder.append_value(bytes.as_slice());
+        }
+        10 => {
+            if let Ok(decimal) = VariantDecimal4::try_new(rng.random(), rng.random_range(0..10)) {
+                builder.append_value(decimal);
+            } else {
+                builder.append_value(0i32);
+            }
+        }
+        11 => {
+            if let Ok(decimal) = VariantDecimal8::try_new(rng.random(), rng.random_range(0..19)) {
+                builder.append_value(decimal);
+            } else {
+                builder.append_value(0i64);
+            }
+        }
+        12 => {
+            if let Ok(decimal) = VariantDecimal16::try_new(rng.random(), rng.random_range(0..39)) {
+                builder.append_value(decimal);
+            } else {
+                builder.append_value(0i64); // Use i64 instead of i128
+            }
+        }
+        13 => {
+            // Generate a list
+            let mut list_builder = builder.new_list();
+            let list_len = rng.random_range(0..10);
+            list_builder.extend(std::iter::repeat_with(|| rng.random::<i32>()).take(list_len));
+            list_builder.finish();
+        }
+        14 => {
+            // Generate an object
+            let mut object_builder = builder.new_object();
+            let obj_size = rng.random_range(0..10);
+
+            object_builder
+                .extend((0..obj_size).map(|i| (format!("field_{i}"), rng.random::<i32>())));
+
+            object_builder.finish();
+        }
+        15 => {
+            // Time
+            builder.append_value(
+                NaiveTime::from_num_seconds_from_midnight_opt(
+                    // make the argument always valid
+                    rng.random_range(0..86_400),
+                    rng.random_range(0..1_000_000_000),
+                )
+                .unwrap(),
+            )
+        }
+        16 => {
+            let data_time = DateTime::from_timestamp(
+                // make the argument always valid
+                rng.random_range(0..86_400),
+                rng.random_range(0..1_000_000_000),
+            )
+            .unwrap();
+
+            // timestamp w/o timezone
+            builder.append_value(data_time.naive_local());
+
+            // timestamp with timezone
+            builder.append_value(data_time.naive_utc().and_utc());
+        }
+        17 => {
+            builder.append_value(Uuid::new_v4());
+        }
+        _ => unreachable!(),
+    }
+}
+
+fn corrupt_variant_data(
+    rng: &mut StdRng,
+    mut metadata: Vec<u8>,
+    mut value: Vec<u8>,
+) -> (Vec<u8>, Vec<u8>) {
+    // Randomly decide what to corrupt
+    let corrupt_metadata = rng.random_bool(0.3);
+    let corrupt_value = rng.random_bool(0.7);
+
+    if corrupt_metadata && !metadata.is_empty() {
+        let idx = rng.random_range(0..metadata.len());
+        let bit = rng.random_range(0..8);
+        metadata[idx] ^= 1 << bit;
+    }
+
+    if corrupt_value && !value.is_empty() {
+        let idx = rng.random_range(0..value.len());
+        let bit = rng.random_range(0..8);
+        value[idx] ^= 1 << bit;
+    }
+
+    (metadata, value)
+}
+
+fn test_validation_workflow(metadata: &[u8], value: &[u8]) {
+    // Step 1: Try unvalidated construction - should not panic
+    let variant_result = std::panic::catch_unwind(|| Variant::new(metadata, value));
+
+    let variant = match variant_result {
+        Ok(v) => v,
+        Err(_) => return, // Construction failed, which is acceptable for corrupted data
+    };
+
+    // Step 2: Try validation
+    let validation_result = std::panic::catch_unwind(|| variant.clone().with_full_validation());
+
+    match validation_result {
+        Ok(Ok(validated)) => {
+            // Validation succeeded - infallible access should not panic
+            test_infallible_access(&validated);
+        }
+        Ok(Err(_)) => {
+            // Validation failed - fallible access should handle errors gracefully
+            test_fallible_access(&variant);
+        }
+        Err(_) => {
+            // Validation panicked - this may indicate severely corrupted data
+            // For now, we accept this, but it could indicate a validation bug
+        }
+    }
+}
+
+fn test_infallible_access(variant: &Variant) {
+    // All these should not panic on validated variants
+    let _ = variant.as_null();
+    let _ = variant.as_boolean();
+    let _ = variant.as_int32();
+    let _ = variant.as_string();
+
+    if let Some(obj) = variant.as_object() {
+        for (_, _) in obj.iter() {
+            // Should not panic
+        }
+        for i in 0..obj.len() {
+            let _ = obj.field(i);
+        }
+    }
+
+    if let Some(list) = variant.as_list() {
+        for _ in list.iter() {
+            // Should not panic
+        }
+        for i in 0..list.len() {
+            let _ = list.get(i);
+        }
+    }
+}
+
+fn test_fallible_access(variant: &Variant) {
+    // These should handle errors gracefully, never panic
+    if let Some(obj) = variant.as_object() {
+        for result in obj.iter_try() {
+            let _ = result; // May be Ok or Err, but should not panic
+        }
+        for i in 0..obj.len() {
+            let _ = obj.try_field(i); // May be Ok or Err, but should not panic
+        }
+    }
+
+    if let Some(list) = variant.as_list() {
+        for result in list.iter_try() {
+            let _ = result; // May be Ok or Err, but should not panic
+        }
+        for i in 0..list.len() {
+            let _ = list.try_get(i); // May be Ok or Err, but should not panic
+        }
+    }
+}
+
+#[test]
+fn test_specific_validation_error_cases() {
+    // Test specific malformed cases that should trigger validation errors
+
+    // Case 1: Invalid header byte
+    test_validation_workflow_simple(&[0x01, 0x00, 0x00], &[0xFF, 0x42]); // Invalid basic type
+
+    // Case 2: Truncated metadata
+    test_validation_workflow_simple(&[0x01], &[0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F]); // Incomplete metadata
+
+    // Case 3: Truncated value
+    test_validation_workflow_simple(&[0x01, 0x00, 0x00], &[0x09]); // String header but no data
+
+    // Case 4: Invalid object with out-of-bounds field ID
+    test_validation_workflow_simple(&[0x01, 0x00, 0x00], &[0x0F, 0x01, 0xFF, 0x00, 0x00]); // Field ID 255 doesn't exist
+
+    // Case 5: Invalid list with malformed offsets
+    test_validation_workflow_simple(&[0x01, 0x00, 0x00], &[0x13, 0x02, 0xFF, 0x00, 0x00]);
+    // Malformed offset array
+}
+
+fn test_validation_workflow_simple(metadata: &[u8], value: &[u8]) {
+    // Simple version without randomization, always runs regardless of feature flag
+
+    // Step 1: Try unvalidated construction - should not panic
+    let variant_result = std::panic::catch_unwind(|| Variant::new(metadata, value));
+
+    let variant = match variant_result {
+        Ok(v) => v,
+        Err(_) => return, // Construction failed, which is acceptable for corrupted data
+    };
+
+    // Step 2: Try validation
+    let validation_result = std::panic::catch_unwind(|| variant.clone().with_full_validation());
+
+    match validation_result {
+        Ok(Ok(validated)) => {
+            // Validation succeeded - infallible access should not panic
+            test_infallible_access_simple(&validated);
+        }
+        Ok(Err(_)) => {
+            // Validation failed - fallible access should handle errors gracefully
+            test_fallible_access_simple(&variant);
+        }
+        Err(_) => {
+            // Validation panicked - this may indicate severely corrupted data
+        }
+    }
+}
+
+fn test_infallible_access_simple(variant: &Variant) {
+    // All these should not panic on validated variants
+    let _ = variant.as_null();
+    let _ = variant.as_boolean();
+    let _ = variant.as_int32();
+    let _ = variant.as_string();
+
+    if let Some(obj) = variant.as_object() {
+        for (_, _) in obj.iter() {
+            // Should not panic
+        }
+        for i in 0..obj.len() {
+            let _ = obj.field(i);
+        }
+    }
+
+    if let Some(list) = variant.as_list() {
+        for _ in list.iter() {
+            // Should not panic
+        }
+        for i in 0..list.len() {
+            let _ = list.get(i);
+        }
+    }
+}
+
+fn test_fallible_access_simple(variant: &Variant) {
+    // These should handle errors gracefully, never panic
+    if let Some(obj) = variant.as_object() {
+        for result in obj.iter_try() {
+            let _ = result; // May be Ok or Err, but should not panic
+        }
+        for i in 0..obj.len() {
+            let _ = obj.try_field(i); // May be Ok or Err, but should not panic
+        }
+    }
+
+    if let Some(list) = variant.as_list() {
+        for result in list.iter_try() {
+            let _ = result; // May be Ok or Err, but should not panic
+        }
+        for i in 0..list.len() {
+            let _ = list.try_get(i); // May be Ok or Err, but should not panic
+        }
+    }
+}
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index d277a2cbd202..519fbc903d22 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -39,24 +39,30 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
 [dependencies]
 arrow-array = { workspace = true, optional = true }
 arrow-buffer = { workspace = true, optional = true }
-arrow-cast = { workspace = true, optional = true }
 arrow-csv = { workspace = true, optional = true }
 arrow-data = { workspace = true, optional = true }
 arrow-schema = { workspace = true, optional = true }
 arrow-select = { workspace = true, optional = true }
 arrow-ipc = { workspace = true, optional = true }
+parquet-geospatial = { workspace = true, optional = true }
+parquet-variant = { workspace = true, optional = true }
+parquet-variant-json = { workspace = true, optional = true }
+parquet-variant-compute = { workspace = true, optional = true }
+
 object_store = { version = "0.12.0", default-features = false, optional = true }
 
 bytes = { version = "1.1", default-features = false, features = ["std"] }
 thrift = { version = "0.17", default-features = false }
 snap = { version = "1.0", default-features = false, optional = true }
 brotli = { version = "8.0", default-features = false, features = ["std"], optional = true }
-flate2 = { version = "1.1", default-features = false, features = ["zlib-rs"], optional = true }
-lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
+# To use `flate2` you must enable either the `flate2-zlib-rs` or `flate2-rust_backened` backends
+flate2 = { version = "1.1", default-features = false, optional = true }
+lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13", optional = true, default-features = false }
 chrono = { workspace = true }
-num = { version = "0.4", default-features = false }
 num-bigint = { version = "0.4", default-features = false }
+num-integer = { version = "0.1.46", default-features = false, features = ["std"] }
+num-traits = { version = "0.2.19", default-features = false, features = ["std"] }
 base64 = { version = "0.22", default-features = false, features = ["std", ], optional = true }
 clap = { version = "4.1", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
 serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
@@ -64,39 +70,41 @@ serde_json = { version = "1.0", default-features = false, features = ["std"], op
 seq-macro = { version = "0.3", default-features = false }
 futures = { version = "0.3", default-features = false, features = ["std"], optional = true }
 tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] }
-hashbrown = { version = "0.15", default-features = false }
+hashbrown = { version = "0.16", default-features = false }
 twox-hash = { version = "2.0", default-features = false, features = ["xxhash64"] }
 paste = { version = "1.0" }
 half = { version = "2.1", default-features = false, features = ["num-traits"] }
 crc32fast = { version = "1.4.2", optional = true, default-features = false }
-simdutf8 = { version = "0.1.5", optional = true, default-features = false }
+simdutf8 = { workspace = true , optional = true }
 ring = { version = "0.17", default-features = false, features = ["std"], optional = true }
 
 [dev-dependencies]
 base64 = { version = "0.22", default-features = false, features = ["std"] }
-criterion = { version = "0.5", default-features = false, features = ["async_futures"]  }
+criterion = { workspace = true, default-features = false, features = ["async_futures"]  }
 snap = { version = "1.0", default-features = false }
 tempfile = { version = "3.0", default-features = false }
+insta = "1.43.1"
 brotli = { version = "8.0", default-features = false, features = ["std"] }
 flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
-lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"] }
+lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"] }
 zstd = { version = "0.13", default-features = false }
 serde_json = { version = "1.0", features = ["std"], default-features = false }
 arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] }
+arrow-cast = { workspace = true }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
 object_store = { version = "0.12.0", default-features = false, features = ["azure", "fs"] }
-sysinfo = { version = "0.35.0", default-features = false, features = ["system"] }
+sysinfo = { version = "0.37.1", default-features = false, features = ["system"] }
 
 [package.metadata.docs.rs]
 all-features = true
 
 [features]
-default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64", "simdutf8"]
+default = ["arrow", "snap", "brotli", "flate2-zlib-rs", "lz4", "zstd", "base64", "simdutf8"]
 # Enable lz4
 lz4 = ["lz4_flex"]
 # Enable arrow reader/writer APIs
-arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"]
+arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"]
 # Enable support for arrow canonical extension types
 arrow_canonical_extension_types = ["arrow-schema?/canonical_extension_types"]
 # Enable CLI tools
@@ -106,7 +114,7 @@ json = ["serde_json", "base64"]
 # Enable internal testing APIs
 test_common = ["arrow/test_utils"]
 # Experimental, unstable functionality primarily used for testing
-experimental = []
+experimental = ["variant_experimental"]
 # Enable async APIs
 async = ["futures", "tokio"]
 # Enable object_store integration
@@ -119,6 +127,13 @@ crc = ["dep:crc32fast"]
 simdutf8 = ["dep:simdutf8"]
 # Enable Parquet modular encryption support
 encryption = ["dep:ring"]
+# Explicitely enabling rust_backend and zlib-rs features for flate2
+flate2-rust_backened = ["flate2/rust_backend"]
+flate2-zlib-rs = ["flate2/zlib-rs"]
+# Enable parquet variant support
+variant_experimental = ["arrow", "parquet-variant", "parquet-variant-json", "parquet-variant-compute"]
+# Enable geospatial support
+geospatial = ["parquet-geospatial"]
 
 
 [[example]]
@@ -141,6 +156,11 @@ name = "async_read_parquet"
 required-features = ["arrow", "async"]
 path = "./examples/async_read_parquet.rs"
 
+[[example]]
+name = "read_with_row_filter"
+required-features = ["arrow"]
+path = "./examples/read_with_row_filter.rs"
+
 [[example]]
 name = "read_with_rowgroup"
 required-features = ["arrow", "async"]
@@ -160,6 +180,11 @@ name = "encryption"
 required-features = ["arrow"]
 path = "./tests/encryption/mod.rs"
 
+[[test]]
+name = "variant_integration"
+required-features = ["arrow", "variant_experimental", "serde"]
+path = "./tests/variant_integration.rs"
+
 [[bin]]
 name = "parquet-read"
 required-features = ["cli"]
@@ -235,10 +260,20 @@ harness = false
 name = "metadata"
 harness = false
 
+[[bench]]
+name = "parquet_round_trip"
+required-features = ["arrow"]
+harness = false
+
 [[bench]]
 name = "row_selector"
 harness = false
 required-features = ["arrow"]
 
+[[bench]]
+name = "row_selection_cursor"
+harness = false
+required-features = ["arrow"]
+
 [lib]
 bench = false
diff --git a/parquet/README.md b/parquet/README.md
index 8fc72bfbc32a..8317b4dbd4ff 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -64,29 +64,19 @@ The `parquet` crate provides the following features which may be enabled in your
 - `experimental` - Experimental APIs which may change, even between minor releases
 - `simdutf8` (default) - Use the [`simdutf8`] crate for SIMD-accelerated UTF-8 validation
 - `encryption` - support for reading / writing encrypted Parquet files
+- `variant_experimental` - ⚠️ Experimental [Parquet Variant] support, which may change, even between minor releases.
+- `geospatial` - ⚠️ Experimental geospatial support, which may change, even between minor releases.
 
 [`arrow`]: https://crates.io/crates/arrow
 [`simdutf8`]: https://crates.io/crates/simdutf8
+[parquet variant]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
 
 ## Parquet Feature Status
 
-- [x] All encodings supported
-- [x] All compression codecs supported
-- [x] Read support
-  - [x] Primitive column value readers
-  - [x] Row record reader
-  - [x] Arrow record reader
-  - [x] Async support (to Arrow)
-  - [x] Encrypted files
-- [x] Statistics support
-- [x] Write support
-  - [x] Primitive column value writers
-  - [ ] Row record writer
-  - [x] Arrow record writer
-  - [x] Async support
-  - [x] Encrypted files
-- [x] Predicate pushdown
-- [x] Parquet format 4.0.0 support
+Please see the [Implementation Status Page] on the [Apache Parquet] website for
+information on the status of this implementation.
+
+[implementation status page]: https://parquet.apache.org/docs/file-format/implementationstatus/
 
 ## License
 
diff --git a/parquet/THRIFT.md b/parquet/THRIFT.md
new file mode 100644
index 000000000000..599b33f2bce3
--- /dev/null
+++ b/parquet/THRIFT.md
@@ -0,0 +1,478 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Thrift serialization in the parquet crate
+
+For both performance and flexibility reasons, this crate uses custom Thrift parsers and
+serialization mechanisms. For many of the objects defined by the Parquet specification macros
+are used to generate the objects as well as the code to serialize them. But in certain instances
+(performance bottlenecks, additions to the spec, etc.), it becomes necessary to implement the
+serialization code manually. This document serves to document both the standard usage of the
+Thrift macros, as well as how to implement custom encoders and decoders.
+
+## Thrift macros
+
+The Parquet specification utilizes Thrift enums, unions, and structs, defined by an Interface
+Description Language (IDL). This IDL is usually parsed by a Thrift code generator to produce
+language specific structures and serialization/deserialization code. This crate, however, uses
+Rust macros to perform the same function. In addition to skipping creation of additional duplicate
+structures, doing so allows for customizations that produce more performant code, as well as the
+ability to pick and choose which fields to process.
+
+### Enums
+
+Thrift enums are the simplest structure, and are logically identical to Rust enums with unit
+variants. The IDL description will look like
+
+```
+enum Type {
+  BOOLEAN = 0;
+  INT32 = 1;
+  INT64 = 2;
+  INT96 = 3;
+  FLOAT = 4;
+  DOUBLE = 5;
+  BYTE_ARRAY = 6;
+  FIXED_LEN_BYTE_ARRAY = 7;
+}
+```
+
+The `thrift_enum` macro can be used in this instance.
+
+```rust
+thrift_enum!(
+enum Type {
+  BOOLEAN = 0;
+  INT32 = 1;
+  INT64 = 2;
+  INT96 = 3;
+  FLOAT = 4;
+  DOUBLE = 5;
+  BYTE_ARRAY = 6;
+  FIXED_LEN_BYTE_ARRAY = 7;
+}
+);
+```
+
+which will produce a public Rust enum
+
+```rust
+pub enum Type {
+  BOOLEAN,
+  INT32,
+  INT64,
+  INT96,
+  FLOAT,
+  DOUBLE,
+  BYTE_ARRAY,
+  FIXED_LEN_BYTE_ARRAY,
+}
+```
+
+All Rust `enum`s produced with this macro will have `pub` visibility.
+
+### Unions
+
+Thrift unions are a special kind of struct in which only a single field is populated. In this
+regard they are much like Rust enums which can have a mix of unit and tuple variants. Because of
+this flexibility, specifying unions is a little bit trickier.
+
+Often times a union will be defined for which all the variants are typed with empty structs. For
+example the `TimeUnit` union used for `LogicalType`s.
+
+```
+struct MilliSeconds {}
+struct MicroSeconds {}
+struct NanoSeconds {}
+union TimeUnit {
+  1: MilliSeconds MILLIS
+  2: MicroSeconds MICROS
+  3: NanoSeconds NANOS
+}
+```
+
+When serialized, these empty structs become a single `0` (to mark the end of the struct). As an
+optimization, and to allow for a simpler interface, the `thrift_union_all_empty` macro can be used.
+
+```rust
+thrift_union_all_empty!(
+union TimeUnit {
+  1: MilliSeconds MILLIS
+  2: MicroSeconds MICROS
+  3: NanoSeconds NANOS
+}
+);
+```
+
+This macro will ignore the types specified for each variant, and will produce the following Rust
+`enum`:
+
+```rust
+pub enum TimeUnit {
+    MILLIS,
+    MICROS,
+    NANOS,
+}
+```
+
+For unions with mixed variant types, some modifications to the IDL are necessary. Take the
+definition of `ColumnCryptoMetadata`.
+
+```
+struct EncryptionWithFooterKey {
+}
+
+struct EncryptionWithColumnKey {
+  /** Column path in schema **/
+  1: required list<string> path_in_schema
+
+  /** Retrieval metadata of column encryption key **/
+  2: optional binary key_metadata
+}
+
+union ColumnCryptoMetaData {
+  1: EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY
+  2: EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY
+}
+```
+
+The `ENCRYPTION_WITH_FOOTER_KEY` variant is typed with an empty struct, while
+`ENCRYPTION_WITH_COLUMN_KEY` has the type of a struct with fields. In this case, the `thrift_union`
+macro is used.
+
+```rust
+thrift_union!(
+union ColumnCryptoMetaData {
+  1: ENCRYPTION_WITH_FOOTER_KEY
+  2: (EncryptionWithColumnKey) ENCRYPTION_WITH_COLUMN_KEY
+}
+);
+```
+
+Here, the type has been omitted for `ENCRYPTION_WITH_FOOTER_KEY` to indicate it should be a unit
+variant, while the type for `ENCRYPTION_WITH_COLUMN_KEY` is enclosed in parens. The parens are
+necessary to provide a semantic clue to the macro that the identifier is a type. The above will
+produce the Rust enum
+
+```rust
+pub enum ColumnCryptoMetaData {
+    ENCRYPTION_WITH_FOOTER_KEY,
+    ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey),
+}
+```
+
+All Rust `enum`s produced with either macro will have `pub` visibility. `thrift_union` also allows
+for lifetime annotations, but this capability is not currently utilized.
+
+### Structs
+
+The `thrift_struct` macro is used for structs. This macro is a little more flexible than the others
+because it allows for the visibility to be specified, and also allows for lifetimes to be specified
+for the defined structs as well as their fields. An example of this is the `SchemaElement` struct.
+This is defined in this crate as
+
+```rust
+thrift_struct!(
+pub(crate) struct SchemaElement<'a> {
+  1: optional Type r#type;
+  2: optional i32 type_length;
+  3: optional Repetition repetition_type;
+  4: required string<'a> name;
+  5: optional i32 num_children;
+  6: optional ConvertedType converted_type;
+  7: optional i32 scale
+  8: optional i32 precision
+  9: optional i32 field_id;
+  10: optional LogicalType logical_type
+}
+);
+```
+
+Here the `string` field `name` is given a lifetime annotation, which is then propagated to the
+struct definition. Without this annotation, the resultant field would be a `String` type, rather
+than a string slice. The visibility of this struct (and all fields) will be `pub(crate)`. The
+resultant Rust struct will be
+
+```rust
+pub(crate) struct SchemaElement<'a> {
+    pub(crate) r#type: Type, // here we've changed the name `type` to `r#type` to avoid reserved words
+    pub(crate) type_length: i32,
+    pub(crate) repetition_type: Repetition,
+    pub(crate) name: &'a str,
+    ...
+}
+```
+
+The lifetime annotations can also be added to list elements, as in
+
+```rust
+thrift_struct!(
+struct FileMetaData<'a> {
+  /** Version of this file **/
+  1: required i32 version
+  2: required list<'a><SchemaElement> schema;
+  3: required i64 num_rows
+  4: required list<'a><RowGroup> row_groups
+  5: optional list<KeyValue> key_value_metadata
+  6: optional string created_by
+  7: optional list<ColumnOrder> column_orders;
+  8: optional EncryptionAlgorithm encryption_algorithm
+  9: optional binary footer_signing_key_metadata
+}
+);
+```
+
+Note that the lifetime annotation precedes the element type specification.
+
+## Serialization traits
+
+Serialization is performed via several Rust traits. On the deserialization, objects implement
+the `ReadThrift` trait. This defines a `read_thrift` function that takes a
+`ThriftCompactInputProtocol` I/O object as an argument. The `read_thrift` function performs
+all steps necessary to deserialize the object from the input stream, and is usually produced by
+one of the macros mentioned above.
+
+On the serialization side, the `WriteThrift` and `WriteThriftField` traits are used in conjunction
+with a `ThriftCompactOutputProtocol` struct. As above, the Thrift macros produce the necessary
+implementations needed to perform serialization.
+
+While the macros can be used in most circumstances, sometimes more control is needed. The following
+sections provide information on how to provide custom implementations for the serialization
+traits.
+
+### ReadThrift Customization
+
+Thrift enums are serialized as a single `i32` value. The process of reading an enum is straightforward:
+read the enum discriminant, and then match on the possible values. For instance, reading the
+`ConvertedType` enum becomes:
+
+```rust
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let val = prot.read_i32()?;
+        Ok(match val {
+            0 => Self::UTF8,
+            1 => Self::MAP,
+            2 => Self::MAP_KEY_VALUE,
+            ...
+            21 => Self::INTERVAL,
+            _ => return Err(general_err!("Unexpected ConvertedType {}", val)),
+        })
+    }
+}
+```
+
+The default behavior is to return an error when an unexpected field is encountered. One could,
+however, provide an `Unknown` variant if forward compatibility is neeeded in the case of an
+evolving enum.
+
+Deserializing structs is more involved, but still fairly easy. A thrift struct is serialized as
+repeated `(field_id,field_type,field)` tuples. The `field_id` and `field_type` usually occupy a
+single byte, followed by the Thrift encoded field. Because only 4 bits are available for the id,
+encoders usually will instead use deltas from the preceding field. If the delta will exceed 15,
+then the `field_id` nibble will be set to `0`, and the `field_id` will instead be encoded as a
+varint, following the `field_type`. Fields will generally be read in a loop, with the `field_id`
+and `field_type` read first, and then the `field_id` used to determine which field to read.
+When a `field_id` of `0` is encountered, this marks the end of the struct and processing ceases.
+Here is an example of the processing loop:
+
+```rust
+    let mut last_field_id = 0i16;
+    loop {
+        // read the field id and field type. break if we encounter `Stop`
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        // match on the field id
+        match field_ident.id {
+            1 => {
+                let val = i32::read_thrift(&mut *prot)?;
+                num_values = Some(val);
+            }
+            2 => {
+                let val = Encoding::read_thrift(&mut *prot)?;
+                encoding = Some(val);
+            }
+            3 => {
+                let val = Encoding::read_thrift(&mut *prot)?;
+                definition_level_encoding = Some(val);
+            }
+            4 => {
+                let val = Encoding::read_thrift(&mut *prot)?;
+                repetition_level_encoding = Some(val);
+            }
+            // Thrift structs are meant to be forward compatible, so do not error
+            // here. Instead, simply skip unknown fields.
+            _ => {
+                prot.skip(field_ident.field_type)?;
+            }
+        };
+        // set the last seen field id to calculate the next field_id
+        last_field_id = field_ident.id;
+    }
+```
+
+Thrift unions are encoded as structs, but only a single field will be encoded. The loop above
+can be eliminated, and only the `match` on the id performed. A subsequent call to
+`read_field_begin` must return `Stop`, or an error should be returned. Here's an example from
+the decoding of the `LogicalType` union:
+
+```rust
+    // read the discriminant, error if it is `0`
+    let field_ident = prot.read_field_begin(0)?;
+    if field_ident.field_type == FieldType::Stop {
+        return Err(general_err!("received empty union from remote LogicalType"));
+    }
+    let ret = match field_ident.id {
+        1 => {
+            prot.skip_empty_struct()?;
+            Self::String
+        }
+        ...
+        _ => {
+            // LogicalType needs to be forward compatible, so we have defined an `_Unknown`
+            // variant for it. This can return an error if forward compatibility is not desired.
+            prot.skip(field_ident.field_type)?;
+            Self::_Unknown {
+                field_id: field_ident.id,
+            }
+        }
+    };
+    // test to ensure there is only one field present
+    let field_ident = prot.read_field_begin(field_ident.id)?;
+    if field_ident.field_type != FieldType::Stop {
+        return Err(general_err!(
+            "Received multiple fields for union from remote LogicalType"
+        ));
+    }
+```
+
+### WriteThrift Customization
+
+On the serialization side, there are two traits to implement. The first, `WriteThrift`, is used
+for actually serializing the object. The other, `WriteThriftField`, handles serializing objects
+as struct fields.
+
+Serializing enums is as simple as writing the discriminant as an `i32`. For example, here is the
+custom serialization code for `ConvertedType`:
+
+```rust
+impl WriteThrift for ConvertedType {
+    const ELEMENT_TYPE: ElementType = ElementType::I32;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        // because we've added NONE, the variant values are off by 1, so correct that here
+        writer.write_i32(*self as i32 - 1)
+    }
+}
+```
+
+Structs and unions are serialized by field. When performing the serialization, one needs to keep
+track of the last field that has been written, as this is needed to calculate the delta in the
+Thrift field header. For required fields this is not strictly necessary, but when writing
+optional fields it is. A typical `write_thrift` implementation will look like:
+
+```rust
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        // required field f1
+        self.f1.write_thrift_field(writer, 1, 0)?; // field_id == 1, last_field_id == 0
+        // required field f2
+        self.f2.write_thrift_field(writer, 2, 1)?; // field_id == 2, last_field_id == 1
+        // final required field f3, we now save the last_field_id, which is returned by write_thrift_field
+        let mut last_field_id = self.f3.write_thrift_field(writer, 3, 2)?; // field_id == 3, last_field_id == 2
+
+        // optional field f4
+        if let Some(val) = self.f4.as_ref() {
+            last_field_id = val.write_thrift_field(writer, 4, last_field_id)?;
+        }
+        // optional field f5
+        if let Some(val) = self.f5.as_ref() {
+            last_field_id = val.write_thrift_field(writer, 5, last_field_id)?;
+        }
+        // write end of struct
+        writer.write_struct_end()
+    }
+```
+
+In most instances, the `WriteThriftField` implementation can be handled by the `write_thrift_field`
+macro. The first argument is the unqualified name of an object that implements `WriteThrift`, and
+the second is the field type (which will be `FieldType::Struct` for Thrift structs and unions,
+and `FieldType::I32` for Thrift enums).
+
+```rust
+write_thrift_field!(MyNewStruct, FieldType::Struct);
+```
+
+which expands to:
+
+```rust
+impl WriteThriftField for MyNewStruct {
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16> {
+        writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+        self.write_thrift(writer)?;
+        Ok(field_id)
+    }
+}
+```
+
+### Handling for lists
+
+Lists of serialized objects can usually be read using `parquet_thrift::read_thrift_vec` and written
+using the `WriteThrift::write_thrift` implementation for vectors of objects that implement
+`WriteThrift`.
+
+When reading a list, one first reads the list header which will provide the number of elements
+that have been encoded, and then read elements one at a time.
+
+```rust
+    // read the list header
+    let list_ident = prot.read_list_begin()?;
+    // allocate vector with enough capacity
+    let mut page_locations = Vec::with_capacity(list_ident.size as usize);
+    // read elements
+    for _ in 0..list_ident.size {
+        page_locations.push(read_page_location(prot)?);
+    }
+```
+
+Writing is simply the reverse: write the list header, and then serialize the elements:
+
+```rust
+    // write the list header
+    writer.write_list_begin(ElementType::Struct, page_locations.len)?;
+    // write the elements
+    for i in 0..len {
+        page_locations[i].write_thrift(writer)?;
+    }
+```
+
+## More examples
+
+For more examples, the easiest thing to do is to [expand](https://github.com/dtolnay/cargo-expand)
+the thrift macros. For instance, to see the implementations generated in the `basic` module, type:
+
+```sh
+% cargo expand -p parquet --lib --all-features basic
+```
diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs
index 321424b8206c..d7f11b8999b4 100644
--- a/parquet/benches/arrow_reader.rs
+++ b/parquet/benches/arrow_reader.rs
@@ -19,13 +19,13 @@ use arrow::array::Array;
 use arrow::datatypes::DataType;
 use arrow_schema::Field;
 use criterion::measurement::WallTime;
-use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion};
+use criterion::{BenchmarkGroup, Criterion, criterion_group, criterion_main};
 use half::f16;
-use num::FromPrimitive;
 use num_bigint::BigInt;
+use num_traits::FromPrimitive;
 use parquet::arrow::array_reader::{
-    make_byte_array_reader, make_byte_view_array_reader, make_fixed_len_byte_array_reader,
-    ListArrayReader,
+    ListArrayReader, make_byte_array_reader, make_byte_view_array_reader,
+    make_fixed_len_byte_array_reader,
 };
 use parquet::basic::Type;
 use parquet::data_type::{ByteArray, FixedLenByteArrayType};
@@ -38,7 +38,7 @@ use parquet::{
     schema::types::{ColumnDescPtr, SchemaDescPtr},
 };
 use rand::distr::uniform::SampleUniform;
-use rand::{rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::{collections::VecDeque, sync::Arc};
 
 fn build_test_schema() -> SchemaDescPtr {
diff --git a/parquet/benches/arrow_reader_clickbench.rs b/parquet/benches/arrow_reader_clickbench.rs
index 38d5ed9bb84e..e737a4cad1a4 100644
--- a/parquet/benches/arrow_reader_clickbench.rs
+++ b/parquet/benches/arrow_reader_clickbench.rs
@@ -35,7 +35,7 @@ use arrow::compute::{like, nlike, or};
 use arrow_array::types::{Int16Type, Int32Type, Int64Type};
 use arrow_array::{ArrayRef, ArrowPrimitiveType, BooleanArray, PrimitiveArray, StringViewArray};
 use arrow_schema::{ArrowError, DataType, Schema};
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{Criterion, criterion_group, criterion_main};
 use futures::StreamExt;
 use parquet::arrow::arrow_reader::{
     ArrowPredicate, ArrowPredicateFn, ArrowReaderMetadata, ArrowReaderOptions,
@@ -580,14 +580,13 @@ fn hits_1() -> &'static Path {
 
     let current_dir = std::env::current_dir().expect("Failed to get current directory");
     println!(
-        "Looking for ClickBench files starting in current_dir and all parent directories: {:?}",
-        current_dir
+        "Looking for ClickBench files starting in current_dir and all parent directories: {current_dir:?}"
+
     );
 
     let Some(hits_1_path) = find_file_if_exists(current_dir.clone(), "hits_1.parquet") else {
         eprintln!(
-            "Could not find hits_1.parquet in directory or parents: {:?}. Download it via",
-            current_dir
+            "Could not find hits_1.parquet in directory or parents: {current_dir:?}. Download it via",
         );
         eprintln!();
         eprintln!("wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet");
diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index 2e44e5aea0bc..331f5617ca8a 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -57,11 +57,11 @@ use arrow::compute::and;
 use arrow::compute::kernels::cmp::{eq, gt, lt, neq};
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
-use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
 use arrow_array::StringViewArray;
+use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
 use arrow_cast::pretty::pretty_format_batches;
 use bytes::Bytes;
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt};
 use parquet::arrow::arrow_reader::{
@@ -70,9 +70,9 @@ use parquet::arrow::arrow_reader::{
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask};
 use parquet::basic::Compression;
-use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
 use parquet::file::properties::WriterProperties;
-use rand::{rngs::StdRng, Rng, SeedableRng};
+use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::ops::Range;
 use std::sync::Arc;
 
@@ -341,7 +341,7 @@ impl std::fmt::Display for FilterType {
             FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
             FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
         };
-        write!(f, "{}", s)
+        write!(f, "{s}")
     }
 }
 
@@ -461,7 +461,7 @@ fn benchmark_filters_and_projections(c: &mut Criterion) {
             let projection_mask = ProjectionMask::roots(schema_descr, output_projection.clone());
             let pred_mask = ProjectionMask::roots(schema_descr, filter_col.clone());
 
-            let benchmark_name = format!("{filter_type:?}/{proj_case}",);
+            let benchmark_name = format!("{filter_type}/{proj_case}",);
 
             // run the benchmark for the async reader
             let bench_id = BenchmarkId::new(benchmark_name.clone(), "async");
@@ -550,7 +550,8 @@ struct InMemoryReader {
 
 impl InMemoryReader {
     fn try_new(inner: &Bytes) -> parquet::errors::Result<Self> {
-        let mut metadata_reader = ParquetMetaDataReader::new().with_page_indexes(true);
+        let mut metadata_reader =
+            ParquetMetaDataReader::new().with_page_index_policy(PageIndexPolicy::Required);
         metadata_reader.try_parse(inner)?;
         let metadata = metadata_reader.finish().map(Arc::new)?;
 
diff --git a/parquet/benches/arrow_statistics.rs b/parquet/benches/arrow_statistics.rs
index ebc2fb38a7ec..f825883e3264 100644
--- a/parquet/benches/arrow_statistics.rs
+++ b/parquet/benches/arrow_statistics.rs
@@ -23,10 +23,10 @@ use arrow_schema::{
     DataType::{self, *},
     Field, Schema,
 };
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use parquet::{arrow::arrow_reader::ArrowReaderOptions, file::properties::WriterProperties};
 use parquet::{
-    arrow::{arrow_reader::ArrowReaderBuilder, ArrowWriter},
+    arrow::{ArrowWriter, arrow_reader::ArrowReaderBuilder},
     file::properties::EnabledStatistics,
 };
 use std::sync::Arc;
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 4166d962b550..b92f0788b2fc 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -18,21 +18,25 @@
 #[macro_use]
 extern crate criterion;
 
-use criterion::{Criterion, Throughput};
-use std::env;
-use std::fs::File;
+use criterion::{Bencher, Criterion, Throughput};
+use parquet::arrow::arrow_writer::{ArrowRowGroupWriterFactory, compute_leaves};
+use parquet::basic::{Compression, ZstdLevel};
 
 extern crate arrow;
 extern crate parquet;
 
+use std::hint::black_box;
+use std::io::Empty;
 use std::sync::Arc;
 
 use arrow::datatypes::*;
 use arrow::util::bench_util::{create_f16_array, create_f32_array, create_f64_array};
 use arrow::{record_batch::RecordBatch, util::data_gen::*};
 use arrow_array::RecordBatchOptions;
-use parquet::file::properties::WriterProperties;
-use parquet::{arrow::ArrowWriter, errors::Result};
+use parquet::arrow::ArrowSchemaConverter;
+use parquet::errors::Result;
+use parquet::file::properties::{WriterProperties, WriterVersion};
+use parquet::file::writer::SerializedFileWriter;
 
 fn create_primitive_bench_batch(
     size: usize,
@@ -333,197 +337,134 @@ fn _create_nested_bench_batch(
     )?)
 }
 
-#[inline]
-fn write_batch(batch: &RecordBatch) -> Result<()> {
-    write_batch_with_option(batch, None)
-}
-
-#[inline]
-fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> Result<()> {
-    let option = WriterProperties::builder()
-        .set_bloom_filter_enabled(true)
-        .build();
+fn write_batch_with_option(
+    bench: &mut Bencher,
+    batch: &RecordBatch,
+    props: Option<WriterProperties>,
+) -> Result<()> {
+    let mut file = Empty::default();
+    let props = Arc::new(props.unwrap_or_default());
+    let parquet_schema = ArrowSchemaConverter::new()
+        .with_coerce_types(props.coerce_types())
+        .convert(batch.schema_ref())?;
+    let writer = SerializedFileWriter::new(&mut file, parquet_schema.root_schema_ptr(), props)?;
+    let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&writer, batch.schema());
+
+    bench.iter(|| {
+        let mut row_group = row_group_writer_factory.create_column_writers(0).unwrap();
+
+        let mut writers = row_group.iter_mut();
+        for (field, column) in batch
+            .schema()
+            .fields()
+            .iter()
+            .zip(black_box(batch).columns())
+        {
+            for leaf in compute_leaves(field.as_ref(), column).unwrap() {
+                writers.next().unwrap().write(&leaf).unwrap()
+            }
+        }
+
+        for writer in row_group.into_iter() {
+            black_box(writer.close()).unwrap();
+        }
+    });
 
-    write_batch_with_option(batch, Some(option))
+    Ok(())
 }
 
-#[inline]
-fn write_batch_with_option(batch: &RecordBatch, props: Option<WriterProperties>) -> Result<()> {
-    let path = env::temp_dir().join("arrow_writer.temp");
-    let file = File::create(path).unwrap();
-    let mut writer = ArrowWriter::try_new(file, batch.schema(), props)?;
+fn create_batches() -> Vec<(&'static str, RecordBatch)> {
+    const BATCH_SIZE: usize = 4096;
 
-    writer.write(batch)?;
-    writer.close()?;
-    Ok(())
-}
+    let mut batches = vec![];
 
-fn bench_primitive_writer(c: &mut Criterion) {
-    let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch primitive");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values primitive", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_primitive_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("primitive", batch));
 
-    group.bench_function("4096 values primitive with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    let batch = create_primitive_bench_batch_non_null(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("primitive_non_null", batch));
 
-    let batch = create_primitive_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values primitive non-null", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_bool_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("bool", batch));
 
-    group.bench_function("4096 values primitive non-null with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    let batch = create_bool_bench_batch_non_null(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("bool_non_null", batch));
 
-    let batch = create_bool_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values bool", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_string_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("string", batch));
 
-    let batch = create_bool_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values bool non-null", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_string_and_binary_view_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("string_and_binary_view", batch));
 
-    let batch = create_string_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values string", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_string_dictionary_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("string_dictionary", batch));
 
-    group.bench_function("4096 values string with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    let batch = create_string_bench_batch_non_null(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("string_non_null", batch));
 
-    let batch = create_string_and_binary_view_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values string", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_float_bench_batch_with_nans(BATCH_SIZE, 0.5).unwrap();
+    batches.push(("float_with_nans", batch));
 
-    group.bench_function("4096 values string with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    let batch = create_list_primitive_bench_batch(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("list_primitive", batch));
 
-    let batch = create_string_dictionary_bench_batch(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values string dictionary", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let batch = create_list_primitive_bench_batch_non_null(BATCH_SIZE, 0.25, 0.75).unwrap();
+    batches.push(("list_primitive_non_null", batch));
 
-    group.bench_function("4096 values string dictionary with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    batches
+}
 
-    let batch = create_string_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values string non-null", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+fn create_writer_props() -> Vec<(&'static str, WriterProperties)> {
+    let mut props = vec![];
 
-    group.bench_function("4096 values string non-null with bloom filter", |b| {
-        b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
-    });
+    props.push(("default", Default::default()));
 
-    let batch = create_float_bench_batch_with_nans(4096, 0.5).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values float with NaNs", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let prop = WriterProperties::builder()
+        .set_bloom_filter_enabled(true)
+        .build();
+    props.push(("bloom_filter", prop));
 
-    group.finish();
-}
+    let prop = WriterProperties::builder()
+        .set_writer_version(WriterVersion::PARQUET_2_0)
+        .build();
+    props.push(("parquet_2", prop));
 
-// This bench triggers a write error, it is ignored for now
-fn bench_nested_writer(c: &mut Criterion) {
-    let batch = create_list_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
-    let mut group = c.benchmark_group("write_batch nested");
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values primitive list", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let prop = WriterProperties::builder()
+        .set_compression(Compression::ZSTD(ZstdLevel::default()))
+        .build();
+    props.push(("zstd", prop));
 
-    let batch = create_list_primitive_bench_batch_non_null(4096, 0.25, 0.75).unwrap();
-    group.throughput(Throughput::Bytes(
-        batch
-            .columns()
-            .iter()
-            .map(|f| f.get_array_memory_size() as u64)
-            .sum(),
-    ));
-    group.bench_function("4096 values primitive list non-null", |b| {
-        b.iter(|| write_batch(&batch).unwrap())
-    });
+    let prop = WriterProperties::builder()
+        .set_compression(Compression::ZSTD(ZstdLevel::default()))
+        .set_writer_version(WriterVersion::PARQUET_2_0)
+        .build();
+    props.push(("zstd_parquet_2", prop));
+
+    props
+}
 
-    group.finish();
+fn bench_all_writers(c: &mut Criterion) {
+    let batches = create_batches();
+    let props = create_writer_props();
+
+    for (batch_name, batch) in &batches {
+        let mut group = c.benchmark_group(*batch_name);
+        group.throughput(Throughput::Bytes(
+            batch
+                .columns()
+                .iter()
+                .map(|f| f.get_array_memory_size() as u64)
+                .sum(),
+        ));
+
+        for (prop_name, prop) in &props {
+            group.bench_function(*prop_name, |b| {
+                write_batch_with_option(b, batch, Some(prop.clone())).unwrap()
+            });
+        }
+        group.finish();
+    }
 }
 
-criterion_group!(benches, bench_primitive_writer, bench_nested_writer);
+criterion_group!(benches, bench_all_writers);
 criterion_main!(benches);
diff --git a/parquet/benches/encoding.rs b/parquet/benches/encoding.rs
index 68f215d4ea78..65c2ec3d37ee 100644
--- a/parquet/benches/encoding.rs
+++ b/parquet/benches/encoding.rs
@@ -21,7 +21,7 @@ use parquet::basic::{Encoding, Type as ParquetType};
 use parquet::data_type::{
     DataType, DoubleType, FixedLenByteArray, FixedLenByteArrayType, FloatType,
 };
-use parquet::decoding::{get_decoder, Decoder};
+use parquet::decoding::{Decoder, get_decoder};
 use parquet::encoding::get_encoder;
 use parquet::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type};
 use rand::prelude::*;
@@ -52,7 +52,7 @@ fn bench_typed<T: DataType>(
         0,
         ColumnPath::new(vec![]),
     ));
-    c.bench_function(&format!("encoding: {}", name), |b| {
+    c.bench_function(&format!("encoding: {name}"), |b| {
         b.iter(|| {
             let mut encoder = get_encoder::<T>(encoding, &column_desc_ptr).unwrap();
             encoder.put(values).unwrap();
@@ -66,7 +66,7 @@ fn bench_typed<T: DataType>(
     println!("{} encoded as {} bytes", name, encoded.len(),);
 
     let mut buffer = vec![T::T::default(); values.len()];
-    c.bench_function(&format!("decoding: {}", name), |b| {
+    c.bench_function(&format!("decoding: {name}"), |b| {
         b.iter(|| {
             let mut decoder: Box<dyn Decoder<T>> =
                 get_decoder(column_desc_ptr.clone(), encoding).unwrap();
diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs
index c817385f6ba9..c9a6cf3b762c 100644
--- a/parquet/benches/metadata.rs
+++ b/parquet/benches/metadata.rs
@@ -15,11 +15,152 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
+use std::sync::Arc;
+
+use parquet::basic::{Encoding, PageType, Type as PhysicalType};
+use parquet::file::metadata::{
+    ColumnChunkMetaData, FileMetaData, LevelHistogram, PageEncodingStats, ParquetMetaData,
+    ParquetMetaDataOptions, ParquetMetaDataReader, ParquetMetaDataWriter, ParquetStatisticsPolicy,
+    RowGroupMetaData,
+};
+use parquet::file::statistics::Statistics;
+use parquet::file::writer::TrackedWrite;
+use parquet::schema::parser::parse_message_type;
+use parquet::schema::types::{
+    ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as SchemaType,
+};
+use rand::Rng;
+
+use arrow::util::test_util::seedable_rng;
 use bytes::Bytes;
-use criterion::*;
+use criterion::{Criterion, criterion_group, criterion_main};
 use parquet::file::reader::SerializedFileReader;
 use parquet::file::serialized_reader::ReadOptionsBuilder;
 
+const NUM_COLUMNS: usize = 10_000;
+const NUM_ROW_GROUPS: usize = 10;
+
+fn encoded_meta(is_nullable: bool, has_lists: bool) -> Vec<u8> {
+    let mut rng = seedable_rng();
+
+    let mut column_desc_ptrs: Vec<ColumnDescPtr> = Vec::with_capacity(NUM_COLUMNS);
+    let mut message_type = "message test_schema {".to_string();
+    for i in 0..NUM_COLUMNS {
+        message_type.push_str(&format!("REQUIRED FLOAT {};", i));
+        column_desc_ptrs.push(ColumnDescPtr::new(ColumnDescriptor::new(
+            Arc::new(
+                SchemaType::primitive_type_builder(&i.to_string(), PhysicalType::FLOAT)
+                    .build()
+                    .unwrap(),
+            ),
+            0,
+            0,
+            ColumnPath::new(vec![]),
+        )));
+    }
+    message_type.push('}');
+
+    let schema_descr = parse_message_type(&message_type)
+        .map(|t| Arc::new(SchemaDescriptor::new(Arc::new(t))))
+        .unwrap();
+
+    let stats = Statistics::float(Some(rng.random()), Some(rng.random()), None, Some(0), false);
+
+    let (var_size, rep_hist, def_hist) = match (is_nullable, has_lists) {
+        (true, true) => {
+            let rep_hist = LevelHistogram::from(vec![1500i64; 2]);
+            let def_hist = LevelHistogram::from(vec![1000i64; 3]);
+            (
+                Some(rng.random_range(0..1000000000)),
+                Some(rep_hist),
+                Some(def_hist),
+            )
+        }
+        (true, false) => {
+            let def_hist = LevelHistogram::from(vec![1500i64; 2]);
+            (Some(rng.random_range(0..1000000000)), None, Some(def_hist))
+        }
+        (_, _) => (None, None, None),
+    };
+
+    let row_groups = (0..NUM_ROW_GROUPS)
+        .map(|i| {
+            let columns = (0..NUM_COLUMNS)
+                .map(|j| {
+                    ColumnChunkMetaData::builder(column_desc_ptrs[j].clone())
+                        .set_encodings(vec![Encoding::PLAIN, Encoding::RLE_DICTIONARY])
+                        .set_compression(parquet::basic::Compression::UNCOMPRESSED)
+                        .set_num_values(rng.random_range(1..1000000))
+                        .set_total_compressed_size(rng.random_range(50000..5000000))
+                        .set_data_page_offset(rng.random_range(4..2000000000))
+                        .set_dictionary_page_offset(Some(rng.random_range(4..2000000000)))
+                        .set_statistics(stats.clone())
+                        .set_page_encoding_stats(vec![
+                            PageEncodingStats {
+                                page_type: PageType::DICTIONARY_PAGE,
+                                encoding: Encoding::PLAIN,
+                                count: 1,
+                            },
+                            PageEncodingStats {
+                                page_type: PageType::DATA_PAGE,
+                                encoding: Encoding::RLE_DICTIONARY,
+                                count: 10,
+                            },
+                        ])
+                        .set_offset_index_offset(Some(rng.random_range(0..2000000000)))
+                        .set_offset_index_length(Some(rng.random_range(1..100000)))
+                        .set_column_index_offset(Some(rng.random_range(0..2000000000)))
+                        .set_column_index_length(Some(rng.random_range(1..100000)))
+                        .set_unencoded_byte_array_data_bytes(var_size)
+                        .set_repetition_level_histogram(rep_hist.clone())
+                        .set_definition_level_histogram(def_hist.clone())
+                        .build()
+                        .unwrap()
+                })
+                .collect();
+
+            RowGroupMetaData::builder(schema_descr.clone())
+                .set_column_metadata(columns)
+                .set_total_byte_size(rng.random_range(1..2000000000))
+                .set_num_rows(rng.random_range(1..10000000000))
+                .set_ordinal(i as i16)
+                .build()
+                .unwrap()
+        })
+        .collect();
+
+    let file_metadata = FileMetaData::new(
+        1,
+        rng.random_range(1..2000000000),
+        Some("parquet-rs".into()),
+        None,
+        schema_descr,
+        None,
+    );
+
+    let metadata = ParquetMetaData::new(file_metadata, row_groups);
+    let mut buffer = Vec::with_capacity(1024);
+    {
+        let buf = TrackedWrite::new(&mut buffer);
+        let writer = ParquetMetaDataWriter::new_with_tracked(buf, &metadata);
+        writer.finish().unwrap();
+    }
+
+    buffer
+}
+
+fn get_footer_bytes(data: Bytes) -> Bytes {
+    let footer_bytes = data.slice(data.len() - 8..);
+    let footer_len = footer_bytes[0] as u32
+        | (footer_bytes[1] as u32) << 8
+        | (footer_bytes[2] as u32) << 16
+        | (footer_bytes[3] as u32) << 24;
+    let meta_start = data.len() - footer_len as usize - 8;
+    let meta_end = data.len() - 8;
+    data.slice(meta_start..meta_end)
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     // Read file into memory to isolate filesystem performance
     let file = "../parquet-testing/data/alltypes_tiny_pages.parquet";
@@ -27,15 +168,137 @@ fn criterion_benchmark(c: &mut Criterion) {
     let data = Bytes::from(data);
 
     c.bench_function("open(default)", |b| {
-        b.iter(|| SerializedFileReader::new(data.clone()).unwrap())
+        b.iter(|| {
+            let options = ReadOptionsBuilder::new()
+                .with_encoding_stats_as_mask(false)
+                .build();
+            SerializedFileReader::new_with_options(data.clone(), options).unwrap()
+        })
     });
 
     c.bench_function("open(page index)", |b| {
         b.iter(|| {
-            let options = ReadOptionsBuilder::new().with_page_index().build();
+            let options = ReadOptionsBuilder::new()
+                .with_page_index()
+                .with_encoding_stats_as_mask(false)
+                .build();
             SerializedFileReader::new_with_options(data.clone(), options).unwrap()
         })
     });
+
+    let meta_data = get_footer_bytes(data.clone());
+    let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
+    c.bench_function("decode parquet metadata", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
+                .unwrap();
+        })
+    });
+
+    let schema = ParquetMetaDataReader::decode_schema(&meta_data).unwrap();
+    let options = ParquetMetaDataOptions::new()
+        .with_schema(schema)
+        .with_encoding_stats_as_mask(false);
+    c.bench_function("decode metadata with schema", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
+                .unwrap();
+        })
+    });
+
+    let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(true);
+    c.bench_function("decode metadata with stats mask", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
+                .unwrap();
+        })
+    });
+
+    let options =
+        ParquetMetaDataOptions::new().with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll);
+    c.bench_function("decode metadata with skip PES", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
+                .unwrap();
+        })
+    });
+
+    let options = ParquetMetaDataOptions::new()
+        .with_column_stats_policy(ParquetStatisticsPolicy::SkipAll)
+        .with_encoding_stats_as_mask(false);
+    c.bench_function("decode metadata with skip column stats", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&meta_data, Some(&options))
+                .unwrap();
+        })
+    });
+
+    let buf: Bytes = black_box(encoded_meta(false, false)).into();
+    let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
+    c.bench_function("decode parquet metadata (wide)", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let schema = ParquetMetaDataReader::decode_schema(&buf).unwrap();
+    let options = ParquetMetaDataOptions::new()
+        .with_schema(schema)
+        .with_encoding_stats_as_mask(false);
+    c.bench_function("decode metadata (wide) with schema", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(true);
+    c.bench_function("decode metadata (wide) with stats mask", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let options =
+        ParquetMetaDataOptions::new().with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll);
+    c.bench_function("decode metadata (wide) with skip PES", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let options = ParquetMetaDataOptions::new()
+        .with_column_stats_policy(ParquetStatisticsPolicy::SkipAll)
+        .with_encoding_stats_as_mask(false);
+    c.bench_function("decode metadata (wide) with skip column stats", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let buf: Bytes = black_box(encoded_meta(true, true)).into();
+    c.bench_function("decode parquet metadata w/ size stats (wide)", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata(&buf).unwrap();
+        })
+    });
+
+    let options =
+        ParquetMetaDataOptions::new().with_size_stats_policy(ParquetStatisticsPolicy::SkipAll);
+    c.bench_function("decode metadata (wide) with skip size stats", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
+
+    let options = ParquetMetaDataOptions::new()
+        .with_column_stats_policy(ParquetStatisticsPolicy::SkipAll)
+        .with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll)
+        .with_size_stats_policy(ParquetStatisticsPolicy::SkipAll);
+    c.bench_function("decode metadata (wide) with skip all stats", |b| {
+        b.iter(|| {
+            ParquetMetaDataReader::decode_metadata_with_options(&buf, Some(&options)).unwrap();
+        })
+    });
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/parquet/benches/parquet_round_trip.rs b/parquet/benches/parquet_round_trip.rs
new file mode 100644
index 000000000000..b239c3ccc759
--- /dev/null
+++ b/parquet/benches/parquet_round_trip.rs
@@ -0,0 +1,473 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, RecordBatch};
+use arrow::datatypes::{DataType, Field, Float32Type, Float64Type, Int32Type, Int64Type, Schema};
+use arrow::util::bench_util::{
+    create_binary_array_with_len_range_and_prefix_and_seed, create_primitive_array_with_seed,
+    create_string_array_with_len_range_and_prefix_and_seed,
+};
+use arrow_array::{FixedSizeBinaryArray, StringViewArray};
+use bytes::Bytes;
+use criterion::{Criterion, criterion_group, criterion_main};
+use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use parquet::basic::Encoding;
+use parquet::file::properties::WriterProperties;
+use rand::{
+    Rng, SeedableRng,
+    distr::{Alphanumeric, StandardUniform},
+    prelude::StdRng,
+};
+use std::sync::Arc;
+
+#[derive(Copy, Clone)]
+pub enum ColumnType {
+    String(usize),
+    StringView(usize),
+    Binary(usize),
+    FixedLen(i32),
+    Int32,
+    Int64,
+    Float,
+    Double,
+}
+
+// arrow::util::bench_util::create_fsb_array with a seed
+
+/// Creates a random (but fixed-seeded) array of fixed size with a given null density and length
+fn create_fsb_array_with_seed(
+    size: usize,
+    null_density: f32,
+    fixed_len: i32,
+    seed: u64,
+) -> FixedSizeBinaryArray {
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    let rng = &mut rng;
+    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+        (0..size).map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let value = rng
+                    .sample_iter::<u8, _>(StandardUniform)
+                    .take(fixed_len as usize)
+                    .collect::<Vec<u8>>();
+                Some(value)
+            }
+        }),
+        fixed_len,
+    )
+    .unwrap()
+}
+
+// arrow::util::bench_util::create_string_view_array_with_max_len with a seed
+
+/// Creates a random (but fixed-seeded) array of rand size with a given max size, null density and length
+pub fn create_string_view_array_with_seed(
+    size: usize,
+    null_density: f32,
+    max_str_len: usize,
+    seed: u64,
+) -> StringViewArray {
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    let rng = &mut rng;
+    (0..size)
+        .map(|_| {
+            if rng.random::<f32>() < null_density {
+                None
+            } else {
+                let str_len = rng.random_range(max_str_len / 2..max_str_len);
+                let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
+                let value = String::from_utf8(value).unwrap();
+                Some(value)
+            }
+        })
+        .collect()
+}
+
+fn schema(column_type: ColumnType, num_columns: usize) -> Arc<Schema> {
+    let field_type = match column_type {
+        ColumnType::Binary(_) => DataType::Binary,
+        ColumnType::String(_) => DataType::Utf8,
+        ColumnType::StringView(_) => DataType::Utf8View,
+        ColumnType::FixedLen(size) => DataType::FixedSizeBinary(size),
+        ColumnType::Int32 => DataType::Int32,
+        ColumnType::Int64 => DataType::Int64,
+        ColumnType::Float => DataType::Float32,
+        ColumnType::Double => DataType::Float64,
+    };
+
+    let fields: Vec<Field> = (0..num_columns)
+        .map(|i| Field::new(format!("col_{i}"), field_type.clone(), true))
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+fn create_batch(
+    schema: &Arc<Schema>,
+    column_type: ColumnType,
+    seed: usize,
+    num_columns: usize,
+    num_rows: usize,
+) -> RecordBatch {
+    let null_density = 0.0001;
+    let mut arrays: Vec<ArrayRef> = vec![];
+    match column_type {
+        ColumnType::Binary(max_len) => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_binary_array_with_len_range_and_prefix_and_seed::<i32>(
+                    num_rows,
+                    null_density,
+                    max_len / 2,
+                    max_len,
+                    &[],
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::String(max_str_len) => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+                    num_rows,
+                    null_density,
+                    max_str_len / 2,
+                    max_str_len,
+                    "",
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::StringView(max_str_len) => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_string_view_array_with_seed(
+                    num_rows,
+                    null_density,
+                    max_str_len,
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::FixedLen(size) => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array =
+                    create_fsb_array_with_seed(num_rows, null_density, size, array_seed as u64);
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::Int32 => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_primitive_array_with_seed::<Int32Type>(
+                    num_rows,
+                    null_density,
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::Int64 => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_primitive_array_with_seed::<Int64Type>(
+                    num_rows,
+                    null_density,
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::Float => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_primitive_array_with_seed::<Float32Type>(
+                    num_rows,
+                    null_density,
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+        ColumnType::Double => {
+            for i in 0..num_columns {
+                let array_seed = seed * num_columns + i;
+                let array = create_primitive_array_with_seed::<Float64Type>(
+                    num_rows,
+                    null_density,
+                    array_seed as u64,
+                );
+                arrays.push(Arc::new(array));
+            }
+        }
+    }
+    RecordBatch::try_new(schema.clone(), arrays).unwrap()
+}
+
+#[derive(Copy, Clone)]
+pub struct ParquetFileSpec {
+    column_type: ColumnType,
+    num_columns: usize,
+    num_row_groups: usize,
+    rows_per_row_group: usize,
+    rows_per_page: usize,
+    encoding: Encoding,
+    use_dict: bool,
+}
+
+const DEFAULT_NUM_COLUMNS: usize = 10;
+const DEFAULT_NUM_ROWGROUPS: usize = 10;
+const DEFAULT_ROWS_PER_PAGE: usize = 2_000;
+const DEFAULT_ROWS_PER_ROWGROUP: usize = 10_000;
+
+impl ParquetFileSpec {
+    pub fn new(column_type: ColumnType) -> Self {
+        Self {
+            column_type,
+            num_columns: DEFAULT_NUM_COLUMNS,
+            num_row_groups: DEFAULT_NUM_ROWGROUPS,
+            rows_per_row_group: DEFAULT_ROWS_PER_ROWGROUP,
+            rows_per_page: DEFAULT_ROWS_PER_PAGE,
+            encoding: Encoding::PLAIN,
+            use_dict: true,
+        }
+    }
+
+    pub fn with_num_columns(self, num_columns: usize) -> Self {
+        Self {
+            num_columns,
+            ..self
+        }
+    }
+
+    pub fn with_num_row_groups(self, num_row_groups: usize) -> Self {
+        Self {
+            num_row_groups,
+            ..self
+        }
+    }
+
+    pub fn with_rows_per_row_group(self, rows_per_row_group: usize) -> Self {
+        Self {
+            rows_per_row_group,
+            ..self
+        }
+    }
+
+    pub fn with_rows_per_page(self, rows_per_page: usize) -> Self {
+        Self {
+            rows_per_page,
+            ..self
+        }
+    }
+
+    pub fn with_encoding(self, encoding: Encoding) -> Self {
+        Self { encoding, ..self }
+    }
+
+    pub fn with_use_dict(self, use_dict: bool) -> Self {
+        Self { use_dict, ..self }
+    }
+}
+
+fn file_from_spec(spec: ParquetFileSpec, buffer: &mut Vec<u8>) {
+    const SEED: usize = 31;
+    let num_rows = spec.rows_per_row_group.min(100);
+    let rows_to_write = spec.num_row_groups * spec.rows_per_row_group;
+
+    let schema = schema(spec.column_type, spec.num_columns);
+    let props = WriterProperties::builder()
+        .set_max_row_group_size(spec.rows_per_row_group)
+        .set_data_page_row_count_limit(spec.rows_per_page)
+        .set_encoding(spec.encoding)
+        .set_dictionary_enabled(spec.use_dict)
+        .set_compression(parquet::basic::Compression::UNCOMPRESSED)
+        .build();
+
+    let mut writer = ArrowWriter::try_new(buffer, schema.clone(), Some(props)).unwrap();
+
+    // use the same batch repeatedly otherwise the data generation will dominate the time
+    let batch = create_batch(&schema, spec.column_type, SEED, spec.num_columns, num_rows);
+
+    let mut rows_written = 0;
+    while rows_written < rows_to_write {
+        writer.write(&batch).unwrap();
+        rows_written += num_rows;
+    }
+
+    let parquet_metadata = writer.close().unwrap();
+    assert_eq!(parquet_metadata.num_row_groups(), spec.num_row_groups);
+    assert_eq!(
+        parquet_metadata.file_metadata().num_rows() as usize,
+        rows_to_write
+    );
+}
+
+fn read_write(c: &mut Criterion, spec: ParquetFileSpec, msg: &str) {
+    let mut buffer = Vec::with_capacity(1_000_000);
+
+    // read once to size the buffer
+    file_from_spec(spec, &mut buffer);
+
+    c.bench_function(&format!("write {msg}"), |b| {
+        buffer.clear();
+        b.iter(|| file_from_spec(spec, &mut buffer))
+    });
+
+    let file_bytes = Bytes::from(buffer);
+    c.bench_function(&format!("read {msg}"), |b| {
+        b.iter(|| {
+            let record_reader = ParquetRecordBatchReaderBuilder::try_new(file_bytes.clone())
+                .unwrap()
+                .build()
+                .unwrap();
+            let mut num_rows = 0;
+            for maybe_batch in record_reader {
+                let batch = maybe_batch.unwrap();
+                num_rows += batch.num_rows();
+            }
+            assert_eq!(num_rows, spec.num_row_groups * spec.rows_per_row_group);
+        })
+    });
+}
+
+fn int_benches(c: &mut Criterion, column_type: ColumnType) {
+    let ctype = match column_type {
+        ColumnType::Int32 => "int32",
+        ColumnType::Int64 => "int64",
+        _ => unreachable!(),
+    };
+
+    let spec = ParquetFileSpec::new(column_type).with_use_dict(true);
+    read_write(c, spec, &format!("{ctype} dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("{ctype} plain"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_BINARY_PACKED);
+    read_write(c, spec, &format!("{ctype} delta_binary"));
+
+    let spec = spec.with_encoding(Encoding::BYTE_STREAM_SPLIT);
+    read_write(c, spec, &format!("{ctype} byte_stream_split"));
+}
+
+fn float_benches(c: &mut Criterion, column_type: ColumnType) {
+    let ctype = match column_type {
+        ColumnType::Float => "f32",
+        ColumnType::Double => "f64",
+        _ => unreachable!(),
+    };
+
+    let spec = ParquetFileSpec::new(column_type).with_use_dict(true);
+    read_write(c, spec, &format!("{ctype} dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("{ctype} plain"));
+
+    let spec = spec.with_encoding(Encoding::BYTE_STREAM_SPLIT);
+    read_write(c, spec, &format!("{ctype} byte_stream_split"));
+}
+
+fn string_benches(c: &mut Criterion, max_str_len: usize) {
+    let spec = ParquetFileSpec::new(ColumnType::String(max_str_len))
+        .with_num_columns(5)
+        .with_use_dict(true);
+    read_write(c, spec, &format!("String({max_str_len}) dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("String({max_str_len}) plain"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_LENGTH_BYTE_ARRAY);
+    read_write(c, spec, &format!("String({max_str_len}) delta_length"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_BYTE_ARRAY);
+    read_write(c, spec, &format!("String({max_str_len}) delta_byte_array"));
+
+    let spec = ParquetFileSpec::new(ColumnType::StringView(max_str_len))
+        .with_num_columns(5)
+        .with_use_dict(true);
+    read_write(c, spec, &format!("StringView({max_str_len}) dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("StringView({max_str_len}) plain"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_LENGTH_BYTE_ARRAY);
+    read_write(c, spec, &format!("StringView({max_str_len}) delta_length"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_BYTE_ARRAY);
+    read_write(
+        c,
+        spec,
+        &format!("StringView({max_str_len}) delta_byte_array"),
+    );
+}
+
+fn binary_benches(c: &mut Criterion, max_len: usize) {
+    let spec = ParquetFileSpec::new(ColumnType::Binary(max_len))
+        .with_num_columns(5)
+        .with_use_dict(true);
+    read_write(c, spec, &format!("Binary({max_len}) dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("Binary({max_len}) plain"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_LENGTH_BYTE_ARRAY);
+    read_write(c, spec, &format!("Binary({max_len}) delta_length"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_BYTE_ARRAY);
+    read_write(c, spec, &format!("Binary({max_len}) delta_byte_array"));
+}
+
+fn flba_benches(c: &mut Criterion, len: i32) {
+    let spec = ParquetFileSpec::new(ColumnType::FixedLen(len))
+        .with_num_columns(5)
+        .with_use_dict(true);
+    read_write(c, spec, &format!("Fixed({len}) dict"));
+
+    let spec = spec.with_use_dict(false).with_encoding(Encoding::PLAIN);
+    read_write(c, spec, &format!("Fixed({len}) plain"));
+
+    let spec = spec.with_encoding(Encoding::BYTE_STREAM_SPLIT);
+    read_write(c, spec, &format!("Fixed({len}) byte_stream_split"));
+
+    let spec = spec.with_encoding(Encoding::DELTA_BYTE_ARRAY);
+    read_write(c, spec, &format!("Fixed({len}) delta_byte_array"));
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    int_benches(c, ColumnType::Int32);
+    int_benches(c, ColumnType::Int64);
+    float_benches(c, ColumnType::Float);
+    float_benches(c, ColumnType::Double);
+    string_benches(c, 20);
+    string_benches(c, 100);
+    binary_benches(c, 20);
+    binary_benches(c, 100);
+    flba_benches(c, 2);
+    flba_benches(c, 16);
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/parquet/benches/row_selection_cursor.rs b/parquet/benches/row_selection_cursor.rs
new file mode 100644
index 000000000000..49c9e6d68acf
--- /dev/null
+++ b/parquet/benches/row_selection_cursor.rs
@@ -0,0 +1,501 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::hint;
+use std::sync::Arc;
+
+use arrow_array::builder::StringViewBuilder;
+use arrow_array::{ArrayRef, Float64Array, Int32Array, RecordBatch, StringViewArray};
+use arrow_schema::{DataType, Field, Schema};
+use bytes::Bytes;
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::{
+    ParquetRecordBatchReaderBuilder, RowSelection, RowSelectionPolicy, RowSelector,
+};
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+const TOTAL_ROWS: usize = 1 << 20;
+const BATCH_SIZE: usize = 1 << 10;
+const BASE_SEED: u64 = 0xA55AA55A;
+const AVG_SELECTOR_LENGTHS: &[usize] = &[4, 8, 12, 16, 20, 24, 28, 32, 36, 40];
+const COLUMN_WIDTHS: &[usize] = &[2, 4, 8, 16, 32];
+const UTF8VIEW_LENS: &[usize] = &[4, 8, 16, 32, 64, 128, 256];
+const BENCH_MODES: &[BenchMode] = &[BenchMode::ReadSelector, BenchMode::ReadMask];
+
+struct DataProfile {
+    name: &'static str,
+    build_batch: fn(usize) -> RecordBatch,
+}
+
+const DATA_PROFILES: &[DataProfile] = &[
+    DataProfile {
+        name: "int32",
+        build_batch: build_int32_batch,
+    },
+    DataProfile {
+        name: "float64",
+        build_batch: build_float64_batch,
+    },
+    DataProfile {
+        name: "utf8view",
+        build_batch: build_utf8view_batch,
+    },
+];
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let scenarios = [
+        /* uniform50 (50% selected, constant run lengths, starts with skip)
+        ```text
+        ┌───────────────┐
+        │               │  skip
+        │               │
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │               │  skip
+        │               │
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │      ...      │
+        └───────────────┘
+        ``` */
+        Scenario {
+            name: "uniform50",
+            select_ratio: 0.5,
+            start_with_select: false,
+            distribution: RunDistribution::Constant,
+        },
+        /* spread50 (50% selected, large jitter in run lengths, starts with skip)
+        ```text
+        ┌───────────────┐
+        │               │  skip (long)
+        │               │
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (short)
+        │               │  skip (short)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (long)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │               │  skip (medium)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (medium)
+        │      ...      │
+        └───────────────┘
+        ``` */
+        Scenario {
+            name: "spread50",
+            select_ratio: 0.5,
+            start_with_select: false,
+            distribution: RunDistribution::Uniform { spread: 0.9 },
+        },
+        /* sparse20 (20% selected, bimodal: occasional long runs, starts with skip)
+        ```text
+        ┌───────────────┐
+        │               │  skip (long)
+        │               │
+        │               │
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (short)
+        │               │  skip (long)
+        │               │
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (occasional long)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │      ...      │
+        └───────────────┘
+        ``` */
+        Scenario {
+            name: "sparse20",
+            select_ratio: 0.2,
+            start_with_select: false,
+            distribution: RunDistribution::Bimodal {
+                long_factor: 6.0,
+                long_prob: 0.1,
+            },
+        },
+        /* dense80 (80% selected, bimodal: occasional long runs, starts with select)
+        ```text
+        ┌───────────────┐
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (long)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │               │  skip (short)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (long)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │               │  skip (very short)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│  select (long)
+        │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+        │      ...      │
+        └───────────────┘
+        ``` */
+        Scenario {
+            name: "dense80",
+            select_ratio: 0.8,
+            start_with_select: true,
+            distribution: RunDistribution::Bimodal {
+                long_factor: 4.0,
+                long_prob: 0.05,
+            },
+        },
+    ];
+
+    let base_parquet = build_parquet_data(TOTAL_ROWS, build_int32_batch);
+    let base_scenario = &scenarios[0];
+
+    for (idx, scenario) in scenarios.iter().enumerate() {
+        // The first scenario is a special case for backwards compatibility with
+        // existing benchmark result formats.
+        let suite = if idx == 0 { "len" } else { "scenario" };
+        bench_over_lengths(
+            c,
+            suite,
+            scenario.name,
+            &base_parquet,
+            scenario,
+            BASE_SEED ^ ((idx as u64) << 16),
+        );
+    }
+
+    for (profile_idx, profile) in DATA_PROFILES.iter().enumerate() {
+        let parquet_data = build_parquet_data(TOTAL_ROWS, profile.build_batch);
+        bench_over_lengths(
+            c,
+            "dtype",
+            profile.name,
+            &parquet_data,
+            base_scenario,
+            BASE_SEED ^ ((profile_idx as u64) << 24),
+        );
+    }
+
+    for (offset, &column_count) in COLUMN_WIDTHS.iter().enumerate() {
+        let parquet_data = write_parquet_batch(build_int32_columns_batch(TOTAL_ROWS, column_count));
+        let variant_label = format!("C{:02}", column_count);
+        bench_over_lengths(
+            c,
+            "columns",
+            &variant_label,
+            &parquet_data,
+            base_scenario,
+            BASE_SEED ^ ((offset as u64) << 32),
+        );
+    }
+
+    for (offset, &len) in UTF8VIEW_LENS.iter().enumerate() {
+        let batch = build_utf8view_batch_with_len(TOTAL_ROWS, len);
+        let parquet_data = write_parquet_batch(batch);
+        let variant_label = format!("utf8view-L{:03}", len);
+        bench_over_lengths(
+            c,
+            "utf8view-len",
+            &variant_label,
+            &parquet_data,
+            base_scenario,
+            BASE_SEED ^ ((offset as u64) << 40),
+        );
+    }
+}
+
+fn bench_over_lengths(
+    c: &mut Criterion,
+    suite: &str,
+    variant: &str,
+    parquet_data: &Bytes,
+    scenario: &Scenario,
+    seed_base: u64,
+) {
+    for (offset, &avg_len) in AVG_SELECTOR_LENGTHS.iter().enumerate() {
+        let selectors =
+            generate_selectors(avg_len, TOTAL_ROWS, scenario, seed_base + offset as u64);
+        let stats = SelectorStats::new(&selectors);
+        let selection = RowSelection::from(selectors);
+        let suffix = format!(
+            "{}-{}-{}-L{:02}-avg{:.1}-sel{:02}",
+            suite,
+            scenario.name,
+            variant,
+            avg_len,
+            stats.average_selector_len,
+            (stats.select_ratio * 100.0).round() as u32
+        );
+
+        let bench_input = BenchInput {
+            parquet_data: parquet_data.clone(),
+            selection,
+        };
+
+        for &mode in BENCH_MODES {
+            c.bench_with_input(
+                BenchmarkId::new(mode.label(), &suffix),
+                &bench_input,
+                |b, input| {
+                    b.iter(|| {
+                        let total = run_read(&input.parquet_data, &input.selection, mode.policy());
+                        hint::black_box(total);
+                    });
+                },
+            );
+        }
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
+
+struct BenchInput {
+    parquet_data: Bytes,
+    selection: RowSelection,
+}
+
+fn run_read(parquet_data: &Bytes, selection: &RowSelection, policy: RowSelectionPolicy) -> usize {
+    let reader = ParquetRecordBatchReaderBuilder::try_new(parquet_data.clone())
+        .unwrap()
+        .with_batch_size(BATCH_SIZE)
+        .with_row_selection(selection.clone())
+        .with_row_selection_policy(policy)
+        .build()
+        .unwrap();
+
+    let mut total_rows = 0usize;
+    for batch in reader {
+        let batch = batch.unwrap();
+        total_rows += batch.num_rows();
+    }
+    total_rows
+}
+
+fn build_parquet_data(total_rows: usize, build_batch: fn(usize) -> RecordBatch) -> Bytes {
+    let batch = build_batch(total_rows);
+    write_parquet_batch(batch)
+}
+
+fn build_single_column_batch(data_type: DataType, array: ArrayRef) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new("value", data_type, false)]));
+    RecordBatch::try_new(schema, vec![array]).unwrap()
+}
+
+fn build_int32_batch(total_rows: usize) -> RecordBatch {
+    let values = Int32Array::from_iter_values((0..total_rows).map(|v| v as i32));
+    build_single_column_batch(DataType::Int32, Arc::new(values) as ArrayRef)
+}
+
+fn build_float64_batch(total_rows: usize) -> RecordBatch {
+    let values = Float64Array::from_iter_values((0..total_rows).map(|v| v as f64));
+    build_single_column_batch(DataType::Float64, Arc::new(values) as ArrayRef)
+}
+
+fn build_utf8view_batch(total_rows: usize) -> RecordBatch {
+    let mut builder = StringViewBuilder::new();
+    // Mix short and long values.
+    for i in 0..total_rows {
+        match i % 5 {
+            0 => builder.append_value("alpha"),
+            1 => builder.append_value("beta"),
+            2 => builder.append_value("gamma"),
+            3 => builder.append_value("delta"),
+            _ => builder.append_value("a longer utf8 string payload to test view storage"),
+        }
+    }
+    let values: StringViewArray = builder.finish();
+    build_single_column_batch(DataType::Utf8View, Arc::new(values) as ArrayRef)
+}
+
+fn build_utf8view_batch_with_len(total_rows: usize, len: usize) -> RecordBatch {
+    let mut builder = StringViewBuilder::new();
+    let value: String = "a".repeat(len);
+    for _ in 0..total_rows {
+        builder.append_value(&value);
+    }
+    let values: StringViewArray = builder.finish();
+    build_single_column_batch(DataType::Utf8View, Arc::new(values) as ArrayRef)
+}
+
+fn build_int32_columns_batch(total_rows: usize, num_columns: usize) -> RecordBatch {
+    let base_values: ArrayRef = Arc::new(Int32Array::from_iter_values(
+        (0..total_rows).map(|v| v as i32),
+    ));
+    let mut fields = Vec::with_capacity(num_columns);
+    let mut columns = Vec::with_capacity(num_columns);
+    for idx in 0..num_columns {
+        fields.push(Field::new(format!("value{}", idx), DataType::Int32, false));
+        columns.push(base_values.clone());
+    }
+    let schema = Arc::new(Schema::new(fields));
+    RecordBatch::try_new(schema, columns).unwrap()
+}
+
+fn write_parquet_batch(batch: RecordBatch) -> Bytes {
+    let schema = batch.schema();
+    let mut writer = ArrowWriter::try_new(Vec::new(), schema.clone(), None).unwrap();
+    writer.write(&batch).unwrap();
+    let buffer = writer.into_inner().unwrap();
+    Bytes::from(buffer)
+}
+
+#[derive(Clone)]
+struct Scenario {
+    name: &'static str,
+    select_ratio: f64,
+    start_with_select: bool,
+    distribution: RunDistribution,
+}
+
+#[derive(Clone)]
+enum RunDistribution {
+    Constant,
+    Uniform { spread: f64 },
+    Bimodal { long_factor: f64, long_prob: f64 },
+}
+
+fn generate_selectors(
+    avg_selector_len: usize,
+    total_rows: usize,
+    scenario: &Scenario,
+    seed: u64,
+) -> Vec<RowSelector> {
+    assert!(
+        (0.0..=1.0).contains(&scenario.select_ratio),
+        "select_ratio must be in [0, 1]"
+    );
+
+    let mut select_mean = scenario.select_ratio * 2.0 * avg_selector_len as f64;
+    let mut skip_mean = (1.0 - scenario.select_ratio) * 2.0 * avg_selector_len as f64;
+
+    select_mean = select_mean.max(1.0);
+    skip_mean = skip_mean.max(1.0);
+
+    let sum = select_mean + skip_mean;
+    // Rebalance the sampled select/skip run lengths so their sum matches the requested
+    // average selector length while respecting the configured selectivity ratio.
+    let scale = if sum == 0.0 {
+        1.0
+    } else {
+        (2.0 * avg_selector_len as f64) / sum
+    };
+    select_mean *= scale;
+    skip_mean *= scale;
+
+    let mut rng = StdRng::seed_from_u64(seed ^ (avg_selector_len as u64).wrapping_mul(0x9E3779B1));
+    let mut selectors = Vec::with_capacity(total_rows / avg_selector_len.max(1));
+    let mut remaining = total_rows;
+    let mut is_select = scenario.start_with_select;
+
+    while remaining > 0 {
+        let mean = if is_select { select_mean } else { skip_mean };
+        let len = sample_length(mean, &scenario.distribution, &mut rng).max(1);
+        let len = len.min(remaining);
+        selectors.push(if is_select {
+            RowSelector::select(len)
+        } else {
+            RowSelector::skip(len)
+        });
+        remaining -= len;
+        if remaining == 0 {
+            break;
+        }
+        is_select = !is_select;
+    }
+
+    let selection: RowSelection = selectors.into();
+    selection.into()
+}
+
+fn sample_length(mean: f64, distribution: &RunDistribution, rng: &mut StdRng) -> usize {
+    match distribution {
+        RunDistribution::Constant => mean.round().max(1.0) as usize,
+        RunDistribution::Uniform { spread } => {
+            let spread = spread.clamp(0.0, 0.99);
+            let lower = (mean * (1.0 - spread)).max(1.0);
+            let upper = (mean * (1.0 + spread)).max(lower + f64::EPSILON);
+            if (upper - lower) < 1.0 {
+                lower.round().max(1.0) as usize
+            } else {
+                let low = lower.floor() as usize;
+                let high = upper.ceil() as usize;
+                rng.random_range(low..=high).max(1)
+            }
+        }
+        RunDistribution::Bimodal {
+            long_factor,
+            long_prob,
+        } => {
+            let long_prob = long_prob.clamp(0.0, 0.5);
+            let short_prob = 1.0 - long_prob;
+            let short_factor = if short_prob == 0.0 {
+                1.0 / long_factor.max(f64::EPSILON)
+            } else {
+                (1.0 - long_prob * long_factor).max(0.0) / short_prob
+            };
+            let use_long = rng.random_bool(long_prob);
+            let factor = if use_long {
+                *long_factor
+            } else {
+                short_factor.max(0.1)
+            };
+            (mean * factor).round().max(1.0) as usize
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum BenchMode {
+    ReadSelector,
+    ReadMask,
+}
+
+impl BenchMode {
+    fn label(self) -> &'static str {
+        match self {
+            BenchMode::ReadSelector => "read_selector",
+            BenchMode::ReadMask => "read_mask",
+        }
+    }
+
+    fn policy(self) -> RowSelectionPolicy {
+        match self {
+            BenchMode::ReadSelector => RowSelectionPolicy::Selectors,
+            BenchMode::ReadMask => RowSelectionPolicy::Mask,
+        }
+    }
+}
+
+struct SelectorStats {
+    average_selector_len: f64,
+    select_ratio: f64,
+}
+
+impl SelectorStats {
+    fn new(selectors: &[RowSelector]) -> Self {
+        if selectors.is_empty() {
+            return Self {
+                average_selector_len: 0.0,
+                select_ratio: 0.0,
+            };
+        }
+
+        let total_rows: usize = selectors.iter().map(|s| s.row_count).sum();
+        let selected_rows: usize = selectors
+            .iter()
+            .filter(|s| !s.skip)
+            .map(|s| s.row_count)
+            .sum();
+
+        Self {
+            average_selector_len: total_rows as f64 / selectors.len() as f64,
+            select_ratio: if total_rows == 0 {
+                0.0
+            } else {
+                selected_rows as f64 / total_rows as f64
+            },
+        }
+    }
+}
diff --git a/parquet/examples/async_read_parquet.rs b/parquet/examples/async_read_parquet.rs
index 0a2e9ba994dd..78287fa846fc 100644
--- a/parquet/examples/async_read_parquet.rs
+++ b/parquet/examples/async_read_parquet.rs
@@ -45,7 +45,7 @@ async fn main() -> Result<()> {
     builder = builder.with_projection(mask);
 
     // Highlight: set `RowFilter`, it'll push down filter predicates to skip IO and decode.
-    // For more specific usage: please refer to https://github.com/apache/datafusion/blob/main/datafusion/core/src/datasource/physical_plan/parquet/row_filter.rs.
+    // For more specific usage: please refer to https://github.com/apache/datafusion/blob/main/datafusion/datasource-parquet/src/row_filter.rs.
     let scalar = Int32Array::from(vec![1]);
     let filter = ArrowPredicateFn::new(
         ProjectionMask::roots(file_metadata.schema_descr(), [0]),
diff --git a/parquet/examples/external_metadata.rs b/parquet/examples/external_metadata.rs
index 2c3250782c0f..9370016049e1 100644
--- a/parquet/examples/external_metadata.rs
+++ b/parquet/examples/external_metadata.rs
@@ -20,7 +20,9 @@ use arrow_cast::pretty::pretty_format_batches;
 use futures::TryStreamExt;
 use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder};
-use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, ParquetMetaDataWriter};
+use parquet::file::metadata::{
+    PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, ParquetMetaDataWriter,
+};
 use parquet::file::properties::{EnabledStatistics, WriterProperties};
 use std::fs::File;
 use std::path::{Path, PathBuf};
@@ -111,7 +113,7 @@ async fn get_metadata_from_remote_parquet_file(
 
     // tell the reader to read the page index
     ParquetMetaDataReader::new()
-        .with_page_indexes(true)
+        .with_page_index_policy(PageIndexPolicy::Required)
         .load_and_finish(remote_file, file_size)
         .await
         .unwrap()
@@ -140,7 +142,7 @@ fn prepare_metadata(metadata: ParquetMetaData) -> ParquetMetaData {
     // verifiy that the size has indeed been reduced
     let new_size = metadata.memory_size();
     assert!(new_size < orig_size, "metadata size did not decrease");
-    println!("Reduced metadata size from {} to {}", orig_size, new_size);
+    println!("Reduced metadata size from {orig_size} to {new_size}");
     metadata
 }
 
@@ -160,7 +162,7 @@ fn write_metadata_to_local_file(metadata: ParquetMetaData, file: impl AsRef<Path
 fn read_metadata_from_local_file(file: impl AsRef<Path>) -> ParquetMetaData {
     let file = File::open(file).unwrap();
     ParquetMetaDataReader::new()
-        .with_page_indexes(true)
+        .with_page_index_policy(PageIndexPolicy::Required)
         .parse_and_finish(&file)
         .unwrap()
 }
diff --git a/parquet/examples/read_with_row_filter.rs b/parquet/examples/read_with_row_filter.rs
new file mode 100644
index 000000000000..e0efb536b418
--- /dev/null
+++ b/parquet/examples/read_with_row_filter.rs
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::Int32Array;
+use arrow_cast::pretty::print_batches;
+use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{ArrowPredicateFn, ParquetRecordBatchReaderBuilder, RowFilter};
+use parquet::errors::Result;
+use std::fs::File;
+
+// RowFilter / with_row_filter usage. For background and more
+// context, see <https://arrow.apache.org/blog/2025/12/11/parquet-late-materialization-deep-dive/>
+fn main() -> Result<()> {
+    let testdata = arrow::util::test_util::parquet_test_data();
+    let path = format!("{testdata}/alltypes_plain.parquet");
+    let file = File::open(&path)?;
+    let builder = ParquetRecordBatchReaderBuilder::try_new(file)?;
+    let schema_desc = builder.metadata().file_metadata().schema_descr_ptr();
+
+    // Create predicate: column id > 4. This col has index 0.
+    // Projection mask ensures only predicate columns are read to evaluate the filter.
+    let projection_mask = ProjectionMask::leaves(&schema_desc, [0]);
+    let predicate = ArrowPredicateFn::new(projection_mask, |batch| {
+        let id_col = batch.column(0);
+        arrow::compute::kernels::cmp::gt(id_col, &Int32Array::new_scalar(4))
+    });
+
+    let row_filter = RowFilter::new(vec![Box::new(predicate)]);
+    let reader = builder.with_row_filter(row_filter).build()?;
+
+    let filtered_batches: Vec<_> = reader.map(|b| b.unwrap()).collect();
+    print_batches(&filtered_batches)?;
+
+    Ok(())
+}
diff --git a/parquet/examples/read_with_rowgroup.rs b/parquet/examples/read_with_rowgroup.rs
index 5d1ff0770f9e..e3c714c2807a 100644
--- a/parquet/examples/read_with_rowgroup.rs
+++ b/parquet/examples/read_with_rowgroup.rs
@@ -19,10 +19,10 @@ use arrow::util::pretty::print_batches;
 use bytes::{Buf, Bytes};
 use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, RowGroups, RowSelection};
 use parquet::arrow::async_reader::AsyncFileReader;
-use parquet::arrow::{parquet_to_arrow_field_levels, ProjectionMask};
+use parquet::arrow::{ProjectionMask, parquet_to_arrow_field_levels};
 use parquet::column::page::{PageIterator, PageReader};
 use parquet::errors::{ParquetError, Result};
-use parquet::file::metadata::RowGroupMetaData;
+use parquet::file::metadata::{ParquetMetaData, RowGroupMetaData};
 use parquet::file::reader::{ChunkReader, Length};
 use parquet::file::serialized_reader::SerializedPageReader;
 use std::sync::Arc;
@@ -35,10 +35,11 @@ async fn main() -> Result<()> {
     let mut file = File::open(&path).await.unwrap();
 
     // The metadata could be cached in other places, this example only shows how to read
-    let metadata = file.get_metadata(None).await?;
+    let metadata = Arc::try_unwrap(file.get_metadata(None).await?).unwrap();
 
-    for rg in metadata.row_groups() {
-        let mut rowgroup = InMemoryRowGroup::create(rg.clone(), ProjectionMask::all());
+    for row_group_idx in 0..metadata.row_groups().len() {
+        let mut rowgroup =
+            InMemoryRowGroup::create(metadata.clone(), row_group_idx, ProjectionMask::all());
         rowgroup.async_fetch_data(&mut file, None).await?;
         let reader = rowgroup.build_reader(1024, None)?;
 
@@ -100,14 +101,15 @@ impl ChunkReader for ColumnChunkData {
 
 #[derive(Clone)]
 pub struct InMemoryRowGroup {
-    pub metadata: RowGroupMetaData,
+    metadata: ParquetMetaData,
+    row_group_idx: usize,
     mask: ProjectionMask,
     column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
 }
 
 impl RowGroups for InMemoryRowGroup {
     fn num_rows(&self) -> usize {
-        self.metadata.num_rows() as usize
+        self.row_group_metadata().num_rows() as usize
     }
 
     fn column_chunks(&self, i: usize) -> Result<Box<dyn PageIterator>> {
@@ -118,7 +120,7 @@ impl RowGroups for InMemoryRowGroup {
             Some(data) => {
                 let page_reader: Box<dyn PageReader> = Box::new(SerializedPageReader::new(
                     data.clone(),
-                    self.metadata.column(i),
+                    self.row_group_metadata().column(i),
                     self.num_rows(),
                     None,
                 )?);
@@ -129,26 +131,44 @@ impl RowGroups for InMemoryRowGroup {
             }
         }
     }
+
+    fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
+        Box::new(std::iter::once(self.row_group_metadata()))
+    }
+
+    fn metadata(&self) -> &ParquetMetaData {
+        &self.metadata
+    }
 }
 
 impl InMemoryRowGroup {
-    pub fn create(metadata: RowGroupMetaData, mask: ProjectionMask) -> Self {
-        let column_chunks = metadata.columns().iter().map(|_| None).collect::<Vec<_>>();
+    pub fn create(metadata: ParquetMetaData, row_group_idx: usize, mask: ProjectionMask) -> Self {
+        let column_chunks = metadata
+            .row_group(row_group_idx)
+            .columns()
+            .iter()
+            .map(|_| None)
+            .collect::<Vec<_>>();
 
         Self {
             metadata,
+            row_group_idx,
             mask,
             column_chunks,
         }
     }
 
+    pub fn row_group_metadata(&self) -> &RowGroupMetaData {
+        self.metadata.row_group(self.row_group_idx)
+    }
+
     pub fn build_reader(
         &self,
         batch_size: usize,
         selection: Option<RowSelection>,
     ) -> Result<ParquetRecordBatchReader> {
         let levels = parquet_to_arrow_field_levels(
-            &self.metadata.schema_descr_ptr(),
+            &self.row_group_metadata().schema_descr_ptr(),
             self.mask.clone(),
             None,
         )?;
@@ -163,7 +183,7 @@ impl InMemoryRowGroup {
         _selection: Option<&RowSelection>,
     ) -> Result<()> {
         let mut vs = std::mem::take(&mut self.column_chunks);
-        for (leaf_idx, meta) in self.metadata.columns().iter().enumerate() {
+        for (leaf_idx, meta) in self.row_group_metadata().columns().iter().enumerate() {
             if self.mask.leaf_included(leaf_idx) {
                 let (start, len) = meta.byte_range();
                 let data = reader.get_bytes(start..(start + len)).await?;
diff --git a/parquet/regen.sh b/parquet/regen.sh
index 39999c7872cd..1f2aee91bbc8 100755
--- a/parquet/regen.sh
+++ b/parquet/regen.sh
@@ -17,7 +17,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-REVISION=5b564f3c47679526cf72e54f207013f28f53acc4
+# using commit for parquet-format 2.11.0
+REVISION=848302e179d7bb52a64caea6a058b3c08212787c
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 
diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs
index 14a475859810..82c8e77f6393 100644
--- a/parquet/src/arrow/array_reader/builder.rs
+++ b/parquet/src/arrow/array_reader/builder.rs
@@ -15,33 +15,108 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 
 use arrow_schema::{DataType, Fields, SchemaBuilder};
 
+use crate::arrow::ProjectionMask;
 use crate::arrow::array_reader::byte_view_array::make_byte_view_array_reader;
+use crate::arrow::array_reader::cached_array_reader::CacheRole;
+use crate::arrow::array_reader::cached_array_reader::CachedArrayReader;
 use crate::arrow::array_reader::empty_array::make_empty_array_reader;
 use crate::arrow::array_reader::fixed_len_byte_array::make_fixed_len_byte_array_reader;
+use crate::arrow::array_reader::row_group_cache::RowGroupCache;
+use crate::arrow::array_reader::row_number::RowNumberReader;
 use crate::arrow::array_reader::{
-    make_byte_array_dictionary_reader, make_byte_array_reader, ArrayReader,
-    FixedSizeListArrayReader, ListArrayReader, MapArrayReader, NullArrayReader,
-    PrimitiveArrayReader, RowGroups, StructArrayReader,
+    ArrayReader, FixedSizeListArrayReader, ListArrayReader, MapArrayReader, NullArrayReader,
+    PrimitiveArrayReader, RowGroups, StructArrayReader, make_byte_array_dictionary_reader,
+    make_byte_array_reader,
 };
-use crate::arrow::schema::{ParquetField, ParquetFieldType};
-use crate::arrow::ProjectionMask;
+use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+use crate::arrow::schema::{ParquetField, ParquetFieldType, VirtualColumnType};
 use crate::basic::Type as PhysicalType;
 use crate::data_type::{BoolType, DoubleType, FloatType, Int32Type, Int64Type, Int96Type};
 use crate::errors::{ParquetError, Result};
+use crate::file::metadata::ParquetMetaData;
 use crate::schema::types::{ColumnDescriptor, ColumnPath, Type};
 
+/// Builder for [`CacheOptions`]
+#[derive(Debug, Clone)]
+pub struct CacheOptionsBuilder<'a> {
+    /// Projection mask to apply to the cache
+    pub projection_mask: &'a ProjectionMask,
+    /// Cache to use for storing row groups
+    pub cache: &'a Arc<Mutex<RowGroupCache>>,
+}
+
+impl<'a> CacheOptionsBuilder<'a> {
+    /// create a new cache options builder
+    pub fn new(projection_mask: &'a ProjectionMask, cache: &'a Arc<Mutex<RowGroupCache>>) -> Self {
+        Self {
+            projection_mask,
+            cache,
+        }
+    }
+
+    /// Return a new [`CacheOptions`] for producing (populating) the cache
+    pub fn producer(self) -> CacheOptions<'a> {
+        CacheOptions {
+            projection_mask: self.projection_mask,
+            cache: self.cache,
+            role: CacheRole::Producer,
+        }
+    }
+
+    /// return a new [`CacheOptions`] for consuming (reading) the cache
+    pub fn consumer(self) -> CacheOptions<'a> {
+        CacheOptions {
+            projection_mask: self.projection_mask,
+            cache: self.cache,
+            role: CacheRole::Consumer,
+        }
+    }
+}
+
+/// Cache options containing projection mask, cache, and role
+#[derive(Clone)]
+pub struct CacheOptions<'a> {
+    pub projection_mask: &'a ProjectionMask,
+    pub cache: &'a Arc<Mutex<RowGroupCache>>,
+    pub role: CacheRole,
+}
+
 /// Builds [`ArrayReader`]s from parquet schema, projection mask, and RowGroups reader
-pub(crate) struct ArrayReaderBuilder<'a> {
+pub struct ArrayReaderBuilder<'a> {
+    /// Source of row group data
     row_groups: &'a dyn RowGroups,
+    /// Optional cache options for the array reader
+    cache_options: Option<&'a CacheOptions<'a>>,
+    /// Parquet metadata for computing virtual column values
+    parquet_metadata: Option<&'a ParquetMetaData>,
+    /// metrics
+    metrics: &'a ArrowReaderMetrics,
 }
 
 impl<'a> ArrayReaderBuilder<'a> {
-    pub(crate) fn new(row_groups: &'a dyn RowGroups) -> Self {
-        Self { row_groups }
+    pub fn new(row_groups: &'a dyn RowGroups, metrics: &'a ArrowReaderMetrics) -> Self {
+        Self {
+            row_groups,
+            cache_options: None,
+            parquet_metadata: None,
+            metrics,
+        }
+    }
+
+    /// Add cache options to the builder
+    pub fn with_cache_options(mut self, cache_options: Option<&'a CacheOptions<'a>>) -> Self {
+        self.cache_options = cache_options;
+        self
+    }
+
+    /// Add parquet metadata to the builder for computing virtual column values
+    pub fn with_parquet_metadata(mut self, parquet_metadata: &'a ParquetMetaData) -> Self {
+        self.parquet_metadata = Some(parquet_metadata);
+        self
     }
 
     /// Create [`ArrayReader`] from parquet schema, projection mask, and parquet file reader.
@@ -69,7 +144,33 @@ impl<'a> ArrayReaderBuilder<'a> {
         mask: &ProjectionMask,
     ) -> Result<Option<Box<dyn ArrayReader>>> {
         match field.field_type {
-            ParquetFieldType::Primitive { .. } => self.build_primitive_reader(field, mask),
+            ParquetFieldType::Primitive { col_idx, .. } => {
+                let Some(reader) = self.build_primitive_reader(field, mask)? else {
+                    return Ok(None);
+                };
+                let Some(cache_options) = self.cache_options.as_ref() else {
+                    return Ok(Some(reader));
+                };
+
+                if cache_options.projection_mask.leaf_included(col_idx) {
+                    Ok(Some(Box::new(CachedArrayReader::new(
+                        reader,
+                        Arc::clone(cache_options.cache),
+                        col_idx,
+                        cache_options.role,
+                        self.metrics.clone(), // cheap clone
+                    ))))
+                } else {
+                    Ok(Some(reader))
+                }
+            }
+            ParquetFieldType::Virtual(virtual_type) => {
+                // Virtual columns don't have data in the parquet file
+                // They need to be built by specialized readers
+                match virtual_type {
+                    VirtualColumnType::RowNumber => Ok(Some(self.build_row_number_reader()?)),
+                }
+            }
             ParquetFieldType::Group { .. } => match &field.arrow_type {
                 DataType::Map(_, _) => self.build_map_reader(field, mask),
                 DataType::Struct(_) => self.build_struct_reader(field, mask),
@@ -81,6 +182,18 @@ impl<'a> ArrayReaderBuilder<'a> {
         }
     }
 
+    fn build_row_number_reader(&self) -> Result<Box<dyn ArrayReader>> {
+        let parquet_metadata = self.parquet_metadata.ok_or_else(|| {
+            ParquetError::General(
+                "ParquetMetaData is required to read virtual row number columns.".to_string(),
+            )
+        })?;
+        Ok(Box::new(RowNumberReader::try_new(
+            parquet_metadata,
+            self.row_groups.row_groups(),
+        )?))
+    }
+
     /// Build array reader for map type.
     fn build_map_reader(
         &self,
@@ -356,6 +469,7 @@ impl<'a> ArrayReaderBuilder<'a> {
 mod tests {
     use super::*;
     use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
+    use crate::arrow::schema::virtual_type::RowNumber;
     use crate::file::reader::{FileReader, SerializedFileReader};
     use crate::util::test_common::file_util::get_test_file;
     use arrow::datatypes::Field;
@@ -372,10 +486,12 @@ mod tests {
             file_metadata.schema_descr(),
             ProjectionMask::all(),
             file_metadata.key_value_metadata(),
+            &[],
         )
         .unwrap();
 
-        let array_reader = ArrayReaderBuilder::new(&file_reader)
+        let metrics = ArrowReaderMetrics::disabled();
+        let array_reader = ArrayReaderBuilder::new(&file_reader, &metrics)
             .build_array_reader(fields.as_ref(), &mask)
             .unwrap();
 
@@ -388,4 +504,41 @@ mod tests {
 
         assert_eq!(array_reader.get_data_type(), &arrow_type);
     }
+
+    #[test]
+    fn test_create_array_reader_with_row_numbers() {
+        let file = get_test_file("nulls.snappy.parquet");
+        let file_reader: Arc<dyn FileReader> = Arc::new(SerializedFileReader::new(file).unwrap());
+
+        let file_metadata = file_reader.metadata().file_metadata();
+        let mask = ProjectionMask::leaves(file_metadata.schema_descr(), [0]);
+        let row_number_field = Arc::new(
+            Field::new("row_number", DataType::Int64, false).with_extension_type(RowNumber),
+        );
+        let (_, fields) = parquet_to_arrow_schema_and_fields(
+            file_metadata.schema_descr(),
+            ProjectionMask::all(),
+            file_metadata.key_value_metadata(),
+            std::slice::from_ref(&row_number_field),
+        )
+        .unwrap();
+
+        let metrics = ArrowReaderMetrics::disabled();
+        let array_reader = ArrayReaderBuilder::new(&file_reader, &metrics)
+            .with_parquet_metadata(file_reader.metadata())
+            .build_array_reader(fields.as_ref(), &mask)
+            .unwrap();
+
+        // Create arrow types
+        let arrow_type = DataType::Struct(Fields::from(vec![
+            Field::new(
+                "b_struct",
+                DataType::Struct(vec![Field::new("b_c_int", DataType::Int32, true)].into()),
+                true,
+            ),
+            (*row_number_field).clone(),
+        ]));
+
+        assert_eq!(array_reader.get_data_type(), &arrow_type);
+    }
 }
diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs
index 92583155605b..0acbe6501924 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::buffer::bit_util::sign_extend_be;
 use crate::arrow::buffer::offset_buffer::OffsetBuffer;
 use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
@@ -274,7 +274,7 @@ impl ByteArrayDecoder {
                 validate_utf8,
             )),
             Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => ByteArrayDecoder::Dictionary(
-                ByteArrayDecoderDictionary::new(data, num_levels, num_values),
+                ByteArrayDecoderDictionary::new(data, num_levels, num_values)?,
             ),
             Encoding::DELTA_LENGTH_BYTE_ARRAY => ByteArrayDecoder::DeltaLength(
                 ByteArrayDecoderDeltaLength::new(data, validate_utf8)?,
@@ -286,7 +286,7 @@ impl ByteArrayDecoder {
                 return Err(general_err!(
                     "unsupported encoding for byte array: {}",
                     encoding
-                ))
+                ));
             }
         };
 
@@ -563,10 +563,10 @@ pub struct ByteArrayDecoderDictionary {
 }
 
 impl ByteArrayDecoderDictionary {
-    fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Self {
-        Self {
-            decoder: DictIndexDecoder::new(data, num_levels, num_values),
-        }
+    fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Result<Self> {
+        Ok(Self {
+            decoder: DictIndexDecoder::new(data, num_levels, num_values)?,
+        })
     }
 
     fn read<I: OffsetSizeTrait>(
diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
index 0f8a21478ed2..09de37a80ed9 100644
--- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs
+++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
@@ -19,13 +19,13 @@ use std::any::Any;
 use std::marker::PhantomData;
 use std::sync::Arc;
 
-use arrow_array::{new_empty_array, Array, ArrayRef, OffsetSizeTrait};
+use arrow_array::{Array, ArrayRef, OffsetSizeTrait, new_empty_array};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::DataType as ArrowType;
 use bytes::Bytes;
 
 use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::buffer::{dictionary_buffer::DictionaryBuffer, offset_buffer::OffsetBuffer};
 use crate::arrow::record_reader::GenericRecordReader;
 use crate::arrow::schema::parquet_to_arrow_field;
@@ -165,6 +165,10 @@ where
     }
 
     fn consume_batch(&mut self) -> Result<ArrayRef> {
+        // advance the def & rep level buffers
+        self.def_levels_buffer = self.record_reader.consume_def_levels();
+        self.rep_levels_buffer = self.record_reader.consume_rep_levels();
+
         if self.record_reader.num_values() == 0 {
             // once the record_reader has been consumed, we've replaced its values with the default
             // variant of DictionaryBuffer (Offset). If `consume_batch` then gets called again, we
@@ -175,9 +179,6 @@ where
         let buffer = self.record_reader.consume_record_data();
         let null_buffer = self.record_reader.consume_bitmap_buffer();
         let array = buffer.into_array(null_buffer, &self.data_type)?;
-
-        self.def_levels_buffer = self.record_reader.consume_def_levels();
-        self.rep_levels_buffer = self.record_reader.consume_rep_levels();
         self.record_reader.reset();
 
         Ok(array)
@@ -292,7 +293,7 @@ where
             Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
                 let bit_width = data[0];
                 let mut decoder = RleDecoder::new(bit_width);
-                decoder.set_data(data.slice(1..));
+                decoder.set_data(data.slice(1..))?;
                 MaybeDictionaryDecoder::Dict {
                     decoder,
                     max_remaining_values: num_values.unwrap_or(num_levels),
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 6d6bbdc7b804..f881690f805f 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::buffer::view_buffer::ViewBuffer;
 use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
 use crate::arrow::record_reader::GenericRecordReader;
@@ -28,7 +28,7 @@ use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use crate::util::utf8::check_valid_utf8;
-use arrow_array::{builder::make_view, ArrayRef};
+use arrow_array::{ArrayRef, builder::make_view};
 use arrow_buffer::Buffer;
 use arrow_data::ByteView;
 use arrow_schema::DataType as ArrowType;
@@ -236,7 +236,7 @@ impl ByteViewArrayDecoder {
             Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
                 ByteViewArrayDecoder::Dictionary(ByteViewArrayDecoderDictionary::new(
                     data, num_levels, num_values,
-                ))
+                )?)
             }
             Encoding::DELTA_LENGTH_BYTE_ARRAY => ByteViewArrayDecoder::DeltaLength(
                 ByteViewArrayDecoderDeltaLength::new(data, validate_utf8)?,
@@ -248,7 +248,7 @@ impl ByteViewArrayDecoder {
                 return Err(general_err!(
                     "unsupported encoding for byte array: {}",
                     encoding
-                ))
+                ));
             }
         };
 
@@ -426,10 +426,10 @@ pub struct ByteViewArrayDecoderDictionary {
 }
 
 impl ByteViewArrayDecoderDictionary {
-    fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Self {
-        Self {
-            decoder: DictIndexDecoder::new(data, num_levels, num_values),
-        }
+    fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Result<Self> {
+        Ok(Self {
+            decoder: DictIndexDecoder::new(data, num_levels, num_values)?,
+        })
     }
 
     /// Reads the next indexes from self.decoder
diff --git a/parquet/src/arrow/array_reader/cached_array_reader.rs b/parquet/src/arrow/array_reader/cached_array_reader.rs
new file mode 100644
index 000000000000..b55b1e1d1a65
--- /dev/null
+++ b/parquet/src/arrow/array_reader/cached_array_reader.rs
@@ -0,0 +1,762 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`CachedArrayReader`] wrapper around [`ArrayReader`]
+
+use crate::arrow::array_reader::row_group_cache::BatchID;
+use crate::arrow::array_reader::{ArrayReader, row_group_cache::RowGroupCache};
+use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+use crate::errors::Result;
+use arrow_array::{ArrayRef, BooleanArray, new_empty_array};
+use arrow_buffer::BooleanBufferBuilder;
+use arrow_schema::DataType as ArrowType;
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+
+/// Role of the cached array reader
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CacheRole {
+    /// Producer role: inserts data into the cache during filter phase
+    Producer,
+    /// Consumer role: removes consumed data from the cache during output building phase
+    Consumer,
+}
+
+/// A cached wrapper around an ArrayReader that avoids duplicate decoding
+/// when the same column appears in both filter predicates and output projection.
+///
+/// This reader acts as a transparent layer over the inner reader, using a cache
+/// to avoid redundant work when the same data is needed multiple times.
+///
+/// The reader can operate in two roles:
+/// - Producer: During filter phase, inserts decoded data into the cache
+/// - Consumer: During output building, consumes and removes data from the cache
+///
+/// This means the memory consumption of the cache has two stages:
+/// 1. During the filter phase, the memory increases as the cache is populated
+/// 2. It peaks when filters are built.
+/// 3. It decreases as the cached data is consumed.
+///
+/// ```text
+///    ▲
+///    │     ╭─╮
+///    │    ╱   ╲
+///    │   ╱     ╲
+///    │  ╱       ╲
+///    │ ╱         ╲
+///    │╱           ╲
+///    └─────────────╲──────► Time
+///    │      │      │
+///    Filter  Peak  Consume
+///    Phase (Built) (Decrease)
+/// ```
+pub struct CachedArrayReader {
+    /// The underlying array reader
+    inner: Box<dyn ArrayReader>,
+    /// Shared cache for this row group
+    shared_cache: Arc<Mutex<RowGroupCache>>,
+    /// Column index for cache key generation
+    column_idx: usize,
+    /// Current logical position in the data stream for this reader (for cache key generation)
+    outer_position: usize,
+    /// Current position in `inner`
+    inner_position: usize,
+    /// Batch size for the cache
+    batch_size: usize,
+    /// Boolean buffer builder to track selections for the next consume_batch()
+    selections: BooleanBufferBuilder,
+    /// Role of this reader (Producer or Consumer)
+    role: CacheRole,
+    /// Local cache to store batches between read_records and consume_batch calls
+    /// This ensures data is available even if the shared cache evicts items
+    local_cache: HashMap<BatchID, ArrayRef>,
+    /// Statistics to report on the Cache behavior
+    metrics: ArrowReaderMetrics,
+}
+
+impl CachedArrayReader {
+    /// Creates a new cached array reader with the specified role
+    pub fn new(
+        inner: Box<dyn ArrayReader>,
+        cache: Arc<Mutex<RowGroupCache>>,
+        column_idx: usize,
+        role: CacheRole,
+        metrics: ArrowReaderMetrics,
+    ) -> Self {
+        let batch_size = cache.lock().unwrap().batch_size();
+
+        Self {
+            inner,
+            shared_cache: cache,
+            column_idx,
+            outer_position: 0,
+            inner_position: 0,
+            batch_size,
+            selections: BooleanBufferBuilder::new(0),
+            role,
+            local_cache: HashMap::new(),
+            metrics,
+        }
+    }
+
+    fn get_batch_id_from_position(&self, row_id: usize) -> BatchID {
+        BatchID {
+            val: row_id / self.batch_size,
+        }
+    }
+
+    /// Loads the batch with the given ID (first row offset) from the inner
+    /// reader
+    ///
+    /// After this call the required batch will be available in
+    /// `self.local_cache` and may also be stored in `self.shared_cache`.
+    ///
+    fn fetch_batch(&mut self, batch_id: BatchID) -> Result<usize> {
+        let first_row_offset = batch_id.val * self.batch_size;
+        if self.inner_position < first_row_offset {
+            let to_skip = first_row_offset - self.inner_position;
+            let skipped = self.inner.skip_records(to_skip)?;
+            assert_eq!(skipped, to_skip);
+            self.inner_position += skipped;
+        }
+
+        let read = self.inner.read_records(self.batch_size)?;
+
+        // If there are no remaining records (EOF), return immediately without
+        // attempting to cache an empty batch. This prevents inserting zero-length
+        // arrays into the cache which can later cause panics when slicing.
+        if read == 0 {
+            return Ok(0);
+        }
+
+        let array = self.inner.consume_batch()?;
+
+        // Store in both shared cache and local cache
+        // The shared cache is used to reuse results between readers
+        // The local cache ensures data is available for our consume_batch call
+        let _cached =
+            self.shared_cache
+                .lock()
+                .unwrap()
+                .insert(self.column_idx, batch_id, array.clone());
+        // Note: if the shared cache is full (_cached == false), we continue without caching
+        // The local cache will still store the data for this reader's use
+
+        self.local_cache.insert(batch_id, array);
+
+        self.inner_position += read;
+        Ok(read)
+    }
+
+    /// Remove batches from cache that have been completely consumed
+    /// This is only called for Consumer role readers
+    fn cleanup_consumed_batches(&mut self) {
+        let current_batch_id = self.get_batch_id_from_position(self.outer_position);
+
+        // Remove batches that are at least one batch behind the current position
+        // This ensures we don't remove batches that might still be needed for the current batch
+        // We can safely remove batch_id if current_batch_id > batch_id + 1
+        if current_batch_id.val > 1 {
+            let mut cache = self.shared_cache.lock().unwrap();
+            for batch_id_to_remove in 0..(current_batch_id.val - 1) {
+                cache.remove(
+                    self.column_idx,
+                    BatchID {
+                        val: batch_id_to_remove,
+                    },
+                );
+            }
+        }
+    }
+}
+
+impl ArrayReader for CachedArrayReader {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn get_data_type(&self) -> &ArrowType {
+        self.inner.get_data_type()
+    }
+
+    fn read_records(&mut self, num_records: usize) -> Result<usize> {
+        let mut read = 0;
+        while read < num_records {
+            let batch_id = self.get_batch_id_from_position(self.outer_position);
+
+            // Check local cache first
+            let cached = if let Some(array) = self.local_cache.get(&batch_id) {
+                Some(Arc::clone(array))
+            } else {
+                // If not in local cache, i.e., we are consumer, check shared cache
+                let cache_content = self
+                    .shared_cache
+                    .lock()
+                    .unwrap()
+                    .get(self.column_idx, batch_id);
+                if let Some(array) = cache_content.as_ref() {
+                    // Store in local cache for later use in consume_batch
+                    self.local_cache.insert(batch_id, Arc::clone(array));
+                }
+                cache_content
+            };
+
+            match cached {
+                Some(array) => {
+                    let array_len = array.len();
+                    if array_len + batch_id.val * self.batch_size > self.outer_position {
+                        // the cache batch has some records that we can select
+                        let v = array_len + batch_id.val * self.batch_size - self.outer_position;
+                        let select_cnt = std::cmp::min(num_records - read, v);
+                        read += select_cnt;
+                        self.metrics.increment_cache_reads(select_cnt);
+                        self.outer_position += select_cnt;
+                        self.selections.append_n(select_cnt, true);
+                    } else {
+                        // this is last batch and we have used all records from it
+                        break;
+                    }
+                }
+                None => {
+                    let read_from_inner = self.fetch_batch(batch_id)?;
+                    // Reached end-of-file, no more records to read
+                    if read_from_inner == 0 {
+                        break;
+                    }
+                    self.metrics.increment_inner_reads(read_from_inner);
+                    let select_from_this_batch = std::cmp::min(
+                        num_records - read,
+                        self.inner_position - self.outer_position,
+                    );
+                    read += select_from_this_batch;
+                    self.outer_position += select_from_this_batch;
+                    self.selections.append_n(select_from_this_batch, true);
+                    if read_from_inner < self.batch_size {
+                        // this is last batch from inner reader
+                        break;
+                    }
+                }
+            }
+        }
+        Ok(read)
+    }
+
+    fn skip_records(&mut self, num_records: usize) -> Result<usize> {
+        let mut skipped = 0;
+        while skipped < num_records {
+            let size = std::cmp::min(num_records - skipped, self.batch_size);
+            skipped += size;
+            self.selections.append_n(size, false);
+            self.outer_position += size;
+        }
+        Ok(num_records)
+    }
+
+    fn consume_batch(&mut self) -> Result<ArrayRef> {
+        let row_count = self.selections.len();
+        if row_count == 0 {
+            return Ok(new_empty_array(self.inner.get_data_type()));
+        }
+
+        let start_position = self.outer_position - row_count;
+
+        let selection_buffer = self.selections.finish();
+
+        let start_batch = start_position / self.batch_size;
+        let end_batch = (start_position + row_count - 1) / self.batch_size;
+
+        let mut selected_arrays = Vec::new();
+        for batch_id in start_batch..=end_batch {
+            let batch_start = batch_id * self.batch_size;
+            let batch_end = batch_start + self.batch_size - 1;
+            let batch_id = self.get_batch_id_from_position(batch_start);
+
+            // Calculate the overlap between the start_position and the batch
+            let overlap_start = start_position.max(batch_start);
+            let overlap_end = (start_position + row_count - 1).min(batch_end);
+
+            if overlap_start > overlap_end {
+                continue;
+            }
+
+            let selection_start = overlap_start - start_position;
+            let selection_length = overlap_end - overlap_start + 1;
+            let mask = selection_buffer.slice(selection_start, selection_length);
+
+            if mask.count_set_bits() == 0 {
+                continue;
+            }
+
+            let mask_array = BooleanArray::from(mask);
+            // Read from local cache instead of shared cache to avoid cache eviction issues
+            let cached = self
+                .local_cache
+                .get(&batch_id)
+                .expect("data must be already cached in the read_records call, this is a bug");
+            let cached = cached.slice(overlap_start - batch_start, selection_length);
+            let filtered = arrow_select::filter::filter(&cached, &mask_array)?;
+            selected_arrays.push(filtered);
+        }
+
+        self.selections = BooleanBufferBuilder::new(0);
+
+        // Only remove batches from local buffer that are completely behind current position
+        // Keep the current batch and any future batches as they might still be needed
+        let current_batch_id = self.get_batch_id_from_position(self.outer_position);
+        self.local_cache
+            .retain(|batch_id, _| batch_id.val >= current_batch_id.val);
+
+        // For consumers, cleanup batches that have been completely consumed
+        // This reduces the memory usage of the shared cache
+        if self.role == CacheRole::Consumer {
+            self.cleanup_consumed_batches();
+        }
+
+        match selected_arrays.len() {
+            0 => Ok(new_empty_array(self.inner.get_data_type())),
+            1 => Ok(selected_arrays.into_iter().next().unwrap()),
+            _ => Ok(arrow_select::concat::concat(
+                &selected_arrays
+                    .iter()
+                    .map(|a| a.as_ref())
+                    .collect::<Vec<_>>(),
+            )?),
+        }
+    }
+
+    fn get_def_levels(&self) -> Option<&[i16]> {
+        None // we don't allow nullable parent for now.
+    }
+
+    fn get_rep_levels(&self) -> Option<&[i16]> {
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arrow::array_reader::ArrayReader;
+    use crate::arrow::array_reader::row_group_cache::RowGroupCache;
+    use arrow_array::{ArrayRef, Int32Array};
+    use std::sync::{Arc, Mutex};
+
+    // Mock ArrayReader for testing
+    struct MockArrayReader {
+        data: Vec<i32>,
+        position: usize,
+        records_to_consume: usize,
+        data_type: ArrowType,
+    }
+
+    impl MockArrayReader {
+        fn new(data: Vec<i32>) -> Self {
+            Self {
+                data,
+                position: 0,
+                records_to_consume: 0,
+                data_type: ArrowType::Int32,
+            }
+        }
+    }
+
+    impl ArrayReader for MockArrayReader {
+        fn as_any(&self) -> &dyn Any {
+            self
+        }
+
+        fn get_data_type(&self) -> &ArrowType {
+            &self.data_type
+        }
+
+        fn read_records(&mut self, batch_size: usize) -> Result<usize> {
+            let remaining = self.data.len() - self.position;
+            let to_read = std::cmp::min(batch_size, remaining);
+            self.records_to_consume += to_read;
+            Ok(to_read)
+        }
+
+        fn consume_batch(&mut self) -> Result<ArrayRef> {
+            let start = self.position;
+            let end = start + self.records_to_consume;
+            let slice = &self.data[start..end];
+            self.position = end;
+            self.records_to_consume = 0;
+            Ok(Arc::new(Int32Array::from(slice.to_vec())))
+        }
+
+        fn skip_records(&mut self, num_records: usize) -> Result<usize> {
+            let remaining = self.data.len() - self.position;
+            let to_skip = std::cmp::min(num_records, remaining);
+            self.position += to_skip;
+            Ok(to_skip)
+        }
+
+        fn get_def_levels(&self) -> Option<&[i16]> {
+            None
+        }
+
+        fn get_rep_levels(&self) -> Option<&[i16]> {
+            None
+        }
+    }
+
+    #[test]
+    fn test_cached_reader_basic() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache,
+            0,
+            CacheRole::Producer,
+            metrics,
+        );
+
+        // Read 3 records
+        let records_read = cached_reader.read_records(3).unwrap();
+        assert_eq!(records_read, 3);
+
+        let array = cached_reader.consume_batch().unwrap();
+        assert_eq!(array.len(), 3);
+
+        let int32_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[1, 2, 3]);
+
+        // Read 3 more records
+        let records_read = cached_reader.read_records(3).unwrap();
+        assert_eq!(records_read, 2);
+    }
+
+    #[test]
+    fn test_read_skip_pattern() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(5, usize::MAX))); // Batch size 5
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache,
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        let read1 = cached_reader.read_records(2).unwrap();
+        assert_eq!(read1, 2);
+
+        let array1 = cached_reader.consume_batch().unwrap();
+        assert_eq!(array1.len(), 2);
+        let int32_array = array1.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[1, 2]);
+
+        let skipped = cached_reader.skip_records(2).unwrap();
+        assert_eq!(skipped, 2);
+
+        let read2 = cached_reader.read_records(1).unwrap();
+        assert_eq!(read2, 1);
+
+        // Consume it (should be the 5th element after skipping 3,4)
+        let array2 = cached_reader.consume_batch().unwrap();
+        assert_eq!(array2.len(), 1);
+        let int32_array = array2.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[5]);
+    }
+
+    #[test]
+    fn test_multiple_reads_before_consume() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache,
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // Multiple reads should accumulate
+        let read1 = cached_reader.read_records(2).unwrap();
+        assert_eq!(read1, 2);
+
+        let read2 = cached_reader.read_records(1).unwrap();
+        assert_eq!(read2, 1);
+
+        // Consume should return all accumulated records
+        let array = cached_reader.consume_batch().unwrap();
+        assert_eq!(array.len(), 3);
+        let int32_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[1, 2, 3]);
+    }
+
+    #[test]
+    fn test_eof_behavior() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(5, usize::MAX))); // Batch size 5
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache,
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // Try to read more than available
+        let read1 = cached_reader.read_records(5).unwrap();
+        assert_eq!(read1, 3); // Should only get 3 records (all available)
+
+        let array1 = cached_reader.consume_batch().unwrap();
+        assert_eq!(array1.len(), 3);
+
+        // Further reads should return 0
+        let read2 = cached_reader.read_records(1).unwrap();
+        assert_eq!(read2, 0);
+
+        let array2 = cached_reader.consume_batch().unwrap();
+        assert_eq!(array2.len(), 0);
+    }
+
+    #[test]
+    fn test_cache_sharing() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(5, usize::MAX))); // Batch size 5
+
+        // First reader - populate cache
+        let mock_reader1 = MockArrayReader::new(vec![1, 2, 3, 4, 5]);
+        let mut cached_reader1 = CachedArrayReader::new(
+            Box::new(mock_reader1),
+            cache.clone(),
+            0,
+            CacheRole::Producer,
+            metrics.clone(),
+        );
+
+        cached_reader1.read_records(3).unwrap();
+        let array1 = cached_reader1.consume_batch().unwrap();
+        assert_eq!(array1.len(), 3);
+
+        // Second reader with different column index should not interfere
+        let mock_reader2 = MockArrayReader::new(vec![10, 20, 30, 40, 50]);
+        let mut cached_reader2 = CachedArrayReader::new(
+            Box::new(mock_reader2),
+            cache.clone(),
+            1,
+            CacheRole::Consumer,
+            metrics.clone(),
+        );
+
+        cached_reader2.read_records(2).unwrap();
+        let array2 = cached_reader2.consume_batch().unwrap();
+        assert_eq!(array2.len(), 2);
+
+        // Verify the second reader got its own data, not from cache
+        let int32_array = array2.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[10, 20]);
+    }
+
+    #[test]
+    fn test_consumer_removes_batches() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+        let mut consumer_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache.clone(),
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // Read first batch (positions 0-2, batch 0)
+        let read1 = consumer_reader.read_records(3).unwrap();
+        assert_eq!(read1, 3);
+        assert_eq!(consumer_reader.outer_position, 3);
+        // Check that batch 0 is in cache after read_records
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_some());
+
+        let array1 = consumer_reader.consume_batch().unwrap();
+        assert_eq!(array1.len(), 3);
+
+        // After first consume_batch, batch 0 should still be in cache
+        // (current_batch_id = 3/3 = 1, cleanup only happens if current_batch_id > 1)
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_some());
+
+        // Read second batch (positions 3-5, batch 1)
+        let read2 = consumer_reader.read_records(3).unwrap();
+        assert_eq!(read2, 3);
+        assert_eq!(consumer_reader.outer_position, 6);
+        let array2 = consumer_reader.consume_batch().unwrap();
+        assert_eq!(array2.len(), 3);
+
+        // After second consume_batch, batch 0 should be removed
+        // (current_batch_id = 6/3 = 2, cleanup removes batches 0..(2-1) = 0..1, so removes batch 0)
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_none());
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 1 }).is_some());
+
+        // Read third batch (positions 6-8, batch 2)
+        let read3 = consumer_reader.read_records(3).unwrap();
+        assert_eq!(read3, 3);
+        assert_eq!(consumer_reader.outer_position, 9);
+        let array3 = consumer_reader.consume_batch().unwrap();
+        assert_eq!(array3.len(), 3);
+
+        // After third consume_batch, batches 0 and 1 should be removed
+        // (current_batch_id = 9/3 = 3, cleanup removes batches 0..(3-1) = 0..2, so removes batches 0 and 1)
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_none());
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 1 }).is_none());
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 2 }).is_some());
+    }
+
+    #[test]
+    fn test_producer_keeps_batches() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+        let mut producer_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache.clone(),
+            0,
+            CacheRole::Producer,
+            metrics,
+        );
+
+        // Read first batch (positions 0-2)
+        let read1 = producer_reader.read_records(3).unwrap();
+        assert_eq!(read1, 3);
+        let array1 = producer_reader.consume_batch().unwrap();
+        assert_eq!(array1.len(), 3);
+
+        // Verify batch 0 is in cache
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_some());
+
+        // Read second batch (positions 3-5) - producer should NOT remove batch 0
+        let read2 = producer_reader.read_records(3).unwrap();
+        assert_eq!(read2, 3);
+        let array2 = producer_reader.consume_batch().unwrap();
+        assert_eq!(array2.len(), 3);
+
+        // Verify both batch 0 and batch 1 are still present (no removal for producer)
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_some());
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 1 }).is_some());
+    }
+
+    #[test]
+    fn test_local_cache_protects_against_eviction() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache.clone(),
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // Read records which should populate both shared and local cache
+        let records_read = cached_reader.read_records(3).unwrap();
+        assert_eq!(records_read, 3);
+
+        // Verify data is in both caches
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_some());
+        assert!(cached_reader.local_cache.contains_key(&BatchID { val: 0 }));
+
+        // Simulate cache eviction by manually removing from shared cache
+        cache.lock().unwrap().remove(0, BatchID { val: 0 });
+        assert!(cache.lock().unwrap().get(0, BatchID { val: 0 }).is_none());
+
+        // Even though shared cache was evicted, consume_batch should still work
+        // because data is preserved in local cache
+        let array = cached_reader.consume_batch().unwrap();
+        assert_eq!(array.len(), 3);
+
+        let int32_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[1, 2, 3]);
+
+        // Local cache should be cleared after consume_batch
+        assert!(cached_reader.local_cache.is_empty());
+    }
+
+    #[test]
+    fn test_local_cache_is_cleared_properly() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, 0))); // Batch size 3, cache 0
+        let mut cached_reader = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache.clone(),
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // Read records which should populate both shared and local cache
+        let records_read = cached_reader.read_records(1).unwrap();
+        assert_eq!(records_read, 1);
+        let array = cached_reader.consume_batch().unwrap();
+        assert_eq!(array.len(), 1);
+
+        let records_read = cached_reader.read_records(3).unwrap();
+        assert_eq!(records_read, 3);
+        let array = cached_reader.consume_batch().unwrap();
+        assert_eq!(array.len(), 3);
+    }
+
+    #[test]
+    fn test_batch_id_calculation_with_incremental_reads() {
+        let metrics = ArrowReaderMetrics::disabled();
+        let mock_reader = MockArrayReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
+        let cache = Arc::new(Mutex::new(RowGroupCache::new(3, usize::MAX))); // Batch size 3
+
+        // Create a producer to populate cache
+        let mut producer = CachedArrayReader::new(
+            Box::new(MockArrayReader::new(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
+            cache.clone(),
+            0,
+            CacheRole::Producer,
+            metrics.clone(),
+        );
+
+        // Populate cache with first batch (1, 2, 3)
+        producer.read_records(3).unwrap();
+        producer.consume_batch().unwrap();
+
+        // Now create a consumer that will try to read from cache
+        let mut consumer = CachedArrayReader::new(
+            Box::new(mock_reader),
+            cache.clone(),
+            0,
+            CacheRole::Consumer,
+            metrics,
+        );
+
+        // - We want to read 4 records starting from position 0
+        // - First 3 records (positions 0-2) should come from cache (batch 0)
+        // - The 4th record (position 3) should come from the next batch
+        let records_read = consumer.read_records(4).unwrap();
+        assert_eq!(records_read, 4);
+
+        let array = consumer.consume_batch().unwrap();
+        assert_eq!(array.len(), 4);
+
+        let int32_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert_eq!(int32_array.values(), &[1, 2, 3, 4]);
+    }
+}
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index 6b437be943d4..2297926add5f 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -15,11 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::buffer::bit_util::{iter_set_bits_rev, sign_extend_be};
 use crate::arrow::decoder::{DeltaByteArrayDecoder, DictIndexDecoder};
-use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::arrow::record_reader::GenericRecordReader;
+use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::arrow::schema::parquet_to_arrow_field;
 use crate::basic::{Encoding, Type};
 use crate::column::page::PageIterator;
@@ -27,10 +27,10 @@ use crate::column::reader::decoder::ColumnValueDecoder;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use arrow_array::{
-    ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
-    IntervalDayTimeArray, IntervalYearMonthArray,
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    FixedSizeBinaryArray, Float16Array, IntervalDayTimeArray, IntervalYearMonthArray,
 };
-use arrow_buffer::{i256, Buffer, IntervalDayTime};
+use arrow_buffer::{Buffer, IntervalDayTime, i256};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{DataType as ArrowType, IntervalUnit};
 use bytes::Bytes;
@@ -59,11 +59,27 @@ pub fn make_fixed_len_byte_array_reader(
             return Err(general_err!(
                 "invalid physical type for fixed length byte array reader - {}",
                 t
-            ))
+            ));
         }
     };
     match &data_type {
         ArrowType::FixedSizeBinary(_) => {}
+        ArrowType::Decimal32(_, _) => {
+            if byte_length > 4 {
+                return Err(general_err!(
+                    "decimal 32 type too large, must be less then 4 bytes, got {}",
+                    byte_length
+                ));
+            }
+        }
+        ArrowType::Decimal64(_, _) => {
+            if byte_length > 8 {
+                return Err(general_err!(
+                    "decimal 64 type too large, must be less then 8 bytes, got {}",
+                    byte_length
+                ));
+            }
+        }
         ArrowType::Decimal128(_, _) => {
             if byte_length > 16 {
                 return Err(general_err!(
@@ -101,7 +117,7 @@ pub fn make_fixed_len_byte_array_reader(
             return Err(general_err!(
                 "invalid data type for fixed length byte array reader - {}",
                 data_type
-            ))
+            ));
         }
     }
 
@@ -168,6 +184,16 @@ impl ArrayReader for FixedLenByteArrayReader {
         // conversion lambdas are all infallible. This improves performance by avoiding a branch in
         // the inner loop (see docs for `PrimitiveArray::from_unary`).
         let array: ArrayRef = match &self.data_type {
+            ArrowType::Decimal32(p, s) => {
+                let f = |b: &[u8]| i32::from_be_bytes(sign_extend_be(b));
+                Arc::new(Decimal32Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
+                    as ArrayRef
+            }
+            ArrowType::Decimal64(p, s) => {
+                let f = |b: &[u8]| i64::from_be_bytes(sign_extend_be(b));
+                Arc::new(Decimal64Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
+                    as ArrayRef
+            }
             ArrowType::Decimal128(p, s) => {
                 let f = |b: &[u8]| i128::from_be_bytes(sign_extend_be(b));
                 Arc::new(Decimal128Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
@@ -355,7 +381,7 @@ impl ColumnValueDecoder for ValueDecoder {
                 offset: 0,
             },
             Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => Decoder::Dict {
-                decoder: DictIndexDecoder::new(data, num_levels, num_values),
+                decoder: DictIndexDecoder::new(data, num_levels, num_values)?,
             },
             Encoding::DELTA_BYTE_ARRAY => Decoder::Delta {
                 decoder: DeltaByteArrayDecoder::new(data)?,
@@ -368,7 +394,7 @@ impl ColumnValueDecoder for ValueDecoder {
                 return Err(general_err!(
                     "unsupported encoding for fixed length byte array: {}",
                     encoding
-                ))
+                ));
             }
         });
         Ok(())
@@ -492,8 +518,8 @@ enum Decoder {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::arrow::arrow_reader::ParquetRecordBatchReader;
     use crate::arrow::ArrowWriter;
+    use crate::arrow::arrow_reader::ParquetRecordBatchReader;
     use arrow::datatypes::Field;
     use arrow::error::Result as ArrowResult;
     use arrow_array::{Array, ListArray};
diff --git a/parquet/src/arrow/array_reader/fixed_size_list_array.rs b/parquet/src/arrow/array_reader/fixed_size_list_array.rs
index 5750ef94e3e8..8ef3bd6c2a4b 100644
--- a/parquet/src/arrow/array_reader/fixed_size_list_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_size_list_array.rs
@@ -22,8 +22,8 @@ use crate::arrow::array_reader::ArrayReader;
 use crate::errors::ParquetError;
 use crate::errors::Result;
 use arrow_array::FixedSizeListArray;
-use arrow_array::{builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef};
-use arrow_data::{transform::MutableArrayData, ArrayData};
+use arrow_array::{Array, ArrayRef, builder::BooleanBufferBuilder, new_empty_array};
+use arrow_data::{ArrayData, transform::MutableArrayData};
 use arrow_schema::DataType as ArrowType;
 
 /// Implementation of fixed-size list array reader.
@@ -225,15 +225,15 @@ impl ArrayReader for FixedSizeListArrayReader {
 mod tests {
     use super::*;
     use crate::arrow::{
-        array_reader::{test_util::InMemoryArrayReader, ListArrayReader},
-        arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
         ArrowWriter,
+        array_reader::{ListArrayReader, test_util::InMemoryArrayReader},
+        arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
     };
     use arrow::datatypes::{Field, Int32Type};
     use arrow_array::{
+        FixedSizeListArray, ListArray, PrimitiveArray, RecordBatch,
         builder::{FixedSizeListBuilder, Int32Builder, ListBuilder},
         cast::AsArray,
-        FixedSizeListArray, ListArray, PrimitiveArray, RecordBatch,
     };
     use arrow_buffer::Buffer;
     use arrow_data::ArrayDataBuilder;
diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs
index 66c4f30b3c29..ff1b414c27bb 100644
--- a/parquet/src/arrow/array_reader/list_array.rs
+++ b/parquet/src/arrow/array_reader/list_array.rs
@@ -19,12 +19,12 @@ use crate::arrow::array_reader::ArrayReader;
 use crate::errors::ParquetError;
 use crate::errors::Result;
 use arrow_array::{
-    builder::BooleanBufferBuilder, new_empty_array, Array, ArrayRef, GenericListArray,
-    OffsetSizeTrait,
+    Array, ArrayRef, GenericListArray, OffsetSizeTrait, builder::BooleanBufferBuilder,
+    new_empty_array,
 };
 use arrow_buffer::Buffer;
 use arrow_buffer::ToByteSlice;
-use arrow_data::{transform::MutableArrayData, ArrayData};
+use arrow_data::{ArrayData, transform::MutableArrayData};
 use arrow_schema::DataType as ArrowType;
 use std::any::Any;
 use std::cmp::Ordering;
@@ -246,11 +246,12 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::arrow::array_reader::ArrayReaderBuilder;
     use crate::arrow::array_reader::list_array::ListArrayReader;
     use crate::arrow::array_reader::test_util::InMemoryArrayReader;
-    use crate::arrow::array_reader::ArrayReaderBuilder;
+    use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
     use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
-    use crate::arrow::{parquet_to_arrow_schema, ArrowWriter, ProjectionMask};
+    use crate::arrow::{ArrowWriter, ProjectionMask, parquet_to_arrow_schema};
     use crate::file::properties::WriterProperties;
     use crate::file::reader::{FileReader, SerializedFileReader};
     use crate::schema::parser::parse_message_type;
@@ -560,10 +561,12 @@ mod tests {
             schema,
             ProjectionMask::all(),
             file_metadata.key_value_metadata(),
+            &[],
         )
         .unwrap();
 
-        let mut array_reader = ArrayReaderBuilder::new(&file_reader)
+        let metrics = ArrowReaderMetrics::disabled();
+        let mut array_reader = ArrayReaderBuilder::new(&file_reader, &metrics)
             .build_array_reader(fields.as_ref(), &mask)
             .unwrap();
 
diff --git a/parquet/src/arrow/array_reader/map_array.rs b/parquet/src/arrow/array_reader/map_array.rs
index 4bdec602ba4f..1639aca6293f 100644
--- a/parquet/src/arrow/array_reader/map_array.rs
+++ b/parquet/src/arrow/array_reader/map_array.rs
@@ -29,6 +29,7 @@ pub struct MapArrayReader {
 }
 
 impl MapArrayReader {
+    #[allow(rustdoc::private_intra_doc_links)]
     /// Creates a new [`MapArrayReader`] with a `def_level`, `rep_level` and `nullable`
     /// as defined on [`ParquetField`][crate::arrow::schema::ParquetField]
     pub fn new(
@@ -123,12 +124,12 @@ impl ArrayReader for MapArrayReader {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::arrow::arrow_reader::ParquetRecordBatchReader;
     use crate::arrow::ArrowWriter;
+    use crate::arrow::arrow_reader::ParquetRecordBatchReader;
     use arrow::datatypes::{Field, Int32Type, Schema};
+    use arrow_array::RecordBatch;
     use arrow_array::builder::{MapBuilder, PrimitiveBuilder, StringBuilder};
     use arrow_array::cast::*;
-    use arrow_array::RecordBatch;
     use arrow_schema::Fields;
     use bytes::Bytes;
 
diff --git a/parquet/src/arrow/array_reader/mod.rs b/parquet/src/arrow/array_reader/mod.rs
index 94d61c9eacf5..54be89f23084 100644
--- a/parquet/src/arrow/array_reader/mod.rs
+++ b/parquet/src/arrow/array_reader/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Logic for reading into arrow arrays
+//! Logic for reading into arrow arrays: [`ArrayReader`] and [`RowGroups`]
 
 use crate::errors::Result;
 use arrow_array::ArrayRef;
@@ -23,16 +23,18 @@ use arrow_schema::DataType as ArrowType;
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::arrow::record_reader::GenericRecordReader;
+use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::column::page::PageIterator;
 use crate::column::reader::decoder::ColumnValueDecoder;
+use crate::file::metadata::ParquetMetaData;
 use crate::file::reader::{FilePageIterator, FileReader};
 
 mod builder;
 mod byte_array;
 mod byte_array_dictionary;
 mod byte_view_array;
+mod cached_array_reader;
 mod empty_array;
 mod fixed_len_byte_array;
 mod fixed_size_list_array;
@@ -40,12 +42,16 @@ mod list_array;
 mod map_array;
 mod null_array;
 mod primitive_array;
+mod row_group_cache;
+mod row_number;
 mod struct_array;
 
 #[cfg(test)]
 mod test_util;
 
-pub(crate) use builder::ArrayReaderBuilder;
+// Note that this crate is public under the `experimental` feature flag.
+use crate::file::metadata::RowGroupMetaData;
+pub use builder::{ArrayReaderBuilder, CacheOptions, CacheOptionsBuilder};
 pub use byte_array::make_byte_array_reader;
 pub use byte_array_dictionary::make_byte_array_dictionary_reader;
 #[allow(unused_imports)] // Only used for benchmarks
@@ -57,9 +63,25 @@ pub use list_array::ListArrayReader;
 pub use map_array::MapArrayReader;
 pub use null_array::NullArrayReader;
 pub use primitive_array::PrimitiveArrayReader;
+pub use row_group_cache::RowGroupCache;
 pub use struct_array::StructArrayReader;
 
-/// Array reader reads parquet data into arrow array.
+/// Reads Parquet data into Arrow Arrays.
+///
+/// This is an internal implementation detail of the Parquet reader, and is not
+/// intended for public use.
+///
+/// This is the core trait for reading encoded Parquet data directly into Arrow
+/// Arrays efficiently. There are various specializations of this trait for
+/// different combinations of encodings and arrays, such as
+/// [`PrimitiveArrayReader`], [`ListArrayReader`], etc.
+///
+/// Each `ArrayReader` logically contains the following state
+/// 1. A handle to the encoded Parquet data
+/// 2. An in progress buffered Array
+///
+/// Data can either be read in batches using [`ArrayReader::next_batch`] or
+/// incrementally using [`ArrayReader::read_records`] and [`ArrayReader::skip_records`].
 pub trait ArrayReader: Send {
     // TODO: this function is never used, and the trait is not public. Perhaps this should be
     // removed.
@@ -87,6 +109,12 @@ pub trait ArrayReader: Send {
     fn consume_batch(&mut self) -> Result<ArrayRef>;
 
     /// Skips over `num_records` records, returning the number of rows skipped
+    ///
+    /// Note that calling `skip_records` with large values of `num_records` is
+    /// efficient as it avoids decoding data into the the in-progress array.
+    /// However, there is overhead to calling this function, so for small values of
+    /// `num_records`, it can be more efficient to call read_records and apply
+    /// a filter to the resulting array.
     fn skip_records(&mut self, num_records: usize) -> Result<usize>;
 
     /// If this array has a non-zero definition level, i.e. has a nullable parent
@@ -106,7 +134,7 @@ pub trait ArrayReader: Send {
     fn get_rep_levels(&self) -> Option<&[i16]>;
 }
 
-/// A collection of row groups
+/// Interface for reading data pages from the columns of one or more RowGroups.
 pub trait RowGroups {
     /// Get the number of rows in this collection
     fn num_rows(&self) -> usize;
@@ -114,17 +142,35 @@ pub trait RowGroups {
     /// Returns a [`PageIterator`] for all pages in the specified column chunk
     /// across all row groups in this collection.
     fn column_chunks(&self, i: usize) -> Result<Box<dyn PageIterator>>;
+
+    /// Returns an iterator over the row groups in this collection
+    ///
+    /// Note this may not include all row groups in [`Self::metadata`].
+    fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_>;
+
+    /// Returns the parquet metadata
+    fn metadata(&self) -> &ParquetMetaData;
 }
 
 impl RowGroups for Arc<dyn FileReader> {
     fn num_rows(&self) -> usize {
-        self.metadata().file_metadata().num_rows() as usize
+        FileReader::metadata(self.as_ref())
+            .file_metadata()
+            .num_rows() as usize
     }
 
     fn column_chunks(&self, column_index: usize) -> Result<Box<dyn PageIterator>> {
         let iterator = FilePageIterator::new(column_index, Arc::clone(self))?;
         Ok(Box::new(iterator))
     }
+
+    fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
+        Box::new(FileReader::metadata(self.as_ref()).row_groups().iter())
+    }
+
+    fn metadata(&self) -> &ParquetMetaData {
+        FileReader::metadata(self.as_ref())
+    }
 }
 
 /// Uses `record_reader` to read up to `batch_size` records from `pages`
diff --git a/parquet/src/arrow/array_reader/null_array.rs b/parquet/src/arrow/array_reader/null_array.rs
index 838db854e05f..4ddd1df86442 100644
--- a/parquet/src/arrow/array_reader/null_array.rs
+++ b/parquet/src/arrow/array_reader/null_array.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::record_reader::RecordReader;
 use crate::column::page::PageIterator;
 use crate::data_type::DataType;
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 76b1e1cad52d..dae42c4c7124 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -15,26 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
+use crate::arrow::array_reader::{ArrayReader, read_records, skip_records};
 use crate::arrow::record_reader::RecordReader;
 use crate::arrow::schema::parquet_to_arrow_field;
 use crate::basic::Type as PhysicalType;
 use crate::column::page::PageIterator;
 use crate::data_type::{DataType, Int96};
-use crate::errors::{ParquetError, Result};
+use crate::errors::Result;
 use crate::schema::types::ColumnDescPtr;
 use arrow_array::{
-    builder::{
-        TimestampMicrosecondBufferBuilder, TimestampMillisecondBufferBuilder,
-        TimestampNanosecondBufferBuilder, TimestampSecondBufferBuilder,
-    },
-    ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    Array, ArrayRef, BooleanArray, Date64Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, PrimitiveArray,
+    UInt8Array, UInt16Array, builder::PrimitiveDictionaryBuilder, cast::AsArray, downcast_integer,
+    types::*,
 };
-use arrow_buffer::{i256, BooleanBuffer, Buffer};
-use arrow_data::ArrayDataBuilder;
+use arrow_array::{
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt32Array, UInt64Array,
+};
+use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, ScalarBuffer, i256};
 use arrow_schema::{DataType as ArrowType, TimeUnit};
 use std::any::Any;
 use std::sync::Arc;
@@ -63,37 +62,23 @@ impl IntoBuffer for Vec<bool> {
 
 impl IntoBuffer for Vec<Int96> {
     fn into_buffer(self, target_type: &ArrowType) -> Buffer {
+        let mut builder = Vec::with_capacity(self.len());
         match target_type {
             ArrowType::Timestamp(TimeUnit::Second, _) => {
-                let mut builder = TimestampSecondBufferBuilder::new(self.len());
-                for v in self {
-                    builder.append(v.to_seconds())
-                }
-                builder.finish()
+                builder.extend(self.iter().map(|x| x.to_seconds()));
             }
             ArrowType::Timestamp(TimeUnit::Millisecond, _) => {
-                let mut builder = TimestampMillisecondBufferBuilder::new(self.len());
-                for v in self {
-                    builder.append(v.to_millis())
-                }
-                builder.finish()
+                builder.extend(self.iter().map(|x| x.to_millis()));
             }
             ArrowType::Timestamp(TimeUnit::Microsecond, _) => {
-                let mut builder = TimestampMicrosecondBufferBuilder::new(self.len());
-                for v in self {
-                    builder.append(v.to_micros())
-                }
-                builder.finish()
+                builder.extend(self.iter().map(|x| x.to_micros()));
             }
             ArrowType::Timestamp(TimeUnit::Nanosecond, _) => {
-                let mut builder = TimestampNanosecondBufferBuilder::new(self.len());
-                for v in self {
-                    builder.append(v.to_nanos())
-                }
-                builder.finish()
+                builder.extend(self.iter().map(|x| x.to_nanos()));
             }
             _ => unreachable!("Invalid target_type for Int96."),
         }
+        Buffer::from_vec(builder)
     }
 }
 
@@ -166,255 +151,52 @@ where
 
     fn consume_batch(&mut self) -> Result<ArrayRef> {
         let target_type = &self.data_type;
-        let arrow_data_type = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => ArrowType::Boolean,
-            PhysicalType::INT32 => {
-                match target_type {
-                    ArrowType::UInt32 => {
-                        // follow C++ implementation and use overflow/reinterpret cast from  i32 to u32 which will map
-                        // `i32::MIN..0` to `(i32::MAX as u32)..u32::MAX`
-                        ArrowType::UInt32
-                    }
-                    _ => ArrowType::Int32,
-                }
-            }
-            PhysicalType::INT64 => {
-                match target_type {
-                    ArrowType::UInt64 => {
-                        // follow C++ implementation and use overflow/reinterpret cast from  i64 to u64 which will map
-                        // `i64::MIN..0` to `(i64::MAX as u64)..u64::MAX`
-                        ArrowType::UInt64
-                    }
-                    _ => ArrowType::Int64,
-                }
-            }
-            PhysicalType::FLOAT => ArrowType::Float32,
-            PhysicalType::DOUBLE => ArrowType::Float64,
-            PhysicalType::INT96 => match target_type {
-                ArrowType::Timestamp(TimeUnit::Second, _) => target_type.clone(),
-                ArrowType::Timestamp(TimeUnit::Millisecond, _) => target_type.clone(),
-                ArrowType::Timestamp(TimeUnit::Microsecond, _) => target_type.clone(),
-                ArrowType::Timestamp(TimeUnit::Nanosecond, _) => target_type.clone(),
-                _ => unreachable!("INT96 must be a timestamp."),
-            },
-            PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                unreachable!("PrimitiveArrayReaders don't support complex physical types");
-            }
-        };
-
-        // Convert to arrays by using the Parquet physical type.
-        // The physical types are then cast to Arrow types if necessary
 
+        // Convert physical data to equivalent arrow type, and then perform
+        // coercion as needed
         let record_data = self
             .record_reader
             .consume_record_data()
             .into_buffer(target_type);
 
-        let array_data = ArrayDataBuilder::new(arrow_data_type)
-            .len(self.record_reader.num_values())
-            .add_buffer(record_data)
-            .null_bit_buffer(self.record_reader.consume_bitmap_buffer());
+        let len = self.record_reader.num_values();
+        let nulls = self
+            .record_reader
+            .consume_bitmap_buffer()
+            .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)));
 
-        let array_data = unsafe { array_data.build_unchecked() };
         let array: ArrayRef = match T::get_physical_type() {
-            PhysicalType::BOOLEAN => Arc::new(BooleanArray::from(array_data)),
-            PhysicalType::INT32 => match array_data.data_type() {
-                ArrowType::UInt32 => Arc::new(UInt32Array::from(array_data)),
-                ArrowType::Int32 => Arc::new(Int32Array::from(array_data)),
-                _ => unreachable!(),
-            },
-            PhysicalType::INT64 => match array_data.data_type() {
-                ArrowType::UInt64 => Arc::new(UInt64Array::from(array_data)),
-                ArrowType::Int64 => Arc::new(Int64Array::from(array_data)),
-                _ => unreachable!(),
-            },
-            PhysicalType::FLOAT => Arc::new(Float32Array::from(array_data)),
-            PhysicalType::DOUBLE => Arc::new(Float64Array::from(array_data)),
-            PhysicalType::INT96 => match target_type {
-                ArrowType::Timestamp(TimeUnit::Second, _) => {
-                    Arc::new(TimestampSecondArray::from(array_data))
-                }
-                ArrowType::Timestamp(TimeUnit::Millisecond, _) => {
-                    Arc::new(TimestampMillisecondArray::from(array_data))
-                }
-                ArrowType::Timestamp(TimeUnit::Microsecond, _) => {
-                    Arc::new(TimestampMicrosecondArray::from(array_data))
-                }
-                ArrowType::Timestamp(TimeUnit::Nanosecond, _) => {
-                    Arc::new(TimestampNanosecondArray::from(array_data))
-                }
-                _ => unreachable!("INT96 must be a timestamp."),
-            },
-
+            PhysicalType::BOOLEAN => Arc::new(BooleanArray::new(
+                BooleanBuffer::new(record_data, 0, len),
+                nulls,
+            )),
+            PhysicalType::INT32 => Arc::new(Int32Array::new(
+                ScalarBuffer::new(record_data, 0, len),
+                nulls,
+            )),
+            PhysicalType::INT64 => Arc::new(Int64Array::new(
+                ScalarBuffer::new(record_data, 0, len),
+                nulls,
+            )),
+            PhysicalType::FLOAT => Arc::new(Float32Array::new(
+                ScalarBuffer::new(record_data, 0, len),
+                nulls,
+            )),
+            PhysicalType::DOUBLE => Arc::new(Float64Array::new(
+                ScalarBuffer::new(record_data, 0, len),
+                nulls,
+            )),
+            PhysicalType::INT96 => Arc::new(Int64Array::new(
+                ScalarBuffer::new(record_data, 0, len),
+                nulls,
+            )),
             PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
                 unreachable!("PrimitiveArrayReaders don't support complex physical types");
             }
         };
 
-        // cast to Arrow type
-        // We make a strong assumption here that the casts should be infallible.
-        // If the cast fails because of incompatible datatypes, then there might
-        // be a bigger problem with how Arrow schemas are converted to Parquet.
-        //
-        // As there is not always a 1:1 mapping between Arrow and Parquet, there
-        // are datatypes which we must convert explicitly.
-        // These are:
-        // - date64: cast int32 to date32, then date32 to date64.
-        // - decimal: cast int32 to decimal, int64 to decimal
-        let array = match target_type {
-            // Using `arrow_cast::cast` has been found to be very slow for converting
-            // INT32 physical type to lower bitwidth logical types. Since rust casts
-            // are infallible, instead use `unary` which is much faster (by up to 40%).
-            // One consequence of this approach is that some malformed integer columns
-            // will return (an arguably correct) result rather than null.
-            // See https://github.com/apache/arrow-rs/issues/7040 for a discussion of this
-            // issue.
-            ArrowType::UInt8 if *(array.data_type()) == ArrowType::Int32 => {
-                let array = array
-                    .as_any()
-                    .downcast_ref::<Int32Array>()
-                    .unwrap()
-                    .unary(|i| i as u8) as UInt8Array;
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::Int8 if *(array.data_type()) == ArrowType::Int32 => {
-                let array = array
-                    .as_any()
-                    .downcast_ref::<Int32Array>()
-                    .unwrap()
-                    .unary(|i| i as i8) as Int8Array;
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::UInt16 if *(array.data_type()) == ArrowType::Int32 => {
-                let array = array
-                    .as_any()
-                    .downcast_ref::<Int32Array>()
-                    .unwrap()
-                    .unary(|i| i as u16) as UInt16Array;
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::Int16 if *(array.data_type()) == ArrowType::Int32 => {
-                let array = array
-                    .as_any()
-                    .downcast_ref::<Int32Array>()
-                    .unwrap()
-                    .unary(|i| i as i16) as Int16Array;
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::Date64 if *(array.data_type()) == ArrowType::Int32 => {
-                // this is cheap as it internally reinterprets the data
-                let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
-                arrow_cast::cast(&a, target_type)?
-            }
-            ArrowType::Decimal128(p, s) => {
-                // Apply conversion to all elements regardless of null slots as the conversion
-                // to `i128` is infallible. This improves performance by avoiding a branch in
-                // the inner loop (see docs for `PrimitiveArray::unary`).
-                let array = match array.data_type() {
-                    ArrowType::Int32 => array
-                        .as_any()
-                        .downcast_ref::<Int32Array>()
-                        .unwrap()
-                        .unary(|i| i as i128)
-                        as Decimal128Array,
-                    ArrowType::Int64 => array
-                        .as_any()
-                        .downcast_ref::<Int64Array>()
-                        .unwrap()
-                        .unary(|i| i as i128)
-                        as Decimal128Array,
-                    _ => {
-                        return Err(arrow_err!(
-                            "Cannot convert {:?} to decimal",
-                            array.data_type()
-                        ));
-                    }
-                }
-                .with_precision_and_scale(*p, *s)?;
-
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::Decimal256(p, s) => {
-                // See above comment. Conversion to `i256` is likewise infallible.
-                let array = match array.data_type() {
-                    ArrowType::Int32 => array
-                        .as_any()
-                        .downcast_ref::<Int32Array>()
-                        .unwrap()
-                        .unary(|i| i256::from_i128(i as i128))
-                        as Decimal256Array,
-                    ArrowType::Int64 => array
-                        .as_any()
-                        .downcast_ref::<Int64Array>()
-                        .unwrap()
-                        .unary(|i| i256::from_i128(i as i128))
-                        as Decimal256Array,
-                    _ => {
-                        return Err(arrow_err!(
-                            "Cannot convert {:?} to decimal",
-                            array.data_type()
-                        ));
-                    }
-                }
-                .with_precision_and_scale(*p, *s)?;
-
-                Arc::new(array) as ArrayRef
-            }
-            ArrowType::Dictionary(_, value_type) => match value_type.as_ref() {
-                ArrowType::Decimal128(p, s) => {
-                    let array = match array.data_type() {
-                        ArrowType::Int32 => array
-                            .as_any()
-                            .downcast_ref::<Int32Array>()
-                            .unwrap()
-                            .unary(|i| i as i128)
-                            as Decimal128Array,
-                        ArrowType::Int64 => array
-                            .as_any()
-                            .downcast_ref::<Int64Array>()
-                            .unwrap()
-                            .unary(|i| i as i128)
-                            as Decimal128Array,
-                        _ => {
-                            return Err(arrow_err!(
-                                "Cannot convert {:?} to decimal dictionary",
-                                array.data_type()
-                            ));
-                        }
-                    }
-                    .with_precision_and_scale(*p, *s)?;
-
-                    arrow_cast::cast(&array, target_type)?
-                }
-                ArrowType::Decimal256(p, s) => {
-                    let array = match array.data_type() {
-                        ArrowType::Int32 => array
-                            .as_any()
-                            .downcast_ref::<Int32Array>()
-                            .unwrap()
-                            .unary(i256::from)
-                            as Decimal256Array,
-                        ArrowType::Int64 => array
-                            .as_any()
-                            .downcast_ref::<Int64Array>()
-                            .unwrap()
-                            .unary(i256::from)
-                            as Decimal256Array,
-                        _ => {
-                            return Err(arrow_err!(
-                                "Cannot convert {:?} to decimal dictionary",
-                                array.data_type()
-                            ));
-                        }
-                    }
-                    .with_precision_and_scale(*p, *s)?;
-
-                    arrow_cast::cast(&array, target_type)?
-                }
-                _ => arrow_cast::cast(&array, target_type)?,
-            },
-            _ => arrow_cast::cast(&array, target_type)?,
-        };
+        // Coerce the arrow type to the desired array type
+        let array = coerce_array(array, target_type)?;
 
         // save definition and repetition buffers
         self.def_levels_buffer = self.record_reader.consume_def_levels();
@@ -436,6 +218,220 @@ where
     }
 }
 
+/// Coerce the parquet physical type array to the target type
+///
+/// This should match the logic in schema::primitive::apply_hint
+fn coerce_array(array: ArrayRef, target_type: &ArrowType) -> Result<ArrayRef> {
+    if let ArrowType::Dictionary(key_type, value_type) = target_type {
+        let dictionary = pack_dictionary(key_type, array.as_ref())?;
+        let any_dictionary = dictionary.as_any_dictionary();
+
+        let coerced_values =
+            coerce_array(Arc::clone(any_dictionary.values()), value_type.as_ref())?;
+
+        return Ok(any_dictionary.with_values(coerced_values));
+    }
+
+    match array.data_type() {
+        ArrowType::Int32 => coerce_i32(array.as_primitive(), target_type),
+        ArrowType::Int64 => coerce_i64(array.as_primitive(), target_type),
+        ArrowType::Boolean | ArrowType::Float32 | ArrowType::Float64 => Ok(array),
+        _ => unreachable!("Cannot coerce array of type {}", array.data_type()),
+    }
+}
+
+fn coerce_i32(array: &Int32Array, target_type: &ArrowType) -> Result<ArrayRef> {
+    Ok(match target_type {
+        ArrowType::UInt8 => {
+            let array = array.unary(|i| i as u8) as UInt8Array;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Int8 => {
+            let array = array.unary(|i| i as i8) as Int8Array;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::UInt16 => {
+            let array = array.unary(|i| i as u16) as UInt16Array;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Int16 => {
+            let array = array.unary(|i| i as i16) as Int16Array;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Int32 => Arc::new(array.clone()),
+        // follow C++ implementation and use overflow/reinterpret cast from  i32 to u32 which will map
+        // `i32::MIN..0` to `(i32::MAX as u32)..u32::MAX`
+        ArrowType::UInt32 => Arc::new(UInt32Array::new(
+            array.values().inner().clone().into(),
+            array.nulls().cloned(),
+        )) as ArrayRef,
+        ArrowType::Date32 => Arc::new(array.reinterpret_cast::<Date32Type>()) as _,
+        ArrowType::Date64 => {
+            let array: Date64Array = array.unary(|x| x as i64 * 86_400_000);
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Time32(TimeUnit::Second) => {
+            Arc::new(array.reinterpret_cast::<Time32SecondType>()) as ArrayRef
+        }
+        ArrowType::Time32(TimeUnit::Millisecond) => {
+            Arc::new(array.reinterpret_cast::<Time32MillisecondType>()) as ArrayRef
+        }
+        ArrowType::Timestamp(time_unit, timezone) => match time_unit {
+            TimeUnit::Second => {
+                let array: TimestampSecondArray = array
+                    .unary(|x| x as i64)
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Millisecond => {
+                let array: TimestampMillisecondArray = array
+                    .unary(|x| x as i64)
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Microsecond => {
+                let array: TimestampMicrosecondArray = array
+                    .unary(|x| x as i64)
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Nanosecond => {
+                let array: TimestampNanosecondArray = array
+                    .unary(|x| x as i64)
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+        },
+        ArrowType::Decimal32(p, s) => {
+            let array = array
+                .reinterpret_cast::<Decimal32Type>()
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Decimal64(p, s) => {
+            let array: Decimal64Array =
+                array.unary(|i| i as i64).with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Decimal128(p, s) => {
+            let array: Decimal128Array = array
+                .unary(|i| i as i128)
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as ArrayRef
+        }
+        ArrowType::Decimal256(p, s) => {
+            let array: Decimal256Array = array
+                .unary(|i| i256::from_i128(i as i128))
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as ArrayRef
+        }
+        _ => unreachable!("Cannot coerce i32 to {target_type}"),
+    })
+}
+
+fn coerce_i64(array: &Int64Array, target_type: &ArrowType) -> Result<ArrayRef> {
+    Ok(match target_type {
+        ArrowType::Int64 => Arc::new(array.clone()) as _,
+        // follow C++ implementation and use overflow/reinterpret cast from i64 to u64 which will map
+        // `i64::MIN..0` to `(i64::MAX as u64)..u64::MAX`
+        ArrowType::UInt64 => Arc::new(UInt64Array::new(
+            array.values().inner().clone().into(),
+            array.nulls().cloned(),
+        )) as ArrayRef,
+        ArrowType::Date64 => Arc::new(array.reinterpret_cast::<Date64Type>()) as _,
+        ArrowType::Time64(TimeUnit::Microsecond) => {
+            Arc::new(array.reinterpret_cast::<Time64MicrosecondType>()) as _
+        }
+        ArrowType::Time64(TimeUnit::Nanosecond) => {
+            Arc::new(array.reinterpret_cast::<Time64NanosecondType>()) as _
+        }
+        ArrowType::Duration(unit) => match unit {
+            TimeUnit::Second => Arc::new(array.reinterpret_cast::<DurationSecondType>()) as _,
+            TimeUnit::Millisecond => {
+                Arc::new(array.reinterpret_cast::<DurationMillisecondType>()) as _
+            }
+            TimeUnit::Microsecond => {
+                Arc::new(array.reinterpret_cast::<DurationMicrosecondType>()) as _
+            }
+            TimeUnit::Nanosecond => {
+                Arc::new(array.reinterpret_cast::<DurationNanosecondType>()) as _
+            }
+        },
+        ArrowType::Timestamp(time_unit, timezone) => match time_unit {
+            TimeUnit::Second => {
+                let array = array
+                    .reinterpret_cast::<TimestampSecondType>()
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Millisecond => {
+                let array = array
+                    .reinterpret_cast::<TimestampMillisecondType>()
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Microsecond => {
+                let array = array
+                    .reinterpret_cast::<TimestampMicrosecondType>()
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+            TimeUnit::Nanosecond => {
+                let array = array
+                    .reinterpret_cast::<TimestampNanosecondType>()
+                    .with_timezone_opt(timezone.clone());
+                Arc::new(array) as _
+            }
+        },
+        ArrowType::Decimal64(p, s) => {
+            let array = array
+                .reinterpret_cast::<Decimal64Type>()
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as _
+        }
+        ArrowType::Decimal128(p, s) => {
+            let array: Decimal128Array = array
+                .unary(|i| i as i128)
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as _
+        }
+        ArrowType::Decimal256(p, s) => {
+            let array: Decimal256Array = array
+                .unary(|i| i256::from_i128(i as i128))
+                .with_precision_and_scale(*p, *s)?;
+            Arc::new(array) as _
+        }
+        _ => unreachable!("Cannot coerce i64 to {target_type}"),
+    })
+}
+
+macro_rules! pack_dictionary_helper {
+    ($t:ty, $values:ident) => {
+        match $values.data_type() {
+            ArrowType::Int32 => pack_dictionary_impl::<$t, Int32Type>($values.as_primitive()),
+            ArrowType::Int64 => pack_dictionary_impl::<$t, Int64Type>($values.as_primitive()),
+            ArrowType::Float32 => pack_dictionary_impl::<$t, Float32Type>($values.as_primitive()),
+            ArrowType::Float64 => pack_dictionary_impl::<$t, Float64Type>($values.as_primitive()),
+            _ => unreachable!("Invalid physical type"),
+        }
+    };
+}
+
+fn pack_dictionary(key: &ArrowType, values: &dyn Array) -> Result<ArrayRef> {
+    downcast_integer! {
+        key => (pack_dictionary_helper, values),
+        _ => unreachable!("Invalid key type"),
+    }
+}
+
+fn pack_dictionary_impl<K: ArrowDictionaryKeyType, V: ArrowPrimitiveType>(
+    values: &PrimitiveArray<V>,
+) -> Result<ArrayRef> {
+    let mut builder = PrimitiveDictionaryBuilder::<K, V>::with_capacity(1024, values.len());
+    builder.extend(values);
+    Ok(Arc::new(builder.finish()))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -445,8 +441,8 @@ mod tests {
     use crate::data_type::{Int32Type, Int64Type};
     use crate::schema::parser::parse_message_type;
     use crate::schema::types::SchemaDescriptor;
-    use crate::util::test_common::rand_gen::make_pages;
     use crate::util::InMemoryPageIterator;
+    use crate::util::test_common::rand_gen::make_pages;
     use arrow::datatypes::ArrowPrimitiveType;
     use arrow_array::{Array, Date32Array, PrimitiveArray};
 
diff --git a/parquet/src/arrow/array_reader/row_group_cache.rs b/parquet/src/arrow/array_reader/row_group_cache.rs
new file mode 100644
index 000000000000..ef726e16495f
--- /dev/null
+++ b/parquet/src/arrow/array_reader/row_group_cache.rs
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{Array, ArrayRef};
+use arrow_schema::DataType;
+use std::collections::HashMap;
+
+/// Starting row ID for this batch
+///
+/// The `BatchID` is used to identify batches of rows within a row group.
+///
+/// The row_index in the id are relative to the rows being read from the
+/// underlying column reader (which might already have a RowSelection applied)
+///
+/// The `BatchID` for any particular row is `row_index / batch_size`. The
+/// integer division ensures that rows in the same batch share the same
+/// the BatchID which can be calculated quickly from the row index
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct BatchID {
+    pub val: usize,
+}
+
+/// Cache key that uniquely identifies a batch within a row group
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct CacheKey {
+    /// Column index in the row group
+    pub column_idx: usize,
+    /// Starting row ID for this batch
+    pub batch_id: BatchID,
+}
+
+fn get_array_memory_size_for_cache(array: &ArrayRef) -> usize {
+    match array.data_type() {
+        // TODO: this is temporary workaround. It's very difficult to measure the actual memory usage of one StringViewArray,
+        // because the underlying buffer is shared with multiple StringViewArrays.
+        DataType::Utf8View => {
+            use arrow_array::cast::AsArray;
+            let array = array.as_string_view();
+            array.len() * 16 + array.total_buffer_bytes_used() + std::mem::size_of_val(array)
+        }
+        _ => array.get_array_memory_size(),
+    }
+}
+
+/// Row group cache that stores decoded arrow arrays at batch granularity
+///
+/// This cache is designed to avoid duplicate decoding when the same column
+/// appears in both filter predicates and output projection.
+#[derive(Debug)]
+pub struct RowGroupCache {
+    /// Cache storage mapping (column_idx, row_id) -> ArrayRef
+    cache: HashMap<CacheKey, ArrayRef>,
+    /// Cache granularity
+    batch_size: usize,
+    /// Maximum cache size in bytes
+    max_cache_bytes: usize,
+    /// Current cache size in bytes
+    current_cache_size: usize,
+}
+
+impl RowGroupCache {
+    /// Creates a new empty row group cache
+    pub fn new(batch_size: usize, max_cache_bytes: usize) -> Self {
+        Self {
+            cache: HashMap::new(),
+            batch_size,
+            max_cache_bytes,
+            current_cache_size: 0,
+        }
+    }
+
+    /// Inserts an array into the cache for the given column and starting row ID
+    /// Returns true if the array was inserted, false if it would exceed the cache size limit
+    pub fn insert(&mut self, column_idx: usize, batch_id: BatchID, array: ArrayRef) -> bool {
+        let array_size = get_array_memory_size_for_cache(&array);
+
+        // Check if adding this array would exceed the cache size limit
+        if self.current_cache_size + array_size > self.max_cache_bytes {
+            return false; // Cache is full, don't insert
+        }
+
+        let key = CacheKey {
+            column_idx,
+            batch_id,
+        };
+
+        let existing = self.cache.insert(key, array);
+        assert!(existing.is_none());
+        self.current_cache_size += array_size;
+        true
+    }
+
+    /// Retrieves a cached array for the given column and row ID
+    /// Returns None if not found in cache
+    pub fn get(&self, column_idx: usize, batch_id: BatchID) -> Option<ArrayRef> {
+        let key = CacheKey {
+            column_idx,
+            batch_id,
+        };
+        self.cache.get(&key).cloned()
+    }
+
+    /// Gets the batch size for this cache
+    pub fn batch_size(&self) -> usize {
+        self.batch_size
+    }
+
+    /// Removes a cached array for the given column and row ID
+    /// Returns true if the entry was found and removed, false otherwise
+    pub fn remove(&mut self, column_idx: usize, batch_id: BatchID) -> bool {
+        let key = CacheKey {
+            column_idx,
+            batch_id,
+        };
+        if let Some(array) = self.cache.remove(&key) {
+            self.current_cache_size -= get_array_memory_size_for_cache(&array);
+            true
+        } else {
+            false
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{ArrayRef, Int32Array};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_cache_basic_operations() {
+        let mut cache = RowGroupCache::new(1000, usize::MAX);
+
+        // Create test array
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+
+        // Test insert and get
+        let batch_id = BatchID { val: 0 };
+        assert!(cache.insert(0, batch_id, array.clone()));
+        let retrieved = cache.get(0, batch_id);
+        assert!(retrieved.is_some());
+        assert_eq!(retrieved.unwrap().len(), 5);
+
+        // Test miss
+        let miss = cache.get(1, batch_id);
+        assert!(miss.is_none());
+
+        // Test different row_id
+        let miss = cache.get(0, BatchID { val: 1000 });
+        assert!(miss.is_none());
+    }
+
+    #[test]
+    fn test_cache_remove() {
+        let mut cache = RowGroupCache::new(1000, usize::MAX);
+
+        // Create test arrays
+        let array1: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+        let array2: ArrayRef = Arc::new(Int32Array::from(vec![4, 5, 6]));
+
+        // Insert arrays
+        assert!(cache.insert(0, BatchID { val: 0 }, array1.clone()));
+        assert!(cache.insert(0, BatchID { val: 1000 }, array2.clone()));
+        assert!(cache.insert(1, BatchID { val: 0 }, array1.clone()));
+
+        // Verify they're there
+        assert!(cache.get(0, BatchID { val: 0 }).is_some());
+        assert!(cache.get(0, BatchID { val: 1000 }).is_some());
+        assert!(cache.get(1, BatchID { val: 0 }).is_some());
+
+        // Remove one entry
+        let removed = cache.remove(0, BatchID { val: 0 });
+        assert!(removed);
+        assert!(cache.get(0, BatchID { val: 0 }).is_none());
+
+        // Other entries should still be there
+        assert!(cache.get(0, BatchID { val: 1000 }).is_some());
+        assert!(cache.get(1, BatchID { val: 0 }).is_some());
+
+        // Try to remove non-existent entry
+        let not_removed = cache.remove(0, BatchID { val: 0 });
+        assert!(!not_removed);
+
+        // Remove remaining entries
+        assert!(cache.remove(0, BatchID { val: 1000 }));
+        assert!(cache.remove(1, BatchID { val: 0 }));
+
+        // Cache should be empty
+        assert!(cache.get(0, BatchID { val: 1000 }).is_none());
+        assert!(cache.get(1, BatchID { val: 0 }).is_none());
+    }
+}
diff --git a/parquet/src/arrow/array_reader/row_number.rs b/parquet/src/arrow/array_reader/row_number.rs
new file mode 100644
index 000000000000..f9e60a2c0d34
--- /dev/null
+++ b/parquet/src/arrow/array_reader/row_number.rs
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::arrow::array_reader::ArrayReader;
+use crate::errors::{ParquetError, Result};
+use crate::file::metadata::{ParquetMetaData, RowGroupMetaData};
+use arrow_array::{ArrayRef, Int64Array};
+use arrow_schema::DataType;
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+pub(crate) struct RowNumberReader {
+    buffered_row_numbers: Vec<i64>,
+    remaining_row_numbers: std::iter::Flatten<std::vec::IntoIter<std::ops::Range<i64>>>,
+}
+
+impl RowNumberReader {
+    pub(crate) fn try_new<'a>(
+        parquet_metadata: &'a ParquetMetaData,
+        row_groups: impl Iterator<Item = &'a RowGroupMetaData>,
+    ) -> Result<Self> {
+        // Pass 1: Build a map from ordinal to first_row_index
+        // This is O(M) where M is the total number of row groups in the file
+        let mut ordinal_to_offset: HashMap<i16, i64> = HashMap::new();
+        let mut first_row_index: i64 = 0;
+
+        for rg in parquet_metadata.row_groups() {
+            if let Some(ordinal) = rg.ordinal() {
+                ordinal_to_offset.insert(ordinal, first_row_index);
+            }
+            first_row_index += rg.num_rows();
+        }
+
+        // Pass 2: Build ranges in the order specified by the row_groups iterator
+        // This is O(N) where N is the number of selected row groups
+        // This preserves the user's requested order instead of sorting by ordinal
+        let ranges: Vec<_> = row_groups
+            .map(|rg| {
+                let ordinal = rg.ordinal().ok_or_else(|| {
+                    ParquetError::General(
+                        "Row group missing ordinal field, required to compute row numbers"
+                            .to_string(),
+                    )
+                })?;
+
+                let offset = ordinal_to_offset.get(&ordinal).ok_or_else(|| {
+                    ParquetError::General(format!(
+                        "Row group with ordinal {} not found in metadata",
+                        ordinal
+                    ))
+                })?;
+
+                Ok(*offset..*offset + rg.num_rows())
+            })
+            .collect::<Result<_>>()?;
+
+        Ok(Self {
+            buffered_row_numbers: Vec::new(),
+            remaining_row_numbers: ranges.into_iter().flatten(),
+        })
+    }
+}
+
+impl ArrayReader for RowNumberReader {
+    fn read_records(&mut self, batch_size: usize) -> Result<usize> {
+        let starting_len = self.buffered_row_numbers.len();
+        self.buffered_row_numbers
+            .extend((&mut self.remaining_row_numbers).take(batch_size));
+        Ok(self.buffered_row_numbers.len() - starting_len)
+    }
+
+    fn skip_records(&mut self, num_records: usize) -> Result<usize> {
+        // TODO: Use advance_by when it stabilizes to improve performance
+        Ok((&mut self.remaining_row_numbers).take(num_records).count())
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn get_data_type(&self) -> &DataType {
+        &DataType::Int64
+    }
+
+    fn consume_batch(&mut self) -> Result<ArrayRef> {
+        Ok(Arc::new(Int64Array::from_iter(
+            self.buffered_row_numbers.drain(..),
+        )))
+    }
+
+    fn get_def_levels(&self) -> Option<&[i16]> {
+        None
+    }
+
+    fn get_rep_levels(&self) -> Option<&[i16]> {
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::basic::Type as PhysicalType;
+    use crate::file::metadata::{
+        ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData,
+    };
+    use crate::schema::types::{SchemaDescriptor, Type as SchemaType};
+    use std::sync::Arc;
+
+    fn create_test_schema() -> Arc<SchemaDescriptor> {
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(
+                SchemaType::primitive_type_builder("test_col", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+            )])
+            .build()
+            .unwrap();
+        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
+    }
+
+    fn create_test_parquet_metadata(row_groups: Vec<(i16, i64)>) -> ParquetMetaData {
+        let schema_descr = create_test_schema();
+
+        let mut row_group_metas = vec![];
+        for (ordinal, num_rows) in row_groups {
+            let columns: Vec<_> = schema_descr
+                .columns()
+                .iter()
+                .map(|col| ColumnChunkMetaData::builder(col.clone()).build().unwrap())
+                .collect();
+
+            let row_group = RowGroupMetaData::builder(schema_descr.clone())
+                .set_num_rows(num_rows)
+                .set_ordinal(ordinal)
+                .set_total_byte_size(100)
+                .set_column_metadata(columns)
+                .build()
+                .unwrap();
+            row_group_metas.push(row_group);
+        }
+
+        let total_rows: i64 = row_group_metas.iter().map(|rg| rg.num_rows()).sum();
+        let file_metadata = FileMetaData::new(
+            1,            // version
+            total_rows,   // num_rows
+            None,         // created_by
+            None,         // key_value_metadata
+            schema_descr, // schema_descr
+            None,         // column_orders
+        );
+
+        ParquetMetaData::new(file_metadata, row_group_metas)
+    }
+
+    #[test]
+    fn test_row_number_reader_reverse_order() {
+        // Create metadata with 3 row groups, each with 2 rows
+        let metadata = create_test_parquet_metadata(vec![
+            (0, 2), // Row group 0: ordinal=0, rows 0-1
+            (1, 2), // Row group 1: ordinal=1, rows 2-3
+            (2, 2), // Row group 2: ordinal=2, rows 4-5
+        ]);
+
+        // Select only row groups with ordinals 2 and 0 (in that order)
+        // This means we want row group 2 first, then row group 0, skipping row group 1
+        let selected_row_groups: Vec<_> = vec![
+            &metadata.row_groups()[2], // ordinal 2
+            &metadata.row_groups()[0], // ordinal 0
+        ];
+
+        let mut reader =
+            RowNumberReader::try_new(&metadata, selected_row_groups.into_iter()).unwrap();
+
+        // Read all row numbers
+        let num_read = reader.read_records(6).unwrap();
+        assert_eq!(num_read, 4); // Should read 4 rows total (2 from each selected group)
+
+        let array = reader.consume_batch().unwrap();
+        let row_numbers = array.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        // Expected: row group 2 first (rows 4-5), then row group 0 (rows 0-1)
+        let expected = vec![4, 5, 0, 1];
+        let actual: Vec<i64> = row_numbers.iter().map(|v| v.unwrap()).collect();
+
+        assert_eq!(
+            actual, expected,
+            "Row numbers should match the order of selected row groups, not file order"
+        );
+    }
+}
diff --git a/parquet/src/arrow/array_reader/struct_array.rs b/parquet/src/arrow/array_reader/struct_array.rs
index fb2f2f8928b9..3c5a5f836bbe 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -17,9 +17,9 @@
 
 use crate::arrow::array_reader::ArrayReader;
 use crate::errors::{ParquetError, Result};
-use arrow_array::{builder::BooleanBufferBuilder, Array, ArrayRef, StructArray};
-use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::DataType as ArrowType;
+use arrow_array::{Array, ArrayRef, StructArray, builder::BooleanBufferBuilder};
+use arrow_buffer::NullBuffer;
+use arrow_schema::{DataType as ArrowType, DataType};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -124,16 +124,15 @@ impl ArrayReader for StructArrayReader {
             return Err(general_err!("Not all children array length are the same!"));
         }
 
-        // Now we can build array data
-        let mut array_data_builder = ArrayDataBuilder::new(self.data_type.clone())
-            .len(children_array_len)
-            .child_data(
-                children_array
-                    .iter()
-                    .map(|x| x.to_data())
-                    .collect::<Vec<ArrayData>>(),
-            );
+        let DataType::Struct(fields) = &self.data_type else {
+            return Err(general_err!(
+                "Internal: StructArrayReader must have struct data type, got {:?}",
+                self.data_type
+            ));
+        };
+        let fields = fields.clone(); // cloning Fields is cheap (Arc internally)
 
+        let mut nulls = None;
         if self.nullable {
             // calculate struct def level data
 
@@ -168,12 +167,19 @@ impl ArrayReader for StructArrayReader {
             if bitmap_builder.len() != children_array_len {
                 return Err(general_err!("Failed to decode level data for struct array"));
             }
-
-            array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
+            nulls = Some(NullBuffer::from(bitmap_builder));
         }
 
-        let array_data = unsafe { array_data_builder.build_unchecked() };
-        Ok(Arc::new(StructArray::from(array_data)))
+        // Safety: checked above that all children array data have same
+        // length and correct type
+        unsafe {
+            Ok(Arc::new(StructArray::new_unchecked_with_length(
+                fields,
+                children_array,
+                nulls,
+                children_array_len,
+            )))
+        }
     }
 
     fn skip_records(&mut self, num_records: usize) -> Result<usize> {
@@ -212,8 +218,8 @@ impl ArrayReader for StructArrayReader {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::arrow::array_reader::test_util::InMemoryArrayReader;
     use crate::arrow::array_reader::ListArrayReader;
+    use crate::arrow::array_reader::test_util::InMemoryArrayReader;
     use arrow::buffer::Buffer;
     use arrow::datatypes::Field;
     use arrow_array::cast::AsArray;
diff --git a/parquet/src/arrow/array_reader/test_util.rs b/parquet/src/arrow/array_reader/test_util.rs
index a7ff8d6e41ba..45af5a2777b3 100644
--- a/parquet/src/arrow/array_reader/test_util.rs
+++ b/parquet/src/arrow/array_reader/test_util.rs
@@ -25,7 +25,7 @@ use crate::arrow::array_reader::ArrayReader;
 use crate::basic::{ConvertedType, Encoding, Type as PhysicalType};
 use crate::column::page::{PageIterator, PageReader};
 use crate::data_type::{ByteArray, ByteArrayType};
-use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
+use crate::encodings::encoding::{DictEncoder, Encoder, get_encoder};
 use crate::errors::Result;
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type};
 
@@ -109,15 +109,19 @@ impl InMemoryArrayReader {
         def_levels: Option<Vec<i16>>,
         rep_levels: Option<Vec<i16>>,
     ) -> Self {
-        assert!(def_levels
-            .as_ref()
-            .map(|d| d.len() == array.len())
-            .unwrap_or(true));
-
-        assert!(rep_levels
-            .as_ref()
-            .map(|r| r.len() == array.len())
-            .unwrap_or(true));
+        assert!(
+            def_levels
+                .as_ref()
+                .map(|d| d.len() == array.len())
+                .unwrap_or(true)
+        );
+
+        assert!(
+            rep_levels
+                .as_ref()
+                .map(|r| r.len() == array.len())
+                .unwrap_or(true)
+        );
 
         Self {
             data_type,
diff --git a/parquet/src/arrow/arrow_reader/filter.rs b/parquet/src/arrow/arrow_reader/filter.rs
index 3a897c05444b..4fbe45748b88 100644
--- a/parquet/src/arrow/arrow_reader/filter.rs
+++ b/parquet/src/arrow/arrow_reader/filter.rs
@@ -186,4 +186,12 @@ impl RowFilter {
     pub fn new(predicates: Vec<Box<dyn ArrowPredicate>>) -> Self {
         Self { predicates }
     }
+    /// Returns the inner predicates
+    pub fn predicates(&self) -> &Vec<Box<dyn ArrowPredicate>> {
+        &self.predicates
+    }
+    /// Returns the inner predicates, consuming self
+    pub fn into_predicates(self) -> Vec<Box<dyn ArrowPredicate>> {
+        self.predicates
+    }
 }
diff --git a/parquet/src/arrow/arrow_reader/metrics.rs b/parquet/src/arrow/arrow_reader/metrics.rs
new file mode 100644
index 000000000000..b36d79586bb3
--- /dev/null
+++ b/parquet/src/arrow/arrow_reader/metrics.rs
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [ArrowReaderMetrics] for collecting metrics about the Arrow reader
+
+use std::sync::Arc;
+use std::sync::atomic::AtomicUsize;
+
+/// This enum represents the state of Arrow reader metrics collection.
+///
+/// The inner metrics are stored in an `Arc<ArrowReaderMetricsInner>`
+/// so cloning the `ArrowReaderMetrics` enum will not clone the inner metrics.
+///
+/// To access metrics, create an `ArrowReaderMetrics` via [`ArrowReaderMetrics::enabled()`]
+/// and configure the `ArrowReaderBuilder` with a clone.
+#[derive(Debug, Clone)]
+pub enum ArrowReaderMetrics {
+    /// Metrics are not collected (default)
+    Disabled,
+    /// Metrics are collected and stored in an `Arc`.
+    ///
+    /// Create this via [`ArrowReaderMetrics::enabled()`].
+    Enabled(Arc<ArrowReaderMetricsInner>),
+}
+
+impl ArrowReaderMetrics {
+    /// Creates a new instance of [`ArrowReaderMetrics::Disabled`]
+    pub fn disabled() -> Self {
+        Self::Disabled
+    }
+
+    /// Creates a new instance of [`ArrowReaderMetrics::Enabled`]
+    pub fn enabled() -> Self {
+        Self::Enabled(Arc::new(ArrowReaderMetricsInner::new()))
+    }
+
+    /// Predicate Cache: number of records read directly from the inner reader
+    ///
+    /// This is the total number of records read from the inner reader (that is
+    /// actually decoding). It measures the amount of work that could not be
+    /// avoided with caching.
+    ///
+    /// It returns the number of records read across all columns, so if you read
+    /// 2 columns each with 100 records, this will return 200.
+    ///
+    ///
+    /// Returns None if metrics are disabled.
+    pub fn records_read_from_inner(&self) -> Option<usize> {
+        match self {
+            Self::Disabled => None,
+            Self::Enabled(inner) => Some(
+                inner
+                    .records_read_from_inner
+                    .load(std::sync::atomic::Ordering::Relaxed),
+            ),
+        }
+    }
+
+    /// Predicate Cache: number of records read from the cache
+    ///
+    /// This is the total number of records read from the cache actually
+    /// decoding). It measures the amount of work that was avoided with caching.
+    ///
+    /// It returns the number of records read across all columns, so if you read
+    /// 2 columns each with 100 records from the cache, this will return 200.
+    ///
+    /// Returns None if metrics are disabled.
+    pub fn records_read_from_cache(&self) -> Option<usize> {
+        match self {
+            Self::Disabled => None,
+            Self::Enabled(inner) => Some(
+                inner
+                    .records_read_from_cache
+                    .load(std::sync::atomic::Ordering::Relaxed),
+            ),
+        }
+    }
+
+    /// Increments the count of records read from the inner reader
+    pub(crate) fn increment_inner_reads(&self, count: usize) {
+        let Self::Enabled(inner) = self else {
+            return;
+        };
+        inner
+            .records_read_from_inner
+            .fetch_add(count, std::sync::atomic::Ordering::Relaxed);
+    }
+
+    /// Increments the count of records read from the cache
+    pub(crate) fn increment_cache_reads(&self, count: usize) {
+        let Self::Enabled(inner) = self else {
+            return;
+        };
+
+        inner
+            .records_read_from_cache
+            .fetch_add(count, std::sync::atomic::Ordering::Relaxed);
+    }
+}
+
+/// Holds the actual metrics for the Arrow reader.
+///
+/// Please see [`ArrowReaderMetrics`] for the public interface.
+#[derive(Debug)]
+pub struct ArrowReaderMetricsInner {
+    // Metrics for Predicate Cache
+    /// Total number of records read from the inner reader (uncached)
+    records_read_from_inner: AtomicUsize,
+    /// Total number of records read from previously cached pages
+    records_read_from_cache: AtomicUsize,
+}
+
+impl ArrowReaderMetricsInner {
+    /// Creates a new instance of `ArrowReaderMetricsInner`
+    pub(crate) fn new() -> Self {
+        Self {
+            records_read_from_inner: AtomicUsize::new(0),
+            records_read_from_cache: AtomicUsize::new(0),
+        }
+    }
+}
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 9127423efe4b..b0563d0d693a 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -18,31 +18,43 @@
 //! Contains reader which reads parquet data into arrow [`RecordBatch`]
 
 use arrow_array::cast::AsArray;
-use arrow_array::Array;
-use arrow_array::{RecordBatch, RecordBatchReader};
-use arrow_schema::{ArrowError, DataType as ArrowType, Schema, SchemaRef};
+use arrow_array::{Array, RecordBatch, RecordBatchReader};
+use arrow_schema::{ArrowError, DataType as ArrowType, FieldRef, Schema, SchemaRef};
+use arrow_select::filter::filter_record_batch;
 pub use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter};
-pub use selection::{RowSelection, RowSelector};
+pub use selection::{RowSelection, RowSelectionCursor, RowSelectionPolicy, RowSelector};
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;
 
 pub use crate::arrow::array_reader::RowGroups;
 use crate::arrow::array_reader::{ArrayReader, ArrayReaderBuilder};
-use crate::arrow::schema::{parquet_to_arrow_schema_and_fields, ParquetField};
-use crate::arrow::{parquet_to_arrow_field_levels, FieldLevels, ProjectionMask};
+use crate::arrow::schema::{
+    ParquetField, parquet_to_arrow_schema_and_fields, virtual_type::is_virtual_column,
+};
+use crate::arrow::{FieldLevels, ProjectionMask, parquet_to_arrow_field_levels_with_virtual};
+use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash};
+use crate::bloom_filter::{
+    SBBF_HEADER_SIZE_ESTIMATE, Sbbf, chunk_read_bloom_filter_header_and_offset,
+};
 use crate::column::page::{PageIterator, PageReader};
 #[cfg(feature = "encryption")]
 use crate::encryption::decrypt::FileDecryptionProperties;
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+use crate::file::metadata::{
+    PageIndexPolicy, ParquetMetaData, ParquetMetaDataOptions, ParquetMetaDataReader,
+    ParquetStatisticsPolicy, RowGroupMetaData,
+};
 use crate::file::reader::{ChunkReader, SerializedPageReader};
 use crate::schema::types::SchemaDescriptor;
 
-pub(crate) use read_plan::{ReadPlan, ReadPlanBuilder};
+use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+// Exposed so integration tests and benchmarks can temporarily override the threshold.
+pub use read_plan::{ReadPlan, ReadPlanBuilder};
 
 mod filter;
+pub mod metrics;
 mod read_plan;
-mod selection;
+pub(crate) mod selection;
 pub mod statistics;
 
 /// Builder for constructing Parquet readers that decode into [Apache Arrow]
@@ -50,8 +62,9 @@ pub mod statistics;
 ///
 /// Most users should use one of the following specializations:
 ///
-/// * synchronous API: [`ParquetRecordBatchReaderBuilder::try_new`]
-/// * `async` API: [`ParquetRecordBatchStreamBuilder::new`]
+/// * synchronous API: [`ParquetRecordBatchReaderBuilder`]
+/// * `async` API: [`ParquetRecordBatchStreamBuilder`]
+/// * decoder API: [`ParquetPushDecoderBuilder`]
 ///
 /// # Features
 /// * Projection pushdown: [`Self::with_projection`]
@@ -86,11 +99,19 @@ pub mod statistics;
 /// You can read more about this design in the [Querying Parquet with
 /// Millisecond Latency] Arrow blog post.
 ///
-/// [`ParquetRecordBatchStreamBuilder::new`]: crate::arrow::async_reader::ParquetRecordBatchStreamBuilder::new
+/// [`ParquetRecordBatchStreamBuilder`]: crate::arrow::async_reader::ParquetRecordBatchStreamBuilder
+/// [`ParquetPushDecoderBuilder`]: crate::arrow::push_decoder::ParquetPushDecoderBuilder
 /// [Apache Arrow]: https://arrow.apache.org/
 /// [`StatisticsConverter`]: statistics::StatisticsConverter
 /// [Querying Parquet with Millisecond Latency]: https://arrow.apache.org/blog/2022/12/26/querying-parquet-with-millisecond-latency/
 pub struct ArrowReaderBuilder<T> {
+    /// The "input" to read parquet data from.
+    ///
+    /// Note in the case of the [`ParquetPushDecoderBuilder`], there
+    /// is no underlying input, which is indicated by a type parameter of [`NoInput`]
+    ///
+    /// [`ParquetPushDecoderBuilder`]: crate::arrow::push_decoder::ParquetPushDecoderBuilder
+    /// [`NoInput`]: crate::arrow::push_decoder::NoInput
     pub(crate) input: T,
 
     pub(crate) metadata: Arc<ParquetMetaData>,
@@ -109,9 +130,15 @@ pub struct ArrowReaderBuilder<T> {
 
     pub(crate) selection: Option<RowSelection>,
 
+    pub(crate) row_selection_policy: RowSelectionPolicy,
+
     pub(crate) limit: Option<usize>,
 
     pub(crate) offset: Option<usize>,
+
+    pub(crate) metrics: ArrowReaderMetrics,
+
+    pub(crate) max_predicate_cache_size: usize,
 }
 
 impl<T: Debug> Debug for ArrowReaderBuilder<T> {
@@ -126,8 +153,10 @@ impl<T: Debug> Debug for ArrowReaderBuilder<T> {
             .field("projection", &self.projection)
             .field("filter", &self.filter)
             .field("selection", &self.selection)
+            .field("row_selection_policy", &self.row_selection_policy)
             .field("limit", &self.limit)
             .field("offset", &self.offset)
+            .field("metrics", &self.metrics)
             .finish()
     }
 }
@@ -144,8 +173,11 @@ impl<T> ArrowReaderBuilder<T> {
             projection: ProjectionMask::all(),
             filter: None,
             selection: None,
+            row_selection_policy: RowSelectionPolicy::default(),
             limit: None,
             offset: None,
+            metrics: ArrowReaderMetrics::Disabled,
+            max_predicate_cache_size: 100 * 1024 * 1024, // 100MB default cache size
         }
     }
 
@@ -190,6 +222,16 @@ impl<T> ArrowReaderBuilder<T> {
         }
     }
 
+    /// Configure how row selections should be materialised during execution
+    ///
+    /// See [`RowSelectionPolicy`] for more details
+    pub fn with_row_selection_policy(self, policy: RowSelectionPolicy) -> Self {
+        Self {
+            row_selection_policy: policy,
+            ..self
+        }
+    }
+
     /// Provide a [`RowSelection`] to filter out rows, and avoid fetching their
     /// data into memory.
     ///
@@ -248,7 +290,7 @@ impl<T> ArrowReaderBuilder<T> {
     /// Skip 1100      (skip the remaining 900 rows in row group 2 and the first 200 rows in row group 3)
     /// ```
     ///
-    /// [`Index`]: crate::file::page_index::index::Index
+    /// [`Index`]: crate::file::page_index::column_index::ColumnIndexMetaData
     pub fn with_row_selection(self, selection: RowSelection) -> Self {
         Self {
             selection: Some(selection),
@@ -262,6 +304,36 @@ impl<T> ArrowReaderBuilder<T> {
     ///
     /// It is recommended to enable reading the page index if using this functionality, to allow
     /// more efficient skipping over data pages. See [`ArrowReaderOptions::with_page_index`].
+    ///
+    /// For a running example see `parquet/examples/read_with_row_filter.rs`.
+    /// See <https://arrow.apache.org/blog/2025/12/11/parquet-late-materialization-deep-dive/>
+    /// for a technical explanation of late materialization.
+    ///
+    /// # Example
+    /// ```rust
+    /// # use std::fs::File;
+    /// # use arrow_array::Int32Array;
+    /// # use parquet::arrow::ProjectionMask;
+    /// # use parquet::arrow::arrow_reader::{ArrowPredicateFn, ParquetRecordBatchReaderBuilder, RowFilter};
+    /// # fn main() -> Result<(), parquet::errors::ParquetError> {
+    /// # let testdata = arrow::util::test_util::parquet_test_data();
+    /// # let path = format!("{testdata}/alltypes_plain.parquet");
+    /// # let file = File::open(&path)?;
+    /// let builder = ParquetRecordBatchReaderBuilder::try_new(file)?;
+    /// let schema_desc = builder.metadata().file_metadata().schema_descr_ptr();
+    ///
+    /// // Create predicate: column id > 4. This col has index 0.
+    /// let projection = ProjectionMask::leaves(&schema_desc, [0]);
+    /// let predicate = ArrowPredicateFn::new(projection, |batch| {
+    ///     let id_col = batch.column(0);
+    ///     arrow::compute::kernels::cmp::gt(id_col, &Int32Array::new_scalar(4))
+    /// });
+    ///
+    /// let row_filter = RowFilter::new(vec![Box::new(predicate)]);
+    /// let _reader = builder.with_row_filter(row_filter).build()?;
+    /// # Ok(())
+    /// # }
+    /// ```
     pub fn with_row_filter(self, filter: RowFilter) -> Self {
         Self {
             filter: Some(filter),
@@ -296,26 +368,99 @@ impl<T> ArrowReaderBuilder<T> {
             ..self
         }
     }
+
+    /// Specify metrics collection during reading
+    ///
+    /// To access the metrics, create an [`ArrowReaderMetrics`] and pass a
+    /// clone of the provided metrics to the builder.
+    ///
+    /// For example:
+    ///
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use bytes::Bytes;
+    /// # use arrow_array::{Int32Array, RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
+    /// use parquet::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+    /// # use parquet::arrow::ArrowWriter;
+    /// # let mut file: Vec<u8> = Vec::with_capacity(1024);
+    /// # let schema = Arc::new(Schema::new(vec![Field::new("i32", DataType::Int32, false)]));
+    /// # let mut writer = ArrowWriter::try_new(&mut file, schema.clone(), None).unwrap();
+    /// # let batch = RecordBatch::try_new(schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]).unwrap();
+    /// # writer.write(&batch).unwrap();
+    /// # writer.close().unwrap();
+    /// # let file = Bytes::from(file);
+    /// // Create metrics object to pass into the reader
+    /// let metrics = ArrowReaderMetrics::enabled();
+    /// let reader = ParquetRecordBatchReaderBuilder::try_new(file).unwrap()
+    ///   // Configure the builder to use the metrics by passing a clone
+    ///   .with_metrics(metrics.clone())
+    ///   // Build the reader
+    ///   .build().unwrap();
+    /// // .. read data from the reader ..
+    ///
+    /// // check the metrics
+    /// assert!(metrics.records_read_from_inner().is_some());
+    /// ```
+    pub fn with_metrics(self, metrics: ArrowReaderMetrics) -> Self {
+        Self { metrics, ..self }
+    }
+
+    /// Set the maximum size (per row group) of the predicate cache in bytes for
+    /// the async decoder.
+    ///
+    /// Defaults to 100MB (across all columns). Set to `usize::MAX` to use
+    /// unlimited cache size.
+    ///
+    /// This cache is used to store decoded arrays that are used in
+    /// predicate evaluation ([`Self::with_row_filter`]).
+    ///
+    /// This cache is only used for the "async" decoder, [`ParquetRecordBatchStream`]. See
+    /// [this ticket] for more details and alternatives.
+    ///
+    /// [`ParquetRecordBatchStream`]: https://docs.rs/parquet/latest/parquet/arrow/async_reader/struct.ParquetRecordBatchStream.html
+    /// [this ticket]: https://github.com/apache/arrow-rs/issues/8000
+    pub fn with_max_predicate_cache_size(self, max_predicate_cache_size: usize) -> Self {
+        Self {
+            max_predicate_cache_size,
+            ..self
+        }
+    }
 }
 
-/// Options that control how metadata is read for a parquet file
+/// Options that control how [`ParquetMetaData`] is read when constructing
+/// an Arrow reader.
+///
+/// To use these options, pass them to one of the following methods:
+/// * [`ParquetRecordBatchReaderBuilder::try_new_with_options`]
+/// * [`ParquetRecordBatchStreamBuilder::new_with_options`]
+///
+/// For fine-grained control over metadata loading, use
+/// [`ArrowReaderMetadata::load`] to load metadata with these options,
 ///
 /// See [`ArrowReaderBuilder`] for how to configure how the column data
 /// is then read from the file, including projection and filter pushdown
+///
+/// [`ParquetRecordBatchStreamBuilder::new_with_options`]: crate::arrow::async_reader::ParquetRecordBatchStreamBuilder::new_with_options
 #[derive(Debug, Clone, Default)]
 pub struct ArrowReaderOptions {
     /// Should the reader strip any user defined metadata from the Arrow schema
     skip_arrow_metadata: bool,
-    /// If provided used as the schema hint when determining the Arrow schema,
+    /// If provided, used as the schema hint when determining the Arrow schema,
     /// otherwise the schema hint is read from the [ARROW_SCHEMA_META_KEY]
     ///
     /// [ARROW_SCHEMA_META_KEY]: crate::arrow::ARROW_SCHEMA_META_KEY
     supplied_schema: Option<SchemaRef>,
-    /// If true, attempt to read `OffsetIndex` and `ColumnIndex`
-    pub(crate) page_index: bool,
+    /// Policy for reading offset and column indexes.
+    pub(crate) page_index_policy: PageIndexPolicy,
+    /// Options to control reading of Parquet metadata
+    metadata_options: ParquetMetaDataOptions,
     /// If encryption is enabled, the file decryption properties can be provided
     #[cfg(feature = "encryption")]
-    pub(crate) file_decryption_properties: Option<FileDecryptionProperties>,
+    pub(crate) file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
+
+    virtual_columns: Vec<FieldRef>,
 }
 
 impl ArrowReaderOptions {
@@ -329,7 +474,7 @@ impl ArrowReaderOptions {
     /// Parquet files generated by some writers may contain embedded arrow
     /// schema and metadata.
     /// This may not be correct or compatible with your system,
-    /// for example: [ARROW-16184](https://issues.apache.org/jira/browse/ARROW-16184)
+    /// for example, see [ARROW-16184](https://issues.apache.org/jira/browse/ARROW-16184)
     pub fn with_skip_arrow_metadata(self, skip_arrow_metadata: bool) -> Self {
         Self {
             skip_arrow_metadata,
@@ -393,6 +538,59 @@ impl ArrowReaderOptions {
     /// let mut reader = builder.build().unwrap();
     /// let _batch = reader.next().unwrap().unwrap();
     /// ```
+    ///
+    /// # Example: Preserving Dictionary Encoding
+    ///
+    /// By default, Parquet string columns are read as `Utf8Array` (or `LargeUtf8Array`),
+    /// even if the underlying Parquet data uses dictionary encoding. You can preserve
+    /// the dictionary encoding by specifying a `Dictionary` type in the schema hint:
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use tempfile::tempfile;
+    /// use arrow_array::{ArrayRef, RecordBatch, StringArray};
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
+    /// use parquet::arrow::ArrowWriter;
+    ///
+    /// // Write a Parquet file with string data
+    /// let file = tempfile().unwrap();
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("city", DataType::Utf8, false)
+    /// ]));
+    /// let cities = StringArray::from(vec!["Berlin", "Berlin", "Paris", "Berlin", "Paris"]);
+    /// let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(cities)]).unwrap();
+    ///
+    /// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), None).unwrap();
+    /// writer.write(&batch).unwrap();
+    /// writer.close().unwrap();
+    ///
+    /// // Read the file back, requesting dictionary encoding preservation
+    /// let dict_schema = Arc::new(Schema::new(vec![
+    ///     Field::new("city", DataType::Dictionary(
+    ///         Box::new(DataType::Int32),
+    ///         Box::new(DataType::Utf8)
+    ///     ), false)
+    /// ]));
+    /// let options = ArrowReaderOptions::new().with_schema(dict_schema);
+    /// let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+    ///     file.try_clone().unwrap(),
+    ///     options
+    /// ).unwrap();
+    ///
+    /// let mut reader = builder.build().unwrap();
+    /// let batch = reader.next().unwrap().unwrap();
+    ///
+    /// // The column is now a DictionaryArray
+    /// assert!(matches!(
+    ///     batch.column(0).data_type(),
+    ///     DataType::Dictionary(_, _)
+    /// ));
+    /// ```
+    ///
+    /// **Note**: Dictionary encoding preservation works best when:
+    /// 1. The original column was dictionary encoded (the default for string columns)
+    /// 2. There are a small number of distinct values
     pub fn with_schema(self, schema: SchemaRef) -> Self {
         Self {
             supplied_schema: Some(schema),
@@ -401,7 +599,7 @@ impl ArrowReaderOptions {
         }
     }
 
-    /// Enable reading [`PageIndex`], if present (defaults to `false`)
+    /// Enable reading the [`PageIndex`] from the metadata, if present (defaults to `false`)
     ///
     /// The `PageIndex` can be used to push down predicates to the parquet scan,
     /// potentially eliminating unnecessary IO, by some query engines.
@@ -414,7 +612,74 @@ impl ArrowReaderOptions {
     /// [`ParquetMetaData::column_index`]: crate::file::metadata::ParquetMetaData::column_index
     /// [`ParquetMetaData::offset_index`]: crate::file::metadata::ParquetMetaData::offset_index
     pub fn with_page_index(self, page_index: bool) -> Self {
-        Self { page_index, ..self }
+        let page_index_policy = PageIndexPolicy::from(page_index);
+
+        Self {
+            page_index_policy,
+            ..self
+        }
+    }
+
+    /// Set the [`PageIndexPolicy`] to determine how page indexes should be read.
+    ///
+    /// See [`Self::with_page_index`] for more details.
+    pub fn with_page_index_policy(self, policy: PageIndexPolicy) -> Self {
+        Self {
+            page_index_policy: policy,
+            ..self
+        }
+    }
+
+    /// Provide a Parquet schema to use when decoding the metadata. The schema in the Parquet
+    /// footer will be skipped.
+    ///
+    /// This can be used to avoid reparsing the schema from the file when it is
+    /// already known.
+    pub fn with_parquet_schema(mut self, schema: Arc<SchemaDescriptor>) -> Self {
+        self.metadata_options.set_schema(schema);
+        self
+    }
+
+    /// Set whether to convert the [`encoding_stats`] in the Parquet `ColumnMetaData` to a bitmask
+    /// (defaults to `false`).
+    ///
+    /// See [`ColumnChunkMetaData::page_encoding_stats_mask`] for an explanation of why this
+    /// might be desirable.
+    ///
+    /// [`ColumnChunkMetaData::page_encoding_stats_mask`]:
+    /// crate::file::metadata::ColumnChunkMetaData::page_encoding_stats_mask
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn with_encoding_stats_as_mask(mut self, val: bool) -> Self {
+        self.metadata_options.set_encoding_stats_as_mask(val);
+        self
+    }
+
+    /// Sets the decoding policy for [`encoding_stats`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn with_encoding_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_encoding_stats_policy(policy);
+        self
+    }
+
+    /// Sets the decoding policy for [`statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L912
+    pub fn with_column_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_column_stats_policy(policy);
+        self
+    }
+
+    /// Sets the decoding policy for [`size_statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`size_statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L936
+    pub fn with_size_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_size_stats_policy(policy);
+        self
     }
 
     /// Provide the file decryption properties to use when reading encrypted parquet files.
@@ -423,7 +688,7 @@ impl ArrowReaderOptions {
     #[cfg(feature = "encryption")]
     pub fn with_file_decryption_properties(
         self,
-        file_decryption_properties: FileDecryptionProperties,
+        file_decryption_properties: Arc<FileDecryptionProperties>,
     ) -> Self {
         Self {
             file_decryption_properties: Some(file_decryption_properties),
@@ -431,11 +696,83 @@ impl ArrowReaderOptions {
         }
     }
 
+    /// Include virtual columns in the output.
+    ///
+    /// Virtual columns are columns that are not part of the Parquet schema, but are added to the output by the reader such as row numbers.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use parquet::arrow::{ArrowWriter, RowNumber};
+    /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
+    /// # use tempfile::tempfile;
+    /// #
+    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// // Create a simple record batch with some data
+    /// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
+    /// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
+    ///
+    /// // Write the batch to a temporary parquet file
+    /// let file = tempfile()?;
+    /// let mut writer = ArrowWriter::try_new(
+    ///     file.try_clone()?,
+    ///     batch.schema(),
+    ///     None
+    /// )?;
+    /// writer.write(&batch)?;
+    /// writer.close()?;
+    ///
+    /// // Create a virtual column for row numbers
+    /// let row_number_field = Arc::new(Field::new("row_number", DataType::Int64, false)
+    ///     .with_extension_type(RowNumber));
+    ///
+    /// // Configure options with virtual columns
+    /// let options = ArrowReaderOptions::new()
+    ///     .with_virtual_columns(vec![row_number_field])?;
+    ///
+    /// // Create a reader with the options
+    /// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
+    ///     file,
+    ///     options
+    /// )?
+    /// .build()?;
+    ///
+    /// // Read the batch - it will include both the original column and the virtual row_number column
+    /// let result_batch = reader.next().unwrap()?;
+    /// assert_eq!(result_batch.num_columns(), 2); // "value" + "row_number"
+    /// assert_eq!(result_batch.num_rows(), 3);
+    /// #
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn with_virtual_columns(self, virtual_columns: Vec<FieldRef>) -> Result<Self> {
+        // Validate that all fields are virtual columns
+        for field in &virtual_columns {
+            if !is_virtual_column(field) {
+                return Err(ParquetError::General(format!(
+                    "Field '{}' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'",
+                    field.name()
+                )));
+            }
+        }
+        Ok(Self {
+            virtual_columns,
+            ..self
+        })
+    }
+
     /// Retrieve the currently set page index behavior.
     ///
     /// This can be set via [`with_page_index`][Self::with_page_index].
     pub fn page_index(&self) -> bool {
-        self.page_index
+        self.page_index_policy != PageIndexPolicy::Skip
+    }
+
+    /// Retrieve the currently set metadata decoding options.
+    pub fn metadata_options(&self) -> &ParquetMetaDataOptions {
+        &self.metadata_options
     }
 
     /// Retrieve the currently set file decryption properties.
@@ -443,7 +780,7 @@ impl ArrowReaderOptions {
     /// This can be set via
     /// [`file_decryption_properties`][Self::with_file_decryption_properties].
     #[cfg(feature = "encryption")]
-    pub fn file_decryption_properties(&self) -> Option<&FileDecryptionProperties> {
+    pub fn file_decryption_properties(&self) -> Option<&Arc<FileDecryptionProperties>> {
         self.file_decryption_properties.as_ref()
     }
 }
@@ -468,12 +805,13 @@ pub struct ArrowReaderMetadata {
     pub(crate) metadata: Arc<ParquetMetaData>,
     /// The Arrow Schema
     pub(crate) schema: SchemaRef,
-
+    /// The Parquet schema (root field)
     pub(crate) fields: Option<Arc<ParquetField>>,
 }
 
 impl ArrowReaderMetadata {
-    /// Loads [`ArrowReaderMetadata`] from the provided [`ChunkReader`], if necessary
+    /// Create [`ArrowReaderMetadata`] from the provided [`ArrowReaderOptions`]
+    /// and [`ChunkReader`]
     ///
     /// See [`ParquetRecordBatchReaderBuilder::new_with_metadata`] for an
     /// example of how this can be used
@@ -484,23 +822,31 @@ impl ArrowReaderMetadata {
     /// `Self::metadata` is missing the page index, this function will attempt
     /// to load the page index by making an object store request.
     pub fn load<T: ChunkReader>(reader: &T, options: ArrowReaderOptions) -> Result<Self> {
-        let metadata = ParquetMetaDataReader::new().with_page_indexes(options.page_index);
+        let metadata = ParquetMetaDataReader::new()
+            .with_page_index_policy(options.page_index_policy)
+            .with_metadata_options(Some(options.metadata_options.clone()));
         #[cfg(feature = "encryption")]
-        let metadata =
-            metadata.with_decryption_properties(options.file_decryption_properties.as_ref());
+        let metadata = metadata.with_decryption_properties(
+            options.file_decryption_properties.as_ref().map(Arc::clone),
+        );
         let metadata = metadata.parse_and_finish(reader)?;
         Self::try_new(Arc::new(metadata), options)
     }
 
-    /// Create a new [`ArrowReaderMetadata`]
+    /// Create a new [`ArrowReaderMetadata`] from a pre-existing
+    /// [`ParquetMetaData`] and [`ArrowReaderOptions`].
     ///
     /// # Notes
     ///
-    /// This function does not attempt to load the PageIndex if not present in the metadata.
-    /// See [`Self::load`] for more details.
+    /// This function will not attempt to load the PageIndex if not present in the metadata, regardless
+    /// of the settings in `options`. See [`Self::load`] to load metadata including the page index if needed.
     pub fn try_new(metadata: Arc<ParquetMetaData>, options: ArrowReaderOptions) -> Result<Self> {
         match options.supplied_schema {
-            Some(supplied_schema) => Self::with_supplied_schema(metadata, supplied_schema.clone()),
+            Some(supplied_schema) => Self::with_supplied_schema(
+                metadata,
+                supplied_schema.clone(),
+                &options.virtual_columns,
+            ),
             None => {
                 let kv_metadata = match options.skip_arrow_metadata {
                     true => None,
@@ -511,6 +857,7 @@ impl ArrowReaderMetadata {
                     metadata.file_metadata().schema_descr(),
                     ProjectionMask::all(),
                     kv_metadata,
+                    &options.virtual_columns,
                 )?;
 
                 Ok(Self {
@@ -525,16 +872,18 @@ impl ArrowReaderMetadata {
     fn with_supplied_schema(
         metadata: Arc<ParquetMetaData>,
         supplied_schema: SchemaRef,
+        virtual_columns: &[FieldRef],
     ) -> Result<Self> {
         let parquet_schema = metadata.file_metadata().schema_descr();
-        let field_levels = parquet_to_arrow_field_levels(
+        let field_levels = parquet_to_arrow_field_levels_with_virtual(
             parquet_schema,
             ProjectionMask::all(),
             Some(supplied_schema.fields()),
+            virtual_columns,
         )?;
         let fields = field_levels.fields;
         let inferred_len = fields.len();
-        let supplied_len = supplied_schema.fields().len();
+        let supplied_len = supplied_schema.fields().len() + virtual_columns.len();
         // Ensure the supplied schema has the same number of columns as the parquet schema.
         // parquet_to_arrow_field_levels is expected to throw an error if the schemas have
         // different lengths, but we check here to be safe.
@@ -552,7 +901,7 @@ impl ArrowReaderMetadata {
         for (field1, field2) in field_iter {
             if field1.data_type() != field2.data_type() {
                 errors.push(format!(
-                    "data type mismatch for field {}: requested {:?} but found {:?}",
+                    "data type mismatch for field {}: requested {} but found {}",
                     field1.name(),
                     field1.data_type(),
                     field2.data_type()
@@ -607,7 +956,7 @@ impl ArrowReaderMetadata {
 }
 
 #[doc(hidden)]
-/// A newtype used within [`ReaderOptionsBuilder`] to distinguish sync readers from async
+// A newtype used within `ReaderOptionsBuilder` to distinguish sync readers from async
 pub struct SyncReader<T: ChunkReader>(T);
 
 impl<T: Debug + ChunkReader> Debug for SyncReader<T> {
@@ -616,11 +965,12 @@ impl<T: Debug + ChunkReader> Debug for SyncReader<T> {
     }
 }
 
-/// A synchronous builder used to construct [`ParquetRecordBatchReader`] for a file
-///
-/// For an async API see [`crate::arrow::async_reader::ParquetRecordBatchStreamBuilder`]
+/// Creates [`ParquetRecordBatchReader`] for reading Parquet files into Arrow [`RecordBatch`]es
 ///
-/// See [`ArrowReaderBuilder`] for additional member functions
+/// # See Also
+/// * [`crate::arrow::async_reader::ParquetRecordBatchStreamBuilder`] for an async API
+/// * [`crate::arrow::push_decoder::ParquetPushDecoderBuilder`] for a SansIO decoder API
+/// * [`ArrowReaderBuilder`] for additional member functions
 pub type ParquetRecordBatchReaderBuilder<T> = ArrowReaderBuilder<SyncReader<T>>;
 
 impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
@@ -657,6 +1007,9 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
     }
 
     /// Create a new [`ParquetRecordBatchReaderBuilder`] with [`ArrowReaderOptions`]
+    ///
+    /// Use this method if you want to control the options for reading the
+    /// [`ParquetMetaData`]
     pub fn try_new_with_options(reader: T, options: ArrowReaderOptions) -> Result<Self> {
         let metadata = ArrowReaderMetadata::load(&reader, options)?;
         Ok(Self::new_with_metadata(reader, metadata))
@@ -664,6 +1017,7 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
 
     /// Create a [`ParquetRecordBatchReaderBuilder`] from the provided [`ArrowReaderMetadata`]
     ///
+    /// Use this method if you already have [`ParquetMetaData`] for a file.
     /// This interface allows:
     ///
     /// 1. Loading metadata once and using it to create multiple builders with
@@ -703,27 +1057,102 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
         Self::new_builder(SyncReader(input), metadata)
     }
 
+    /// Read bloom filter for a column in a row group
+    ///
+    /// Returns `None` if the column does not have a bloom filter
+    ///
+    /// We should call this function after other forms pruning, such as projection and predicate pushdown.
+    pub fn get_row_group_column_bloom_filter(
+        &self,
+        row_group_idx: usize,
+        column_idx: usize,
+    ) -> Result<Option<Sbbf>> {
+        let metadata = self.metadata.row_group(row_group_idx);
+        let column_metadata = metadata.column(column_idx);
+
+        let offset: u64 = if let Some(offset) = column_metadata.bloom_filter_offset() {
+            offset
+                .try_into()
+                .map_err(|_| ParquetError::General("Bloom filter offset is invalid".to_string()))?
+        } else {
+            return Ok(None);
+        };
+
+        let buffer = match column_metadata.bloom_filter_length() {
+            Some(length) => self.input.0.get_bytes(offset, length as usize),
+            None => self.input.0.get_bytes(offset, SBBF_HEADER_SIZE_ESTIMATE),
+        }?;
+
+        let (header, bitset_offset) =
+            chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?;
+
+        match header.algorithm {
+            BloomFilterAlgorithm::BLOCK => {
+                // this match exists to future proof the singleton algorithm enum
+            }
+        }
+        match header.compression {
+            BloomFilterCompression::UNCOMPRESSED => {
+                // this match exists to future proof the singleton compression enum
+            }
+        }
+        match header.hash {
+            BloomFilterHash::XXHASH => {
+                // this match exists to future proof the singleton hash enum
+            }
+        }
+
+        let bitset = match column_metadata.bloom_filter_length() {
+            Some(_) => buffer.slice(
+                (TryInto::<usize>::try_into(bitset_offset).unwrap()
+                    - TryInto::<usize>::try_into(offset).unwrap())..,
+            ),
+            None => {
+                let bitset_length: usize = header.num_bytes.try_into().map_err(|_| {
+                    ParquetError::General("Bloom filter length is invalid".to_string())
+                })?;
+                self.input.0.get_bytes(bitset_offset, bitset_length)?
+            }
+        };
+        Ok(Some(Sbbf::new(&bitset)))
+    }
+
     /// Build a [`ParquetRecordBatchReader`]
     ///
     /// Note: this will eagerly evaluate any `RowFilter` before returning
     pub fn build(self) -> Result<ParquetRecordBatchReader> {
+        let Self {
+            input,
+            metadata,
+            schema: _,
+            fields,
+            batch_size,
+            row_groups,
+            projection,
+            mut filter,
+            selection,
+            row_selection_policy,
+            limit,
+            offset,
+            metrics,
+            // Not used for the sync reader, see https://github.com/apache/arrow-rs/issues/8000
+            max_predicate_cache_size: _,
+        } = self;
+
         // Try to avoid allocate large buffer
-        let batch_size = self
-            .batch_size
-            .min(self.metadata.file_metadata().num_rows() as usize);
+        let batch_size = batch_size.min(metadata.file_metadata().num_rows() as usize);
 
-        let row_groups = self
-            .row_groups
-            .unwrap_or_else(|| (0..self.metadata.num_row_groups()).collect());
+        let row_groups = row_groups.unwrap_or_else(|| (0..metadata.num_row_groups()).collect());
 
         let reader = ReaderRowGroups {
-            reader: Arc::new(self.input.0),
-            metadata: self.metadata,
+            reader: Arc::new(input.0),
+            metadata,
             row_groups,
         };
 
-        let mut filter = self.filter;
-        let mut plan_builder = ReadPlanBuilder::new(batch_size).with_selection(self.selection);
+        let mut plan_builder = ReadPlanBuilder::new(batch_size)
+            .with_selection(selection)
+            .with_row_selection_policy(row_selection_policy);
 
         // Update selection based on any filters
         if let Some(filter) = filter.as_mut() {
@@ -733,20 +1162,25 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
                     break;
                 }
 
-                let array_reader = ArrayReaderBuilder::new(&reader)
-                    .build_array_reader(self.fields.as_deref(), predicate.projection())?;
+                let mut cache_projection = predicate.projection().clone();
+                cache_projection.intersect(&projection);
+
+                let array_reader = ArrayReaderBuilder::new(&reader, &metrics)
+                    .with_parquet_metadata(&reader.metadata)
+                    .build_array_reader(fields.as_deref(), predicate.projection())?;
 
                 plan_builder = plan_builder.with_predicate(array_reader, predicate.as_mut())?;
             }
         }
 
-        let array_reader = ArrayReaderBuilder::new(&reader)
-            .build_array_reader(self.fields.as_deref(), &self.projection)?;
+        let array_reader = ArrayReaderBuilder::new(&reader, &metrics)
+            .with_parquet_metadata(&reader.metadata)
+            .build_array_reader(fields.as_deref(), &projection)?;
 
         let read_plan = plan_builder
             .limited(reader.num_rows())
-            .with_offset(self.offset)
-            .with_limit(self.limit)
+            .with_offset(offset)
+            .with_limit(limit)
             .build_limited()
             .build();
 
@@ -779,6 +1213,18 @@ impl<T: ChunkReader + 'static> RowGroups for ReaderRowGroups<T> {
             row_groups: self.row_groups.clone().into_iter(),
         }))
     }
+
+    fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
+        Box::new(
+            self.row_groups
+                .iter()
+                .map(move |i| self.metadata.row_group(*i)),
+        )
+    }
+
+    fn metadata(&self) -> &ParquetMetaData {
+        self.metadata.as_ref()
+    }
 }
 
 struct ReaderPageIterator<T: ChunkReader> {
@@ -826,14 +1272,32 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
 
 impl<T: ChunkReader + 'static> PageIterator for ReaderPageIterator<T> {}
 
-/// An `Iterator<Item = ArrowResult<RecordBatch>>` that yields [`RecordBatch`]
-/// read from a parquet data source
+/// Reads Parquet data as Arrow [`RecordBatch`]es
+///
+/// This struct implements the [`RecordBatchReader`] trait and is an
+/// `Iterator<Item = ArrowResult<RecordBatch>>` that yields [`RecordBatch`]es.
+///
+/// Typically, either reads from a file or an in memory buffer [`Bytes`]
+///
+/// Created by [`ParquetRecordBatchReaderBuilder`]
+///
+/// [`Bytes`]: bytes::Bytes
 pub struct ParquetRecordBatchReader {
     array_reader: Box<dyn ArrayReader>,
     schema: SchemaRef,
     read_plan: ReadPlan,
 }
 
+impl Debug for ParquetRecordBatchReader {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ParquetRecordBatchReader")
+            .field("array_reader", &"...")
+            .field("schema", &self.schema)
+            .field("read_plan", &self.read_plan)
+            .finish()
+    }
+}
+
 impl Iterator for ParquetRecordBatchReader {
     type Item = Result<RecordBatch, ArrowError>;
 
@@ -853,10 +1317,83 @@ impl ParquetRecordBatchReader {
     fn next_inner(&mut self) -> Result<Option<RecordBatch>> {
         let mut read_records = 0;
         let batch_size = self.batch_size();
-        match self.read_plan.selection_mut() {
-            Some(selection) => {
-                while read_records < batch_size && !selection.is_empty() {
-                    let front = selection.pop_front().unwrap();
+        if batch_size == 0 {
+            return Ok(None);
+        }
+        match self.read_plan.row_selection_cursor_mut() {
+            RowSelectionCursor::Mask(mask_cursor) => {
+                // Stream the record batch reader using contiguous segments of the selection
+                // mask, avoiding the need to materialize intermediate `RowSelector` ranges.
+                while !mask_cursor.is_empty() {
+                    let Some(mask_chunk) = mask_cursor.next_mask_chunk(batch_size) else {
+                        return Ok(None);
+                    };
+
+                    if mask_chunk.initial_skip > 0 {
+                        let skipped = self.array_reader.skip_records(mask_chunk.initial_skip)?;
+                        if skipped != mask_chunk.initial_skip {
+                            return Err(general_err!(
+                                "failed to skip rows, expected {}, got {}",
+                                mask_chunk.initial_skip,
+                                skipped
+                            ));
+                        }
+                    }
+
+                    if mask_chunk.chunk_rows == 0 {
+                        if mask_cursor.is_empty() && mask_chunk.selected_rows == 0 {
+                            return Ok(None);
+                        }
+                        continue;
+                    }
+
+                    let mask = mask_cursor.mask_values_for(&mask_chunk)?;
+
+                    let read = self.array_reader.read_records(mask_chunk.chunk_rows)?;
+                    if read == 0 {
+                        return Err(general_err!(
+                            "reached end of column while expecting {} rows",
+                            mask_chunk.chunk_rows
+                        ));
+                    }
+                    if read != mask_chunk.chunk_rows {
+                        return Err(general_err!(
+                            "insufficient rows read from array reader - expected {}, got {}",
+                            mask_chunk.chunk_rows,
+                            read
+                        ));
+                    }
+
+                    let array = self.array_reader.consume_batch()?;
+                    // The column reader exposes the projection as a struct array; convert this
+                    // into a record batch before applying the boolean filter mask.
+                    let struct_array = array.as_struct_opt().ok_or_else(|| {
+                        ArrowError::ParquetError(
+                            "Struct array reader should return struct array".to_string(),
+                        )
+                    })?;
+
+                    let filtered_batch =
+                        filter_record_batch(&RecordBatch::from(struct_array), &mask)?;
+
+                    if filtered_batch.num_rows() != mask_chunk.selected_rows {
+                        return Err(general_err!(
+                            "filtered rows mismatch selection - expected {}, got {}",
+                            mask_chunk.selected_rows,
+                            filtered_batch.num_rows()
+                        ));
+                    }
+
+                    if filtered_batch.num_rows() == 0 {
+                        continue;
+                    }
+
+                    return Ok(Some(filtered_batch));
+                }
+            }
+            RowSelectionCursor::Selectors(selectors_cursor) => {
+                while read_records < batch_size && !selectors_cursor.is_empty() {
+                    let front = selectors_cursor.next_selector();
                     if front.skip {
                         let skipped = self.array_reader.skip_records(front.row_count)?;
 
@@ -882,7 +1419,7 @@ impl ParquetRecordBatchReader {
                         Some(remaining) if remaining != 0 => {
                             // if page row count less than batch_size we must set batch size to page row count.
                             // add check avoid dead loop
-                            selection.push_front(RowSelector::select(remaining));
+                            selectors_cursor.return_selector(RowSelector::select(remaining));
                             need_read
                         }
                         _ => front.row_count,
@@ -893,7 +1430,7 @@ impl ParquetRecordBatchReader {
                     };
                 }
             }
-            None => {
+            RowSelectionCursor::All => {
                 self.array_reader.read_records(batch_size)?;
             }
         };
@@ -941,7 +1478,10 @@ impl ParquetRecordBatchReader {
         batch_size: usize,
         selection: Option<RowSelection>,
     ) -> Result<Self> {
-        let array_reader = ArrayReaderBuilder::new(row_groups)
+        // note metrics are not supported in this API
+        let metrics = ArrowReaderMetrics::disabled();
+        let array_reader = ArrayReaderBuilder::new(row_groups, &metrics)
+            .with_parquet_metadata(row_groups.metadata())
             .build_array_reader(levels.levels.as_ref(), &ProjectionMask::all())?;
 
         let read_plan = ReadPlanBuilder::new(batch_size)
@@ -960,7 +1500,7 @@ impl ParquetRecordBatchReader {
     /// all rows will be returned
     pub(crate) fn new(array_reader: Box<dyn ArrayReader>, read_plan: ReadPlan) -> Self {
         let schema = match array_reader.get_data_type() {
-            ArrowType::Struct(ref fields) => Schema::new(fields.clone()),
+            ArrowType::Struct(fields) => Schema::new(fields.clone()),
             _ => unreachable!("Struct array reader's data type is not struct!"),
         };
 
@@ -978,7 +1518,7 @@ impl ParquetRecordBatchReader {
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
     use std::cmp::min;
     use std::collections::{HashMap, VecDeque};
     use std::fmt::Formatter;
@@ -987,43 +1527,49 @@ mod tests {
     use std::path::PathBuf;
     use std::sync::Arc;
 
-    use arrow_array::builder::*;
-    use arrow_array::cast::AsArray;
-    use arrow_array::types::{
-        Date32Type, Date64Type, Decimal128Type, Decimal256Type, DecimalType, Float16Type,
-        Float32Type, Float64Type, Time32MillisecondType, Time64MicrosecondType,
-    };
-    use arrow_array::*;
-    use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime};
-    use arrow_data::{ArrayData, ArrayDataBuilder};
-    use arrow_schema::{
-        ArrowError, DataType as ArrowDataType, Field, Fields, Schema, SchemaRef, TimeUnit,
-    };
-    use arrow_select::concat::concat_batches;
-    use bytes::Bytes;
-    use half::f16;
-    use num::PrimInt;
-    use rand::{rng, Rng, RngCore};
+    use rand::rngs::StdRng;
+    use rand::{Rng, RngCore, SeedableRng, random, rng};
     use tempfile::tempfile;
 
     use crate::arrow::arrow_reader::{
-        ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
-        ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
+        ArrowPredicateFn, ArrowReaderBuilder, ArrowReaderMetadata, ArrowReaderOptions,
+        ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder, RowFilter, RowSelection,
+        RowSelectionPolicy, RowSelector,
     };
-    use crate::arrow::schema::add_encoded_arrow_schema_to_metadata;
+    use crate::arrow::schema::{add_encoded_arrow_schema_to_metadata, virtual_type::RowNumber};
     use crate::arrow::{ArrowWriter, ProjectionMask};
-    use crate::basic::{ConvertedType, Encoding, Repetition, Type as PhysicalType};
+    use crate::basic::{ConvertedType, Encoding, LogicalType, Repetition, Type as PhysicalType};
     use crate::column::reader::decoder::REPETITION_LEVELS_BATCH_SIZE;
     use crate::data_type::{
         BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
         FloatType, Int32Type, Int64Type, Int96, Int96Type,
     };
     use crate::errors::Result;
+    use crate::file::metadata::{ParquetMetaData, ParquetStatisticsPolicy};
     use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
     use crate::file::writer::SerializedFileWriter;
     use crate::schema::parser::parse_message_type;
     use crate::schema::types::{Type, TypePtr};
     use crate::util::test_common::rand_gen::RandGen;
+    use arrow::compute::kernels::cmp::eq;
+    use arrow::compute::or;
+    use arrow_array::builder::*;
+    use arrow_array::cast::AsArray;
+    use arrow_array::types::{
+        Date32Type, Date64Type, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type,
+        DecimalType, Float16Type, Float32Type, Float64Type, Time32MillisecondType,
+        Time64MicrosecondType,
+    };
+    use arrow_array::*;
+    use arrow_buffer::{ArrowNativeType, Buffer, IntervalDayTime, NullBuffer, i256};
+    use arrow_data::{ArrayData, ArrayDataBuilder};
+    use arrow_schema::{
+        ArrowError, DataType as ArrowDataType, Field, Fields, Schema, SchemaRef, TimeUnit,
+    };
+    use arrow_select::concat::concat_batches;
+    use bytes::Bytes;
+    use half::f16;
+    use num_traits::PrimInt;
 
     #[test]
     fn test_arrow_reader_all_columns() {
@@ -1038,41 +1584,164 @@ mod tests {
     }
 
     #[test]
-    fn test_arrow_reader_single_column() {
-        let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");
+    fn test_reuse_schema() {
+        let file = get_test_file("parquet/alltypes-java.parquet");
 
-        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
-        let original_schema = Arc::clone(builder.schema());
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file.try_clone().unwrap()).unwrap();
+        let expected = builder.metadata;
+        let schema = expected.file_metadata().schema_descr_ptr();
 
-        let mask = ProjectionMask::leaves(builder.parquet_schema(), [2]);
-        let reader = builder.with_projection(mask).build().unwrap();
+        let arrow_options = ArrowReaderOptions::new().with_parquet_schema(schema.clone());
+        let builder =
+            ParquetRecordBatchReaderBuilder::try_new_with_options(file, arrow_options).unwrap();
 
-        // Verify that the schema was correctly parsed
-        assert_eq!(1, reader.schema().fields().len());
-        assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
+        // Verify that the metadata matches
+        assert_eq!(expected.as_ref(), builder.metadata.as_ref());
     }
 
     #[test]
-    fn test_arrow_reader_single_column_by_name() {
-        let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");
+    fn test_page_encoding_stats_mask() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_tiny_pages.parquet");
+        let file = File::open(path).unwrap();
 
-        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
-        let original_schema = Arc::clone(builder.schema());
+        let arrow_options = ArrowReaderOptions::new().with_encoding_stats_as_mask(true);
+        let builder =
+            ParquetRecordBatchReaderBuilder::try_new_with_options(file, arrow_options).unwrap();
 
-        let mask = ProjectionMask::columns(builder.parquet_schema(), ["blog_id"]);
-        let reader = builder.with_projection(mask).build().unwrap();
+        let row_group_metadata = builder.metadata.row_group(0);
 
-        // Verify that the schema was correctly parsed
-        assert_eq!(1, reader.schema().fields().len());
-        assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
+        // test page encoding stats
+        let page_encoding_stats = row_group_metadata
+            .column(0)
+            .page_encoding_stats_mask()
+            .unwrap();
+        assert!(page_encoding_stats.is_only(Encoding::PLAIN));
+        let page_encoding_stats = row_group_metadata
+            .column(2)
+            .page_encoding_stats_mask()
+            .unwrap();
+        assert!(page_encoding_stats.is_only(Encoding::PLAIN_DICTIONARY));
     }
 
     #[test]
-    fn test_null_column_reader_test() {
-        let mut file = tempfile::tempfile().unwrap();
+    fn test_stats_stats_skipped() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_tiny_pages.parquet");
+        let file = File::open(path).unwrap();
 
-        let schema = "
-            message message {
+        // test skipping all
+        let arrow_options = ArrowReaderOptions::new()
+            .with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll)
+            .with_column_stats_policy(ParquetStatisticsPolicy::SkipAll);
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            file.try_clone().unwrap(),
+            arrow_options,
+        )
+        .unwrap();
+
+        let row_group_metadata = builder.metadata.row_group(0);
+        for column in row_group_metadata.columns() {
+            assert!(column.page_encoding_stats().is_none());
+            assert!(column.page_encoding_stats_mask().is_none());
+            assert!(column.statistics().is_none());
+        }
+
+        // test skipping all but one column and converting to mask
+        let arrow_options = ArrowReaderOptions::new()
+            .with_encoding_stats_as_mask(true)
+            .with_encoding_stats_policy(ParquetStatisticsPolicy::skip_except(&[0]))
+            .with_column_stats_policy(ParquetStatisticsPolicy::skip_except(&[0]));
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            file.try_clone().unwrap(),
+            arrow_options,
+        )
+        .unwrap();
+
+        let row_group_metadata = builder.metadata.row_group(0);
+        for (idx, column) in row_group_metadata.columns().iter().enumerate() {
+            assert!(column.page_encoding_stats().is_none());
+            assert_eq!(column.page_encoding_stats_mask().is_some(), idx == 0);
+            assert_eq!(column.statistics().is_some(), idx == 0);
+        }
+    }
+
+    #[test]
+    fn test_size_stats_stats_skipped() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/repeated_primitive_no_list.parquet");
+        let file = File::open(path).unwrap();
+
+        // test skipping all
+        let arrow_options =
+            ArrowReaderOptions::new().with_size_stats_policy(ParquetStatisticsPolicy::SkipAll);
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            file.try_clone().unwrap(),
+            arrow_options,
+        )
+        .unwrap();
+
+        let row_group_metadata = builder.metadata.row_group(0);
+        for column in row_group_metadata.columns() {
+            assert!(column.repetition_level_histogram().is_none());
+            assert!(column.definition_level_histogram().is_none());
+            assert!(column.unencoded_byte_array_data_bytes().is_none());
+        }
+
+        // test skipping all but one column and converting to mask
+        let arrow_options = ArrowReaderOptions::new()
+            .with_encoding_stats_as_mask(true)
+            .with_size_stats_policy(ParquetStatisticsPolicy::skip_except(&[1]));
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            file.try_clone().unwrap(),
+            arrow_options,
+        )
+        .unwrap();
+
+        let row_group_metadata = builder.metadata.row_group(0);
+        for (idx, column) in row_group_metadata.columns().iter().enumerate() {
+            assert_eq!(column.repetition_level_histogram().is_some(), idx == 1);
+            assert_eq!(column.definition_level_histogram().is_some(), idx == 1);
+            assert_eq!(column.unencoded_byte_array_data_bytes().is_some(), idx == 1);
+        }
+    }
+
+    #[test]
+    fn test_arrow_reader_single_column() {
+        let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        let original_schema = Arc::clone(builder.schema());
+
+        let mask = ProjectionMask::leaves(builder.parquet_schema(), [2]);
+        let reader = builder.with_projection(mask).build().unwrap();
+
+        // Verify that the schema was correctly parsed
+        assert_eq!(1, reader.schema().fields().len());
+        assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
+    }
+
+    #[test]
+    fn test_arrow_reader_single_column_by_name() {
+        let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        let original_schema = Arc::clone(builder.schema());
+
+        let mask = ProjectionMask::columns(builder.parquet_schema(), ["blog_id"]);
+        let reader = builder.with_projection(mask).build().unwrap();
+
+        // Verify that the schema was correctly parsed
+        assert_eq!(1, reader.schema().fields().len());
+        assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
+    }
+
+    #[test]
+    fn test_null_column_reader_test() {
+        let mut file = tempfile::tempfile().unwrap();
+
+        let schema = "
+            message message {
                 OPTIONAL INT32 int32;
             }
         ";
@@ -1464,7 +2133,7 @@ mod tests {
     struct RandFixedLenGen {}
 
     impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
-        fn gen(len: i32) -> FixedLenByteArray {
+        fn r#gen(len: i32) -> FixedLenByteArray {
             let mut v = vec![0u8; len as usize];
             rng().fill_bytes(&mut v);
             ByteArray::from(v).into()
@@ -1693,8 +2362,8 @@ mod tests {
     struct RandUtf8Gen {}
 
     impl RandGen<ByteArrayType> for RandUtf8Gen {
-        fn gen(len: i32) -> ByteArray {
-            Int32Type::gen(len).to_string().as_str().into()
+        fn r#gen(len: i32) -> ByteArray {
+            Int32Type::r#gen(len).to_string().as_str().into()
         }
     }
 
@@ -1796,22 +2465,21 @@ mod tests {
                 encodings,
             );
 
-            // https://github.com/apache/arrow-rs/issues/1179
-            // let data_type = ArrowDataType::Dictionary(
-            //     Box::new(key.clone()),
-            //     Box::new(ArrowDataType::LargeUtf8),
-            // );
-            //
-            // run_single_column_reader_tests::<ByteArrayType, _, RandUtf8Gen>(
-            //     2,
-            //     ConvertedType::UTF8,
-            //     Some(data_type.clone()),
-            //     move |vals| {
-            //         let vals = string_converter::<i64>(vals);
-            //         arrow::compute::cast(&vals, &data_type).unwrap()
-            //     },
-            //     encodings,
-            // );
+            let data_type = ArrowDataType::Dictionary(
+                Box::new(key.clone()),
+                Box::new(ArrowDataType::LargeUtf8),
+            );
+
+            run_single_column_reader_tests::<ByteArrayType, _, RandUtf8Gen>(
+                2,
+                ConvertedType::UTF8,
+                Some(data_type.clone()),
+                move |vals| {
+                    let vals = string_converter::<i64>(vals);
+                    arrow::compute::cast(&vals, &data_type).unwrap()
+                },
+                encodings,
+            );
         }
     }
 
@@ -2129,7 +2797,7 @@ mod tests {
         let batch = record_reader.next().unwrap().unwrap();
         assert_eq!(batch.num_rows(), 1);
 
-        let expected_schema = Schema::new(Fields::from(vec![Field::new(
+        let expected_schema = Schema::new(vec![Field::new(
             "my_map",
             ArrowDataType::Map(
                 Arc::new(Field::new(
@@ -2143,7 +2811,7 @@ mod tests {
                 false,
             ),
             true,
-        )]));
+        )]);
         assert_eq!(batch.schema().as_ref(), &expected_schema);
 
         assert_eq!(batch.num_rows(), 1);
@@ -2193,6 +2861,63 @@ mod tests {
         assert_eq!(&batch, &read[0])
     }
 
+    #[test]
+    fn test_read_nullable_structs_with_binary_dict_as_first_child_column() {
+        // the `StructArrayReader` will check the definition and repetition levels of the first
+        // child column in the struct to determine nullability for the struct. If the first
+        // column's is being read by `ByteArrayDictionaryReader` we need to ensure that the
+        // nullability is interpreted  correctly from the rep/def level buffers managed by the
+        // buffers managed by this array reader.
+
+        let struct_fields = Fields::from(vec![
+            Field::new(
+                "city",
+                ArrowDataType::Dictionary(
+                    Box::new(ArrowDataType::UInt8),
+                    Box::new(ArrowDataType::Utf8),
+                ),
+                true,
+            ),
+            Field::new("name", ArrowDataType::Utf8, true),
+        ]);
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "items",
+            ArrowDataType::Struct(struct_fields.clone()),
+            true,
+        )]));
+
+        let items_arr = StructArray::new(
+            struct_fields,
+            vec![
+                Arc::new(DictionaryArray::new(
+                    UInt8Array::from_iter_values(vec![0, 1, 1, 0, 2]),
+                    Arc::new(StringArray::from_iter_values(vec![
+                        "quebec",
+                        "fredericton",
+                        "halifax",
+                    ])),
+                )),
+                Arc::new(StringArray::from_iter_values(vec![
+                    "albert", "terry", "lance", "", "tim",
+                ])),
+            ],
+            Some(NullBuffer::from_iter(vec![true, true, true, false, true])),
+        );
+
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(items_arr)]).unwrap();
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+        let read = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 8)
+            .unwrap()
+            .collect::<Result<Vec<_>, _>>()
+            .unwrap();
+
+        assert_eq!(read.len(), 1);
+        assert_eq!(&batch, &read[0])
+    }
+
     /// Parameters for single_column_reader_test
     #[derive(Clone)]
     struct TestOptions {
@@ -2543,7 +3268,10 @@ mod tests {
         // Print out options to facilitate debugging failures on CI
         println!(
             "Running type {:?} single_column_reader_test ConvertedType::{}/ArrowType::{:?} with Options: {:?}",
-            T::get_physical_type(), converted_type, arrow_type, opts
+            T::get_physical_type(),
+            converted_type,
+            arrow_type,
+            opts
         );
 
         //according to null_percent generate def_levels
@@ -2700,7 +3428,7 @@ mod tests {
                 assert_eq!(end - total_read, batch.num_rows());
 
                 let a = converter(&expected_data[total_read..end]);
-                let b = Arc::clone(batch.column(0));
+                let b = batch.column(0);
 
                 assert_eq!(a.data_type(), b.data_type());
                 assert_eq!(a.to_data(), b.to_data());
@@ -2747,7 +3475,7 @@ mod tests {
         schema: TypePtr,
         field: Option<Field>,
         opts: &TestOptions,
-    ) -> Result<crate::format::FileMetaData> {
+    ) -> Result<ParquetMetaData> {
         let mut writer_props = opts.writer_props();
         if let Some(field) = field {
             let arrow_schema = Schema::new(vec![field]);
@@ -2940,11 +3668,11 @@ mod tests {
 
         let reader = builder.with_projection(mask).build().unwrap();
 
-        let expected_schema = Schema::new(Fields::from(vec![Field::new(
+        let expected_schema = Schema::new(vec![Field::new(
             "group",
             ArrowDataType::Struct(vec![Field::new("leaf", ArrowDataType::Int32, false)].into()),
             true,
-        )]));
+        )]);
 
         let batch = reader.into_iter().next().unwrap().unwrap();
         assert_eq!(batch.schema().as_ref(), &expected_schema);
@@ -3019,7 +3747,7 @@ mod tests {
                     "Parquet argument error: Parquet error: encountered non UTF-8 data";
                 assert!(
                     err.to_string().contains(expected_err),
-                    "data type: {data_type:?}, expected: {expected_err}, got: {err}"
+                    "data type: {data_type}, expected: {expected_err}, got: {err}"
                 );
             }
         }
@@ -3058,7 +3786,7 @@ mod tests {
                     "Parquet argument error: Parquet error: encountered non UTF-8 data";
                 assert!(
                     err.to_string().contains(expected_err),
-                    "data type: {data_type:?}, expected: {expected_err}, got: {err}"
+                    "data type: {data_type}, expected: {expected_err}, got: {err}"
                 );
             }
         }
@@ -3511,8 +4239,8 @@ mod tests {
                 ),
             ])),
             "Arrow: Incompatible supplied Arrow schema: data type mismatch for field nested: \
-            requested Struct([Field { name: \"nested1_valid\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"nested1_invalid\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]) \
-            but found Struct([Field { name: \"nested1_valid\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"nested1_invalid\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }])",
+            requested Struct(\"nested1_valid\": non-null Utf8, \"nested1_invalid\": non-null Int32) \
+            but found Struct(\"nested1_valid\": non-null Utf8, \"nested1_invalid\": non-null Int64)",
         );
     }
 
@@ -3546,7 +4274,10 @@ mod tests {
         let err =
             ParquetRecordBatchReaderBuilder::try_new_with_options(parquet_data, reader_options)
                 .unwrap_err();
-        assert_eq!(err.to_string(), "Arrow: Incompatible supplied Arrow schema: data type mismatch for field column1: requested Int32 but found Utf8")
+        assert_eq!(
+            err.to_string(),
+            "Arrow: Incompatible supplied Arrow schema: data type mismatch for field column1: requested Int32 but found Utf8"
+        )
     }
 
     #[test]
@@ -3566,7 +4297,10 @@ mod tests {
         let err =
             ParquetRecordBatchReaderBuilder::try_new_with_options(parquet_data, reader_options)
                 .unwrap_err();
-        assert_eq!(err.to_string(), "Arrow: Incompatible supplied Arrow schema: nullability mismatch for field column1: expected true but found false")
+        assert_eq!(
+            err.to_string(),
+            "Arrow: Incompatible supplied Arrow schema: nullability mismatch for field column1: expected true but found false"
+        )
     }
 
     #[test]
@@ -4338,6 +5072,162 @@ mod tests {
         assert_eq!(out, batch.slice(2, 1));
     }
 
+    #[test]
+    fn test_row_selection_interleaved_skip() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "v",
+            ArrowDataType::Int32,
+            false,
+        )]));
+
+        let values = Int32Array::from(vec![0, 1, 2, 3, 4]);
+        let batch = RecordBatch::try_from_iter([("v", Arc::new(values) as ArrayRef)]).unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), None).unwrap();
+        writer.write(&batch)?;
+        writer.close()?;
+
+        let selection = RowSelection::from(vec![
+            RowSelector::select(1),
+            RowSelector::skip(2),
+            RowSelector::select(2),
+        ]);
+
+        let mut reader = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer))?
+            .with_batch_size(4)
+            .with_row_selection(selection)
+            .build()?;
+
+        let out = reader.next().unwrap()?;
+        assert_eq!(out.num_rows(), 3);
+        let values = out
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>()
+            .values();
+        assert_eq!(values, &[0, 3, 4]);
+        assert!(reader.next().is_none());
+        Ok(())
+    }
+
+    #[test]
+    fn test_row_selection_mask_sparse_rows() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "v",
+            ArrowDataType::Int32,
+            false,
+        )]));
+
+        let values = Int32Array::from((0..30).collect::<Vec<i32>>());
+        let batch = RecordBatch::try_from_iter([("v", Arc::new(values) as ArrayRef)])?;
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), None)?;
+        writer.write(&batch)?;
+        writer.close()?;
+
+        let total_rows = batch.num_rows();
+        let ranges = (1..total_rows)
+            .step_by(2)
+            .map(|i| i..i + 1)
+            .collect::<Vec<_>>();
+        let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), total_rows);
+
+        let selectors: Vec<RowSelector> = selection.clone().into();
+        assert!(total_rows < selectors.len() * 8);
+
+        let bytes = Bytes::from(buffer);
+
+        let reader = ParquetRecordBatchReaderBuilder::try_new(bytes.clone())?
+            .with_batch_size(7)
+            .with_row_selection(selection)
+            .build()?;
+
+        let mut collected = Vec::new();
+        for batch in reader {
+            let batch = batch?;
+            collected.extend_from_slice(
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values(),
+            );
+        }
+
+        let expected: Vec<i32> = (1..total_rows).step_by(2).map(|i| i as i32).collect();
+        assert_eq!(collected, expected);
+        Ok(())
+    }
+
+    fn test_decimal32_roundtrip() {
+        let d = |values: Vec<i32>, p: u8| {
+            let iter = values.into_iter();
+            PrimitiveArray::<Decimal32Type>::from_iter_values(iter)
+                .with_precision_and_scale(p, 2)
+                .unwrap()
+        };
+
+        let d1 = d(vec![1, 2, 3, 4, 5], 9);
+        let batch = RecordBatch::try_from_iter([("d1", Arc::new(d1) as ArrayRef)]).unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let t1 = builder.parquet_schema().columns()[0].physical_type();
+        assert_eq!(t1, PhysicalType::INT32);
+
+        let mut reader = builder.build().unwrap();
+        assert_eq!(batch.schema(), reader.schema());
+
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(batch, out);
+    }
+
+    fn test_decimal64_roundtrip() {
+        // Precision <= 9 -> INT32
+        // Precision <= 18 -> INT64
+
+        let d = |values: Vec<i64>, p: u8| {
+            let iter = values.into_iter();
+            PrimitiveArray::<Decimal64Type>::from_iter_values(iter)
+                .with_precision_and_scale(p, 2)
+                .unwrap()
+        };
+
+        let d1 = d(vec![1, 2, 3, 4, 5], 9);
+        let d2 = d(vec![1, 2, 3, 4, 10.pow(10) - 1], 10);
+        let d3 = d(vec![1, 2, 3, 4, 10.pow(18) - 1], 18);
+
+        let batch = RecordBatch::try_from_iter([
+            ("d1", Arc::new(d1) as ArrayRef),
+            ("d2", Arc::new(d2) as ArrayRef),
+            ("d3", Arc::new(d3) as ArrayRef),
+        ])
+        .unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let t1 = builder.parquet_schema().columns()[0].physical_type();
+        assert_eq!(t1, PhysicalType::INT32);
+        let t2 = builder.parquet_schema().columns()[1].physical_type();
+        assert_eq!(t2, PhysicalType::INT64);
+        let t3 = builder.parquet_schema().columns()[2].physical_type();
+        assert_eq!(t3, PhysicalType::INT64);
+
+        let mut reader = builder.build().unwrap();
+        assert_eq!(batch.schema(), reader.schema());
+
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(batch, out);
+    }
+
     fn test_decimal_roundtrip<T: DecimalType>() {
         // Precision <= 9 -> INT32
         // Precision <= 18 -> INT64
@@ -4387,6 +5277,8 @@ mod tests {
 
     #[test]
     fn test_decimal() {
+        test_decimal32_roundtrip();
+        test_decimal64_roundtrip();
         test_decimal_roundtrip::<Decimal128Type>();
         test_decimal_roundtrip::<Decimal256Type>();
     }
@@ -4648,4 +5540,507 @@ mod tests {
         assert_eq!(c0.len(), c1.len());
         c0.iter().zip(c1.iter()).for_each(|(l, r)| assert_eq!(l, r));
     }
+
+    #[test]
+    fn test_row_filter_full_page_skip_is_handled() {
+        let first_value: i64 = 1111;
+        let last_value: i64 = 9999;
+        let num_rows: usize = 12;
+
+        // build data with row selection average length 4
+        // The result would be (1111 XXXX) ... (4 page in the middle)... (XXXX 9999)
+        // The Row Selection would be [1111, (skip 10), 9999]
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("key", arrow_schema::DataType::Int64, false),
+            Field::new("value", arrow_schema::DataType::Int64, false),
+        ]));
+
+        let mut int_values: Vec<i64> = (0..num_rows as i64).collect();
+        int_values[0] = first_value;
+        int_values[num_rows - 1] = last_value;
+        let keys = Int64Array::from(int_values.clone());
+        let values = Int64Array::from(int_values.clone());
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(keys) as ArrayRef, Arc::new(values) as ArrayRef],
+        )
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_write_batch_size(2)
+            .set_data_page_row_count_limit(2)
+            .build();
+
+        let mut buffer = Vec::new();
+        let mut writer = ArrowWriter::try_new(&mut buffer, schema, Some(props)).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+        let data = Bytes::from(buffer);
+
+        let options = ArrowReaderOptions::new().with_page_index(true);
+        let builder =
+            ParquetRecordBatchReaderBuilder::try_new_with_options(data.clone(), options).unwrap();
+        let schema = builder.parquet_schema().clone();
+        let filter_mask = ProjectionMask::leaves(&schema, [0]);
+
+        let make_predicate = |mask: ProjectionMask| {
+            ArrowPredicateFn::new(mask, move |batch: RecordBatch| {
+                let column = batch.column(0);
+                let match_first = eq(column, &Int64Array::new_scalar(first_value))?;
+                let match_second = eq(column, &Int64Array::new_scalar(last_value))?;
+                or(&match_first, &match_second)
+            })
+        };
+
+        let options = ArrowReaderOptions::new().with_page_index(true);
+        let predicate = make_predicate(filter_mask.clone());
+
+        // The batch size is set to 12 to read all rows in one go after filtering
+        // If the Reader chooses mask to handle filter, it might cause panic because the mid 4 pages may not be decoded.
+        let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(data.clone(), options)
+            .unwrap()
+            .with_row_filter(RowFilter::new(vec![Box::new(predicate)]))
+            .with_row_selection_policy(RowSelectionPolicy::Auto { threshold: 32 })
+            .with_batch_size(12)
+            .build()
+            .unwrap();
+
+        // Predicate pruning used to panic once mask-backed plans removed whole pages.
+        // Collecting into batches validates the plan now downgrades to selectors instead.
+        let schema = reader.schema().clone();
+        let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+        let result = concat_batches(&schema, &batches).unwrap();
+        assert_eq!(result.num_rows(), 2);
+    }
+
+    #[test]
+    fn test_get_row_group_column_bloom_filter_with_length() {
+        // convert to new parquet file with bloom_filter_length
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/data_index_bloom_encoding_stats.parquet");
+        let file = File::open(path).unwrap();
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        let schema = builder.schema().clone();
+        let reader = builder.build().unwrap();
+
+        let mut parquet_data = Vec::new();
+        let props = WriterProperties::builder()
+            .set_bloom_filter_enabled(true)
+            .build();
+        let mut writer = ArrowWriter::try_new(&mut parquet_data, schema, Some(props)).unwrap();
+        for batch in reader {
+            let batch = batch.unwrap();
+            writer.write(&batch).unwrap();
+        }
+        writer.close().unwrap();
+
+        // test the new parquet file
+        test_get_row_group_column_bloom_filter(parquet_data.into(), true);
+    }
+
+    #[test]
+    fn test_get_row_group_column_bloom_filter_without_length() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/data_index_bloom_encoding_stats.parquet");
+        let data = Bytes::from(std::fs::read(path).unwrap());
+        test_get_row_group_column_bloom_filter(data, false);
+    }
+
+    fn test_get_row_group_column_bloom_filter(data: Bytes, with_length: bool) {
+        let builder = ParquetRecordBatchReaderBuilder::try_new(data.clone()).unwrap();
+
+        let metadata = builder.metadata();
+        assert_eq!(metadata.num_row_groups(), 1);
+        let row_group = metadata.row_group(0);
+        let column = row_group.column(0);
+        assert_eq!(column.bloom_filter_length().is_some(), with_length);
+
+        let sbbf = builder
+            .get_row_group_column_bloom_filter(0, 0)
+            .unwrap()
+            .unwrap();
+        assert!(sbbf.check(&"Hello"));
+        assert!(!sbbf.check(&"Hello_Not_Exists"));
+    }
+
+    #[test]
+    fn test_read_unknown_logical_type() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/unknown-logical-type.parquet");
+        let test_file = File::open(path).unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(test_file)
+            .expect("Error creating reader builder");
+
+        let schema = builder.metadata().file_metadata().schema_descr();
+        assert_eq!(
+            schema.column(0).logical_type_ref(),
+            Some(&LogicalType::String)
+        );
+        assert_eq!(
+            schema.column(1).logical_type_ref(),
+            Some(&LogicalType::_Unknown { field_id: 2555 })
+        );
+        assert_eq!(schema.column(1).physical_type(), PhysicalType::BYTE_ARRAY);
+
+        let mut reader = builder.build().unwrap();
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(out.num_rows(), 3);
+        assert_eq!(out.num_columns(), 2);
+    }
+
+    #[test]
+    fn test_read_row_numbers() {
+        let file = write_parquet_from_iter(vec![(
+            "value",
+            Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef,
+        )]);
+        let supplied_fields = Fields::from(vec![Field::new("value", ArrowDataType::Int64, false)]);
+
+        let row_number_field = Arc::new(
+            Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber),
+        );
+
+        let options = ArrowReaderOptions::new()
+            .with_schema(Arc::new(Schema::new(supplied_fields)))
+            .with_virtual_columns(vec![row_number_field.clone()])
+            .unwrap();
+        let mut arrow_reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            file.try_clone().unwrap(),
+            options,
+        )
+        .expect("reader builder with schema")
+        .build()
+        .expect("reader with schema");
+
+        let batch = arrow_reader.next().unwrap().unwrap();
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("value", ArrowDataType::Int64, false),
+            (*row_number_field).clone(),
+        ]));
+
+        assert_eq!(batch.schema(), schema);
+        assert_eq!(batch.num_columns(), 2);
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(
+            batch
+                .column(0)
+                .as_primitive::<types::Int64Type>()
+                .iter()
+                .collect::<Vec<_>>(),
+            vec![Some(1), Some(2), Some(3)]
+        );
+        assert_eq!(
+            batch
+                .column(1)
+                .as_primitive::<types::Int64Type>()
+                .iter()
+                .collect::<Vec<_>>(),
+            vec![Some(0), Some(1), Some(2)]
+        );
+    }
+
+    #[test]
+    fn test_read_only_row_numbers() {
+        let file = write_parquet_from_iter(vec![(
+            "value",
+            Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef,
+        )]);
+        let row_number_field = Arc::new(
+            Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber),
+        );
+        let options = ArrowReaderOptions::new()
+            .with_virtual_columns(vec![row_number_field.clone()])
+            .unwrap();
+        let metadata = ArrowReaderMetadata::load(&file, options).unwrap();
+        let num_columns = metadata
+            .metadata
+            .file_metadata()
+            .schema_descr()
+            .num_columns();
+
+        let mut arrow_reader = ParquetRecordBatchReaderBuilder::new_with_metadata(file, metadata)
+            .with_projection(ProjectionMask::none(num_columns))
+            .build()
+            .expect("reader with schema");
+
+        let batch = arrow_reader.next().unwrap().unwrap();
+        let schema = Arc::new(Schema::new(vec![row_number_field]));
+
+        assert_eq!(batch.schema(), schema);
+        assert_eq!(batch.num_columns(), 1);
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(
+            batch
+                .column(0)
+                .as_primitive::<types::Int64Type>()
+                .iter()
+                .collect::<Vec<_>>(),
+            vec![Some(0), Some(1), Some(2)]
+        );
+    }
+
+    #[test]
+    fn test_read_row_numbers_row_group_order() -> Result<()> {
+        // Make a parquet file with 100 rows split across 2 row groups
+        let array = Int64Array::from_iter_values(5000..5100);
+        let batch = RecordBatch::try_from_iter([("col", Arc::new(array) as ArrayRef)])?;
+        let mut buffer = Vec::new();
+        let options = WriterProperties::builder()
+            .set_max_row_group_size(50)
+            .build();
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema().clone(), Some(options))?;
+        // write in 10 row batches as the size limits are enforced after each batch
+        for batch_chunk in (0..10).map(|i| batch.slice(i * 10, 10)) {
+            writer.write(&batch_chunk)?;
+        }
+        writer.close()?;
+
+        let row_number_field = Arc::new(
+            Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber),
+        );
+
+        let buffer = Bytes::from(buffer);
+
+        let options =
+            ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field.clone()])?;
+
+        // read out with normal options
+        let arrow_reader =
+            ParquetRecordBatchReaderBuilder::try_new_with_options(buffer.clone(), options.clone())?
+                .build()?;
+
+        assert_eq!(
+            ValuesAndRowNumbers {
+                values: (5000..5100).collect(),
+                row_numbers: (0..100).collect()
+            },
+            ValuesAndRowNumbers::new_from_reader(arrow_reader)
+        );
+
+        // Now read, out of order row groups
+        let arrow_reader = ParquetRecordBatchReaderBuilder::try_new_with_options(buffer, options)?
+            .with_row_groups(vec![1, 0])
+            .build()?;
+
+        assert_eq!(
+            ValuesAndRowNumbers {
+                values: (5050..5100).chain(5000..5050).collect(),
+                row_numbers: (50..100).chain(0..50).collect(),
+            },
+            ValuesAndRowNumbers::new_from_reader(arrow_reader)
+        );
+
+        Ok(())
+    }
+
+    #[derive(Debug, PartialEq)]
+    struct ValuesAndRowNumbers {
+        values: Vec<i64>,
+        row_numbers: Vec<i64>,
+    }
+    impl ValuesAndRowNumbers {
+        fn new_from_reader(reader: ParquetRecordBatchReader) -> Self {
+            let mut values = vec![];
+            let mut row_numbers = vec![];
+            for batch in reader {
+                let batch = batch.expect("Could not read batch");
+                values.extend(
+                    batch
+                        .column_by_name("col")
+                        .expect("Could not get col column")
+                        .as_primitive::<arrow::datatypes::Int64Type>()
+                        .iter()
+                        .map(|v| v.expect("Could not get value")),
+                );
+
+                row_numbers.extend(
+                    batch
+                        .column_by_name("row_number")
+                        .expect("Could not get row_number column")
+                        .as_primitive::<arrow::datatypes::Int64Type>()
+                        .iter()
+                        .map(|v| v.expect("Could not get row number"))
+                        .collect::<Vec<_>>(),
+                );
+            }
+            Self {
+                values,
+                row_numbers,
+            }
+        }
+    }
+
+    #[test]
+    fn test_with_virtual_columns_rejects_non_virtual_fields() {
+        // Try to pass a regular field (not a virtual column) to with_virtual_columns
+        let regular_field = Arc::new(Field::new("regular_column", ArrowDataType::Int64, false));
+        assert_eq!(
+            ArrowReaderOptions::new()
+                .with_virtual_columns(vec![regular_field])
+                .unwrap_err()
+                .to_string(),
+            "Parquet error: Field 'regular_column' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'"
+        );
+    }
+
+    #[test]
+    fn test_row_numbers_with_multiple_row_groups() {
+        test_row_numbers_with_multiple_row_groups_helper(
+            false,
+            |path, selection, _row_filter, batch_size| {
+                let file = File::open(path).unwrap();
+                let row_number_field = Arc::new(
+                    Field::new("row_number", ArrowDataType::Int64, false)
+                        .with_extension_type(RowNumber),
+                );
+                let options = ArrowReaderOptions::new()
+                    .with_virtual_columns(vec![row_number_field])
+                    .unwrap();
+                let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
+                    .unwrap()
+                    .with_row_selection(selection)
+                    .with_batch_size(batch_size)
+                    .build()
+                    .expect("Could not create reader");
+                reader
+                    .collect::<Result<Vec<_>, _>>()
+                    .expect("Could not read")
+            },
+        );
+    }
+
+    #[test]
+    fn test_row_numbers_with_multiple_row_groups_and_filter() {
+        test_row_numbers_with_multiple_row_groups_helper(
+            true,
+            |path, selection, row_filter, batch_size| {
+                let file = File::open(path).unwrap();
+                let row_number_field = Arc::new(
+                    Field::new("row_number", ArrowDataType::Int64, false)
+                        .with_extension_type(RowNumber),
+                );
+                let options = ArrowReaderOptions::new()
+                    .with_virtual_columns(vec![row_number_field])
+                    .unwrap();
+                let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
+                    .unwrap()
+                    .with_row_selection(selection)
+                    .with_batch_size(batch_size)
+                    .with_row_filter(row_filter.expect("No filter"))
+                    .build()
+                    .expect("Could not create reader");
+                reader
+                    .collect::<Result<Vec<_>, _>>()
+                    .expect("Could not read")
+            },
+        );
+    }
+
+    pub(crate) fn test_row_numbers_with_multiple_row_groups_helper<F>(
+        use_filter: bool,
+        test_case: F,
+    ) where
+        F: FnOnce(PathBuf, RowSelection, Option<RowFilter>, usize) -> Vec<RecordBatch>,
+    {
+        let seed: u64 = random();
+        println!("test_row_numbers_with_multiple_row_groups seed: {}", seed);
+        let mut rng = StdRng::seed_from_u64(seed);
+
+        use tempfile::TempDir;
+        let tempdir = TempDir::new().expect("Could not create temp dir");
+
+        let (bytes, metadata) = generate_file_with_row_numbers(&mut rng);
+
+        let path = tempdir.path().join("test.parquet");
+        std::fs::write(&path, bytes).expect("Could not write file");
+
+        let mut case = vec![];
+        let mut remaining = metadata.file_metadata().num_rows();
+        while remaining > 0 {
+            let row_count = rng.random_range(1..=remaining);
+            remaining -= row_count;
+            case.push(RowSelector {
+                row_count: row_count as usize,
+                skip: rng.random_bool(0.5),
+            });
+        }
+
+        let filter = use_filter.then(|| {
+            let filter = (0..metadata.file_metadata().num_rows())
+                .map(|_| rng.random_bool(0.99))
+                .collect::<Vec<_>>();
+            let mut filter_offset = 0;
+            RowFilter::new(vec![Box::new(ArrowPredicateFn::new(
+                ProjectionMask::all(),
+                move |b| {
+                    let array = BooleanArray::from_iter(
+                        filter
+                            .iter()
+                            .skip(filter_offset)
+                            .take(b.num_rows())
+                            .map(|x| Some(*x)),
+                    );
+                    filter_offset += b.num_rows();
+                    Ok(array)
+                },
+            ))])
+        });
+
+        let selection = RowSelection::from(case);
+        let batches = test_case(path, selection.clone(), filter, rng.random_range(1..4096));
+
+        if selection.skipped_row_count() == metadata.file_metadata().num_rows() as usize {
+            assert!(batches.into_iter().all(|batch| batch.num_rows() == 0));
+            return;
+        }
+        let actual = concat_batches(batches.first().expect("No batches").schema_ref(), &batches)
+            .expect("Failed to concatenate");
+        // assert_eq!(selection.row_count(), actual.num_rows());
+        let values = actual
+            .column(0)
+            .as_primitive::<types::Int64Type>()
+            .iter()
+            .collect::<Vec<_>>();
+        let row_numbers = actual
+            .column(1)
+            .as_primitive::<types::Int64Type>()
+            .iter()
+            .collect::<Vec<_>>();
+        assert_eq!(
+            row_numbers
+                .into_iter()
+                .map(|number| number.map(|number| number + 1))
+                .collect::<Vec<_>>(),
+            values
+        );
+    }
+
+    fn generate_file_with_row_numbers(rng: &mut impl Rng) -> (Bytes, ParquetMetaData) {
+        let schema = Arc::new(Schema::new(Fields::from(vec![Field::new(
+            "value",
+            ArrowDataType::Int64,
+            false,
+        )])));
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer =
+            ArrowWriter::try_new(&mut buf, schema.clone(), None).expect("Could not create writer");
+
+        let mut values = 1..=rng.random_range(1..4096);
+        while !values.is_empty() {
+            let batch_values = values
+                .by_ref()
+                .take(rng.random_range(1..4096))
+                .collect::<Vec<_>>();
+            let array = Arc::new(Int64Array::from(batch_values)) as ArrayRef;
+            let batch =
+                RecordBatch::try_from_iter([("value", array)]).expect("Could not create batch");
+            writer.write(&batch).expect("Could not write batch");
+            writer.flush().expect("Could not flush");
+        }
+        let metadata = writer.close().expect("Could not close writer");
+
+        (Bytes::from(buf), metadata)
+    }
 }
diff --git a/parquet/src/arrow/arrow_reader/read_plan.rs b/parquet/src/arrow/arrow_reader/read_plan.rs
index e083fb822be4..7c9eb36befe3 100644
--- a/parquet/src/arrow/arrow_reader/read_plan.rs
+++ b/parquet/src/arrow/arrow_reader/read_plan.rs
@@ -19,8 +19,10 @@
 //! from a Parquet file
 
 use crate::arrow::array_reader::ArrayReader;
+use crate::arrow::arrow_reader::selection::RowSelectionPolicy;
+use crate::arrow::arrow_reader::selection::RowSelectionStrategy;
 use crate::arrow::arrow_reader::{
-    ArrowPredicate, ParquetRecordBatchReader, RowSelection, RowSelector,
+    ArrowPredicate, ParquetRecordBatchReader, RowSelection, RowSelectionCursor, RowSelector,
 };
 use crate::errors::{ParquetError, Result};
 use arrow_array::Array;
@@ -28,31 +30,46 @@ use arrow_select::filter::prep_null_mask_filter;
 use std::collections::VecDeque;
 
 /// A builder for [`ReadPlan`]
-#[derive(Clone)]
-pub(crate) struct ReadPlanBuilder {
+#[derive(Clone, Debug)]
+pub struct ReadPlanBuilder {
     batch_size: usize,
-    /// Current to apply, includes all filters
+    /// Which rows to select. Includes the result of all filters applied so far
     selection: Option<RowSelection>,
+    /// Policy to use when materializing the row selection
+    row_selection_policy: RowSelectionPolicy,
 }
 
 impl ReadPlanBuilder {
     /// Create a `ReadPlanBuilder` with the given batch size
-    pub(crate) fn new(batch_size: usize) -> Self {
+    pub fn new(batch_size: usize) -> Self {
         Self {
             batch_size,
             selection: None,
+            row_selection_policy: RowSelectionPolicy::default(),
         }
     }
 
     /// Set the current selection to the given value
-    pub(crate) fn with_selection(mut self, selection: Option<RowSelection>) -> Self {
+    pub fn with_selection(mut self, selection: Option<RowSelection>) -> Self {
         self.selection = selection;
         self
     }
 
+    /// Configure the policy to use when materialising the [`RowSelection`]
+    ///
+    /// Defaults to [`RowSelectionPolicy::Auto`]
+    pub fn with_row_selection_policy(mut self, policy: RowSelectionPolicy) -> Self {
+        self.row_selection_policy = policy;
+        self
+    }
+
+    /// Returns the current row selection policy
+    pub fn row_selection_policy(&self) -> &RowSelectionPolicy {
+        &self.row_selection_policy
+    }
+
     /// Returns the current selection, if any
-    #[cfg(feature = "async")]
-    pub(crate) fn selection(&self) -> Option<&RowSelection> {
+    pub fn selection(&self) -> Option<&RowSelection> {
         self.selection.as_ref()
     }
 
@@ -68,7 +85,7 @@ impl ReadPlanBuilder {
     }
 
     /// Returns true if the current plan selects any rows
-    pub(crate) fn selects_any(&self) -> bool {
+    pub fn selects_any(&self) -> bool {
         self.selection
             .as_ref()
             .map(|s| s.selects_any())
@@ -76,11 +93,47 @@ impl ReadPlanBuilder {
     }
 
     /// Returns the number of rows selected, or `None` if all rows are selected.
-    #[cfg(feature = "async")]
-    pub(crate) fn num_rows_selected(&self) -> Option<usize> {
+    pub fn num_rows_selected(&self) -> Option<usize> {
         self.selection.as_ref().map(|s| s.row_count())
     }
 
+    /// Returns the [`RowSelectionStrategy`] for this plan.
+    ///
+    /// Guarantees to return either `Selectors` or `Mask`, never `Auto`.
+    pub(crate) fn resolve_selection_strategy(&self) -> RowSelectionStrategy {
+        match self.row_selection_policy {
+            RowSelectionPolicy::Selectors => RowSelectionStrategy::Selectors,
+            RowSelectionPolicy::Mask => RowSelectionStrategy::Mask,
+            RowSelectionPolicy::Auto { threshold, .. } => {
+                let selection = match self.selection.as_ref() {
+                    Some(selection) => selection,
+                    None => return RowSelectionStrategy::Selectors,
+                };
+
+                // total_rows: total number of rows selected / skipped
+                // effective_count: number of non-empty selectors
+                let (total_rows, effective_count) =
+                    selection.iter().fold((0usize, 0usize), |(rows, count), s| {
+                        if s.row_count > 0 {
+                            (rows + s.row_count, count + 1)
+                        } else {
+                            (rows, count)
+                        }
+                    });
+
+                if effective_count == 0 {
+                    return RowSelectionStrategy::Mask;
+                }
+
+                if total_rows < effective_count.saturating_mul(threshold) {
+                    RowSelectionStrategy::Mask
+                } else {
+                    RowSelectionStrategy::Selectors
+                }
+            }
+        }
+    }
+
     /// Evaluates an [`ArrowPredicate`], updating this plan's `selection`
     ///
     /// If the current `selection` is `Some`, the resulting [`RowSelection`]
@@ -90,7 +143,7 @@ impl ReadPlanBuilder {
     /// Note: pre-existing selections may come from evaluating a previous predicate
     /// or if the [`ParquetRecordBatchReader`] specified an explicit
     /// [`RowSelection`] in addition to one or more predicates.
-    pub(crate) fn with_predicate(
+    pub fn with_predicate(
         mut self,
         array_reader: Box<dyn ArrayReader>,
         predicate: &mut dyn ArrowPredicate,
@@ -123,21 +176,39 @@ impl ReadPlanBuilder {
     }
 
     /// Create a final `ReadPlan` the read plan for the scan
-    pub(crate) fn build(mut self) -> ReadPlan {
+    pub fn build(mut self) -> ReadPlan {
         // If selection is empty, truncate
         if !self.selects_any() {
             self.selection = Some(RowSelection::from(vec![]));
         }
+
+        // Preferred strategy must not be Auto
+        let selection_strategy = self.resolve_selection_strategy();
+
         let Self {
             batch_size,
             selection,
+            row_selection_policy: _,
         } = self;
 
-        let selection = selection.map(|s| s.trim().into());
+        let selection = selection.map(|s| s.trim());
+
+        let row_selection_cursor = selection
+            .map(|s| {
+                let trimmed = s.trim();
+                let selectors: Vec<RowSelector> = trimmed.into();
+                match selection_strategy {
+                    RowSelectionStrategy::Mask => {
+                        RowSelectionCursor::new_mask_from_selectors(selectors)
+                    }
+                    RowSelectionStrategy::Selectors => RowSelectionCursor::new_selectors(selectors),
+                }
+            })
+            .unwrap_or(RowSelectionCursor::new_all());
 
         ReadPlan {
             batch_size,
-            selection,
+            row_selection_cursor,
         }
     }
 }
@@ -230,17 +301,28 @@ impl LimitedReadPlanBuilder {
 /// A plan reading specific rows from a Parquet Row Group.
 ///
 /// See [`ReadPlanBuilder`] to create `ReadPlan`s
-pub(crate) struct ReadPlan {
+#[derive(Debug)]
+pub struct ReadPlan {
     /// The number of rows to read in each batch
     batch_size: usize,
     /// Row ranges to be selected from the data source
-    selection: Option<VecDeque<RowSelector>>,
+    row_selection_cursor: RowSelectionCursor,
 }
 
 impl ReadPlan {
-    /// Returns a mutable reference to the selection, if any
-    pub(crate) fn selection_mut(&mut self) -> Option<&mut VecDeque<RowSelector>> {
-        self.selection.as_mut()
+    /// Returns a mutable reference to the selection selectors, if any
+    #[deprecated(since = "57.1.0", note = "Use `row_selection_cursor_mut` instead")]
+    pub fn selection_mut(&mut self) -> Option<&mut VecDeque<RowSelector>> {
+        if let RowSelectionCursor::Selectors(selectors_cursor) = &mut self.row_selection_cursor {
+            Some(selectors_cursor.selectors_mut())
+        } else {
+            None
+        }
+    }
+
+    /// Returns a mutable reference to the row selection cursor
+    pub fn row_selection_cursor_mut(&mut self) -> &mut RowSelectionCursor {
+        &mut self.row_selection_cursor
     }
 
     /// Return the number of rows to read in each output batch
@@ -249,3 +331,33 @@ impl ReadPlan {
         self.batch_size
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn builder_with_selection(selection: RowSelection) -> ReadPlanBuilder {
+        ReadPlanBuilder::new(1024).with_selection(Some(selection))
+    }
+
+    #[test]
+    fn preferred_selection_strategy_prefers_mask_by_default() {
+        let selection = RowSelection::from(vec![RowSelector::select(8)]);
+        let builder = builder_with_selection(selection);
+        assert_eq!(
+            builder.resolve_selection_strategy(),
+            RowSelectionStrategy::Mask
+        );
+    }
+
+    #[test]
+    fn preferred_selection_strategy_prefers_selectors_when_threshold_small() {
+        let selection = RowSelection::from(vec![RowSelector::select(8)]);
+        let builder = builder_with_selection(selection)
+            .with_row_selection_policy(RowSelectionPolicy::Auto { threshold: 1 });
+        assert_eq!(
+            builder.resolve_selection_strategy(),
+            RowSelectionStrategy::Selectors
+        );
+    }
+}
diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs
index c53d47be2e56..2ddf812f9c39 100644
--- a/parquet/src/arrow/arrow_reader/selection.rs
+++ b/parquet/src/arrow/arrow_reader/selection.rs
@@ -15,12 +15,51 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::arrow::ProjectionMask;
+use crate::errors::ParquetError;
+use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation};
 use arrow_array::{Array, BooleanArray};
+use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
 use arrow_select::filter::SlicesIterator;
 use std::cmp::Ordering;
 use std::collections::VecDeque;
 use std::ops::Range;
 
+/// Policy for picking a strategy to materialise [`RowSelection`] during execution.
+///
+/// Note that this is a user-provided preference, and the actual strategy used
+/// may differ based on safety considerations (e.g. page skipping).
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum RowSelectionPolicy {
+    /// Use a queue of [`RowSelector`] values
+    Selectors,
+    /// Use a boolean mask to materialise the selection
+    Mask,
+    /// Choose between [`Self::Mask`] and [`Self::Selectors`] based on selector density
+    Auto {
+        /// Average selector length below which masks are preferred
+        threshold: usize,
+    },
+}
+
+impl Default for RowSelectionPolicy {
+    fn default() -> Self {
+        Self::Auto { threshold: 32 }
+    }
+}
+
+/// Fully resolved strategy for materializing [`RowSelection`] during execution.
+///
+/// This is determined from a combination of user preference (via [`RowSelectionPolicy`])
+/// and safety considerations (e.g. page skipping).
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum RowSelectionStrategy {
+    /// Use a queue of [`RowSelector`] values
+    Selectors,
+    /// Use a boolean mask to materialise the selection
+    Mask,
+}
+
 /// [`RowSelection`] is a collection of [`RowSelector`] used to skip rows when
 /// scanning a parquet file
 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
@@ -95,7 +134,7 @@ impl RowSelector {
 /// * It contains no [`RowSelector`] of 0 rows
 /// * Consecutive [`RowSelector`]s alternate skipping or selecting rows
 ///
-/// [`PageIndex`]: crate::file::page_index::index::PageIndex
+/// [`PageIndex`]: crate::file::page_index::column_index::ColumnIndexMetaData
 #[derive(Debug, Clone, Default, Eq, PartialEq)]
 pub struct RowSelection {
     selectors: Vec<RowSelector>,
@@ -162,7 +201,7 @@ impl RowSelection {
     /// Note: this method does not make any effort to combine consecutive ranges, nor coalesce
     /// ranges that are close together. This is instead delegated to the IO subsystem to optimise,
     /// e.g. [`ObjectStore::get_ranges`](object_store::ObjectStore::get_ranges)
-    pub fn scan_ranges(&self, page_locations: &[crate::format::PageLocation]) -> Vec<Range<u64>> {
+    pub fn scan_ranges(&self, page_locations: &[PageLocation]) -> Vec<Range<u64>> {
         let mut ranges: Vec<Range<u64>> = vec![];
         let mut row_offset = 0;
 
@@ -211,6 +250,39 @@ impl RowSelection {
         ranges
     }
 
+    /// Returns true if this selection would skip any data pages within the provided columns
+    fn selection_skips_any_page(
+        &self,
+        projection: &ProjectionMask,
+        columns: &[OffsetIndexMetaData],
+    ) -> bool {
+        columns.iter().enumerate().any(|(leaf_idx, column)| {
+            if !projection.leaf_included(leaf_idx) {
+                return false;
+            }
+
+            let locations = column.page_locations();
+            if locations.is_empty() {
+                return false;
+            }
+
+            let ranges = self.scan_ranges(locations);
+            !ranges.is_empty() && ranges.len() < locations.len()
+        })
+    }
+
+    /// Returns true if selectors should be forced, preventing mask materialisation
+    pub(crate) fn should_force_selectors(
+        &self,
+        projection: &ProjectionMask,
+        offset_index: Option<&[OffsetIndexMetaData]>,
+    ) -> bool {
+        match offset_index {
+            Some(columns) => self.selection_skips_any_page(projection, columns),
+            None => false,
+        }
+    }
+
     /// Splits off the first `row_count` from this [`RowSelection`]
     pub fn split_off(&mut self, row_count: usize) -> Self {
         let mut total_count = 0;
@@ -441,6 +513,58 @@ impl RowSelection {
     pub fn skipped_row_count(&self) -> usize {
         self.iter().filter(|s| s.skip).map(|s| s.row_count).sum()
     }
+
+    /// Expands the selection to align with batch boundaries.
+    /// This is needed when using cached array readers to ensure that
+    /// the cached data covers full batches.
+    pub(crate) fn expand_to_batch_boundaries(&self, batch_size: usize, total_rows: usize) -> Self {
+        if batch_size == 0 {
+            return self.clone();
+        }
+
+        let mut expanded_ranges = Vec::new();
+        let mut row_offset = 0;
+
+        for selector in &self.selectors {
+            if selector.skip {
+                row_offset += selector.row_count;
+            } else {
+                let start = row_offset;
+                let end = row_offset + selector.row_count;
+
+                // Expand start to batch boundary
+                let expanded_start = (start / batch_size) * batch_size;
+                // Expand end to batch boundary
+                let expanded_end = end.div_ceil(batch_size) * batch_size;
+                let expanded_end = expanded_end.min(total_rows);
+
+                expanded_ranges.push(expanded_start..expanded_end);
+                row_offset += selector.row_count;
+            }
+        }
+
+        // Sort ranges by start position
+        expanded_ranges.sort_by_key(|range| range.start);
+
+        // Merge overlapping or consecutive ranges
+        let mut merged_ranges: Vec<Range<usize>> = Vec::new();
+        for range in expanded_ranges {
+            if let Some(last) = merged_ranges.last_mut() {
+                if range.start <= last.end {
+                    // Overlapping or consecutive - merge them
+                    last.end = last.end.max(range.end);
+                } else {
+                    // No overlap - add new range
+                    merged_ranges.push(range);
+                }
+            } else {
+                // First range
+                merged_ranges.push(range);
+            }
+        }
+
+        Self::from_consecutive_ranges(merged_ranges.into_iter(), total_rows)
+    }
 }
 
 impl From<Vec<RowSelector>> for RowSelection {
@@ -637,11 +761,181 @@ fn union_row_selections(left: &[RowSelector], right: &[RowSelector]) -> RowSelec
     iter.collect()
 }
 
+/// Cursor for iterating a mask-backed [`RowSelection`]
+///
+/// This is best for dense selections where there are many small skips
+/// or selections. For example, selecting every other row.
+#[derive(Debug)]
+pub struct MaskCursor {
+    mask: BooleanBuffer,
+    /// Current absolute offset into the selection
+    position: usize,
+}
+
+impl MaskCursor {
+    /// Returns `true` when no further rows remain
+    pub fn is_empty(&self) -> bool {
+        self.position >= self.mask.len()
+    }
+
+    /// Advance through the mask representation, producing the next chunk summary
+    pub fn next_mask_chunk(&mut self, batch_size: usize) -> Option<MaskChunk> {
+        let (initial_skip, chunk_rows, selected_rows, mask_start, end_position) = {
+            let mask = &self.mask;
+
+            if self.position >= mask.len() {
+                return None;
+            }
+
+            let start_position = self.position;
+            let mut cursor = start_position;
+            let mut initial_skip = 0;
+
+            while cursor < mask.len() && !mask.value(cursor) {
+                initial_skip += 1;
+                cursor += 1;
+            }
+
+            let mask_start = cursor;
+            let mut chunk_rows = 0;
+            let mut selected_rows = 0;
+
+            // Advance until enough rows have been selected to satisfy the batch size,
+            // or until the mask is exhausted. This mirrors the behaviour of the legacy
+            // `RowSelector` queue-based iteration.
+            while cursor < mask.len() && selected_rows < batch_size {
+                chunk_rows += 1;
+                if mask.value(cursor) {
+                    selected_rows += 1;
+                }
+                cursor += 1;
+            }
+
+            (initial_skip, chunk_rows, selected_rows, mask_start, cursor)
+        };
+
+        self.position = end_position;
+
+        Some(MaskChunk {
+            initial_skip,
+            chunk_rows,
+            selected_rows,
+            mask_start,
+        })
+    }
+
+    /// Materialise the boolean values for a mask-backed chunk
+    pub fn mask_values_for(&self, chunk: &MaskChunk) -> Result<BooleanArray, ParquetError> {
+        if chunk.mask_start.saturating_add(chunk.chunk_rows) > self.mask.len() {
+            return Err(ParquetError::General(
+                "Internal Error: MaskChunk exceeds mask length".to_string(),
+            ));
+        }
+        Ok(BooleanArray::from(
+            self.mask.slice(chunk.mask_start, chunk.chunk_rows),
+        ))
+    }
+}
+
+/// Cursor for iterating a selector-backed [`RowSelection`]
+///
+/// This is best for sparse selections where large contiguous
+/// blocks of rows are selected or skipped.
+#[derive(Debug)]
+pub struct SelectorsCursor {
+    selectors: VecDeque<RowSelector>,
+    /// Current absolute offset into the selection
+    position: usize,
+}
+
+impl SelectorsCursor {
+    /// Returns `true` when no further rows remain
+    pub fn is_empty(&self) -> bool {
+        self.selectors.is_empty()
+    }
+
+    pub(crate) fn selectors_mut(&mut self) -> &mut VecDeque<RowSelector> {
+        &mut self.selectors
+    }
+
+    /// Return the next [`RowSelector`]
+    pub(crate) fn next_selector(&mut self) -> RowSelector {
+        let selector = self.selectors.pop_front().unwrap();
+        self.position += selector.row_count;
+        selector
+    }
+
+    /// Return a selector to the front, rewinding the position
+    pub(crate) fn return_selector(&mut self, selector: RowSelector) {
+        self.position = self.position.saturating_sub(selector.row_count);
+        self.selectors.push_front(selector);
+    }
+}
+
+/// Result of computing the next chunk to read when using a [`MaskCursor`]
+#[derive(Debug)]
+pub struct MaskChunk {
+    /// Number of leading rows to skip before reaching selected rows
+    pub initial_skip: usize,
+    /// Total rows covered by this chunk (selected + skipped)
+    pub chunk_rows: usize,
+    /// Rows actually selected within the chunk
+    pub selected_rows: usize,
+    /// Starting offset within the mask where the chunk begins
+    pub mask_start: usize,
+}
+
+/// Cursor for iterating a [`RowSelection`] during execution within a
+/// [`ReadPlan`](crate::arrow::arrow_reader::ReadPlan).
+///
+/// This keeps per-reader state such as the current position and delegates the
+/// actual storage strategy to the internal `RowSelectionBacking`.
+#[derive(Debug)]
+pub enum RowSelectionCursor {
+    /// Reading all rows
+    All,
+    /// Use a bitmask to back the selection (dense selections)
+    Mask(MaskCursor),
+    /// Use a queue of selectors to back the selection (sparse selections)
+    Selectors(SelectorsCursor),
+}
+
+impl RowSelectionCursor {
+    /// Create a [`MaskCursor`] cursor backed by a bitmask, from an existing set of selectors
+    pub(crate) fn new_mask_from_selectors(selectors: Vec<RowSelector>) -> Self {
+        Self::Mask(MaskCursor {
+            mask: boolean_mask_from_selectors(&selectors),
+            position: 0,
+        })
+    }
+
+    /// Create a [`RowSelectionCursor::Selectors`] from the provided selectors
+    pub(crate) fn new_selectors(selectors: Vec<RowSelector>) -> Self {
+        Self::Selectors(SelectorsCursor {
+            selectors: selectors.into(),
+            position: 0,
+        })
+    }
+
+    /// Create a cursor that selects all rows
+    pub(crate) fn new_all() -> Self {
+        Self::All
+    }
+}
+
+fn boolean_mask_from_selectors(selectors: &[RowSelector]) -> BooleanBuffer {
+    let total_rows: usize = selectors.iter().map(|s| s.row_count).sum();
+    let mut builder = BooleanBufferBuilder::new(total_rows);
+    for selector in selectors {
+        builder.append_n(selector.row_count, !selector.skip);
+    }
+    builder.finish()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::format::PageLocation;
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
 
     #[test]
     fn test_from_filters() {
@@ -1378,4 +1672,33 @@ mod tests {
         assert_eq!(selection.row_count(), 0);
         assert_eq!(selection.skipped_row_count(), 0);
     }
+
+    #[test]
+    fn test_trim() {
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(34),
+            RowSelector::select(12),
+            RowSelector::skip(3),
+            RowSelector::select(35),
+        ]);
+
+        let expected = vec![
+            RowSelector::skip(34),
+            RowSelector::select(12),
+            RowSelector::skip(3),
+            RowSelector::select(35),
+        ];
+
+        assert_eq!(selection.trim().selectors, expected);
+
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(34),
+            RowSelector::select(12),
+            RowSelector::skip(3),
+        ]);
+
+        let expected = vec![RowSelector::skip(34), RowSelector::select(12)];
+
+        assert_eq!(selection.trim().selectors, expected);
+    }
 }
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index cffa60e62e96..2f46c96be6b7 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -25,7 +25,7 @@ use crate::basic::Type as PhysicalType;
 use crate::data_type::{ByteArray, FixedLenByteArray};
 use crate::errors::{ParquetError, Result};
 use crate::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData};
-use crate::file::page_index::index::{Index, PageIndex};
+use crate::file::page_index::column_index::{ColumnIndexIterators, ColumnIndexMetaData};
 use crate::file::statistics::Statistics as ParquetStatistics;
 use crate::schema::types::SchemaDescriptor;
 use arrow_array::builder::{
@@ -33,12 +33,12 @@ use arrow_array::builder::{
     StringViewBuilder,
 };
 use arrow_array::{
-    new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, Int16Array,
-    Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray, Time32SecondArray,
-    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal32Array, Decimal64Array,
+    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, Int8Array,
+    Int16Array, Int32Array, Int64Array, LargeBinaryArray, Time32MillisecondArray,
+    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
+    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt8Array,
+    UInt16Array, UInt32Array, UInt64Array, new_empty_array, new_null_array,
 };
 use arrow_buffer::i256;
 use arrow_schema::{DataType, Field, Schema, TimeUnit};
@@ -46,12 +46,24 @@ use half::f16;
 use paste::paste;
 use std::sync::Arc;
 
-// Convert the bytes array to i128.
+// Convert the bytes array to i32.
 // The endian of the input bytes array must be big-endian.
-pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
+pub(crate) fn from_bytes_to_i32(b: &[u8]) -> i32 {
     // The bytes array are from parquet file and must be the big-endian.
     // The endian is defined by parquet format, and the reference document
     // https://github.com/apache/parquet-format/blob/54e53e5d7794d383529dd30746378f19a12afd58/src/main/thrift/parquet.thrift#L66
+    i32::from_be_bytes(sign_extend_be::<4>(b))
+}
+
+// Convert the bytes array to i64.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i64(b: &[u8]) -> i64 {
+    i64::from_be_bytes(sign_extend_be::<8>(b))
+}
+
+// Convert the bytes array to i128.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
     i128::from_be_bytes(sign_extend_be::<16>(b))
 }
 
@@ -263,9 +275,10 @@ macro_rules! make_decimal_stats_iterator {
                         ParquetStatistics::Int32(s) => {
                             s.$func().map(|x| $stat_value_type::from(*x))
                         }
-                        ParquetStatistics::Int64(s) => {
-                            s.$func().map(|x| $stat_value_type::from(*x))
-                        }
+                        ParquetStatistics::Int64(s) => s
+                            .$func()
+                            .map(|x| $stat_value_type::try_from(*x).ok())
+                            .flatten(),
                         ParquetStatistics::ByteArray(s) => s.$bytes_func().map($convert_func),
                         ParquetStatistics::FixedLenByteArray(s) => {
                             s.$bytes_func().map($convert_func)
@@ -282,6 +295,34 @@ macro_rules! make_decimal_stats_iterator {
     };
 }
 
+make_decimal_stats_iterator!(
+    MinDecimal32StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MaxDecimal32StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MinDecimal64StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
+make_decimal_stats_iterator!(
+    MaxDecimal64StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
 make_decimal_stats_iterator!(
     MinDecimal128StatsIterator,
     min_opt,
@@ -476,6 +517,18 @@ macro_rules! get_statistics {
                 }
                 Ok(Arc::new(builder.finish()))
             },
+            DataType::Decimal32(precision, scale) => {
+                let arr = Decimal32Array::from_iter(
+                    [<$stat_type_prefix Decimal32StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
+            DataType::Decimal64(precision, scale) => {
+                let arr = Decimal64Array::from_iter(
+                    [<$stat_type_prefix Decimal64StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
             DataType::Decimal128(precision, scale) => {
                 let arr = Decimal128Array::from_iter(
                     [<$stat_type_prefix Decimal128StatsIterator>]::new($iterator)
@@ -544,17 +597,17 @@ macro_rules! get_statistics {
 }
 
 macro_rules! make_data_page_stats_iterator {
-    ($iterator_type: ident, $func: expr, $index_type: path, $stat_value_type: ty) => {
+    ($iterator_type: ident, $func: ident, $stat_value_type: ty) => {
         struct $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             iter: I,
         }
 
         impl<'a, I> $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             fn new(iter: I) -> Self {
                 Self { iter }
@@ -563,7 +616,7 @@ macro_rules! make_data_page_stats_iterator {
 
         impl<'a, I> Iterator for $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             type Item = Vec<Option<$stat_value_type>>;
 
@@ -571,16 +624,14 @@ macro_rules! make_data_page_stats_iterator {
                 let next = self.iter.next();
                 match next {
                     Some((len, index)) => match index {
-                        $index_type(native_index) => {
-                            Some(native_index.indexes.iter().map($func).collect::<Vec<_>>())
-                        }
                         // No matching `Index` found;
                         // thus no statistics that can be extracted.
                         // We return vec![None; len] to effectively
                         // create an arrow null-array with the length
                         // corresponding to the number of entries in
                         // `ParquetOffsetIndex` per row group per column.
-                        _ => Some(vec![None; len]),
+                        ColumnIndexMetaData::NONE => Some(vec![None; len]),
+                        _ => Some(<$stat_value_type>::$func(&index).collect::<Vec<_>>()),
                     },
                     _ => None,
                 }
@@ -593,101 +644,45 @@ macro_rules! make_data_page_stats_iterator {
     };
 }
 
-make_data_page_stats_iterator!(
-    MinBooleanDataPageStatsIterator,
-    |x: &PageIndex<bool>| { x.min },
-    Index::BOOLEAN,
-    bool
-);
-make_data_page_stats_iterator!(
-    MaxBooleanDataPageStatsIterator,
-    |x: &PageIndex<bool>| { x.max },
-    Index::BOOLEAN,
-    bool
-);
-make_data_page_stats_iterator!(
-    MinInt32DataPageStatsIterator,
-    |x: &PageIndex<i32>| { x.min },
-    Index::INT32,
-    i32
-);
-make_data_page_stats_iterator!(
-    MaxInt32DataPageStatsIterator,
-    |x: &PageIndex<i32>| { x.max },
-    Index::INT32,
-    i32
-);
-make_data_page_stats_iterator!(
-    MinInt64DataPageStatsIterator,
-    |x: &PageIndex<i64>| { x.min },
-    Index::INT64,
-    i64
-);
-make_data_page_stats_iterator!(
-    MaxInt64DataPageStatsIterator,
-    |x: &PageIndex<i64>| { x.max },
-    Index::INT64,
-    i64
-);
+make_data_page_stats_iterator!(MinBooleanDataPageStatsIterator, min_values_iter, bool);
+make_data_page_stats_iterator!(MaxBooleanDataPageStatsIterator, max_values_iter, bool);
+make_data_page_stats_iterator!(MinInt32DataPageStatsIterator, min_values_iter, i32);
+make_data_page_stats_iterator!(MaxInt32DataPageStatsIterator, max_values_iter, i32);
+make_data_page_stats_iterator!(MinInt64DataPageStatsIterator, min_values_iter, i64);
+make_data_page_stats_iterator!(MaxInt64DataPageStatsIterator, max_values_iter, i64);
 make_data_page_stats_iterator!(
     MinFloat16DataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.min.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
+    min_values_iter,
     FixedLenByteArray
 );
 make_data_page_stats_iterator!(
     MaxFloat16DataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.max.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
+    max_values_iter,
     FixedLenByteArray
 );
-make_data_page_stats_iterator!(
-    MinFloat32DataPageStatsIterator,
-    |x: &PageIndex<f32>| { x.min },
-    Index::FLOAT,
-    f32
-);
-make_data_page_stats_iterator!(
-    MaxFloat32DataPageStatsIterator,
-    |x: &PageIndex<f32>| { x.max },
-    Index::FLOAT,
-    f32
-);
-make_data_page_stats_iterator!(
-    MinFloat64DataPageStatsIterator,
-    |x: &PageIndex<f64>| { x.min },
-    Index::DOUBLE,
-    f64
-);
-make_data_page_stats_iterator!(
-    MaxFloat64DataPageStatsIterator,
-    |x: &PageIndex<f64>| { x.max },
-    Index::DOUBLE,
-    f64
-);
+make_data_page_stats_iterator!(MinFloat32DataPageStatsIterator, min_values_iter, f32);
+make_data_page_stats_iterator!(MaxFloat32DataPageStatsIterator, max_values_iter, f32);
+make_data_page_stats_iterator!(MinFloat64DataPageStatsIterator, min_values_iter, f64);
+make_data_page_stats_iterator!(MaxFloat64DataPageStatsIterator, max_values_iter, f64);
 make_data_page_stats_iterator!(
     MinByteArrayDataPageStatsIterator,
-    |x: &PageIndex<ByteArray>| { x.min.clone() },
-    Index::BYTE_ARRAY,
+    min_values_iter,
     ByteArray
 );
 make_data_page_stats_iterator!(
     MaxByteArrayDataPageStatsIterator,
-    |x: &PageIndex<ByteArray>| { x.max.clone() },
-    Index::BYTE_ARRAY,
+    max_values_iter,
     ByteArray
 );
 make_data_page_stats_iterator!(
     MaxFixedLenByteArrayDataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.max.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
+    max_values_iter,
     FixedLenByteArray
 );
 
 make_data_page_stats_iterator!(
     MinFixedLenByteArrayDataPageStatsIterator,
-    |x: &PageIndex<FixedLenByteArray>| { x.min.clone() },
-    Index::FIXED_LEN_BYTE_ARRAY,
+    min_values_iter,
     FixedLenByteArray
 );
 
@@ -695,14 +690,14 @@ macro_rules! get_decimal_page_stats_iterator {
     ($iterator_type: ident, $func: ident, $stat_value_type: ident, $convert_func: ident) => {
         struct $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             iter: I,
         }
 
         impl<'a, I> $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             fn new(iter: I) -> Self {
                 Self { iter }
@@ -711,44 +706,37 @@ macro_rules! get_decimal_page_stats_iterator {
 
         impl<'a, I> Iterator for $iterator_type<'a, I>
         where
-            I: Iterator<Item = (usize, &'a Index)>,
+            I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
         {
             type Item = Vec<Option<$stat_value_type>>;
 
+            // Some(native_index.$func().map(|v| v.map($conv)).collect::<Vec<_>>())
             fn next(&mut self) -> Option<Self::Item> {
                 let next = self.iter.next();
                 match next {
                     Some((len, index)) => match index {
-                        Index::INT32(native_index) => Some(
+                        ColumnIndexMetaData::INT32(native_index) => Some(
                             native_index
-                                .indexes
-                                .iter()
-                                .map(|x| x.$func.and_then(|x| Some($stat_value_type::from(x))))
+                                .$func()
+                                .map(|x| x.map(|x| $stat_value_type::from(*x)))
                                 .collect::<Vec<_>>(),
                         ),
-                        Index::INT64(native_index) => Some(
+                        ColumnIndexMetaData::INT64(native_index) => Some(
                             native_index
-                                .indexes
-                                .iter()
-                                .map(|x| x.$func.and_then(|x| Some($stat_value_type::from(x))))
+                                .$func()
+                                .map(|x| x.map(|x| $stat_value_type::try_from(*x).unwrap()))
                                 .collect::<Vec<_>>(),
                         ),
-                        Index::BYTE_ARRAY(native_index) => Some(
+                        ColumnIndexMetaData::BYTE_ARRAY(native_index) => Some(
                             native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.clone().$func.and_then(|x| Some($convert_func(x.data())))
-                                })
+                                .$func()
+                                .map(|x| x.map(|x| $convert_func(x)))
                                 .collect::<Vec<_>>(),
                         ),
-                        Index::FIXED_LEN_BYTE_ARRAY(native_index) => Some(
+                        ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(native_index) => Some(
                             native_index
-                                .indexes
-                                .iter()
-                                .map(|x| {
-                                    x.clone().$func.and_then(|x| Some($convert_func(x.data())))
-                                })
+                                .$func()
+                                .map(|x| x.map(|x| $convert_func(x)))
                                 .collect::<Vec<_>>(),
                         ),
                         _ => Some(vec![None; len]),
@@ -764,30 +752,58 @@ macro_rules! get_decimal_page_stats_iterator {
     };
 }
 
+get_decimal_page_stats_iterator!(
+    MinDecimal32DataPageStatsIterator,
+    min_values_iter,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal32DataPageStatsIterator,
+    max_values_iter,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MinDecimal64DataPageStatsIterator,
+    min_values_iter,
+    i64,
+    from_bytes_to_i64
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal64DataPageStatsIterator,
+    max_values_iter,
+    i64,
+    from_bytes_to_i64
+);
+
 get_decimal_page_stats_iterator!(
     MinDecimal128DataPageStatsIterator,
-    min,
+    min_values_iter,
     i128,
     from_bytes_to_i128
 );
 
 get_decimal_page_stats_iterator!(
     MaxDecimal128DataPageStatsIterator,
-    max,
+    max_values_iter,
     i128,
     from_bytes_to_i128
 );
 
 get_decimal_page_stats_iterator!(
     MinDecimal256DataPageStatsIterator,
-    min,
+    min_values_iter,
     i256,
     from_bytes_to_i256
 );
 
 get_decimal_page_stats_iterator!(
     MaxDecimal256DataPageStatsIterator,
-    max,
+    max_values_iter,
     i256,
     from_bytes_to_i256
 );
@@ -958,6 +974,10 @@ macro_rules! get_data_page_statistics {
                     )
                 ),
                 DataType::Date64 if $physical_type == Some(PhysicalType::INT64) => Ok(Arc::new(Date64Array::from_iter([<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten()))),
+                DataType::Decimal32(precision, scale) => Ok(Arc::new(
+                    Decimal32Array::from_iter([<$stat_type_prefix Decimal32DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
+                DataType::Decimal64(precision, scale) => Ok(Arc::new(
+                    Decimal64Array::from_iter([<$stat_type_prefix Decimal64DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal128(precision, scale) => Ok(Arc::new(
                     Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal256(precision, scale) => Ok(Arc::new(
@@ -1089,77 +1109,44 @@ fn max_statistics<'a, I: Iterator<Item = Option<&'a ParquetStatistics>>>(
 }
 
 /// Extracts the min statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
+/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`]
 pub(crate) fn min_page_statistics<'a, I>(
     data_type: &DataType,
     iterator: I,
     physical_type: Option<PhysicalType>,
 ) -> Result<ArrayRef>
 where
-    I: Iterator<Item = (usize, &'a Index)>,
+    I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
 {
     get_data_page_statistics!(Min, data_type, iterator, physical_type)
 }
 
 /// Extracts the max statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
+/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`]
 pub(crate) fn max_page_statistics<'a, I>(
     data_type: &DataType,
     iterator: I,
     physical_type: Option<PhysicalType>,
 ) -> Result<ArrayRef>
 where
-    I: Iterator<Item = (usize, &'a Index)>,
+    I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
 {
     get_data_page_statistics!(Max, data_type, iterator, physical_type)
 }
 
 /// Extracts the null count statistics from an iterator
-/// of parquet page [`Index`]'es to an [`ArrayRef`]
+/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`]
 ///
 /// The returned Array is an [`UInt64Array`]
 pub(crate) fn null_counts_page_statistics<'a, I>(iterator: I) -> Result<UInt64Array>
 where
-    I: Iterator<Item = (usize, &'a Index)>,
+    I: Iterator<Item = (usize, &'a ColumnIndexMetaData)>,
 {
     let iter = iterator.flat_map(|(len, index)| match index {
-        Index::NONE => vec![None; len],
-        Index::BOOLEAN(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::INT32(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::INT64(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::FLOAT(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::DOUBLE(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        Index::BYTE_ARRAY(native_index) => native_index
-            .indexes
-            .iter()
-            .map(|x| x.null_count.map(|x| x as u64))
-            .collect::<Vec<_>>(),
-        _ => unimplemented!(),
+        ColumnIndexMetaData::NONE => vec![None; len],
+        column_index => column_index.null_counts().map_or(vec![None; len], |v| {
+            v.iter().map(|i| Some(*i as u64)).collect::<Vec<_>>()
+        }),
     });
 
     Ok(UInt64Array::from_iter(iter))
@@ -1412,9 +1399,10 @@ impl<'a> StatisticsConverter<'a> {
     {
         let Some(parquet_index) = self.parquet_column_index else {
             let num_row_groups = metadatas.into_iter().count();
-            return Ok(BooleanArray::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
+            return Ok(BooleanArray::from_iter(std::iter::repeat_n(
+                None,
+                num_row_groups,
+            )));
         };
 
         let is_max_value_exact = metadatas
@@ -1433,9 +1421,10 @@ impl<'a> StatisticsConverter<'a> {
     {
         let Some(parquet_index) = self.parquet_column_index else {
             let num_row_groups = metadatas.into_iter().count();
-            return Ok(BooleanArray::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
+            return Ok(BooleanArray::from_iter(std::iter::repeat_n(
+                None,
+                num_row_groups,
+            )));
         };
 
         let is_min_value_exact = metadatas
@@ -1454,9 +1443,10 @@ impl<'a> StatisticsConverter<'a> {
     {
         let Some(parquet_index) = self.parquet_column_index else {
             let num_row_groups = metadatas.into_iter().count();
-            return Ok(UInt64Array::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
+            return Ok(UInt64Array::from_iter(std::iter::repeat_n(
+                None,
+                num_row_groups,
+            )));
         };
 
         let null_counts = metadatas
@@ -1485,7 +1475,7 @@ impl<'a> StatisticsConverter<'a> {
     /// page level statistics can prune at a finer granularity.
     ///
     /// However since they are stored in a separate metadata
-    /// structure ([`Index`]) there is different code to extract them as
+    /// structure ([`ColumnIndexMetaData`]) there is different code to extract them as
     /// compared to arrow statistics.
     ///
     /// # Parameters:
@@ -1598,9 +1588,10 @@ impl<'a> StatisticsConverter<'a> {
     {
         let Some(parquet_index) = self.parquet_column_index else {
             let num_row_groups = row_group_indices.into_iter().count();
-            return Ok(UInt64Array::from_iter(
-                std::iter::repeat(None).take(num_row_groups),
-            ));
+            return Ok(UInt64Array::from_iter(std::iter::repeat_n(
+                None,
+                num_row_groups,
+            )));
         };
 
         let iter = row_group_indices.into_iter().map(|rg_index| {
diff --git a/parquet/src/arrow/arrow_writer/byte_array.rs b/parquet/src/arrow/arrow_writer/byte_array.rs
index 9767ec98e636..228d229b3088 100644
--- a/parquet/src/arrow/arrow_writer/byte_array.rs
+++ b/parquet/src/arrow/arrow_writer/byte_array.rs
@@ -23,6 +23,8 @@ use crate::encodings::encoding::{DeltaBitPackEncoder, Encoder};
 use crate::encodings::rle::RleEncoder;
 use crate::errors::{ParquetError, Result};
 use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
+use crate::geospatial::accumulator::{GeoStatsAccumulator, try_new_geo_stats_accumulator};
+use crate::geospatial::statistics::GeospatialStatistics;
 use crate::schema::types::ColumnDescPtr;
 use crate::util::bit_util::num_required_bits;
 use crate::util::interner::{Interner, Storage};
@@ -149,7 +151,7 @@ impl FallbackEncoder {
                 return Err(general_err!(
                     "unsupported encoding {} for byte array",
                     encoding
-                ))
+                ));
             }
         };
 
@@ -421,6 +423,7 @@ pub struct ByteArrayEncoder {
     min_value: Option<ByteArray>,
     max_value: Option<ByteArray>,
     bloom_filter: Option<Sbbf>,
+    geo_stats_accumulator: Option<Box<dyn GeoStatsAccumulator>>,
 }
 
 impl ColumnValueEncoder for ByteArrayEncoder {
@@ -447,6 +450,8 @@ impl ColumnValueEncoder for ByteArrayEncoder {
 
         let statistics_enabled = props.statistics_enabled(descr.path());
 
+        let geo_stats_accumulator = try_new_geo_stats_accumulator(descr);
+
         Ok(Self {
             fallback,
             statistics_enabled,
@@ -454,6 +459,7 @@ impl ColumnValueEncoder for ByteArrayEncoder {
             dict_encoder: dictionary,
             min_value: None,
             max_value: None,
+            geo_stats_accumulator,
         })
     }
 
@@ -536,6 +542,10 @@ impl ColumnValueEncoder for ByteArrayEncoder {
             _ => self.fallback.flush_data_page(min_value, max_value),
         }
     }
+
+    fn flush_geospatial_statistics(&mut self) -> Option<Box<GeospatialStatistics>> {
+        self.geo_stats_accumulator.as_mut().map(|a| a.finish())?
+    }
 }
 
 /// Encodes the provided `values` and `indices` to `encoder`
@@ -547,12 +557,14 @@ where
     T::Item: Copy + Ord + AsRef<[u8]>,
 {
     if encoder.statistics_enabled != EnabledStatistics::None {
-        if let Some((min, max)) = compute_min_max(values, indices.iter().cloned()) {
-            if encoder.min_value.as_ref().map_or(true, |m| m > &min) {
+        if let Some(accumulator) = encoder.geo_stats_accumulator.as_mut() {
+            update_geo_stats_accumulator(accumulator.as_mut(), values, indices.iter().cloned());
+        } else if let Some((min, max)) = compute_min_max(values, indices.iter().cloned()) {
+            if encoder.min_value.as_ref().is_none_or(|m| m > &min) {
                 encoder.min_value = Some(min);
             }
 
-            if encoder.max_value.as_ref().map_or(true, |m| m < &max) {
+            if encoder.max_value.as_ref().is_none_or(|m| m < &max) {
                 encoder.max_value = Some(max);
             }
         }
@@ -595,3 +607,20 @@ where
     }
     Some((min.as_ref().to_vec().into(), max.as_ref().to_vec().into()))
 }
+
+/// Updates geospatial statistics for the provided array and indices
+fn update_geo_stats_accumulator<T>(
+    bounder: &mut dyn GeoStatsAccumulator,
+    array: T,
+    valid: impl Iterator<Item = usize>,
+) where
+    T: ArrayAccessor,
+    T::Item: Copy + Ord + AsRef<[u8]>,
+{
+    if bounder.is_valid() {
+        for idx in valid {
+            let val = array.value(idx);
+            bounder.update_wkb(val.as_ref());
+        }
+    }
+}
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index e4662b8f316c..59bf6c602438 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -43,6 +43,7 @@
 use crate::errors::{ParquetError, Result};
 use arrow_array::cast::AsArray;
 use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow_buffer::bit_iterator::BitIndexIterator;
 use arrow_buffer::{NullBuffer, OffsetBuffer};
 use arrow_schema::{DataType, Field};
 use std::ops::Range;
@@ -87,6 +88,8 @@ fn is_leaf(data_type: &DataType) -> bool {
             | DataType::Binary
             | DataType::LargeBinary
             | DataType::BinaryView
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
             | DataType::FixedSizeBinary(_)
@@ -135,7 +138,7 @@ enum LevelInfoBuilder {
 impl LevelInfoBuilder {
     /// Create a new [`LevelInfoBuilder`] for the given [`Field`] and parent [`LevelContext`]
     fn try_new(field: &Field, parent_ctx: LevelContext, array: &ArrayRef) -> Result<Self> {
-        if field.data_type() != array.data_type() {
+        if !Self::types_compatible(field.data_type(), array.data_type()) {
             return Err(arrow_err!(format!(
                 "Incompatible type. Field '{}' has type {}, array has type {}",
                 field.name(),
@@ -352,10 +355,10 @@ impl LevelInfoBuilder {
                     let len = range.end - range.start;
 
                     let def_levels = info.def_levels.as_mut().unwrap();
-                    def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len));
+                    def_levels.extend(std::iter::repeat_n(ctx.def_level - 1, len));
 
                     if let Some(rep_levels) = info.rep_levels.as_mut() {
-                        rep_levels.extend(std::iter::repeat(ctx.rep_level).take(len));
+                        rep_levels.extend(std::iter::repeat_n(ctx.rep_level, len));
                     }
                 })
             }
@@ -443,9 +446,9 @@ impl LevelInfoBuilder {
             let len = end_idx - start_idx;
             child.visit_leaves(|leaf| {
                 let rep_levels = leaf.rep_levels.as_mut().unwrap();
-                rep_levels.extend(std::iter::repeat(ctx.rep_level - 1).take(len));
+                rep_levels.extend(std::iter::repeat_n(ctx.rep_level - 1, len));
                 let def_levels = leaf.def_levels.as_mut().unwrap();
-                def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len));
+                def_levels.extend(std::iter::repeat_n(ctx.def_level - 1, len));
             })
         };
 
@@ -497,21 +500,22 @@ impl LevelInfoBuilder {
                 def_levels.reserve(len);
                 info.non_null_indices.reserve(len);
 
-                match info.array.logical_nulls() {
+                match &info.logical_nulls {
                     Some(nulls) => {
-                        // TODO: Faster bitmask iteration (#1757)
-                        for i in range {
-                            match nulls.is_valid(i) {
-                                true => {
-                                    def_levels.push(info.max_def_level);
-                                    info.non_null_indices.push(i)
-                                }
-                                false => def_levels.push(info.max_def_level - 1),
-                            }
-                        }
+                        assert!(range.end <= nulls.len());
+                        let nulls = nulls.inner();
+                        def_levels.extend(range.clone().map(|i| {
+                            // Safety: range.end was asserted to be in bounds earlier
+                            let valid = unsafe { nulls.value_unchecked(i) };
+                            info.max_def_level - (!valid as i16)
+                        }));
+                        info.non_null_indices.extend(
+                            BitIndexIterator::new(nulls.inner(), nulls.offset() + range.start, len)
+                                .map(|i| i + range.start),
+                        );
                     }
                     None => {
-                        let iter = std::iter::repeat(info.max_def_level).take(len);
+                        let iter = std::iter::repeat_n(info.max_def_level, len);
                         def_levels.extend(iter);
                         info.non_null_indices.extend(range);
                     }
@@ -521,7 +525,7 @@ impl LevelInfoBuilder {
         }
 
         if let Some(rep_levels) = &mut info.rep_levels {
-            rep_levels.extend(std::iter::repeat(info.max_rep_level).take(len))
+            rep_levels.extend(std::iter::repeat_n(info.max_rep_level, len))
         }
     }
 
@@ -539,7 +543,53 @@ impl LevelInfoBuilder {
             }
         }
     }
+
+    /// Determine if the fields are compatible for purposes of constructing `LevelBuilderInfo`.
+    ///
+    /// Fields are compatible if they're the same type. Otherwise if one of them is a dictionary
+    /// and the other is a native array, the dictionary values must have the same type as the
+    /// native array
+    fn types_compatible(a: &DataType, b: &DataType) -> bool {
+        // if the Arrow data types are equal, the types are deemed compatible
+        if a.equals_datatype(b) {
+            return true;
+        }
+
+        // get the values out of the dictionaries
+        let (a, b) = match (a, b) {
+            (DataType::Dictionary(_, va), DataType::Dictionary(_, vb)) => {
+                (va.as_ref(), vb.as_ref())
+            }
+            (DataType::Dictionary(_, v), b) => (v.as_ref(), b),
+            (a, DataType::Dictionary(_, v)) => (a, v.as_ref()),
+            _ => (a, b),
+        };
+
+        // now that we've got the values from one/both dictionaries, if the values
+        // have the same Arrow data type, they're compatible
+        if a == b {
+            return true;
+        }
+
+        // here we have different Arrow data types, but if the array contains the same type of data
+        // then we consider the type compatible
+        match a {
+            // String, StringView and LargeString are compatible
+            DataType::Utf8 => matches!(b, DataType::LargeUtf8 | DataType::Utf8View),
+            DataType::Utf8View => matches!(b, DataType::LargeUtf8 | DataType::Utf8),
+            DataType::LargeUtf8 => matches!(b, DataType::Utf8 | DataType::Utf8View),
+
+            // Binary, BinaryView and LargeBinary are compatible
+            DataType::Binary => matches!(b, DataType::LargeBinary | DataType::BinaryView),
+            DataType::BinaryView => matches!(b, DataType::LargeBinary | DataType::Binary),
+            DataType::LargeBinary => matches!(b, DataType::Binary | DataType::BinaryView),
+
+            // otherwise we have incompatible types
+            _ => false,
+        }
+    }
 }
+
 /// The data necessary to write a primitive Arrow array to parquet, taking into account
 /// any non-primitive parents it may have in the arrow representation
 #[derive(Debug, Clone)]
@@ -566,6 +616,9 @@ pub(crate) struct ArrayLevels {
 
     /// The arrow array
     array: ArrayRef,
+
+    /// cached logical nulls of the array.
+    logical_nulls: Option<NullBuffer>,
 }
 
 impl PartialEq for ArrayLevels {
@@ -576,6 +629,7 @@ impl PartialEq for ArrayLevels {
             && self.max_def_level == other.max_def_level
             && self.max_rep_level == other.max_rep_level
             && self.array.as_ref() == other.array.as_ref()
+            && self.logical_nulls.as_ref() == other.logical_nulls.as_ref()
     }
 }
 impl Eq for ArrayLevels {}
@@ -588,6 +642,8 @@ impl ArrayLevels {
             false => ctx.def_level,
         };
 
+        let logical_nulls = array.logical_nulls();
+
         Self {
             def_levels: (max_def_level != 0).then(Vec::new),
             rep_levels: (max_rep_level != 0).then(Vec::new),
@@ -595,6 +651,7 @@ impl ArrayLevels {
             max_def_level,
             max_rep_level,
             array,
+            logical_nulls,
         }
     }
 
@@ -668,6 +725,7 @@ mod tests {
             max_def_level: 2,
             max_rep_level: 2,
             array: Arc::new(primitives),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected);
     }
@@ -688,6 +746,7 @@ mod tests {
             max_def_level: 0,
             max_rep_level: 0,
             array,
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -707,6 +766,7 @@ mod tests {
         let levels = calculate_array_levels(&array, &field).unwrap();
         assert_eq!(levels.len(), 1);
 
+        let logical_nulls = array.logical_nulls();
         let expected_levels = ArrayLevels {
             def_levels: Some(vec![1, 0, 1, 1, 0]),
             rep_levels: None,
@@ -714,6 +774,7 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 0,
             array,
+            logical_nulls,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -748,6 +809,7 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 1,
             array: Arc::new(leaf_array),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -781,6 +843,7 @@ mod tests {
             max_def_level: 2,
             max_rep_level: 1,
             array: Arc::new(leaf_array),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -830,6 +893,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: Arc::new(leaf),
+            logical_nulls: None,
         };
 
         assert_eq!(&levels[0], &expected_levels);
@@ -880,6 +944,7 @@ mod tests {
             max_def_level: 5,
             max_rep_level: 2,
             array: Arc::new(leaf),
+            logical_nulls: None,
         };
 
         assert_eq!(&levels[0], &expected_levels);
@@ -917,6 +982,7 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 1,
             array: Arc::new(leaf),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -949,6 +1015,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: Arc::new(leaf),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -997,6 +1064,7 @@ mod tests {
             max_def_level: 5,
             max_rep_level: 2,
             array: Arc::new(leaf),
+            logical_nulls: None,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -1029,6 +1097,7 @@ mod tests {
         let levels = calculate_array_levels(&a_array, &a_field).unwrap();
         assert_eq!(levels.len(), 1);
 
+        let logical_nulls = leaf.logical_nulls();
         let expected_levels = ArrayLevels {
             def_levels: Some(vec![3, 2, 3, 1, 0, 3]),
             rep_levels: None,
@@ -1036,6 +1105,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 0,
             array: leaf,
+            logical_nulls,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -1075,6 +1145,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: Arc::new(a_values),
+            logical_nulls: None,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1167,12 +1238,14 @@ mod tests {
             max_def_level: 0,
             max_rep_level: 0,
             array: Arc::new(a),
+            logical_nulls: None,
         };
         assert_eq!(list_level, &expected_level);
 
         // test "b" levels
         let list_level = levels.get(1).unwrap();
 
+        let b_logical_nulls = b.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 0, 0, 1, 1]),
             rep_levels: None,
@@ -1180,12 +1253,14 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 0,
             array: Arc::new(b),
+            logical_nulls: b_logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
 
         // test "d" levels
         let list_level = levels.get(2).unwrap();
 
+        let d_logical_nulls = d.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 1, 1, 2, 1]),
             rep_levels: None,
@@ -1193,12 +1268,14 @@ mod tests {
             max_def_level: 2,
             max_rep_level: 0,
             array: Arc::new(d),
+            logical_nulls: d_logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
 
         // test "f" levels
         let list_level = levels.get(3).unwrap();
 
+        let f_logical_nulls = f.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![3, 2, 3, 2, 3]),
             rep_levels: None,
@@ -1206,6 +1283,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 0,
             array: Arc::new(f),
+            logical_nulls: f_logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1301,6 +1379,7 @@ mod tests {
         assert_eq!(levels.len(), 2);
 
         let map = batch.column(0).as_map();
+        let map_keys_logical_nulls = map.keys().logical_nulls();
 
         // test key levels
         let list_level = &levels[0];
@@ -1312,11 +1391,13 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 1,
             array: map.keys().clone(),
+            logical_nulls: map_keys_logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
 
         // test values levels
         let list_level = levels.get(1).unwrap();
+        let map_values_logical_nulls = map.values().logical_nulls();
 
         let expected_level = ArrayLevels {
             def_levels: Some(vec![2, 2, 2, 1, 2, 1, 2]),
@@ -1325,6 +1406,7 @@ mod tests {
             max_def_level: 2,
             max_rep_level: 1,
             array: map.values().clone(),
+            logical_nulls: map_values_logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1403,6 +1485,7 @@ mod tests {
         let levels = calculate_array_levels(rb.column(0), rb.schema().field(0)).unwrap();
         let list_level = &levels[0];
 
+        let logical_nulls = values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![4, 1, 0, 2, 2, 3, 4]),
             rep_levels: Some(vec![0, 0, 0, 0, 1, 0, 0]),
@@ -1410,6 +1493,7 @@ mod tests {
             max_def_level: 4,
             max_rep_level: 1,
             array: values,
+            logical_nulls,
         };
 
         assert_eq!(list_level, &expected_level);
@@ -1443,6 +1527,7 @@ mod tests {
 
         assert_eq!(levels.len(), 1);
 
+        let logical_nulls = values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![4, 4, 3, 2, 0, 4, 4, 0, 1]),
             rep_levels: Some(vec![0, 1, 0, 0, 0, 0, 1, 0, 0]),
@@ -1450,6 +1535,7 @@ mod tests {
             max_def_level: 4,
             max_rep_level: 1,
             array: values,
+            logical_nulls,
         };
 
         assert_eq!(&levels[0], &expected_level);
@@ -1528,6 +1614,7 @@ mod tests {
 
         assert_eq!(levels.len(), 2);
 
+        let a1_logical_nulls = a1_values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 6, 5, 2, 3, 1]),
             rep_levels: Some(vec![0, 0, 0, 0, 2, 0, 1, 0]),
@@ -1535,10 +1622,12 @@ mod tests {
             max_def_level: 6,
             max_rep_level: 2,
             array: a1_values,
+            logical_nulls: a1_logical_nulls,
         };
 
         assert_eq!(&levels[0], &expected_level);
 
+        let a2_logical_nulls = a2_values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 3, 2, 4, 1]),
             rep_levels: Some(vec![0, 0, 0, 0, 0, 1, 0]),
@@ -1546,6 +1635,7 @@ mod tests {
             max_def_level: 4,
             max_rep_level: 1,
             array: a2_values,
+            logical_nulls: a2_logical_nulls,
         };
 
         assert_eq!(&levels[1], &expected_level);
@@ -1577,6 +1667,7 @@ mod tests {
 
         let list_level = &levels[0];
 
+        let logical_nulls = values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 3, 3]),
             rep_levels: Some(vec![0, 0, 0, 1]),
@@ -1584,6 +1675,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: values,
+            logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1727,6 +1819,7 @@ mod tests {
         let b_levels = &levels[1];
 
         // [[{a: 1}, null], null, [null, null], [{a: null}, {a: 2}]]
+        let values_a_logical_nulls = values_a.logical_nulls();
         let expected_a = ArrayLevels {
             def_levels: Some(vec![4, 2, 0, 2, 2, 3, 4]),
             rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
@@ -1734,8 +1827,10 @@ mod tests {
             max_def_level: 4,
             max_rep_level: 1,
             array: values_a,
+            logical_nulls: values_a_logical_nulls,
         };
         // [[{b: 2}, null], null, [null, null], [{b: 3}, {b: 4}]]
+        let values_b_logical_nulls = values_b.logical_nulls();
         let expected_b = ArrayLevels {
             def_levels: Some(vec![3, 2, 0, 2, 2, 3, 3]),
             rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
@@ -1743,6 +1838,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: values_b,
+            logical_nulls: values_b_logical_nulls,
         };
 
         assert_eq!(a_levels, &expected_a);
@@ -1767,6 +1863,7 @@ mod tests {
 
         let list_level = &levels[0];
 
+        let logical_nulls = values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 0, 1]),
             rep_levels: Some(vec![0, 0, 0]),
@@ -1774,6 +1871,7 @@ mod tests {
             max_def_level: 3,
             max_rep_level: 1,
             array: values,
+            logical_nulls,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1802,6 +1900,7 @@ mod tests {
         builder.write(0..4);
         let levels = builder.finish();
 
+        let logical_nulls = values.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![5, 4, 5, 2, 5, 3, 5, 5, 4, 4, 0]),
             rep_levels: Some(vec![0, 2, 2, 1, 0, 1, 0, 2, 1, 2, 0]),
@@ -1809,6 +1908,7 @@ mod tests {
             max_def_level: 5,
             max_rep_level: 2,
             array: values,
+            logical_nulls,
         };
 
         assert_eq!(levels[0], expected_level);
@@ -1832,6 +1932,8 @@ mod tests {
         let mut builder = levels(&item_field, dict.clone());
         builder.write(0..4);
         let levels = builder.finish();
+
+        let logical_nulls = dict.logical_nulls();
         let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 1]),
             rep_levels: None,
@@ -1839,6 +1941,7 @@ mod tests {
             max_def_level: 1,
             max_rep_level: 0,
             array: Arc::new(dict),
+            logical_nulls,
         };
         assert_eq!(levels[0], expected_level);
     }
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index c67c05ac2ef1..e5c5500d638d 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -23,34 +23,35 @@ use std::iter::Peekable;
 use std::slice::Iter;
 use std::sync::{Arc, Mutex};
 use std::vec::IntoIter;
-use thrift::protocol::TCompactOutputProtocol;
 
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
-use arrow_array::{ArrayRef, RecordBatch, RecordBatchWriter};
-use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, IntervalUnit, SchemaRef};
+use arrow_array::{ArrayRef, Int32Array, RecordBatch, RecordBatchWriter};
+use arrow_schema::{
+    ArrowError, DataType as ArrowDataType, Field, IntervalUnit, SchemaRef, TimeUnit,
+};
 
 use super::schema::{add_encoded_arrow_schema_to_metadata, decimal_length_from_precision};
 
-use crate::arrow::arrow_writer::byte_array::ByteArrayEncoder;
 use crate::arrow::ArrowSchemaConverter;
+use crate::arrow::arrow_writer::byte_array::ByteArrayEncoder;
 use crate::column::page::{CompressedPage, PageWriteSpec, PageWriter};
 use crate::column::page_encryption::PageEncryptor;
 use crate::column::writer::encoder::ColumnValueEncoder;
 use crate::column::writer::{
-    get_column_writer, ColumnCloseResult, ColumnWriter, GenericColumnWriter,
+    ColumnCloseResult, ColumnWriter, GenericColumnWriter, get_column_writer,
 };
 use crate::data_type::{ByteArray, FixedLenByteArray};
 #[cfg(feature = "encryption")]
 use crate::encryption::encrypt::FileEncryptor;
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{KeyValue, RowGroupMetaData};
+use crate::file::metadata::{KeyValue, ParquetMetaData, RowGroupMetaData};
 use crate::file::properties::{WriterProperties, WriterPropertiesPtr};
 use crate::file::reader::{ChunkReader, Length};
 use crate::file::writer::{SerializedFileWriter, SerializedRowGroupWriter};
-use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
-use crate::thrift::TSerializable;
-use levels::{calculate_array_levels, ArrayLevels};
+use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift};
+use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, SchemaDescriptor};
+use levels::{ArrayLevels, calculate_array_levels};
 
 mod byte_array;
 mod levels;
@@ -128,6 +129,49 @@ mod levels;
 /// [`ListArray`]: https://docs.rs/arrow/latest/arrow/array/type.ListArray.html
 /// [`IntervalMonthDayNanoArray`]: https://docs.rs/arrow/latest/arrow/array/type.IntervalMonthDayNanoArray.html
 /// [support nanosecond intervals]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval
+///
+/// ## Type Compatibility
+/// The writer can write Arrow [`RecordBatch`]s that are logically equivalent. This means that for
+/// a  given column, the writer can accept multiple Arrow [`DataType`]s that contain the same
+/// value type.
+///
+/// For example, the following [`DataType`]s are all logically equivalent and can be written
+/// to the same column:
+/// * String, LargeString, StringView
+/// * Binary, LargeBinary, BinaryView
+///
+/// The writer can will also accept both native and dictionary encoded arrays if the dictionaries
+/// contain compatible values.
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{DictionaryArray, LargeStringArray, RecordBatch, StringArray, UInt8Array};
+/// # use arrow_schema::{DataType, Field, Schema};
+/// # use parquet::arrow::arrow_writer::ArrowWriter;
+/// let record_batch1 = RecordBatch::try_new(
+///    Arc::new(Schema::new(vec![Field::new("col", DataType::LargeUtf8, false)])),
+///    vec![Arc::new(LargeStringArray::from_iter_values(vec!["a", "b"]))]
+///  )
+/// .unwrap();
+///
+/// let mut buffer = Vec::new();
+/// let mut writer = ArrowWriter::try_new(&mut buffer, record_batch1.schema(), None).unwrap();
+/// writer.write(&record_batch1).unwrap();
+///
+/// let record_batch2 = RecordBatch::try_new(
+///     Arc::new(Schema::new(vec![Field::new(
+///         "col",
+///         DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::Utf8)),
+///          false,
+///     )])),
+///     vec![Arc::new(DictionaryArray::new(
+///          UInt8Array::from_iter_values(vec![0, 1]),
+///          Arc::new(StringArray::from_iter_values(vec!["b", "c"])),
+///      ))],
+///  )
+///  .unwrap();
+///  writer.write(&record_batch2).unwrap();
+///  writer.close();
+/// ```
 pub struct ArrowWriter<W: Write> {
     /// Underlying Parquet writer
     writer: SerializedFileWriter<W>,
@@ -186,11 +230,18 @@ impl<W: Write + Send> ArrowWriter<W> {
         options: ArrowWriterOptions,
     ) -> Result<Self> {
         let mut props = options.properties;
-        let mut converter = ArrowSchemaConverter::new().with_coerce_types(props.coerce_types());
-        if let Some(schema_root) = &options.schema_root {
-            converter = converter.schema_root(schema_root);
-        }
-        let schema = converter.convert(&arrow_schema)?;
+
+        let schema = if let Some(parquet_schema) = options.schema_descr {
+            parquet_schema.clone()
+        } else {
+            let mut converter = ArrowSchemaConverter::new().with_coerce_types(props.coerce_types());
+            if let Some(schema_root) = &options.schema_root {
+                converter = converter.schema_root(schema_root);
+            }
+
+            converter.convert(&arrow_schema)?
+        };
+
         if !options.skip_arrow_metadata {
             // add serialized arrow schema
             add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
@@ -198,10 +249,12 @@ impl<W: Write + Send> ArrowWriter<W> {
 
         let max_row_group_size = props.max_row_group_size();
 
+        let props_ptr = Arc::new(props);
         let file_writer =
-            SerializedFileWriter::new(writer, schema.root_schema_ptr(), Arc::new(props))?;
+            SerializedFileWriter::new(writer, schema.root_schema_ptr(), Arc::clone(&props_ptr))?;
 
-        let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&file_writer);
+        let row_group_writer_factory =
+            ArrowRowGroupWriterFactory::new(&file_writer, arrow_schema.clone());
 
         Ok(Self {
             writer: file_writer,
@@ -272,12 +325,10 @@ impl<W: Write + Send> ArrowWriter<W> {
 
         let in_progress = match &mut self.in_progress {
             Some(in_progress) => in_progress,
-            x => x.insert(self.row_group_writer_factory.create_row_group_writer(
-                self.writer.schema_descr(),
-                self.writer.properties(),
-                &self.arrow_schema,
-                self.writer.flushed_row_groups().len(),
-            )?),
+            x => x.insert(
+                self.row_group_writer_factory
+                    .create_row_group_writer(self.writer.flushed_row_groups().len())?,
+            ),
         };
 
         // If would exceed max_row_group_size, split batch
@@ -305,7 +356,15 @@ impl<W: Write + Send> ArrowWriter<W> {
         self.writer.write_all(buf)
     }
 
+    /// Flushes underlying writer
+    pub fn sync(&mut self) -> std::io::Result<()> {
+        self.writer.flush()
+    }
+
     /// Flushes all buffered rows into a new row group
+    ///
+    /// Note the underlying writer is not flushed with this call.
+    /// If this is a desired behavior, please call [`ArrowWriter::sync`].
     pub fn flush(&mut self) -> Result<()> {
         let in_progress = match self.in_progress.take() {
             Some(in_progress) => in_progress,
@@ -355,15 +414,55 @@ impl<W: Write + Send> ArrowWriter<W> {
     /// Unlike [`Self::close`] this does not consume self
     ///
     /// Attempting to write after calling finish will result in an error
-    pub fn finish(&mut self) -> Result<crate::format::FileMetaData> {
+    pub fn finish(&mut self) -> Result<ParquetMetaData> {
         self.flush()?;
         self.writer.finish()
     }
 
     /// Close and finalize the underlying Parquet writer
-    pub fn close(mut self) -> Result<crate::format::FileMetaData> {
+    pub fn close(mut self) -> Result<ParquetMetaData> {
         self.finish()
     }
+
+    /// Create a new row group writer and return its column writers.
+    #[deprecated(
+        since = "56.2.0",
+        note = "Use `ArrowRowGroupWriterFactory` instead, see `ArrowColumnWriter` for an example"
+    )]
+    pub fn get_column_writers(&mut self) -> Result<Vec<ArrowColumnWriter>> {
+        self.flush()?;
+        let in_progress = self
+            .row_group_writer_factory
+            .create_row_group_writer(self.writer.flushed_row_groups().len())?;
+        Ok(in_progress.writers)
+    }
+
+    /// Append the given column chunks to the file as a new row group.
+    #[deprecated(
+        since = "56.2.0",
+        note = "Use `SerializedFileWriter` directly instead, see `ArrowColumnWriter` for an example"
+    )]
+    pub fn append_row_group(&mut self, chunks: Vec<ArrowColumnChunk>) -> Result<()> {
+        let mut row_group_writer = self.writer.next_row_group()?;
+        for chunk in chunks {
+            chunk.append_to_row_group(&mut row_group_writer)?;
+        }
+        row_group_writer.close()?;
+        Ok(())
+    }
+
+    /// Converts this writer into a lower-level [`SerializedFileWriter`] and [`ArrowRowGroupWriterFactory`].
+    ///
+    /// Flushes any outstanding data before returning.
+    ///
+    /// This can be useful to provide more control over how files are written, for example
+    /// to write columns in parallel. See the example on [`ArrowColumnWriter`].
+    pub fn into_serialized_writer(
+        mut self,
+    ) -> Result<(SerializedFileWriter<W>, ArrowRowGroupWriterFactory)> {
+        self.flush()?;
+        Ok((self.writer, self.row_group_writer_factory))
+    }
 }
 
 impl<W: Write + Send> RecordBatchWriter for ArrowWriter<W> {
@@ -385,6 +484,7 @@ pub struct ArrowWriterOptions {
     properties: WriterProperties,
     skip_arrow_metadata: bool,
     schema_root: Option<String>,
+    schema_descr: Option<SchemaDescriptor>,
 }
 
 impl ArrowWriterOptions {
@@ -418,6 +518,18 @@ impl ArrowWriterOptions {
             ..self
         }
     }
+
+    /// Explicitly specify the Parquet schema to be used
+    ///
+    /// If omitted (the default), the [`ArrowSchemaConverter`] is used to compute the
+    /// Parquet [`SchemaDescriptor`]. This may be used When the [`SchemaDescriptor`] is
+    /// already known or must be calculated using custom logic.
+    pub fn with_parquet_schema(self, schema_descr: SchemaDescriptor) -> Self {
+        Self {
+            schema_descr: Some(schema_descr),
+            ..self
+        }
+    }
 }
 
 /// A single column chunk produced by [`ArrowColumnWriter`]
@@ -509,7 +621,7 @@ impl PageWriter for ArrowPageWriter {
             None => page,
         };
 
-        let page_header = page.to_thrift_header();
+        let page_header = page.to_thrift_header()?;
         let header = {
             let mut header = Vec::with_capacity(1024);
 
@@ -521,8 +633,8 @@ impl PageWriter for ArrowPageWriter {
                     }
                 }
                 None => {
-                    let mut protocol = TCompactOutputProtocol::new(&mut header);
-                    page_header.write_to_out_protocol(&mut protocol)?;
+                    let mut protocol = ThriftCompactOutputProtocol::new(&mut header);
+                    page_header.write_thrift(&mut protocol)?;
                 }
             };
 
@@ -559,6 +671,9 @@ impl PageWriter for ArrowPageWriter {
 pub struct ArrowLeafColumn(ArrayLevels);
 
 /// Computes the [`ArrowLeafColumn`] for a potentially nested [`ArrayRef`]
+///
+/// This function can be used along with [`get_column_writers`] to encode
+/// individual columns in parallel. See example on [`ArrowColumnWriter`]
 pub fn compute_leaves(field: &Field, array: &ArrayRef) -> Result<Vec<ArrowLeafColumn>> {
     let levels = calculate_array_levels(array, field)?;
     Ok(levels.into_iter().map(ArrowLeafColumn).collect())
@@ -590,6 +705,8 @@ impl ArrowColumnChunk {
 
 /// Encodes [`ArrowLeafColumn`] to [`ArrowColumnChunk`]
 ///
+/// `ArrowColumnWriter` instances can be created using an [`ArrowRowGroupWriterFactory`];
+///
 /// Note: This is a low-level interface for applications that require
 /// fine-grained control of encoding (e.g. encoding using multiple threads),
 /// see [`ArrowWriter`] for a higher-level interface
@@ -601,7 +718,7 @@ impl ArrowColumnChunk {
 /// # use arrow_array::*;
 /// # use arrow_schema::*;
 /// # use parquet::arrow::ArrowSchemaConverter;
-/// # use parquet::arrow::arrow_writer::{ArrowLeafColumn, compute_leaves, get_column_writers, ArrowColumnChunk};
+/// # use parquet::arrow::arrow_writer::{compute_leaves, ArrowColumnChunk, ArrowLeafColumn, ArrowRowGroupWriterFactory};
 /// # use parquet::file::properties::WriterProperties;
 /// # use parquet::file::writer::{SerializedFileWriter, SerializedRowGroupWriter};
 /// #
@@ -617,8 +734,17 @@ impl ArrowColumnChunk {
 ///   .convert(&schema)
 ///   .unwrap();
 ///
-/// // Create writers for each of the leaf columns
-/// let col_writers = get_column_writers(&parquet_schema, &props, &schema).unwrap();
+/// // Create parquet writer
+/// let root_schema = parquet_schema.root_schema_ptr();
+/// // write to memory in the example, but this could be a File
+/// let mut out = Vec::with_capacity(1024);
+/// let mut writer = SerializedFileWriter::new(&mut out, root_schema, props.clone())
+///   .unwrap();
+///
+/// // Create a factory for building Arrow column writers
+/// let row_group_factory = ArrowRowGroupWriterFactory::new(&writer, Arc::clone(&schema));
+/// // Create column writers for the 0th row group
+/// let col_writers = row_group_factory.create_column_writers(0).unwrap();
 ///
 /// // Spawn a worker thread for each column
 /// //
@@ -641,13 +767,6 @@ impl ArrowColumnChunk {
 ///     })
 ///     .collect();
 ///
-/// // Create parquet writer
-/// let root_schema = parquet_schema.root_schema_ptr();
-/// // write to memory in the example, but this could be a File
-/// let mut out = Vec::with_capacity(1024);
-/// let mut writer = SerializedFileWriter::new(&mut out, root_schema, props.clone())
-///   .unwrap();
-///
 /// // Start row group
 /// let mut row_group_writer: SerializedRowGroupWriter<'_, _> = writer
 ///   .next_row_group()
@@ -679,7 +798,7 @@ impl ArrowColumnChunk {
 /// row_group_writer.close().unwrap();
 ///
 /// let metadata = writer.close().unwrap();
-/// assert_eq!(metadata.num_rows, 3);
+/// assert_eq!(metadata.file_metadata().num_rows(), 3);
 /// ```
 pub struct ArrowColumnWriter {
     writer: ArrowColumnWriterImpl,
@@ -702,7 +821,15 @@ impl ArrowColumnWriter {
     pub fn write(&mut self, col: &ArrowLeafColumn) -> Result<()> {
         match &mut self.writer {
             ArrowColumnWriterImpl::Column(c) => {
-                write_leaf(c, &col.0)?;
+                let leaf = col.0.array();
+                match leaf.as_any_dictionary_opt() {
+                    Some(dictionary) => {
+                        let materialized =
+                            arrow_select::take::take(dictionary.values(), dictionary.keys(), None)?;
+                        write_leaf(c, &materialized, &col.0)?
+                    }
+                    None => write_leaf(c, leaf, &col.0)?,
+                };
             }
             ArrowColumnWriterImpl::ByteArray(c) => {
                 write_primitive(c, col.0.array().as_ref(), &col.0)?;
@@ -755,6 +882,12 @@ impl ArrowColumnWriter {
 }
 
 /// Encodes [`RecordBatch`] to a parquet row group
+///
+/// Note: this structure is created by [`ArrowRowGroupWriterFactory`] internally used to
+/// create [`ArrowRowGroupWriter`]s, but it is not exposed publicly.
+///
+/// See the example on [`ArrowColumnWriter`] for how to encode columns in parallel
+#[derive(Debug)]
 struct ArrowRowGroupWriter {
     writers: Vec<ArrowColumnWriter>,
     schema: SchemaRef,
@@ -789,56 +922,71 @@ impl ArrowRowGroupWriter {
     }
 }
 
-struct ArrowRowGroupWriterFactory {
+/// Factory that creates new column writers for each row group in the Parquet file.
+///
+/// You can create this structure via an [`ArrowWriter::into_serialized_writer`].
+/// See the example on [`ArrowColumnWriter`] for how to encode columns in parallel
+#[derive(Debug)]
+pub struct ArrowRowGroupWriterFactory {
+    schema: SchemaDescPtr,
+    arrow_schema: SchemaRef,
+    props: WriterPropertiesPtr,
     #[cfg(feature = "encryption")]
     file_encryptor: Option<Arc<FileEncryptor>>,
 }
 
 impl ArrowRowGroupWriterFactory {
-    #[cfg(feature = "encryption")]
-    fn new<W: Write + Send>(file_writer: &SerializedFileWriter<W>) -> Self {
+    /// Create a new [`ArrowRowGroupWriterFactory`] for the provided file writer and Arrow schema
+    pub fn new<W: Write + Send>(
+        file_writer: &SerializedFileWriter<W>,
+        arrow_schema: SchemaRef,
+    ) -> Self {
+        let schema = Arc::clone(file_writer.schema_descr_ptr());
+        let props = Arc::clone(file_writer.properties());
         Self {
+            schema,
+            arrow_schema,
+            props,
+            #[cfg(feature = "encryption")]
             file_encryptor: file_writer.file_encryptor(),
         }
     }
 
-    #[cfg(not(feature = "encryption"))]
-    fn new<W: Write + Send>(_file_writer: &SerializedFileWriter<W>) -> Self {
-        Self {}
+    fn create_row_group_writer(&self, row_group_index: usize) -> Result<ArrowRowGroupWriter> {
+        let writers = self.create_column_writers(row_group_index)?;
+        Ok(ArrowRowGroupWriter::new(writers, &self.arrow_schema))
+    }
+
+    /// Create column writers for a new row group, with the given row group index
+    pub fn create_column_writers(&self, row_group_index: usize) -> Result<Vec<ArrowColumnWriter>> {
+        let mut writers = Vec::with_capacity(self.arrow_schema.fields.len());
+        let mut leaves = self.schema.columns().iter();
+        let column_factory = self.column_writer_factory(row_group_index);
+        for field in &self.arrow_schema.fields {
+            column_factory.get_arrow_column_writer(
+                field.data_type(),
+                &self.props,
+                &mut leaves,
+                &mut writers,
+            )?;
+        }
+        Ok(writers)
     }
 
     #[cfg(feature = "encryption")]
-    fn create_row_group_writer(
-        &self,
-        parquet: &SchemaDescriptor,
-        props: &WriterPropertiesPtr,
-        arrow: &SchemaRef,
-        row_group_index: usize,
-    ) -> Result<ArrowRowGroupWriter> {
-        let writers = get_column_writers_with_encryptor(
-            parquet,
-            props,
-            arrow,
-            self.file_encryptor.clone(),
-            row_group_index,
-        )?;
-        Ok(ArrowRowGroupWriter::new(writers, arrow))
+    fn column_writer_factory(&self, row_group_idx: usize) -> ArrowColumnWriterFactory {
+        ArrowColumnWriterFactory::new()
+            .with_file_encryptor(row_group_idx, self.file_encryptor.clone())
     }
 
     #[cfg(not(feature = "encryption"))]
-    fn create_row_group_writer(
-        &self,
-        parquet: &SchemaDescriptor,
-        props: &WriterPropertiesPtr,
-        arrow: &SchemaRef,
-        _row_group_index: usize,
-    ) -> Result<ArrowRowGroupWriter> {
-        let writers = get_column_writers(parquet, props, arrow)?;
-        Ok(ArrowRowGroupWriter::new(writers, arrow))
+    fn column_writer_factory(&self, _row_group_idx: usize) -> ArrowColumnWriterFactory {
+        ArrowColumnWriterFactory::new()
     }
 }
 
-/// Returns the [`ArrowColumnWriter`] for a given schema
+/// Returns [`ArrowColumnWriter`]s for each column in a given schema
+#[deprecated(since = "57.0.0", note = "Use `ArrowRowGroupWriterFactory` instead")]
 pub fn get_column_writers(
     parquet: &SchemaDescriptor,
     props: &WriterPropertiesPtr,
@@ -858,31 +1006,7 @@ pub fn get_column_writers(
     Ok(writers)
 }
 
-/// Returns the [`ArrowColumnWriter`] for a given schema and supports columnar encryption
-#[cfg(feature = "encryption")]
-fn get_column_writers_with_encryptor(
-    parquet: &SchemaDescriptor,
-    props: &WriterPropertiesPtr,
-    arrow: &SchemaRef,
-    file_encryptor: Option<Arc<FileEncryptor>>,
-    row_group_index: usize,
-) -> Result<Vec<ArrowColumnWriter>> {
-    let mut writers = Vec::with_capacity(arrow.fields.len());
-    let mut leaves = parquet.columns().iter();
-    let column_factory =
-        ArrowColumnWriterFactory::new().with_file_encryptor(row_group_index, file_encryptor);
-    for field in &arrow.fields {
-        column_factory.get_arrow_column_writer(
-            field.data_type(),
-            props,
-            &mut leaves,
-            &mut writers,
-        )?;
-    }
-    Ok(writers)
-}
-
-/// Gets [`ArrowColumnWriter`] instances for different data types
+/// Creates [`ArrowColumnWriter`] instances
 struct ArrowColumnWriterFactory {
     #[cfg(feature = "encryption")]
     row_group_index: usize,
@@ -938,7 +1062,8 @@ impl ArrowColumnWriterFactory {
         Ok(Box::<ArrowPageWriter>::default())
     }
 
-    /// Gets the [`ArrowColumnWriter`] for the given `data_type`
+    /// Gets an [`ArrowColumnWriter`] for the given `data_type`, appending the
+    /// output ColumnDesc to `leaves` and the column writers to `out`
     fn get_arrow_column_writer(
         &self,
         data_type: &ArrowDataType,
@@ -946,6 +1071,7 @@ impl ArrowColumnWriterFactory {
         leaves: &mut Iter<'_, ColumnDescPtr>,
         out: &mut Vec<ArrowColumnWriter>,
     ) -> Result<()> {
+        // Instantiate writers for normal columns
         let col = |desc: &ColumnDescPtr| -> Result<ArrowColumnWriter> {
             let page_writer = self.create_page_writer(desc, out.len())?;
             let chunk = page_writer.buffer.clone();
@@ -956,6 +1082,7 @@ impl ArrowColumnWriterFactory {
             })
         };
 
+        // Instantiate writers for byte arrays (e.g. Utf8,  Binary, etc)
         let bytes = |desc: &ColumnDescPtr| -> Result<ArrowColumnWriter> {
             let page_writer = self.create_page_writer(desc, out.len())?;
             let chunk = page_writer.buffer.clone();
@@ -968,15 +1095,15 @@ impl ArrowColumnWriterFactory {
 
         match data_type {
             _ if data_type.is_primitive() => out.push(col(leaves.next().unwrap())?),
-            ArrowDataType::FixedSizeBinary(_) | ArrowDataType::Boolean | ArrowDataType::Null => out.push(col(leaves.next().unwrap())?),
+            ArrowDataType::FixedSizeBinary(_) | ArrowDataType::Boolean | ArrowDataType::Null => {
+                out.push(col(leaves.next().unwrap())?)
+            }
             ArrowDataType::LargeBinary
             | ArrowDataType::Binary
             | ArrowDataType::Utf8
             | ArrowDataType::LargeUtf8
             | ArrowDataType::BinaryView
-            | ArrowDataType::Utf8View => {
-                out.push(bytes(leaves.next().unwrap())?)
-            }
+            | ArrowDataType::Utf8View => out.push(bytes(leaves.next().unwrap())?),
             ArrowDataType::List(f)
             | ArrowDataType::LargeList(f)
             | ArrowDataType::FixedSizeList(f, _) => {
@@ -993,51 +1120,100 @@ impl ArrowColumnWriterFactory {
                     self.get_arrow_column_writer(f[1].data_type(), props, leaves, out)?
                 }
                 _ => unreachable!("invalid map type"),
-            }
+            },
             ArrowDataType::Dictionary(_, value_type) => match value_type.as_ref() {
-                ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Binary | ArrowDataType::LargeBinary => {
-                    out.push(bytes(leaves.next().unwrap())?)
-                }
+                ArrowDataType::Utf8
+                | ArrowDataType::LargeUtf8
+                | ArrowDataType::Binary
+                | ArrowDataType::LargeBinary => out.push(bytes(leaves.next().unwrap())?),
                 ArrowDataType::Utf8View | ArrowDataType::BinaryView => {
                     out.push(bytes(leaves.next().unwrap())?)
                 }
-                ArrowDataType::FixedSizeBinary(_) => {
-                    out.push(bytes(leaves.next().unwrap())?)
-                }
-                _ => {
-                    out.push(col(leaves.next().unwrap())?)
-                }
+                ArrowDataType::FixedSizeBinary(_) => out.push(bytes(leaves.next().unwrap())?),
+                _ => out.push(col(leaves.next().unwrap())?),
+            },
+            _ => {
+                return Err(ParquetError::NYI(format!(
+                    "Attempting to write an Arrow type {data_type} to parquet that is not yet implemented"
+                )));
             }
-            _ => return Err(ParquetError::NYI(
-                format!(
-                    "Attempting to write an Arrow type {data_type:?} to parquet that is not yet implemented"
-                )
-            ))
         }
         Ok(())
     }
 }
 
-fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usize> {
-    let column = levels.array().as_ref();
+fn write_leaf(
+    writer: &mut ColumnWriter<'_>,
+    column: &dyn arrow_array::Array,
+    levels: &ArrayLevels,
+) -> Result<usize> {
     let indices = levels.non_null_indices();
+
     match writer {
-        ColumnWriter::Int32ColumnWriter(ref mut typed) => {
+        // Note: this should match the contents of arrow_to_parquet_type
+        ColumnWriter::Int32ColumnWriter(typed) => {
             match column.data_type() {
-                ArrowDataType::Date64 => {
-                    // If the column is a Date64, we cast it to a Date32, and then interpret that as Int32
-                    let array = arrow_cast::cast(column, &ArrowDataType::Date32)?;
-                    let array = arrow_cast::cast(&array, &ArrowDataType::Int32)?;
-
-                    let array = array.as_primitive::<Int32Type>();
+                ArrowDataType::Null => {
+                    let array = Int32Array::new_null(column.len());
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Int8 => {
+                    let array: Int32Array = column.as_primitive::<Int8Type>().unary(|x| x as i32);
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Int16 => {
+                    let array: Int32Array = column.as_primitive::<Int16Type>().unary(|x| x as i32);
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Int32 => {
+                    write_primitive(typed, column.as_primitive::<Int32Type>().values(), levels)
+                }
+                ArrowDataType::UInt8 => {
+                    let array: Int32Array = column.as_primitive::<UInt8Type>().unary(|x| x as i32);
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::UInt16 => {
+                    let array: Int32Array = column.as_primitive::<UInt16Type>().unary(|x| x as i32);
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::UInt32 => {
-                    let values = column.as_primitive::<UInt32Type>().values();
                     // follow C++ implementation and use overflow/reinterpret cast from  u32 to i32 which will map
                     // `(i32::MAX as u32)..u32::MAX` to `i32::MIN..0`
-                    let array = values.inner().typed_data::<i32>();
-                    write_primitive(typed, array, levels)
+                    let array = column.as_primitive::<UInt32Type>();
+                    write_primitive(typed, array.values().inner().typed_data(), levels)
+                }
+                ArrowDataType::Date32 => {
+                    let array = column.as_primitive::<Date32Type>();
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Time32(TimeUnit::Second) => {
+                    let array = column.as_primitive::<Time32SecondType>();
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Time32(TimeUnit::Millisecond) => {
+                    let array = column.as_primitive::<Time32MillisecondType>();
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Date64 => {
+                    // If the column is a Date64, we truncate it
+                    let array: Int32Array = column
+                        .as_primitive::<Date64Type>()
+                        .unary(|x| (x / 86_400_000) as _);
+
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Decimal32(_, _) => {
+                    let array = column
+                        .as_primitive::<Decimal32Type>()
+                        .unary::<_, Int32Type>(|v| v);
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Decimal64(_, _) => {
+                    // use the int32 to represent the decimal with low precision
+                    let array = column
+                        .as_primitive::<Decimal64Type>()
+                        .unary::<_, Int32Type>(|v| v as i32);
+                    write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Decimal128(_, _) => {
                     // use the int32 to represent the decimal with low precision
@@ -1053,35 +1229,10 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                         .unary::<_, Int32Type>(|v| v.as_i128() as i32);
                     write_primitive(typed, array.values(), levels)
                 }
-                ArrowDataType::Dictionary(_, value_type) => match value_type.as_ref() {
-                    ArrowDataType::Decimal128(_, _) => {
-                        let array = arrow_cast::cast(column, value_type)?;
-                        let array = array
-                            .as_primitive::<Decimal128Type>()
-                            .unary::<_, Int32Type>(|v| v as i32);
-                        write_primitive(typed, array.values(), levels)
-                    }
-                    ArrowDataType::Decimal256(_, _) => {
-                        let array = arrow_cast::cast(column, value_type)?;
-                        let array = array
-                            .as_primitive::<Decimal256Type>()
-                            .unary::<_, Int32Type>(|v| v.as_i128() as i32);
-                        write_primitive(typed, array.values(), levels)
-                    }
-                    _ => {
-                        let array = arrow_cast::cast(column, &ArrowDataType::Int32)?;
-                        let array = array.as_primitive::<Int32Type>();
-                        write_primitive(typed, array.values(), levels)
-                    }
-                },
-                _ => {
-                    let array = arrow_cast::cast(column, &ArrowDataType::Int32)?;
-                    let array = array.as_primitive::<Int32Type>();
-                    write_primitive(typed, array.values(), levels)
-                }
+                d => Err(ParquetError::General(format!("Cannot coerce {d} to I32"))),
             }
         }
-        ColumnWriter::BoolColumnWriter(ref mut typed) => {
+        ColumnWriter::BoolColumnWriter(typed) => {
             let array = column.as_boolean();
             typed.write_batch(
                 get_bool_array_slice(array, indices).as_slice(),
@@ -1089,12 +1240,13 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                 levels.rep_levels(),
             )
         }
-        ColumnWriter::Int64ColumnWriter(ref mut typed) => {
+        ColumnWriter::Int64ColumnWriter(typed) => {
             match column.data_type() {
                 ArrowDataType::Date64 => {
-                    let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
+                    let array = column
+                        .as_primitive::<Date64Type>()
+                        .reinterpret_cast::<Int64Type>();
 
-                    let array = array.as_primitive::<Int64Type>();
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Int64 => {
@@ -1108,6 +1260,56 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                     let array = values.inner().typed_data::<i64>();
                     write_primitive(typed, array, levels)
                 }
+                ArrowDataType::Time64(TimeUnit::Microsecond) => {
+                    let array = column.as_primitive::<Time64MicrosecondType>();
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Time64(TimeUnit::Nanosecond) => {
+                    let array = column.as_primitive::<Time64NanosecondType>();
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Timestamp(unit, _) => match unit {
+                    TimeUnit::Second => {
+                        let array = column.as_primitive::<TimestampSecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Millisecond => {
+                        let array = column.as_primitive::<TimestampMillisecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Microsecond => {
+                        let array = column.as_primitive::<TimestampMicrosecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Nanosecond => {
+                        let array = column.as_primitive::<TimestampNanosecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                },
+                ArrowDataType::Duration(unit) => match unit {
+                    TimeUnit::Second => {
+                        let array = column.as_primitive::<DurationSecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Millisecond => {
+                        let array = column.as_primitive::<DurationMillisecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Microsecond => {
+                        let array = column.as_primitive::<DurationMicrosecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    TimeUnit::Nanosecond => {
+                        let array = column.as_primitive::<DurationNanosecondType>();
+                        write_primitive(typed, array.values(), levels)
+                    }
+                },
+                ArrowDataType::Decimal64(_, _) => {
+                    let array = column
+                        .as_primitive::<Decimal64Type>()
+                        .reinterpret_cast::<Int64Type>();
+                    write_primitive(typed, array.values(), levels)
+                }
                 ArrowDataType::Decimal128(_, _) => {
                     // use the int64 to represent the decimal with low precision
                     let array = column
@@ -1122,89 +1324,58 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                         .unary::<_, Int64Type>(|v| v.as_i128() as i64);
                     write_primitive(typed, array.values(), levels)
                 }
-                ArrowDataType::Dictionary(_, value_type) => match value_type.as_ref() {
-                    ArrowDataType::Decimal128(_, _) => {
-                        let array = arrow_cast::cast(column, value_type)?;
-                        let array = array
-                            .as_primitive::<Decimal128Type>()
-                            .unary::<_, Int64Type>(|v| v as i64);
-                        write_primitive(typed, array.values(), levels)
-                    }
-                    ArrowDataType::Decimal256(_, _) => {
-                        let array = arrow_cast::cast(column, value_type)?;
-                        let array = array
-                            .as_primitive::<Decimal256Type>()
-                            .unary::<_, Int64Type>(|v| v.as_i128() as i64);
-                        write_primitive(typed, array.values(), levels)
-                    }
-                    _ => {
-                        let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
-                        let array = array.as_primitive::<Int64Type>();
-                        write_primitive(typed, array.values(), levels)
-                    }
-                },
-                _ => {
-                    let array = arrow_cast::cast(column, &ArrowDataType::Int64)?;
-                    let array = array.as_primitive::<Int64Type>();
-                    write_primitive(typed, array.values(), levels)
-                }
+                d => Err(ParquetError::General(format!("Cannot coerce {d} to I64"))),
             }
         }
-        ColumnWriter::Int96ColumnWriter(ref mut _typed) => {
+        ColumnWriter::Int96ColumnWriter(_typed) => {
             unreachable!("Currently unreachable because data type not supported")
         }
-        ColumnWriter::FloatColumnWriter(ref mut typed) => {
+        ColumnWriter::FloatColumnWriter(typed) => {
             let array = column.as_primitive::<Float32Type>();
             write_primitive(typed, array.values(), levels)
         }
-        ColumnWriter::DoubleColumnWriter(ref mut typed) => {
+        ColumnWriter::DoubleColumnWriter(typed) => {
             let array = column.as_primitive::<Float64Type>();
             write_primitive(typed, array.values(), levels)
         }
         ColumnWriter::ByteArrayColumnWriter(_) => {
             unreachable!("should use ByteArrayWriter")
         }
-        ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) => {
+        ColumnWriter::FixedLenByteArrayColumnWriter(typed) => {
             let bytes = match column.data_type() {
                 ArrowDataType::Interval(interval_unit) => match interval_unit {
                     IntervalUnit::YearMonth => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalYearMonthArray>()
-                            .unwrap();
+                        let array = column.as_primitive::<IntervalYearMonthType>();
                         get_interval_ym_array_slice(array, indices)
                     }
                     IntervalUnit::DayTime => {
-                        let array = column
-                            .as_any()
-                            .downcast_ref::<arrow_array::IntervalDayTimeArray>()
-                            .unwrap();
+                        let array = column.as_primitive::<IntervalDayTimeType>();
                         get_interval_dt_array_slice(array, indices)
                     }
                     _ => {
-                        return Err(ParquetError::NYI(
-                            format!(
-                                "Attempting to write an Arrow interval type {interval_unit:?} to parquet that is not yet implemented"
-                            )
-                        ));
+                        return Err(ParquetError::NYI(format!(
+                            "Attempting to write an Arrow interval type {interval_unit:?} to parquet that is not yet implemented"
+                        )));
                     }
                 },
                 ArrowDataType::FixedSizeBinary(_) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::FixedSizeBinaryArray>()
-                        .unwrap();
+                    let array = column.as_fixed_size_binary();
                     get_fsb_array_slice(array, indices)
                 }
+                ArrowDataType::Decimal32(_, _) => {
+                    let array = column.as_primitive::<Decimal32Type>();
+                    get_decimal_32_array_slice(array, indices)
+                }
+                ArrowDataType::Decimal64(_, _) => {
+                    let array = column.as_primitive::<Decimal64Type>();
+                    get_decimal_64_array_slice(array, indices)
+                }
                 ArrowDataType::Decimal128(_, _) => {
                     let array = column.as_primitive::<Decimal128Type>();
                     get_decimal_128_array_slice(array, indices)
                 }
                 ArrowDataType::Decimal256(_, _) => {
-                    let array = column
-                        .as_any()
-                        .downcast_ref::<arrow_array::Decimal256Array>()
-                        .unwrap();
+                    let array = column.as_primitive::<Decimal256Type>();
                     get_decimal_256_array_slice(array, indices)
                 }
                 ArrowDataType::Float16 => {
@@ -1279,6 +1450,34 @@ fn get_interval_dt_array_slice(
     values
 }
 
+fn get_decimal_32_array_slice(
+    array: &arrow_array::Decimal32Array,
+    indices: &[usize],
+) -> Vec<FixedLenByteArray> {
+    let mut values = Vec::with_capacity(indices.len());
+    let size = decimal_length_from_precision(array.precision());
+    for i in indices {
+        let as_be_bytes = array.value(*i).to_be_bytes();
+        let resized_value = as_be_bytes[(4 - size)..].to_vec();
+        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
+    }
+    values
+}
+
+fn get_decimal_64_array_slice(
+    array: &arrow_array::Decimal64Array,
+    indices: &[usize],
+) -> Vec<FixedLenByteArray> {
+    let mut values = Vec::with_capacity(indices.len());
+    let size = decimal_length_from_precision(array.precision());
+    for i in indices {
+        let as_be_bytes = array.value(*i).to_be_bytes();
+        let resized_value = as_be_bytes[(8 - size)..].to_vec();
+        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
+    }
+    values
+}
+
 fn get_decimal_128_array_slice(
     array: &arrow_array::Decimal128Array,
     indices: &[usize],
@@ -1334,33 +1533,33 @@ fn get_fsb_array_slice(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     use std::fs::File;
-    use std::io::Seek;
 
     use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
-    use crate::arrow::ARROW_SCHEMA_META_KEY;
+    use crate::arrow::{ARROW_SCHEMA_META_KEY, PARQUET_FIELD_ID_META_KEY};
     use crate::column::page::{Page, PageReader};
-    use crate::file::page_encoding_stats::PageEncodingStats;
+    use crate::file::metadata::thrift::PageHeader;
+    use crate::file::page_index::column_index::ColumnIndexMetaData;
     use crate::file::reader::SerializedPageReader;
-    use crate::format::PageHeader;
+    use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol};
     use crate::schema::types::ColumnPath;
-    use crate::thrift::TCompactSliceInputProtocol;
     use arrow::datatypes::ToByteSlice;
     use arrow::datatypes::{DataType, Schema};
     use arrow::error::Result as ArrowResult;
     use arrow::util::data_gen::create_random_array;
     use arrow::util::pretty::pretty_format_batches;
     use arrow::{array::*, buffer::Buffer};
-    use arrow_buffer::{i256, IntervalDayTime, IntervalMonthDayNano, NullBuffer};
+    use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer, OffsetBuffer, i256};
     use arrow_schema::Fields;
     use half::f16;
-    use num::{FromPrimitive, ToPrimitive};
+    use num_traits::{FromPrimitive, ToPrimitive};
+    use tempfile::tempfile;
 
     use crate::basic::Encoding;
     use crate::data_type::AsBytes;
     use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData, ParquetMetaDataReader};
-    use crate::file::page_index::index::Index;
     use crate::file::properties::{
         BloomFilterPosition, EnabledStatistics, ReaderProperties, WriterVersion,
     };
@@ -2111,7 +2310,7 @@ mod tests {
     const SMALL_SIZE: usize = 7;
     const MEDIUM_SIZE: usize = 63;
 
-    fn roundtrip(expected_batch: RecordBatch, max_row_group_size: Option<usize>) -> Vec<File> {
+    fn roundtrip(expected_batch: RecordBatch, max_row_group_size: Option<usize>) -> Vec<Bytes> {
         let mut files = vec![];
         for version in [WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
             let mut props = WriterProperties::builder().set_writer_version(version);
@@ -2126,27 +2325,27 @@ mod tests {
         files
     }
 
+    // Round trip the specified record batch with the specified writer properties,
+    // to an in-memory file, and validate the arrays using the specified function.
+    // Returns the in-memory file.
     fn roundtrip_opts_with_array_validation<F>(
         expected_batch: &RecordBatch,
         props: WriterProperties,
         validate: F,
-    ) -> File
+    ) -> Bytes
     where
         F: Fn(&ArrayData, &ArrayData),
     {
-        let file = tempfile::tempfile().unwrap();
+        let mut file = vec![];
 
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            expected_batch.schema(),
-            Some(props),
-        )
-        .expect("Unable to write file");
+        let mut writer = ArrowWriter::try_new(&mut file, expected_batch.schema(), Some(props))
+            .expect("Unable to write file");
         writer.write(expected_batch).unwrap();
         writer.close().unwrap();
 
+        let file = Bytes::from(file);
         let mut record_batch_reader =
-            ParquetRecordBatchReader::try_new(file.try_clone().unwrap(), 1024).unwrap();
+            ParquetRecordBatchReader::try_new(file.clone(), 1024).unwrap();
 
         let actual_batch = record_batch_reader
             .next()
@@ -2165,7 +2364,7 @@ mod tests {
         file
     }
 
-    fn roundtrip_opts(expected_batch: &RecordBatch, props: WriterProperties) -> File {
+    fn roundtrip_opts(expected_batch: &RecordBatch, props: WriterProperties) -> Bytes {
         roundtrip_opts_with_array_validation(expected_batch, props, |a, b| {
             a.validate_full().expect("valid expected data");
             b.validate_full().expect("valid actual data");
@@ -2193,17 +2392,17 @@ mod tests {
         }
     }
 
-    fn one_column_roundtrip(values: ArrayRef, nullable: bool) -> Vec<File> {
+    fn one_column_roundtrip(values: ArrayRef, nullable: bool) -> Vec<Bytes> {
         one_column_roundtrip_with_options(RoundTripOptions::new(values, nullable))
     }
 
-    fn one_column_roundtrip_with_schema(values: ArrayRef, schema: SchemaRef) -> Vec<File> {
+    fn one_column_roundtrip_with_schema(values: ArrayRef, schema: SchemaRef) -> Vec<Bytes> {
         let mut options = RoundTripOptions::new(values, false);
         options.schema = schema;
         one_column_roundtrip_with_options(options)
     }
 
-    fn one_column_roundtrip_with_options(options: RoundTripOptions) -> Vec<File> {
+    fn one_column_roundtrip_with_options(options: RoundTripOptions) -> Vec<Bytes> {
         let RoundTripOptions {
             values,
             schema,
@@ -2264,7 +2463,7 @@ mod tests {
         files
     }
 
-    fn values_required<A, I>(iter: I) -> Vec<File>
+    fn values_required<A, I>(iter: I) -> Vec<Bytes>
     where
         A: From<Vec<I::Item>> + Array + 'static,
         I: IntoIterator,
@@ -2274,7 +2473,7 @@ mod tests {
         one_column_roundtrip(values, false)
     }
 
-    fn values_optional<A, I>(iter: I) -> Vec<File>
+    fn values_optional<A, I>(iter: I) -> Vec<Bytes>
     where
         A: From<Vec<Option<I::Item>>> + Array + 'static,
         I: IntoIterator,
@@ -2298,7 +2497,7 @@ mod tests {
     }
 
     fn check_bloom_filter<T: AsBytes>(
-        files: Vec<File>,
+        files: Vec<Bytes>,
         file_column: String,
         positive_values: Vec<T>,
         negative_values: Vec<T>,
@@ -2410,12 +2609,12 @@ mod tests {
             ArrowWriter::try_new(&mut out, batch.schema(), None).expect("Unable to write file");
         writer.write(&batch).unwrap();
         let file_meta_data = writer.close().unwrap();
-        for row_group in file_meta_data.row_groups {
-            for column in row_group.columns {
-                assert!(column.offset_index_offset.is_some());
-                assert!(column.offset_index_length.is_some());
-                assert!(column.column_index_offset.is_none());
-                assert!(column.column_index_length.is_none());
+        for row_group in file_meta_data.row_groups() {
+            for column in row_group.columns() {
+                assert!(column.offset_index_offset().is_some());
+                assert!(column.offset_index_length().is_some());
+                assert!(column.column_index_offset().is_none());
+                assert!(column.column_index_length().is_none());
             }
         }
     }
@@ -2590,7 +2789,7 @@ mod tests {
     #[test]
     fn binary_single_column() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
+        let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect();
         let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
 
         // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
@@ -2600,7 +2799,7 @@ mod tests {
     #[test]
     fn binary_view_single_column() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
+        let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect();
         let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
 
         // BinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
@@ -2641,7 +2840,7 @@ mod tests {
     #[test]
     fn binary_column_bloom_filter() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
+        let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect();
         let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
 
         let array = Arc::new(BinaryArray::from_iter_values(many_vecs_iter));
@@ -2680,7 +2879,7 @@ mod tests {
     #[test]
     fn large_binary_single_column() {
         let one_vec: Vec<u8> = (0..SMALL_SIZE as u8).collect();
-        let many_vecs: Vec<_> = std::iter::repeat(one_vec).take(SMALL_SIZE).collect();
+        let many_vecs: Vec<_> = std::iter::repeat_n(one_vec, SMALL_SIZE).collect();
         let many_vecs_iter = many_vecs.iter().map(|v| v.as_slice());
 
         // LargeBinaryArrays can't be built from Vec<Option<&str>>, so only call `values_required`
@@ -2864,14 +3063,18 @@ mod tests {
         writer.write(&batch).unwrap();
         let file_metadata = writer.close().unwrap();
 
+        let schema = file_metadata.file_metadata().schema();
         // Coerced name of "item" should be "element"
-        assert_eq!(file_metadata.schema[3].name, "element");
+        let list_field = &schema.get_fields()[0].get_fields()[0];
+        assert_eq!(list_field.get_fields()[0].name(), "element");
+
+        let map_field = &schema.get_fields()[1].get_fields()[0];
         // Coerced name of "entries" should be "key_value"
-        assert_eq!(file_metadata.schema[5].name, "key_value");
+        assert_eq!(map_field.name(), "key_value");
         // Coerced name of "keys" should be "key"
-        assert_eq!(file_metadata.schema[6].name, "key");
+        assert_eq!(map_field.get_fields()[0].name(), "key");
         // Coerced name of "values" should be "value"
-        assert_eq!(file_metadata.schema[7].name, "value");
+        assert_eq!(map_field.get_fields()[1].name(), "value");
 
         // Double check schema after reading from the file
         let reader = SerializedFileReader::new(file).unwrap();
@@ -2949,6 +3152,237 @@ mod tests {
         one_column_roundtrip_with_schema(Arc::new(d), schema);
     }
 
+    #[test]
+    fn arrow_writer_test_type_compatibility() {
+        fn ensure_compatible_write<T1, T2>(array1: T1, array2: T2, expected_result: T1)
+        where
+            T1: Array + 'static,
+            T2: Array + 'static,
+        {
+            let schema1 = Arc::new(Schema::new(vec![Field::new(
+                "a",
+                array1.data_type().clone(),
+                false,
+            )]));
+
+            let file = tempfile().unwrap();
+            let mut writer =
+                ArrowWriter::try_new(file.try_clone().unwrap(), schema1.clone(), None).unwrap();
+
+            let rb1 = RecordBatch::try_new(schema1.clone(), vec![Arc::new(array1)]).unwrap();
+            writer.write(&rb1).unwrap();
+
+            let schema2 = Arc::new(Schema::new(vec![Field::new(
+                "a",
+                array2.data_type().clone(),
+                false,
+            )]));
+            let rb2 = RecordBatch::try_new(schema2, vec![Arc::new(array2)]).unwrap();
+            writer.write(&rb2).unwrap();
+
+            writer.close().unwrap();
+
+            let mut record_batch_reader =
+                ParquetRecordBatchReader::try_new(file.try_clone().unwrap(), 1024).unwrap();
+            let actual_batch = record_batch_reader.next().unwrap().unwrap();
+
+            let expected_batch =
+                RecordBatch::try_new(schema1, vec![Arc::new(expected_result)]).unwrap();
+            assert_eq!(actual_batch, expected_batch);
+        }
+
+        // check compatibility between native and dictionaries
+
+        ensure_compatible_write(
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet"])),
+            ),
+            StringArray::from_iter_values(vec!["barquet"]),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0, 1]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet", "barquet"])),
+            ),
+        );
+
+        ensure_compatible_write(
+            StringArray::from_iter_values(vec!["parquet"]),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["barquet"])),
+            ),
+            StringArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        // check compatibility between dictionaries with different key types
+
+        ensure_compatible_write(
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet"])),
+            ),
+            DictionaryArray::new(
+                UInt16Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["barquet"])),
+            ),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0, 1]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet", "barquet"])),
+            ),
+        );
+
+        // check compatibility between dictionaries with different value types
+        ensure_compatible_write(
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet"])),
+            ),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(LargeStringArray::from_iter_values(vec!["barquet"])),
+            ),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0, 1]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet", "barquet"])),
+            ),
+        );
+
+        // check compatibility between a dictionary and a native array with a different type
+        ensure_compatible_write(
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet"])),
+            ),
+            LargeStringArray::from_iter_values(vec!["barquet"]),
+            DictionaryArray::new(
+                UInt8Array::from_iter_values(vec![0, 1]),
+                Arc::new(StringArray::from_iter_values(vec!["parquet", "barquet"])),
+            ),
+        );
+
+        // check compatibility for string types
+
+        ensure_compatible_write(
+            StringArray::from_iter_values(vec!["parquet"]),
+            LargeStringArray::from_iter_values(vec!["barquet"]),
+            StringArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        ensure_compatible_write(
+            LargeStringArray::from_iter_values(vec!["parquet"]),
+            StringArray::from_iter_values(vec!["barquet"]),
+            LargeStringArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        ensure_compatible_write(
+            StringArray::from_iter_values(vec!["parquet"]),
+            StringViewArray::from_iter_values(vec!["barquet"]),
+            StringArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        ensure_compatible_write(
+            StringViewArray::from_iter_values(vec!["parquet"]),
+            StringArray::from_iter_values(vec!["barquet"]),
+            StringViewArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        ensure_compatible_write(
+            LargeStringArray::from_iter_values(vec!["parquet"]),
+            StringViewArray::from_iter_values(vec!["barquet"]),
+            LargeStringArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        ensure_compatible_write(
+            StringViewArray::from_iter_values(vec!["parquet"]),
+            LargeStringArray::from_iter_values(vec!["barquet"]),
+            StringViewArray::from_iter_values(vec!["parquet", "barquet"]),
+        );
+
+        // check compatibility for binary types
+
+        ensure_compatible_write(
+            BinaryArray::from_iter_values(vec![b"parquet"]),
+            LargeBinaryArray::from_iter_values(vec![b"barquet"]),
+            BinaryArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        ensure_compatible_write(
+            LargeBinaryArray::from_iter_values(vec![b"parquet"]),
+            BinaryArray::from_iter_values(vec![b"barquet"]),
+            LargeBinaryArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        ensure_compatible_write(
+            BinaryArray::from_iter_values(vec![b"parquet"]),
+            BinaryViewArray::from_iter_values(vec![b"barquet"]),
+            BinaryArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        ensure_compatible_write(
+            BinaryViewArray::from_iter_values(vec![b"parquet"]),
+            BinaryArray::from_iter_values(vec![b"barquet"]),
+            BinaryViewArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        ensure_compatible_write(
+            BinaryViewArray::from_iter_values(vec![b"parquet"]),
+            LargeBinaryArray::from_iter_values(vec![b"barquet"]),
+            BinaryViewArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        ensure_compatible_write(
+            LargeBinaryArray::from_iter_values(vec![b"parquet"]),
+            BinaryViewArray::from_iter_values(vec![b"barquet"]),
+            LargeBinaryArray::from_iter_values(vec![b"parquet", b"barquet"]),
+        );
+
+        // check compatibility for list types
+
+        let list_field_metadata = HashMap::from_iter(vec![(
+            PARQUET_FIELD_ID_META_KEY.to_string(),
+            "1".to_string(),
+        )]);
+        let list_field = Field::new_list_field(DataType::Int32, false);
+
+        let values1 = Arc::new(Int32Array::from(vec![0, 1, 2, 3, 4]));
+        let offsets1 = OffsetBuffer::new(vec![0, 2, 5].into());
+
+        let values2 = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
+        let offsets2 = OffsetBuffer::new(vec![0, 3, 5].into());
+
+        let values_expected = Arc::new(Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]));
+        let offsets_expected = OffsetBuffer::new(vec![0, 2, 5, 8, 10].into());
+
+        ensure_compatible_write(
+            // when the initial schema has the metadata ...
+            ListArray::try_new(
+                Arc::new(
+                    list_field
+                        .clone()
+                        .with_metadata(list_field_metadata.clone()),
+                ),
+                offsets1,
+                values1,
+                None,
+            )
+            .unwrap(),
+            // ... and some intermediate schema doesn't have the metadata
+            ListArray::try_new(Arc::new(list_field.clone()), offsets2, values2, None).unwrap(),
+            // ... the write will still go through, and the resulting schema will inherit the initial metadata
+            ListArray::try_new(
+                Arc::new(
+                    list_field
+                        .clone()
+                        .with_metadata(list_field_metadata.clone()),
+                ),
+                offsets_expected,
+                values_expected,
+                None,
+            )
+            .unwrap(),
+        );
+    }
+
     #[test]
     fn arrow_writer_primitive_dictionary() {
         // define schema
@@ -2972,6 +3406,48 @@ mod tests {
         one_column_roundtrip_with_schema(Arc::new(d), schema);
     }
 
+    #[test]
+    fn arrow_writer_decimal32_dictionary() {
+        let integers = vec![12345, 56789, 34567];
+
+        let keys = UInt8Array::from(vec![Some(0), None, Some(1), Some(2), Some(1)]);
+
+        let values = Decimal32Array::from(integers.clone())
+            .with_precision_and_scale(5, 2)
+            .unwrap();
+
+        let array = DictionaryArray::new(keys, Arc::new(values));
+        one_column_roundtrip(Arc::new(array.clone()), true);
+
+        let values = Decimal32Array::from(integers)
+            .with_precision_and_scale(9, 2)
+            .unwrap();
+
+        let array = array.with_values(Arc::new(values));
+        one_column_roundtrip(Arc::new(array), true);
+    }
+
+    #[test]
+    fn arrow_writer_decimal64_dictionary() {
+        let integers = vec![12345, 56789, 34567];
+
+        let keys = UInt8Array::from(vec![Some(0), None, Some(1), Some(2), Some(1)]);
+
+        let values = Decimal64Array::from(integers.clone())
+            .with_precision_and_scale(5, 2)
+            .unwrap();
+
+        let array = DictionaryArray::new(keys, Arc::new(values));
+        one_column_roundtrip(Arc::new(array.clone()), true);
+
+        let values = Decimal64Array::from(integers)
+            .with_precision_and_scale(12, 2)
+            .unwrap();
+
+        let array = array.with_values(Arc::new(values));
+        one_column_roundtrip(Arc::new(array), true);
+    }
+
     #[test]
     fn arrow_writer_decimal128_dictionary() {
         let integers = vec![12345, 56789, 34567];
@@ -3588,15 +4064,15 @@ mod tests {
         writer.write(&batch).unwrap();
 
         let metadata = writer.close().unwrap();
-        assert_eq!(metadata.row_groups.len(), 1);
-        let row_group = &metadata.row_groups[0];
-        assert_eq!(row_group.columns.len(), 2);
+        assert_eq!(metadata.num_row_groups(), 1);
+        let row_group = metadata.row_group(0);
+        assert_eq!(row_group.num_columns(), 2);
         // Column "a" has both offset and column index, as requested
-        assert!(row_group.columns[0].offset_index_offset.is_some());
-        assert!(row_group.columns[0].column_index_offset.is_some());
+        assert!(row_group.column(0).offset_index_offset().is_some());
+        assert!(row_group.column(0).column_index_offset().is_some());
         // Column "b" should only have offset index
-        assert!(row_group.columns[1].offset_index_offset.is_some());
-        assert!(row_group.columns[1].column_index_offset.is_none());
+        assert!(row_group.column(1).offset_index_offset().is_some());
+        assert!(row_group.column(1).column_index_offset().is_none());
 
         let options = ReadOptionsBuilder::new().with_page_index().build();
         let reader = SerializedFileReader::new_with_options(Bytes::from(buf), options).unwrap();
@@ -3628,9 +4104,12 @@ mod tests {
         assert_eq!(column_index[0].len(), 2); // 2 columns
 
         let a_idx = &column_index[0][0];
-        assert!(matches!(a_idx, Index::BYTE_ARRAY(_)), "{a_idx:?}");
+        assert!(
+            matches!(a_idx, ColumnIndexMetaData::BYTE_ARRAY(_)),
+            "{a_idx:?}"
+        );
         let b_idx = &column_index[0][1];
-        assert!(matches!(b_idx, Index::NONE), "{b_idx:?}");
+        assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}");
     }
 
     #[test]
@@ -3660,15 +4139,15 @@ mod tests {
         writer.write(&batch).unwrap();
 
         let metadata = writer.close().unwrap();
-        assert_eq!(metadata.row_groups.len(), 1);
-        let row_group = &metadata.row_groups[0];
-        assert_eq!(row_group.columns.len(), 2);
+        assert_eq!(metadata.num_row_groups(), 1);
+        let row_group = metadata.row_group(0);
+        assert_eq!(row_group.num_columns(), 2);
         // Column "a" should only have offset index
-        assert!(row_group.columns[0].offset_index_offset.is_some());
-        assert!(row_group.columns[0].column_index_offset.is_none());
+        assert!(row_group.column(0).offset_index_offset().is_some());
+        assert!(row_group.column(0).column_index_offset().is_none());
         // Column "b" should only have offset index
-        assert!(row_group.columns[1].offset_index_offset.is_some());
-        assert!(row_group.columns[1].column_index_offset.is_none());
+        assert!(row_group.column(1).offset_index_offset().is_some());
+        assert!(row_group.column(1).column_index_offset().is_none());
 
         let options = ReadOptionsBuilder::new().with_page_index().build();
         let reader = SerializedFileReader::new_with_options(Bytes::from(buf), options).unwrap();
@@ -3696,9 +4175,9 @@ mod tests {
         assert_eq!(column_index[0].len(), 2); // 2 columns
 
         let a_idx = &column_index[0][0];
-        assert!(matches!(a_idx, Index::NONE), "{a_idx:?}");
+        assert!(matches!(a_idx, ColumnIndexMetaData::NONE), "{a_idx:?}");
         let b_idx = &column_index[0][1];
-        assert!(matches!(b_idx, Index::NONE), "{b_idx:?}");
+        assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}");
     }
 
     #[test]
@@ -3727,9 +4206,11 @@ mod tests {
             .file_metadata()
             .key_value_metadata()
         {
-            assert!(!key_value_metadata
-                .iter()
-                .any(|kv| kv.key.as_str() == ARROW_SCHEMA_META_KEY));
+            assert!(
+                !key_value_metadata
+                    .iter()
+                    .any(|kv| kv.key.as_str() == ARROW_SCHEMA_META_KEY)
+            );
         }
     }
 
@@ -3788,6 +4269,70 @@ mod tests {
         assert_eq!(batches.len(), 0);
     }
 
+    #[test]
+    fn test_page_stats_not_written_by_default() {
+        let string_field = Field::new("a", DataType::Utf8, false);
+        let schema = Schema::new(vec![string_field]);
+        let raw_string_values = vec!["Blart Versenwald III"];
+        let string_values = StringArray::from(raw_string_values.clone());
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(string_values)]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_statistics_enabled(EnabledStatistics::Page)
+            .set_dictionary_enabled(false)
+            .set_encoding(Encoding::PLAIN)
+            .set_compression(crate::basic::Compression::UNCOMPRESSED)
+            .build();
+
+        let file = roundtrip_opts(&batch, props);
+
+        // read file and decode page headers
+        // Note: use the thrift API as there is no Rust API to access the statistics in the page headers
+
+        // decode first page header
+        let first_page = &file[4..];
+        let mut prot = ThriftSliceInputProtocol::new(first_page);
+        let hdr = PageHeader::read_thrift(&mut prot).unwrap();
+        let stats = hdr.data_page_header.unwrap().statistics;
+
+        assert!(stats.is_none());
+    }
+
+    #[test]
+    fn test_page_stats_when_enabled() {
+        let string_field = Field::new("a", DataType::Utf8, false);
+        let schema = Schema::new(vec![string_field]);
+        let raw_string_values = vec!["Blart Versenwald III", "Andrew Lamb"];
+        let string_values = StringArray::from(raw_string_values.clone());
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(string_values)]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_statistics_enabled(EnabledStatistics::Page)
+            .set_dictionary_enabled(false)
+            .set_encoding(Encoding::PLAIN)
+            .set_write_page_header_statistics(true)
+            .set_compression(crate::basic::Compression::UNCOMPRESSED)
+            .build();
+
+        let file = roundtrip_opts(&batch, props);
+
+        // read file and decode page headers
+        // Note: use the thrift API as there is no Rust API to access the statistics in the page headers
+
+        // decode first page header
+        let first_page = &file[4..];
+        let mut prot = ThriftSliceInputProtocol::new(first_page);
+        let hdr = PageHeader::read_thrift(&mut prot).unwrap();
+        let stats = hdr.data_page_header.unwrap().statistics;
+
+        let stats = stats.unwrap();
+        // check that min/max were actually written to the page
+        assert!(stats.is_max_value_exact.unwrap());
+        assert!(stats.is_min_value_exact.unwrap());
+        assert_eq!(stats.max_value.unwrap(), "Blart Versenwald III".as_bytes());
+        assert_eq!(stats.min_value.unwrap(), "Andrew Lamb".as_bytes());
+    }
+
     #[test]
     fn test_page_stats_truncation() {
         let string_field = Field::new("a", DataType::Utf8, false);
@@ -3813,22 +4358,19 @@ mod tests {
             .set_statistics_truncate_length(Some(2))
             .set_dictionary_enabled(false)
             .set_encoding(Encoding::PLAIN)
+            .set_write_page_header_statistics(true)
             .set_compression(crate::basic::Compression::UNCOMPRESSED)
             .build();
 
-        let mut file = roundtrip_opts(&batch, props);
+        let file = roundtrip_opts(&batch, props);
 
         // read file and decode page headers
         // Note: use the thrift API as there is no Rust API to access the statistics in the page headers
-        let mut buf = vec![];
-        file.seek(std::io::SeekFrom::Start(0)).unwrap();
-        let read = file.read_to_end(&mut buf).unwrap();
-        assert!(read > 0);
 
         // decode first page header
-        let first_page = &buf[4..];
-        let mut prot = TCompactSliceInputProtocol::new(first_page);
-        let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap();
+        let first_page = &file[4..];
+        let mut prot = ThriftSliceInputProtocol::new(first_page);
+        let hdr = PageHeader::read_thrift(&mut prot).unwrap();
         let stats = hdr.data_page_header.unwrap().statistics;
         assert!(stats.is_some());
         let stats = stats.unwrap();
@@ -3840,8 +4382,8 @@ mod tests {
 
         // check second page now
         let second_page = &prot.as_slice()[hdr.compressed_page_size as usize..];
-        let mut prot = TCompactSliceInputProtocol::new(second_page);
-        let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap();
+        let mut prot = ThriftSliceInputProtocol::new(second_page);
+        let hdr = PageHeader::read_thrift(&mut prot).unwrap();
         let stats = hdr.data_page_header.unwrap().statistics;
         assert!(stats.is_some());
         let stats = stats.unwrap();
@@ -3871,17 +4413,26 @@ mod tests {
         writer.write(&batch).unwrap();
         let file_metadata = writer.close().unwrap();
 
-        assert_eq!(file_metadata.row_groups.len(), 1);
-        assert_eq!(file_metadata.row_groups[0].columns.len(), 1);
-        let chunk_meta = file_metadata.row_groups[0].columns[0]
-            .meta_data
-            .as_ref()
-            .expect("column metadata missing");
-        assert!(chunk_meta.encoding_stats.is_some());
-        let chunk_page_stats = chunk_meta.encoding_stats.as_ref().unwrap();
+        assert_eq!(file_metadata.num_row_groups(), 1);
+        assert_eq!(file_metadata.row_group(0).num_columns(), 1);
+        assert!(
+            file_metadata
+                .row_group(0)
+                .column(0)
+                .page_encoding_stats()
+                .is_some()
+        );
+        let chunk_page_stats = file_metadata
+            .row_group(0)
+            .column(0)
+            .page_encoding_stats()
+            .unwrap();
 
         // check that the read metadata is also correct
-        let options = ReadOptionsBuilder::new().with_page_index().build();
+        let options = ReadOptionsBuilder::new()
+            .with_page_index()
+            .with_encoding_stats_as_mask(false)
+            .build();
         let reader = SerializedFileReader::new_with_options(file, options).unwrap();
 
         let rowgroup = reader.get_row_group(0).expect("row group missing");
@@ -3889,11 +4440,7 @@ mod tests {
         let column = rowgroup.metadata().column(0);
         assert!(column.page_encoding_stats().is_some());
         let file_page_stats = column.page_encoding_stats().unwrap();
-        let chunk_stats: Vec<PageEncodingStats> = chunk_page_stats
-            .iter()
-            .map(|x| crate::file::page_encoding_stats::try_from_thrift(x).unwrap())
-            .collect();
-        assert_eq!(&chunk_stats, file_page_stats);
+        assert_eq!(chunk_page_stats, file_page_stats);
     }
 
     #[test]
diff --git a/parquet/src/arrow/async_reader/metadata.rs b/parquet/src/arrow/async_reader/metadata.rs
index e0f7bdbbe902..0ab6a621fca0 100644
--- a/parquet/src/arrow/async_reader/metadata.rs
+++ b/parquet/src/arrow/async_reader/metadata.rs
@@ -16,18 +16,12 @@
 // under the License.
 
 use crate::arrow::async_reader::AsyncFileReader;
-use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
-use crate::file::page_index::index::Index;
-use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
-use crate::file::FOOTER_SIZE;
+use crate::errors::Result;
 use bytes::Bytes;
 use futures::future::BoxFuture;
-use futures::FutureExt;
-use std::future::Future;
 use std::ops::Range;
 
-/// A data source that can be used with [`MetadataLoader`] to load [`ParquetMetaData`]
+/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`]
 ///
 /// Note that implementation is provided for [`AsyncFileReader`].
 ///
@@ -62,11 +56,16 @@ use std::ops::Range;
 ///     }
 /// }
 ///```
+///
+/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
+/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
 pub trait MetadataFetch {
     /// Return a future that fetches the specified range of bytes asynchronously
     ///
     /// Note the returned type is a boxed future, often created by
-    /// [FutureExt::boxed]. See the trait documentation for an example
+    /// [`FutureExt::boxed`]. See the trait documentation for an example
+    ///
+    /// [`FutureExt::boxed`]: futures::FutureExt::boxed
     fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>>;
 }
 
@@ -76,363 +75,17 @@ impl<T: AsyncFileReader> MetadataFetch for &mut T {
     }
 }
 
-/// A data source that can be used with [`MetadataLoader`] to load [`ParquetMetaData`] via suffix
+/// A data source that can be used with [`ParquetMetaDataReader`] to load [`ParquetMetaData`] via suffix
 /// requests, without knowing the file size
+///
+/// [`ParquetMetaDataReader`]: crate::file::metadata::reader::ParquetMetaDataReader
+/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
 pub trait MetadataSuffixFetch: MetadataFetch {
     /// Return a future that fetches the last `n` bytes asynchronously
     ///
     /// Note the returned type is a boxed future, often created by
-    /// [FutureExt::boxed]. See the trait documentation for an example
-    fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, Result<Bytes>>;
-}
-
-/// An asynchronous interface to load [`ParquetMetaData`] from an async source
-pub struct MetadataLoader<F> {
-    /// Function that fetches byte ranges asynchronously
-    fetch: F,
-    /// The in-progress metadata
-    metadata: ParquetMetaData,
-    /// The offset and bytes of remaining unparsed data
-    remainder: Option<(usize, Bytes)>,
-}
-
-impl<F: MetadataFetch> MetadataLoader<F> {
-    /// Create a new [`MetadataLoader`] by reading the footer information
-    ///
-    /// See [`fetch_parquet_metadata`] for the meaning of the individual parameters
-    #[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader")]
-    pub async fn load(mut fetch: F, file_size: usize, prefetch: Option<usize>) -> Result<Self> {
-        if file_size < FOOTER_SIZE {
-            return Err(ParquetError::EOF(format!(
-                "file size of {file_size} is less than footer"
-            )));
-        }
-
-        // If a size hint is provided, read more than the minimum size
-        // to try and avoid a second fetch.
-        let footer_start = if let Some(size_hint) = prefetch {
-            // check for hint smaller than footer
-            let size_hint = std::cmp::max(size_hint, FOOTER_SIZE);
-            file_size.saturating_sub(size_hint)
-        } else {
-            file_size - FOOTER_SIZE
-        };
-
-        let suffix = fetch.fetch(footer_start as u64..file_size as u64).await?;
-        let suffix_len = suffix.len();
-
-        let mut footer = [0; FOOTER_SIZE];
-        footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
-
-        let footer = ParquetMetaDataReader::decode_footer_tail(&footer)?;
-        let length = footer.metadata_length();
-
-        if file_size < length + FOOTER_SIZE {
-            return Err(ParquetError::EOF(format!(
-                "file size of {} is less than footer + metadata {}",
-                file_size,
-                length + FOOTER_SIZE
-            )));
-        }
-
-        // Did not fetch the entire file metadata in the initial read, need to make a second request
-        let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
-            let metadata_start = file_size - length - FOOTER_SIZE;
-            let meta = fetch
-                .fetch(metadata_start as u64..(file_size - FOOTER_SIZE) as u64)
-                .await?;
-            (ParquetMetaDataReader::decode_metadata(&meta)?, None)
-        } else {
-            let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
-
-            let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
-            (
-                ParquetMetaDataReader::decode_metadata(slice)?,
-                Some((footer_start, suffix.slice(..metadata_start))),
-            )
-        };
-
-        Ok(Self {
-            fetch,
-            metadata,
-            remainder,
-        })
-    }
-
-    /// Create a new [`MetadataLoader`] from an existing [`ParquetMetaData`]
-    #[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader")]
-    pub fn new(fetch: F, metadata: ParquetMetaData) -> Self {
-        Self {
-            fetch,
-            metadata,
-            remainder: None,
-        }
-    }
-
-    /// Loads the page index, if any
+    /// [`FutureExt::boxed`]. See the trait documentation for an example
     ///
-    /// * `column_index`: if true will load column index
-    /// * `offset_index`: if true will load offset index
-    #[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader")]
-    pub async fn load_page_index(&mut self, column_index: bool, offset_index: bool) -> Result<()> {
-        if !column_index && !offset_index {
-            return Ok(());
-        }
-
-        let mut range = None;
-        for c in self.metadata.row_groups().iter().flat_map(|r| r.columns()) {
-            range = acc_range(range, c.column_index_range());
-            range = acc_range(range, c.offset_index_range());
-        }
-        let range = match range {
-            None => return Ok(()),
-            Some(range) => range,
-        };
-
-        let data = match &self.remainder {
-            Some((remainder_start, remainder)) if *remainder_start as u64 <= range.start => {
-                let remainder_start = *remainder_start as u64;
-                let range_start = usize::try_from(range.start - remainder_start)?;
-                let range_end = usize::try_from(range.end - remainder_start)?;
-                remainder.slice(range_start..range_end)
-            }
-            // Note: this will potentially fetch data already in remainder, this keeps things simple
-            _ => self.fetch.fetch(range.start..range.end).await?,
-        };
-
-        // Sanity check
-        assert_eq!(data.len(), (range.end - range.start) as usize);
-        let offset = range.start;
-
-        if column_index {
-            let index = self
-                .metadata
-                .row_groups()
-                .iter()
-                .map(|x| {
-                    x.columns()
-                        .iter()
-                        .map(|c| match c.column_index_range() {
-                            Some(r) => {
-                                let r_start = usize::try_from(r.start - offset)?;
-                                let r_end = usize::try_from(r.end - offset)?;
-                                decode_column_index(&data[r_start..r_end], c.column_type())
-                            }
-                            None => Ok(Index::NONE),
-                        })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            self.metadata.set_column_index(Some(index));
-        }
-
-        if offset_index {
-            let index = self
-                .metadata
-                .row_groups()
-                .iter()
-                .map(|x| {
-                    x.columns()
-                        .iter()
-                        .map(|c| match c.offset_index_range() {
-                            Some(r) => {
-                                let r_start = usize::try_from(r.start - offset)?;
-                                let r_end = usize::try_from(r.end - offset)?;
-                                decode_offset_index(&data[r_start..r_end])
-                            }
-                            None => Err(general_err!("missing offset index")),
-                        })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            self.metadata.set_offset_index(Some(index));
-        }
-
-        Ok(())
-    }
-
-    /// Returns the finished [`ParquetMetaData`]
-    pub fn finish(self) -> ParquetMetaData {
-        self.metadata
-    }
-}
-
-struct MetadataFetchFn<F>(F);
-
-impl<F, Fut> MetadataFetch for MetadataFetchFn<F>
-where
-    F: FnMut(Range<usize>) -> Fut + Send,
-    Fut: Future<Output = Result<Bytes>> + Send,
-{
-    fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
-        async move { self.0(range.start.try_into()?..range.end.try_into()?).await }.boxed()
-    }
-}
-
-/// Fetches parquet metadata
-///
-/// Parameters:
-/// * fetch: an async function that can fetch byte ranges
-/// * file_size: the total size of the parquet file
-/// * footer_size_hint: footer prefetch size (see comments below)
-///
-/// The length of the parquet footer, which contains file metadata, is not
-/// known up front. Therefore this function will first issue a request to read
-/// the last 8 bytes to determine the footer's precise length, before
-/// issuing a second request to fetch the metadata bytes
-///
-/// If `prefetch` is `Some`, this will read the specified number of bytes
-/// in the first request, instead of 8, and only issue further requests
-/// if additional bytes are needed. Providing a `prefetch` hint can therefore
-/// significantly reduce the number of `fetch` requests, and consequently latency
-#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader")]
-pub async fn fetch_parquet_metadata<F, Fut>(
-    fetch: F,
-    file_size: usize,
-    prefetch: Option<usize>,
-) -> Result<ParquetMetaData>
-where
-    F: FnMut(Range<usize>) -> Fut + Send,
-    Fut: Future<Output = Result<Bytes>> + Send,
-{
-    let file_size = u64::try_from(file_size)?;
-    let fetch = MetadataFetchFn(fetch);
-    ParquetMetaDataReader::new()
-        .with_prefetch_hint(prefetch)
-        .load_and_finish(fetch, file_size)
-        .await
-}
-
-// these tests are all replicated in parquet::file::metadata::reader
-#[allow(deprecated)]
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::file::reader::{FileReader, Length, SerializedFileReader};
-    use crate::util::test_common::file_util::get_test_file;
-    use std::fs::File;
-    use std::io::{Read, Seek, SeekFrom};
-    use std::sync::atomic::{AtomicUsize, Ordering};
-
-    fn read_range(file: &mut File, range: Range<usize>) -> Result<Bytes> {
-        file.seek(SeekFrom::Start(range.start as _))?;
-        let len = range.end - range.start;
-        let mut buf = Vec::with_capacity(len);
-        file.take(len as _).read_to_end(&mut buf)?;
-        Ok(buf.into())
-    }
-
-    #[tokio::test]
-    async fn test_simple() {
-        let mut file = get_test_file("nulls.snappy.parquet");
-        let len = file.len() as usize;
-
-        let reader = SerializedFileReader::new(file.try_clone().unwrap()).unwrap();
-        let expected = reader.metadata().file_metadata().schema();
-        let fetch_count = AtomicUsize::new(0);
-
-        let mut fetch = |range| {
-            fetch_count.fetch_add(1, Ordering::SeqCst);
-            futures::future::ready(read_range(&mut file, range))
-        };
-
-        let actual = fetch_parquet_metadata(&mut fetch, len, None).await.unwrap();
-        assert_eq!(actual.file_metadata().schema(), expected);
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-
-        // Metadata hint too small - below footer size
-        fetch_count.store(0, Ordering::SeqCst);
-        let actual = fetch_parquet_metadata(&mut fetch, len, Some(7))
-            .await
-            .unwrap();
-        assert_eq!(actual.file_metadata().schema(), expected);
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-
-        // Metadata hint too small
-        fetch_count.store(0, Ordering::SeqCst);
-        let actual = fetch_parquet_metadata(&mut fetch, len, Some(10))
-            .await
-            .unwrap();
-        assert_eq!(actual.file_metadata().schema(), expected);
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-
-        // Metadata hint too large
-        fetch_count.store(0, Ordering::SeqCst);
-        let actual = fetch_parquet_metadata(&mut fetch, len, Some(500))
-            .await
-            .unwrap();
-        assert_eq!(actual.file_metadata().schema(), expected);
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-
-        // Metadata hint exactly correct
-        fetch_count.store(0, Ordering::SeqCst);
-        let actual = fetch_parquet_metadata(&mut fetch, len, Some(428))
-            .await
-            .unwrap();
-        assert_eq!(actual.file_metadata().schema(), expected);
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-
-        let err = fetch_parquet_metadata(&mut fetch, 4, None)
-            .await
-            .unwrap_err()
-            .to_string();
-        assert_eq!(err, "EOF: file size of 4 is less than footer");
-
-        let err = fetch_parquet_metadata(&mut fetch, 20, None)
-            .await
-            .unwrap_err()
-            .to_string();
-        assert_eq!(err, "Parquet error: Invalid Parquet file. Corrupt footer");
-    }
-
-    #[tokio::test]
-    async fn test_page_index() {
-        let mut file = get_test_file("alltypes_tiny_pages.parquet");
-        let len = file.len() as usize;
-        let fetch_count = AtomicUsize::new(0);
-        let mut fetch = |range| {
-            fetch_count.fetch_add(1, Ordering::SeqCst);
-            futures::future::ready(read_range(&mut file, range))
-        };
-
-        let f = MetadataFetchFn(&mut fetch);
-        let mut loader = MetadataLoader::load(f, len, None).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-        loader.load_page_index(true, true).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 3);
-        let metadata = loader.finish();
-        assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
-
-        // Prefetch just footer exactly
-        fetch_count.store(0, Ordering::SeqCst);
-        let f = MetadataFetchFn(&mut fetch);
-        let mut loader = MetadataLoader::load(f, len, Some(1729)).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-        loader.load_page_index(true, true).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-        let metadata = loader.finish();
-        assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
-
-        // Prefetch more than footer but not enough
-        fetch_count.store(0, Ordering::SeqCst);
-        let f = MetadataFetchFn(&mut fetch);
-        let mut loader = MetadataLoader::load(f, len, Some(130649)).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-        loader.load_page_index(true, true).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
-        let metadata = loader.finish();
-        assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
-
-        // Prefetch exactly enough
-        fetch_count.store(0, Ordering::SeqCst);
-        let f = MetadataFetchFn(&mut fetch);
-        let mut loader = MetadataLoader::load(f, len, Some(130650)).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-        loader.load_page_index(true, true).await.unwrap();
-        assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
-        let metadata = loader.finish();
-        assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
-    }
+    /// [`FutureExt::boxed`]: futures::FutureExt::boxed
+    fn fetch_suffix(&mut self, suffix: usize) -> BoxFuture<'_, Result<Bytes>>;
 }
diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 611d6999e07e..60f2ca1615a3 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -21,7 +21,6 @@
 //!
 //! See example on [`ParquetRecordBatchStreamBuilder::new`]
 
-use std::collections::VecDeque;
 use std::fmt::Formatter;
 use std::io::SeekFrom;
 use std::ops::Range;
@@ -29,31 +28,24 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use bytes::{Buf, Bytes};
+use bytes::Bytes;
 use futures::future::{BoxFuture, FutureExt};
-use futures::ready;
 use futures::stream::Stream;
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
 
 use arrow_array::RecordBatch;
-use arrow_schema::{DataType, Fields, Schema, SchemaRef};
+use arrow_schema::{Schema, SchemaRef};
 
-use crate::arrow::array_reader::{ArrayReaderBuilder, RowGroups};
 use crate::arrow::arrow_reader::{
     ArrowReaderBuilder, ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReader,
-    RowFilter, RowSelection,
 };
-use crate::arrow::ProjectionMask;
 
+use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash};
 use crate::bloom_filter::{
-    chunk_read_bloom_filter_header_and_offset, Sbbf, SBBF_HEADER_SIZE_ESTIMATE,
+    SBBF_HEADER_SIZE_ESTIMATE, Sbbf, chunk_read_bloom_filter_header_and_offset,
 };
-use crate::column::page::{PageIterator, PageReader};
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
-use crate::file::page_index::offset_index::OffsetIndexMetaData;
-use crate::file::reader::{ChunkReader, Length, SerializedPageReader};
-use crate::format::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash};
+use crate::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
 
 mod metadata;
 pub use metadata::*;
@@ -61,8 +53,8 @@ pub use metadata::*;
 #[cfg(feature = "object_store")]
 mod store;
 
-use crate::arrow::arrow_reader::ReadPlanBuilder;
-use crate::arrow::schema::ParquetField;
+use crate::DecodeResult;
+use crate::arrow::push_decoder::{NoInput, ParquetPushDecoder, ParquetPushDecoderBuilder};
 #[cfg(feature = "object_store")]
 pub use store::*;
 
@@ -172,12 +164,16 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
         options: Option<&'a ArrowReaderOptions>,
     ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
         async move {
+            let metadata_opts = options.map(|o| o.metadata_options().clone());
             let metadata_reader = ParquetMetaDataReader::new()
-                .with_page_indexes(options.is_some_and(|o| o.page_index));
+                .with_page_index_policy(PageIndexPolicy::from(
+                    options.is_some_and(|o| o.page_index()),
+                ))
+                .with_metadata_options(metadata_opts);
 
             #[cfg(feature = "encryption")]
             let metadata_reader = metadata_reader.with_decryption_properties(
-                options.and_then(|o| o.file_decryption_properties.as_ref()),
+                options.and_then(|o| o.file_decryption_properties.as_ref().map(Arc::clone)),
             );
 
             let parquet_metadata = metadata_reader.load_via_suffix_and_finish(self).await?;
@@ -201,10 +197,10 @@ impl ArrowReaderMetadata {
 }
 
 #[doc(hidden)]
-/// A newtype used within [`ReaderOptionsBuilder`] to distinguish sync readers from async
+/// Newtype (wrapper) used within [`ArrowReaderBuilder`] to distinguish sync readers from async
 ///
-/// Allows sharing the same builder for both the sync and async versions, whilst also not
-/// breaking the pre-existing ParquetRecordBatchStreamBuilder API
+/// Allows sharing the same builder for different readers while keeping the same
+/// ParquetRecordBatchStreamBuilder API
 pub struct AsyncReader<T>(T);
 
 /// A builder for reading parquet files from an `async` source as  [`ParquetRecordBatchStream`]
@@ -446,17 +442,17 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
             chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?;
 
         match header.algorithm {
-            BloomFilterAlgorithm::BLOCK(_) => {
+            BloomFilterAlgorithm::BLOCK => {
                 // this match exists to future proof the singleton algorithm enum
             }
         }
         match header.compression {
-            BloomFilterCompression::UNCOMPRESSED(_) => {
+            BloomFilterCompression::UNCOMPRESSED => {
                 // this match exists to future proof the singleton compression enum
             }
         }
         match header.hash {
-            BloomFilterHash::XXHASH(_) => {
+            BloomFilterHash::XXHASH => {
                 // this match exists to future proof the singleton hash enum
             }
         }
@@ -483,211 +479,115 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
     ///
     /// See examples on [`ParquetRecordBatchStreamBuilder::new`]
     pub fn build(self) -> Result<ParquetRecordBatchStream<T>> {
-        let num_row_groups = self.metadata.row_groups().len();
-
-        let row_groups = match self.row_groups {
-            Some(row_groups) => {
-                if let Some(col) = row_groups.iter().find(|x| **x >= num_row_groups) {
-                    return Err(general_err!(
-                        "row group {} out of bounds 0..{}",
-                        col,
-                        num_row_groups
-                    ));
-                }
-                row_groups.into()
-            }
-            None => (0..self.metadata.row_groups().len()).collect(),
-        };
-
-        // Try to avoid allocate large buffer
-        let batch_size = self
-            .batch_size
-            .min(self.metadata.file_metadata().num_rows() as usize);
-        let reader_factory = ReaderFactory {
-            input: self.input.0,
-            filter: self.filter,
-            metadata: self.metadata.clone(),
-            fields: self.fields,
-            limit: self.limit,
-            offset: self.offset,
-        };
+        let Self {
+            input,
+            metadata,
+            schema,
+            fields,
+            batch_size,
+            row_groups,
+            projection,
+            filter,
+            selection,
+            row_selection_policy: selection_strategy,
+            limit,
+            offset,
+            metrics,
+            max_predicate_cache_size,
+        } = self;
 
         // Ensure schema of ParquetRecordBatchStream respects projection, and does
         // not store metadata (same as for ParquetRecordBatchReader and emitted RecordBatches)
-        let projected_fields = match reader_factory.fields.as_deref().map(|pf| &pf.arrow_type) {
-            Some(DataType::Struct(fields)) => {
-                fields.filter_leaves(|idx, _| self.projection.leaf_included(idx))
-            }
-            None => Fields::empty(),
-            _ => unreachable!("Must be Struct for root type"),
-        };
-        let schema = Arc::new(Schema::new(projected_fields));
-
-        Ok(ParquetRecordBatchStream {
-            metadata: self.metadata,
+        let projection_len = projection.mask.as_ref().map_or(usize::MAX, |m| m.len());
+        let projected_fields = schema
+            .fields
+            .filter_leaves(|idx, _| idx < projection_len && projection.leaf_included(idx));
+        let projected_schema = Arc::new(Schema::new(projected_fields));
+
+        let decoder = ParquetPushDecoderBuilder {
+            input: NoInput,
+            metadata,
+            schema,
+            fields,
+            projection,
+            filter,
+            selection,
+            row_selection_policy: selection_strategy,
             batch_size,
             row_groups,
-            projection: self.projection,
-            selection: self.selection,
-            schema,
-            reader_factory: Some(reader_factory),
-            state: StreamState::Init,
+            limit,
+            offset,
+            metrics,
+            max_predicate_cache_size,
+        }
+        .build()?;
+
+        let request_state = RequestState::None { input: input.0 };
+
+        Ok(ParquetRecordBatchStream {
+            schema: projected_schema,
+            decoder,
+            request_state,
         })
     }
 }
 
-/// Returns a [`ReaderFactory`] and an optional [`ParquetRecordBatchReader`] for the next row group
+/// State machine that tracks outstanding requests to fetch data
 ///
-/// Note: If all rows are filtered out in the row group (e.g by filters, limit or
-/// offset), returns `None` for the reader.
-type ReadResult<T> = Result<(ReaderFactory<T>, Option<ParquetRecordBatchReader>)>;
-
-/// [`ReaderFactory`] is used by [`ParquetRecordBatchStream`] to create
-/// [`ParquetRecordBatchReader`]
-struct ReaderFactory<T> {
-    metadata: Arc<ParquetMetaData>,
-
-    /// Top level parquet schema
-    fields: Option<Arc<ParquetField>>,
-
-    input: T,
-
-    /// Optional filter
-    filter: Option<RowFilter>,
-
-    /// Limit to apply to remaining row groups.  
-    limit: Option<usize>,
-
-    /// Offset to apply to the next
-    offset: Option<usize>,
+/// The parameter `T` is the input, typically an `AsyncFileReader`
+enum RequestState<T> {
+    /// No outstanding requests
+    None {
+        input: T,
+    },
+    /// There is an outstanding request for data
+    Outstanding {
+        /// Ranges that have been requested
+        ranges: Vec<Range<u64>>,
+        /// Future that will resolve (input, requested_ranges)
+        ///
+        /// Note the future owns the reader while the request is outstanding
+        /// and returns it upon completion
+        future: BoxFuture<'static, Result<(T, Vec<Bytes>)>>,
+    },
+    Done,
 }
 
-impl<T> ReaderFactory<T>
+impl<T> RequestState<T>
 where
-    T: AsyncFileReader + Send,
+    T: AsyncFileReader + Unpin + Send + 'static,
 {
-    /// Reads the next row group with the provided `selection`, `projection` and `batch_size`
-    ///
-    /// Updates the `limit` and `offset` of the reader factory
-    ///
-    /// Note: this captures self so that the resulting future has a static lifetime
-    async fn read_row_group(
-        mut self,
-        row_group_idx: usize,
-        selection: Option<RowSelection>,
-        projection: ProjectionMask,
-        batch_size: usize,
-    ) -> ReadResult<T> {
-        // TODO: calling build_array multiple times is wasteful
-
-        let meta = self.metadata.row_group(row_group_idx);
-        let offset_index = self
-            .metadata
-            .offset_index()
-            // filter out empty offset indexes (old versions specified Some(vec![]) when no present)
-            .filter(|index| !index.is_empty())
-            .map(|x| x[row_group_idx].as_slice());
-
-        let mut row_group = InMemoryRowGroup {
-            // schema: meta.schema_descr_ptr(),
-            row_count: meta.num_rows() as usize,
-            column_chunks: vec![None; meta.columns().len()],
-            offset_index,
-            row_group_idx,
-            metadata: self.metadata.as_ref(),
-        };
-
-        let filter = self.filter.as_mut();
-        let mut plan_builder = ReadPlanBuilder::new(batch_size).with_selection(selection);
-
-        // Update selection based on any filters
-        if let Some(filter) = filter {
-            for predicate in filter.predicates.iter_mut() {
-                if !plan_builder.selects_any() {
-                    return Ok((self, None)); // ruled out entire row group
-                }
-
-                // (pre) Fetch only the columns that are selected by the predicate
-                let selection = plan_builder.selection();
-                row_group
-                    .fetch(&mut self.input, predicate.projection(), selection)
-                    .await?;
-
-                let array_reader = ArrayReaderBuilder::new(&row_group)
-                    .build_array_reader(self.fields.as_deref(), predicate.projection())?;
-
-                plan_builder = plan_builder.with_predicate(array_reader, predicate.as_mut())?;
-            }
-        }
-
-        // Compute the number of rows in the selection before applying limit and offset
-        let rows_before = plan_builder
-            .num_rows_selected()
-            .unwrap_or(row_group.row_count);
-
-        if rows_before == 0 {
-            return Ok((self, None)); // ruled out entire row group
-        }
-
-        // Apply any limit and offset
-        let plan_builder = plan_builder
-            .limited(row_group.row_count)
-            .with_offset(self.offset)
-            .with_limit(self.limit)
-            .build_limited();
-
-        let rows_after = plan_builder
-            .num_rows_selected()
-            .unwrap_or(row_group.row_count);
-
-        // Update running offset and limit for after the current row group is read
-        if let Some(offset) = &mut self.offset {
-            // Reduction is either because of offset or limit, as limit is applied
-            // after offset has been "exhausted" can just use saturating sub here
-            *offset = offset.saturating_sub(rows_before - rows_after)
-        }
-
-        if rows_after == 0 {
-            return Ok((self, None)); // ruled out entire row group
-        }
-
-        if let Some(limit) = &mut self.limit {
-            *limit -= rows_after;
+    /// Issue a request to fetch `ranges`, returning the Outstanding state
+    fn begin_request(mut input: T, ranges: Vec<Range<u64>>) -> Self {
+        let ranges_captured = ranges.clone();
+
+        // Note this must move the input *into* the future
+        // because the get_byte_ranges future has a lifetime
+        // (aka can have references internally) and thus must
+        // own the input while the request is outstanding.
+        let future = async move {
+            let data = input.get_byte_ranges(ranges_captured).await?;
+            Ok((input, data))
         }
-        // fetch the pages needed for decoding
-        row_group
-            .fetch(&mut self.input, &projection, plan_builder.selection())
-            .await?;
-
-        let plan = plan_builder.build();
-
-        let array_reader = ArrayReaderBuilder::new(&row_group)
-            .build_array_reader(self.fields.as_deref(), &projection)?;
-
-        let reader = ParquetRecordBatchReader::new(array_reader, plan);
-
-        Ok((self, Some(reader)))
+        .boxed();
+        RequestState::Outstanding { ranges, future }
     }
 }
 
-enum StreamState<T> {
-    /// At the start of a new row group, or the end of the parquet stream
-    Init,
-    /// Decoding a batch
-    Decoding(ParquetRecordBatchReader),
-    /// Reading data from input
-    Reading(BoxFuture<'static, ReadResult<T>>),
-    /// Error
-    Error,
-}
-
-impl<T> std::fmt::Debug for StreamState<T> {
+impl<T> std::fmt::Debug for RequestState<T> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
-            StreamState::Init => write!(f, "StreamState::Init"),
-            StreamState::Decoding(_) => write!(f, "StreamState::Decoding"),
-            StreamState::Reading(_) => write!(f, "StreamState::Reading"),
-            StreamState::Error => write!(f, "StreamState::Error"),
+            RequestState::None { input: _ } => f
+                .debug_struct("RequestState::None")
+                .field("input", &"...")
+                .finish(),
+            RequestState::Outstanding { ranges, .. } => f
+                .debug_struct("RequestState::Outstanding")
+                .field("ranges", &ranges)
+                .finish(),
+            RequestState::Done => {
+                write!(f, "RequestState::Done")
+            }
         }
     }
 }
@@ -707,35 +607,23 @@ impl<T> std::fmt::Debug for StreamState<T> {
 /// required, which is especially important for object stores, where IO operations
 /// have latencies in the hundreds of milliseconds
 ///
+/// See [`ParquetPushDecoderBuilder`] for an API with lower level control over
+/// buffering.
 ///
 /// [`Stream`]: https://docs.rs/futures/latest/futures/stream/trait.Stream.html
 pub struct ParquetRecordBatchStream<T> {
-    metadata: Arc<ParquetMetaData>,
-
+    /// Output schema of the stream
     schema: SchemaRef,
-
-    row_groups: VecDeque<usize>,
-
-    projection: ProjectionMask,
-
-    batch_size: usize,
-
-    selection: Option<RowSelection>,
-
-    /// This is an option so it can be moved into a future
-    reader_factory: Option<ReaderFactory<T>>,
-
-    state: StreamState<T>,
+    /// Input and Outstanding IO request, if any
+    request_state: RequestState<T>,
+    /// Decoding state machine (no IO)
+    decoder: ParquetPushDecoder,
 }
 
 impl<T> std::fmt::Debug for ParquetRecordBatchStream<T> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("ParquetRecordBatchStream")
-            .field("metadata", &self.metadata)
-            .field("schema", &self.schema)
-            .field("batch_size", &self.batch_size)
-            .field("projection", &self.projection)
-            .field("state", &self.state)
+            .field("request_state", &self.request_state)
             .finish()
     }
 }
@@ -769,45 +657,35 @@ where
     /// - `Ok(Some(reader))` which holds all the data for the row group.
     pub async fn next_row_group(&mut self) -> Result<Option<ParquetRecordBatchReader>> {
         loop {
-            match &mut self.state {
-                StreamState::Decoding(_) | StreamState::Reading(_) => {
-                    return Err(ParquetError::General(
-                        "Cannot combine the use of next_row_group with the Stream API".to_string(),
-                    ))
-                }
-                StreamState::Init => {
-                    let row_group_idx = match self.row_groups.pop_front() {
-                        Some(idx) => idx,
-                        None => return Ok(None),
-                    };
-
-                    let row_count = self.metadata.row_group(row_group_idx).num_rows() as usize;
-
-                    let selection = self.selection.as_mut().map(|s| s.split_off(row_count));
-
-                    let reader_factory = self.reader_factory.take().expect("lost reader factory");
-
-                    let (reader_factory, maybe_reader) = reader_factory
-                        .read_row_group(
-                            row_group_idx,
-                            selection,
-                            self.projection.clone(),
-                            self.batch_size,
-                        )
-                        .await
-                        .inspect_err(|_| {
-                            self.state = StreamState::Error;
-                        })?;
-                    self.reader_factory = Some(reader_factory);
-
-                    if let Some(reader) = maybe_reader {
-                        return Ok(Some(reader));
-                    } else {
-                        // All rows skipped, read next row group
-                        continue;
+            // Take ownership of request state to process, leaving self in a
+            // valid state
+            let request_state = std::mem::replace(&mut self.request_state, RequestState::Done);
+            match request_state {
+                // No outstanding requests, proceed to setup next row group
+                RequestState::None { input } => {
+                    match self.decoder.try_next_reader()? {
+                        DecodeResult::NeedsData(ranges) => {
+                            self.request_state = RequestState::begin_request(input, ranges);
+                            continue; // poll again (as the input might be ready immediately)
+                        }
+                        DecodeResult::Data(reader) => {
+                            self.request_state = RequestState::None { input };
+                            return Ok(Some(reader));
+                        }
+                        DecodeResult::Finished => return Ok(None),
                     }
                 }
-                StreamState::Error => return Ok(None), // Ends the stream as error happens.
+                RequestState::Outstanding { ranges, future } => {
+                    let (input, data) = future.await?;
+                    // Push the requested data to the decoder and try again
+                    self.decoder.push_ranges(ranges, data)?;
+                    self.request_state = RequestState::None { input };
+                    continue; // try and decode on next iteration
+                }
+                RequestState::Done => {
+                    self.request_state = RequestState::Done;
+                    return Ok(None);
+                }
             }
         }
     }
@@ -818,314 +696,101 @@ where
     T: AsyncFileReader + Unpin + Send + 'static,
 {
     type Item = Result<RecordBatch>;
-
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        loop {
-            match &mut self.state {
-                StreamState::Decoding(batch_reader) => match batch_reader.next() {
-                    Some(Ok(batch)) => {
-                        return Poll::Ready(Some(Ok(batch)));
-                    }
-                    Some(Err(e)) => {
-                        self.state = StreamState::Error;
-                        return Poll::Ready(Some(Err(ParquetError::ArrowError(e.to_string()))));
-                    }
-                    None => self.state = StreamState::Init,
-                },
-                StreamState::Init => {
-                    let row_group_idx = match self.row_groups.pop_front() {
-                        Some(idx) => idx,
-                        None => return Poll::Ready(None),
-                    };
-
-                    let reader = self.reader_factory.take().expect("lost reader factory");
-
-                    let row_count = self.metadata.row_group(row_group_idx).num_rows() as usize;
-
-                    let selection = self.selection.as_mut().map(|s| s.split_off(row_count));
-
-                    let fut = reader
-                        .read_row_group(
-                            row_group_idx,
-                            selection,
-                            self.projection.clone(),
-                            self.batch_size,
-                        )
-                        .boxed();
-
-                    self.state = StreamState::Reading(fut)
-                }
-                StreamState::Reading(f) => match ready!(f.poll_unpin(cx)) {
-                    Ok((reader_factory, maybe_reader)) => {
-                        self.reader_factory = Some(reader_factory);
-                        match maybe_reader {
-                            // Read records from [`ParquetRecordBatchReader`]
-                            Some(reader) => self.state = StreamState::Decoding(reader),
-                            // All rows skipped, read next row group
-                            None => self.state = StreamState::Init,
-                        }
-                    }
-                    Err(e) => {
-                        self.state = StreamState::Error;
-                        return Poll::Ready(Some(Err(e)));
-                    }
-                },
-                StreamState::Error => return Poll::Ready(None), // Ends the stream as error happens.
+        match self.poll_next_inner(cx) {
+            Ok(res) => {
+                // Successfully decoded a batch, or reached end of stream.
+                // convert Option<RecordBatch> to Option<Result<RecordBatch>>
+                res.map(|res| Ok(res).transpose())
+            }
+            Err(e) => {
+                self.request_state = RequestState::Done;
+                Poll::Ready(Some(Err(e)))
             }
         }
     }
 }
 
-/// An in-memory collection of column chunks
-struct InMemoryRowGroup<'a> {
-    offset_index: Option<&'a [OffsetIndexMetaData]>,
-    /// Column chunks for this row group
-    column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
-    row_count: usize,
-    row_group_idx: usize,
-    metadata: &'a ParquetMetaData,
-}
-
-impl InMemoryRowGroup<'_> {
-    /// Fetches any additional column data specified in `projection` that is not already
-    /// present in `self.column_chunks`.
+impl<T> ParquetRecordBatchStream<T>
+where
+    T: AsyncFileReader + Unpin + Send + 'static,
+{
+    /// Inner state machine
     ///
-    /// If `selection` is provided, only the pages required for the selection
-    /// are fetched. Otherwise, all pages are fetched.
-    async fn fetch<T: AsyncFileReader + Send>(
-        &mut self,
-        input: &mut T,
-        projection: &ProjectionMask,
-        selection: Option<&RowSelection>,
-    ) -> Result<()> {
-        let metadata = self.metadata.row_group(self.row_group_idx);
-        if let Some((selection, offset_index)) = selection.zip(self.offset_index) {
-            // If we have a `RowSelection` and an `OffsetIndex` then only fetch pages required for the
-            // `RowSelection`
-            let mut page_start_offsets: Vec<Vec<u64>> = vec![];
-
-            let fetch_ranges = self
-                .column_chunks
-                .iter()
-                .zip(metadata.columns())
-                .enumerate()
-                .filter(|&(idx, (chunk, _chunk_meta))| {
-                    chunk.is_none() && projection.leaf_included(idx)
-                })
-                .flat_map(|(idx, (_chunk, chunk_meta))| {
-                    // If the first page does not start at the beginning of the column,
-                    // then we need to also fetch a dictionary page.
-                    let mut ranges: Vec<Range<u64>> = vec![];
-                    let (start, _len) = chunk_meta.byte_range();
-                    match offset_index[idx].page_locations.first() {
-                        Some(first) if first.offset as u64 != start => {
-                            ranges.push(start..first.offset as u64);
+    /// Note this is separate from poll_next so we can use ? operator to check for errors
+    /// as it returns `Result<Poll<Option<RecordBatch>>>`
+    fn poll_next_inner(&mut self, cx: &mut Context<'_>) -> Result<Poll<Option<RecordBatch>>> {
+        loop {
+            let request_state = std::mem::replace(&mut self.request_state, RequestState::Done);
+            match request_state {
+                RequestState::None { input } => {
+                    // No outstanding requests, proceed to decode the next batch
+                    match self.decoder.try_decode()? {
+                        DecodeResult::NeedsData(ranges) => {
+                            self.request_state = RequestState::begin_request(input, ranges);
+                            continue; // poll again (as the input might be ready immediately)
+                        }
+                        DecodeResult::Data(batch) => {
+                            self.request_state = RequestState::None { input };
+                            return Ok(Poll::Ready(Some(batch)));
+                        }
+                        DecodeResult::Finished => {
+                            self.request_state = RequestState::Done;
+                            return Ok(Poll::Ready(None));
                         }
-                        _ => (),
                     }
-
-                    ranges.extend(selection.scan_ranges(&offset_index[idx].page_locations));
-                    page_start_offsets.push(ranges.iter().map(|range| range.start).collect());
-
-                    ranges
-                })
-                .collect();
-
-            let mut chunk_data = input.get_byte_ranges(fetch_ranges).await?.into_iter();
-            let mut page_start_offsets = page_start_offsets.into_iter();
-
-            for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
-                if chunk.is_some() || !projection.leaf_included(idx) {
-                    continue;
                 }
-
-                if let Some(offsets) = page_start_offsets.next() {
-                    let mut chunks = Vec::with_capacity(offsets.len());
-                    for _ in 0..offsets.len() {
-                        chunks.push(chunk_data.next().unwrap());
+                RequestState::Outstanding { ranges, mut future } => match future.poll_unpin(cx) {
+                    // Data was ready, push it to the decoder and continue
+                    Poll::Ready(result) => {
+                        let (input, data) = result?;
+                        // Push the requested data to the decoder
+                        self.decoder.push_ranges(ranges, data)?;
+                        self.request_state = RequestState::None { input };
+                        continue; // next iteration will try to decode the next batch
                     }
-
-                    *chunk = Some(Arc::new(ColumnChunkData::Sparse {
-                        length: metadata.column(idx).byte_range().1 as usize,
-                        data: offsets
-                            .into_iter()
-                            .map(|x| x as usize)
-                            .zip(chunks.into_iter())
-                            .collect(),
-                    }))
-                }
-            }
-        } else {
-            let fetch_ranges = self
-                .column_chunks
-                .iter()
-                .enumerate()
-                .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx))
-                .map(|(idx, _chunk)| {
-                    let column = metadata.column(idx);
-                    let (start, length) = column.byte_range();
-                    start..(start + length)
-                })
-                .collect();
-
-            let mut chunk_data = input.get_byte_ranges(fetch_ranges).await?.into_iter();
-
-            for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
-                if chunk.is_some() || !projection.leaf_included(idx) {
-                    continue;
-                }
-
-                if let Some(data) = chunk_data.next() {
-                    *chunk = Some(Arc::new(ColumnChunkData::Dense {
-                        offset: metadata.column(idx).byte_range().0 as usize,
-                        data,
-                    }));
+                    Poll::Pending => {
+                        self.request_state = RequestState::Outstanding { ranges, future };
+                        return Ok(Poll::Pending);
+                    }
+                },
+                RequestState::Done => {
+                    // Stream is done (error or end), return None
+                    self.request_state = RequestState::Done;
+                    return Ok(Poll::Ready(None));
                 }
             }
         }
-
-        Ok(())
-    }
-}
-
-impl RowGroups for InMemoryRowGroup<'_> {
-    fn num_rows(&self) -> usize {
-        self.row_count
-    }
-
-    /// Return chunks for column i
-    fn column_chunks(&self, i: usize) -> Result<Box<dyn PageIterator>> {
-        match &self.column_chunks[i] {
-            None => Err(ParquetError::General(format!(
-                "Invalid column index {i}, column was not fetched"
-            ))),
-            Some(data) => {
-                let page_locations = self
-                    .offset_index
-                    // filter out empty offset indexes (old versions specified Some(vec![]) when no present)
-                    .filter(|index| !index.is_empty())
-                    .map(|index| index[i].page_locations.clone());
-                let column_chunk_metadata = self.metadata.row_group(self.row_group_idx).column(i);
-                let page_reader = SerializedPageReader::new(
-                    data.clone(),
-                    column_chunk_metadata,
-                    self.row_count,
-                    page_locations,
-                )?;
-                let page_reader = page_reader.add_crypto_context(
-                    self.row_group_idx,
-                    i,
-                    self.metadata,
-                    column_chunk_metadata,
-                )?;
-
-                let page_reader: Box<dyn PageReader> = Box::new(page_reader);
-
-                Ok(Box::new(ColumnChunkIterator {
-                    reader: Some(Ok(page_reader)),
-                }))
-            }
-        }
-    }
-}
-
-/// An in-memory column chunk
-#[derive(Clone)]
-enum ColumnChunkData {
-    /// Column chunk data representing only a subset of data pages
-    Sparse {
-        /// Length of the full column chunk
-        length: usize,
-        /// Subset of data pages included in this sparse chunk.
-        ///
-        /// Each element is a tuple of (page offset within file, page data).
-        /// Each entry is a complete page and the list is ordered by offset.
-        data: Vec<(usize, Bytes)>,
-    },
-    /// Full column chunk and the offset within the original file
-    Dense { offset: usize, data: Bytes },
-}
-
-impl ColumnChunkData {
-    /// Return the data for this column chunk at the given offset
-    fn get(&self, start: u64) -> Result<Bytes> {
-        match &self {
-            ColumnChunkData::Sparse { data, .. } => data
-                .binary_search_by_key(&start, |(offset, _)| *offset as u64)
-                .map(|idx| data[idx].1.clone())
-                .map_err(|_| {
-                    ParquetError::General(format!(
-                        "Invalid offset in sparse column chunk data: {start}"
-                    ))
-                }),
-            ColumnChunkData::Dense { offset, data } => {
-                let start = start as usize - *offset;
-                Ok(data.slice(start..))
-            }
-        }
-    }
-}
-
-impl Length for ColumnChunkData {
-    /// Return the total length of the full column chunk
-    fn len(&self) -> u64 {
-        match &self {
-            ColumnChunkData::Sparse { length, .. } => *length as u64,
-            ColumnChunkData::Dense { data, .. } => data.len() as u64,
-        }
-    }
-}
-
-impl ChunkReader for ColumnChunkData {
-    type T = bytes::buf::Reader<Bytes>;
-
-    fn get_read(&self, start: u64) -> Result<Self::T> {
-        Ok(self.get(start)?.reader())
-    }
-
-    fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes> {
-        Ok(self.get(start)?.slice(..length))
-    }
-}
-
-/// Implements [`PageIterator`] for a single column chunk, yielding a single [`PageReader`]
-struct ColumnChunkIterator {
-    reader: Option<Result<Box<dyn PageReader>>>,
-}
-
-impl Iterator for ColumnChunkIterator {
-    type Item = Result<Box<dyn PageReader>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.reader.take()
     }
 }
 
-impl PageIterator for ColumnChunkIterator {}
-
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::arrow::arrow_reader::RowSelectionPolicy;
+    use crate::arrow::arrow_reader::tests::test_row_numbers_with_multiple_row_groups_helper;
     use crate::arrow::arrow_reader::{
-        ArrowPredicateFn, ParquetRecordBatchReaderBuilder, RowSelector,
+        ArrowPredicateFn, ParquetRecordBatchReaderBuilder, RowFilter, RowSelection, RowSelector,
     };
     use crate::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
-    use crate::arrow::schema::parquet_to_arrow_schema_and_fields;
-    use crate::arrow::ArrowWriter;
+    use crate::arrow::schema::virtual_type::RowNumber;
+    use crate::arrow::{ArrowWriter, AsyncArrowWriter, ProjectionMask};
     use crate::file::metadata::ParquetMetaDataReader;
     use crate::file::properties::WriterProperties;
     use arrow::compute::kernels::cmp::eq;
+    use arrow::compute::or;
     use arrow::error::Result as ArrowResult;
-    use arrow_array::builder::{ListBuilder, StringBuilder};
+    use arrow_array::builder::{Float32Builder, ListBuilder, StringBuilder};
     use arrow_array::cast::AsArray;
     use arrow_array::types::Int32Type;
     use arrow_array::{
-        Array, ArrayRef, Int32Array, Int8Array, RecordBatchReader, Scalar, StringArray,
-        StructArray, UInt64Array,
+        Array, ArrayRef, BooleanArray, Int8Array, Int32Array, Int64Array, RecordBatchReader,
+        Scalar, StringArray, StructArray, UInt64Array,
     };
     use arrow_schema::{DataType, Field, Schema};
+    use arrow_select::concat::concat_batches;
     use futures::{StreamExt, TryStreamExt};
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
     use std::collections::HashMap;
     use std::sync::{Arc, Mutex};
     use tempfile::tempfile;
@@ -1164,8 +829,9 @@ mod tests {
             &'a mut self,
             options: Option<&'a ArrowReaderOptions>,
         ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
-            let metadata_reader = ParquetMetaDataReader::new()
-                .with_page_indexes(options.is_some_and(|o| o.page_index));
+            let metadata_reader = ParquetMetaDataReader::new().with_page_index_policy(
+                PageIndexPolicy::from(options.is_some_and(|o| o.page_index())),
+            );
             self.metadata = Some(Arc::new(
                 metadata_reader.parse_and_finish(&self.data).unwrap(),
             ));
@@ -1548,6 +1214,82 @@ mod tests {
         assert_eq!(actual_rows, expected_rows);
     }
 
+    #[tokio::test]
+    async fn test_row_filter_full_page_skip_is_handled_async() {
+        let first_value: i64 = 1111;
+        let last_value: i64 = 9999;
+        let num_rows: usize = 12;
+
+        // build data with row selection average length 4
+        // The result would be (1111 XXXX) ... (4 page in the middle)... (XXXX 9999)
+        // The Row Selection would be [1111, (skip 10), 9999]
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("key", DataType::Int64, false),
+            Field::new("value", DataType::Int64, false),
+        ]));
+
+        let mut int_values: Vec<i64> = (0..num_rows as i64).collect();
+        int_values[0] = first_value;
+        int_values[num_rows - 1] = last_value;
+        let keys = Int64Array::from(int_values.clone());
+        let values = Int64Array::from(int_values.clone());
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(keys) as ArrayRef, Arc::new(values) as ArrayRef],
+        )
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_write_batch_size(2)
+            .set_data_page_row_count_limit(2)
+            .build();
+
+        let mut buffer = Vec::new();
+        let mut writer = ArrowWriter::try_new(&mut buffer, schema, Some(props)).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+        let data = Bytes::from(buffer);
+
+        let builder = ParquetRecordBatchStreamBuilder::new_with_options(
+            TestReader::new(data.clone()),
+            ArrowReaderOptions::new().with_page_index(true),
+        )
+        .await
+        .unwrap();
+        let schema = builder.parquet_schema().clone();
+        let filter_mask = ProjectionMask::leaves(&schema, [0]);
+
+        let make_predicate = |mask: ProjectionMask| {
+            ArrowPredicateFn::new(mask, move |batch: RecordBatch| {
+                let column = batch.column(0);
+                let match_first = eq(column, &Int64Array::new_scalar(first_value))?;
+                let match_second = eq(column, &Int64Array::new_scalar(last_value))?;
+                or(&match_first, &match_second)
+            })
+        };
+
+        let predicate = make_predicate(filter_mask.clone());
+
+        // The batch size is set to 12 to read all rows in one go after filtering
+        // If the Reader chooses mask to handle filter, it might cause panic because the mid 4 pages may not be decoded.
+        let stream = ParquetRecordBatchStreamBuilder::new_with_options(
+            TestReader::new(data.clone()),
+            ArrowReaderOptions::new().with_page_index(true),
+        )
+        .await
+        .unwrap()
+        .with_row_filter(RowFilter::new(vec![Box::new(predicate)]))
+        .with_batch_size(12)
+        .with_row_selection_policy(RowSelectionPolicy::Auto { threshold: 32 })
+        .build()
+        .unwrap();
+
+        let schema = stream.schema().clone();
+        let batches: Vec<_> = stream.try_collect().await.unwrap();
+        let result = concat_batches(&schema, &batches).unwrap();
+        assert_eq!(result.num_rows(), 2);
+    }
+
     #[tokio::test]
     async fn test_row_filter() {
         let a = StringArray::from_iter_values(["a", "b", "b", "b", "c", "c"]);
@@ -1832,95 +1574,6 @@ mod tests {
         assert_eq!(total_rows, 730);
     }
 
-    #[tokio::test]
-    async fn test_in_memory_row_group_sparse() {
-        let testdata = arrow::util::test_util::parquet_test_data();
-        let path = format!("{testdata}/alltypes_tiny_pages.parquet");
-        let data = Bytes::from(std::fs::read(path).unwrap());
-
-        let metadata = ParquetMetaDataReader::new()
-            .with_page_indexes(true)
-            .parse_and_finish(&data)
-            .unwrap();
-
-        let offset_index = metadata.offset_index().expect("reading offset index")[0].clone();
-
-        let mut metadata_builder = metadata.into_builder();
-        let mut row_groups = metadata_builder.take_row_groups();
-        row_groups.truncate(1);
-        let row_group_meta = row_groups.pop().unwrap();
-
-        let metadata = metadata_builder
-            .add_row_group(row_group_meta)
-            .set_column_index(None)
-            .set_offset_index(Some(vec![offset_index.clone()]))
-            .build();
-
-        let metadata = Arc::new(metadata);
-
-        let num_rows = metadata.row_group(0).num_rows();
-
-        assert_eq!(metadata.num_row_groups(), 1);
-
-        let async_reader = TestReader::new(data.clone());
-
-        let requests = async_reader.requests.clone();
-        let (_, fields) = parquet_to_arrow_schema_and_fields(
-            metadata.file_metadata().schema_descr(),
-            ProjectionMask::all(),
-            None,
-        )
-        .unwrap();
-
-        let _schema_desc = metadata.file_metadata().schema_descr();
-
-        let projection = ProjectionMask::leaves(metadata.file_metadata().schema_descr(), vec![0]);
-
-        let reader_factory = ReaderFactory {
-            metadata,
-            fields: fields.map(Arc::new),
-            input: async_reader,
-            filter: None,
-            limit: None,
-            offset: None,
-        };
-
-        let mut skip = true;
-        let mut pages = offset_index[0].page_locations.iter().peekable();
-
-        // Setup `RowSelection` so that we can skip every other page, selecting the last page
-        let mut selectors = vec![];
-        let mut expected_page_requests: Vec<Range<usize>> = vec![];
-        while let Some(page) = pages.next() {
-            let num_rows = if let Some(next_page) = pages.peek() {
-                next_page.first_row_index - page.first_row_index
-            } else {
-                num_rows - page.first_row_index
-            };
-
-            if skip {
-                selectors.push(RowSelector::skip(num_rows as usize));
-            } else {
-                selectors.push(RowSelector::select(num_rows as usize));
-                let start = page.offset as usize;
-                let end = start + page.compressed_page_size as usize;
-                expected_page_requests.push(start..end);
-            }
-            skip = !skip;
-        }
-
-        let selection = RowSelection::from(selectors);
-
-        let (_factory, _reader) = reader_factory
-            .read_row_group(0, Some(selection), projection.clone(), 48)
-            .await
-            .expect("reading row group");
-
-        let requests = requests.lock().unwrap();
-
-        assert_eq!(&requests[..], &expected_page_requests)
-    }
-
     #[tokio::test]
     async fn test_batch_size_overallocate() {
         let testdata = arrow::util::test_util::parquet_test_data();
@@ -1936,13 +1589,16 @@ mod tests {
 
         let file_rows = builder.metadata().file_metadata().num_rows() as usize;
 
-        let stream = builder
+        let builder = builder
             .with_projection(ProjectionMask::all())
-            .with_batch_size(1024)
-            .build()
-            .unwrap();
+            .with_batch_size(1024);
+
+        // even though the batch size is set to 1024, it should adjust to the max
+        // number of rows in the file (8)
         assert_ne!(1024, file_rows);
-        assert_eq!(stream.batch_size, file_rows);
+        assert_eq!(builder.batch_size, file_rows);
+
+        let _stream = builder.build().unwrap();
     }
 
     #[tokio::test]
@@ -2287,6 +1943,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(deprecated)]
     async fn empty_offset_index_doesnt_panic_in_read_row_group() {
         use tokio::fs::File;
         let testdata = arrow::util::test_util::parquet_test_data();
@@ -2312,6 +1969,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(deprecated)]
     async fn non_empty_offset_index_doesnt_panic_in_read_row_group() {
         use tokio::fs::File;
         let testdata = arrow::util::test_util::parquet_test_data();
@@ -2336,6 +1994,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(deprecated)]
     async fn empty_offset_index_doesnt_panic_in_column_chunks() {
         use tempfile::TempDir;
         use tokio::fs::File;
@@ -2386,4 +2045,267 @@ mod tests {
         let result = reader.try_collect::<Vec<_>>().await.unwrap();
         assert_eq!(result.len(), 1);
     }
+
+    #[tokio::test]
+    async fn test_cached_array_reader_sparse_offset_error() {
+        use futures::TryStreamExt;
+
+        use crate::arrow::arrow_reader::{ArrowPredicateFn, RowFilter, RowSelection, RowSelector};
+        use arrow_array::{BooleanArray, RecordBatch};
+
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
+        let data = Bytes::from(std::fs::read(path).unwrap());
+
+        let async_reader = TestReader::new(data);
+
+        // Enable page index so the fetch logic loads only required pages
+        let options = ArrowReaderOptions::new().with_page_index(true);
+        let builder = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+            .await
+            .unwrap();
+
+        // Skip the first 22 rows (entire first Parquet page) and then select the
+        // next 3 rows (22, 23, 24). This means the fetch step will not include
+        // the first page starting at file offset 0.
+        let selection = RowSelection::from(vec![RowSelector::skip(22), RowSelector::select(3)]);
+
+        // Trivial predicate on column 0 that always returns `true`. Using the
+        // same column in both predicate and projection activates the caching
+        // layer (Producer/Consumer pattern).
+        let parquet_schema = builder.parquet_schema();
+        let proj = ProjectionMask::leaves(parquet_schema, vec![0]);
+        let always_true = ArrowPredicateFn::new(proj.clone(), |batch: RecordBatch| {
+            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
+        });
+        let filter = RowFilter::new(vec![Box::new(always_true)]);
+
+        // Build the stream with batch size 8 so the cache reads whole batches
+        // that straddle the requested row range (rows 0-7, 8-15, 16-23, …).
+        let stream = builder
+            .with_batch_size(8)
+            .with_projection(proj)
+            .with_row_selection(selection)
+            .with_row_filter(filter)
+            .build()
+            .unwrap();
+
+        // Collecting the stream should fail with the sparse column chunk offset
+        // error we want to reproduce.
+        let _result: Vec<_> = stream.try_collect().await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_predicate_cache_disabled() {
+        let k = Int32Array::from_iter_values(0..10);
+        let data = RecordBatch::try_from_iter([("k", Arc::new(k) as ArrayRef)]).unwrap();
+
+        let mut buf = Vec::new();
+        // both the page row limit and batch size are set to 1 to create one page per row
+        let props = WriterProperties::builder()
+            .set_data_page_row_count_limit(1)
+            .set_write_batch_size(1)
+            .set_max_row_group_size(10)
+            .set_write_page_header_statistics(true)
+            .build();
+        let mut writer = ArrowWriter::try_new(&mut buf, data.schema(), Some(props)).unwrap();
+        writer.write(&data).unwrap();
+        writer.close().unwrap();
+
+        let data = Bytes::from(buf);
+        let metadata = ParquetMetaDataReader::new()
+            .with_page_index_policy(PageIndexPolicy::Required)
+            .parse_and_finish(&data)
+            .unwrap();
+        let parquet_schema = metadata.file_metadata().schema_descr_ptr();
+
+        // the filter is not clone-able, so we use a lambda to simplify
+        let build_filter = || {
+            let scalar = Int32Array::from_iter_values([5]);
+            let predicate = ArrowPredicateFn::new(
+                ProjectionMask::leaves(&parquet_schema, vec![0]),
+                move |batch| eq(batch.column(0), &Scalar::new(&scalar)),
+            );
+            RowFilter::new(vec![Box::new(predicate)])
+        };
+
+        // select only one of the pages
+        let selection = RowSelection::from(vec![RowSelector::skip(5), RowSelector::select(1)]);
+
+        let options = ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
+        let reader_metadata = ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
+
+        // using the predicate cache (default)
+        let reader_with_cache = TestReader::new(data.clone());
+        let requests_with_cache = reader_with_cache.requests.clone();
+        let stream = ParquetRecordBatchStreamBuilder::new_with_metadata(
+            reader_with_cache,
+            reader_metadata.clone(),
+        )
+        .with_batch_size(1000)
+        .with_row_selection(selection.clone())
+        .with_row_filter(build_filter())
+        .build()
+        .unwrap();
+        let batches_with_cache: Vec<_> = stream.try_collect().await.unwrap();
+
+        // disabling the predicate cache
+        let reader_without_cache = TestReader::new(data);
+        let requests_without_cache = reader_without_cache.requests.clone();
+        let stream = ParquetRecordBatchStreamBuilder::new_with_metadata(
+            reader_without_cache,
+            reader_metadata,
+        )
+        .with_batch_size(1000)
+        .with_row_selection(selection)
+        .with_row_filter(build_filter())
+        .with_max_predicate_cache_size(0) // disabling it by setting the limit to 0
+        .build()
+        .unwrap();
+        let batches_without_cache: Vec<_> = stream.try_collect().await.unwrap();
+
+        assert_eq!(batches_with_cache, batches_without_cache);
+
+        let requests_with_cache = requests_with_cache.lock().unwrap();
+        let requests_without_cache = requests_without_cache.lock().unwrap();
+
+        // less requests will be made without the predicate cache
+        assert_eq!(requests_with_cache.len(), 11);
+        assert_eq!(requests_without_cache.len(), 2);
+
+        // less bytes will be retrieved without the predicate cache
+        assert_eq!(
+            requests_with_cache.iter().map(|r| r.len()).sum::<usize>(),
+            433
+        );
+        assert_eq!(
+            requests_without_cache
+                .iter()
+                .map(|r| r.len())
+                .sum::<usize>(),
+            92
+        );
+    }
+
+    #[test]
+    fn test_row_numbers_with_multiple_row_groups() {
+        test_row_numbers_with_multiple_row_groups_helper(
+            false,
+            |path, selection, _row_filter, batch_size| {
+                let runtime = tokio::runtime::Builder::new_current_thread()
+                    .enable_all()
+                    .build()
+                    .expect("Could not create runtime");
+                runtime.block_on(async move {
+                    let file = tokio::fs::File::open(path).await.unwrap();
+                    let row_number_field = Arc::new(
+                        Field::new("row_number", DataType::Int64, false)
+                            .with_extension_type(RowNumber),
+                    );
+                    let options = ArrowReaderOptions::new()
+                        .with_virtual_columns(vec![row_number_field])
+                        .unwrap();
+                    let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options)
+                        .await
+                        .unwrap()
+                        .with_row_selection(selection)
+                        .with_batch_size(batch_size)
+                        .build()
+                        .expect("Could not create reader");
+                    reader.try_collect::<Vec<_>>().await.unwrap()
+                })
+            },
+        );
+    }
+
+    #[test]
+    fn test_row_numbers_with_multiple_row_groups_and_filter() {
+        test_row_numbers_with_multiple_row_groups_helper(
+            true,
+            |path, selection, row_filter, batch_size| {
+                let runtime = tokio::runtime::Builder::new_current_thread()
+                    .enable_all()
+                    .build()
+                    .expect("Could not create runtime");
+                runtime.block_on(async move {
+                    let file = tokio::fs::File::open(path).await.unwrap();
+                    let row_number_field = Arc::new(
+                        Field::new("row_number", DataType::Int64, false)
+                            .with_extension_type(RowNumber),
+                    );
+                    let options = ArrowReaderOptions::new()
+                        .with_virtual_columns(vec![row_number_field])
+                        .unwrap();
+                    let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options)
+                        .await
+                        .unwrap()
+                        .with_row_selection(selection)
+                        .with_row_filter(row_filter.expect("No row filter"))
+                        .with_batch_size(batch_size)
+                        .build()
+                        .expect("Could not create reader");
+                    reader.try_collect::<Vec<_>>().await.unwrap()
+                })
+            },
+        );
+    }
+
+    #[tokio::test]
+    async fn test_nested_lists() -> Result<()> {
+        // Test case for https://github.com/apache/arrow-rs/issues/8657
+        let list_inner_field = Arc::new(Field::new("item", DataType::Float32, true));
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("vector", DataType::List(list_inner_field.clone()), true),
+        ]));
+
+        let mut list_builder =
+            ListBuilder::new(Float32Builder::new()).with_field(list_inner_field.clone());
+        list_builder.values().append_slice(&[10.0, 10.0, 10.0]);
+        list_builder.append(true);
+        list_builder.values().append_slice(&[20.0, 20.0, 20.0]);
+        list_builder.append(true);
+        list_builder.values().append_slice(&[30.0, 30.0, 30.0]);
+        list_builder.append(true);
+        list_builder.values().append_slice(&[40.0, 40.0, 40.0]);
+        list_builder.append(true);
+        let list_array = list_builder.finish();
+
+        let data = vec![RecordBatch::try_new(
+            table_schema.clone(),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2, 3, 4])),
+                Arc::new(list_array),
+            ],
+        )?];
+
+        let mut buffer = Vec::new();
+        let mut writer = AsyncArrowWriter::try_new(&mut buffer, table_schema, None)?;
+
+        for batch in data {
+            writer.write(&batch).await?;
+        }
+
+        writer.close().await?;
+
+        let reader = TestReader::new(Bytes::from(buffer));
+        let builder = ParquetRecordBatchStreamBuilder::new(reader).await?;
+
+        let predicate = ArrowPredicateFn::new(ProjectionMask::all(), |batch| {
+            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
+        });
+
+        let projection_mask = ProjectionMask::all();
+
+        let mut stream = builder
+            .with_row_filter(RowFilter::new(vec![Box::new(predicate)]))
+            .with_projection(projection_mask)
+            .build()?;
+
+        while let Some(batch) = stream.next().await {
+            let _ = batch.unwrap(); // ensure there is no panic
+        }
+
+        Ok(())
+    }
 }
diff --git a/parquet/src/arrow/async_reader/store.rs b/parquet/src/arrow/async_reader/store.rs
index 8eaf7183e822..f1e987081dcf 100644
--- a/parquet/src/arrow/async_reader/store.rs
+++ b/parquet/src/arrow/async_reader/store.rs
@@ -20,11 +20,11 @@ use std::{ops::Range, sync::Arc};
 use crate::arrow::arrow_reader::ArrowReaderOptions;
 use crate::arrow::async_reader::{AsyncFileReader, MetadataSuffixFetch};
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+use crate::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
 use bytes::Bytes;
-use futures::{future::BoxFuture, FutureExt, TryFutureExt};
-use object_store::{path::Path, ObjectStore};
+use futures::{FutureExt, TryFutureExt, future::BoxFuture};
 use object_store::{GetOptions, GetRange};
+use object_store::{ObjectStore, path::Path};
 use tokio::runtime::Handle;
 
 /// Reads Parquet files in object storage using [`ObjectStore`].
@@ -77,7 +77,7 @@ impl ParquetObjectReader {
     }
 
     /// Provide a hint as to the size of the parquet file's footer,
-    /// see [fetch_parquet_metadata](crate::arrow::async_reader::fetch_parquet_metadata)
+    /// see [`ParquetMetaDataReader::with_prefetch_hint`]
     pub fn with_footer_size_hint(self, hint: usize) -> Self {
         Self {
             metadata_size_hint: Some(hint),
@@ -101,7 +101,11 @@ impl ParquetObjectReader {
         }
     }
 
-    /// Load the Column Index as part of [`Self::get_metadata`]
+    /// Whether to load the Column Index as part of [`Self::get_metadata`]
+    ///
+    /// Note: This setting may be overridden by [`ArrowReaderOptions`] `page_index_policy`.
+    /// If `page_index_policy` is `Optional` or `Required`, it will take precedence
+    /// over this preload flag. When it is `Skip` (default), this flag is used.
     pub fn with_preload_column_index(self, preload_column_index: bool) -> Self {
         Self {
             preload_column_index,
@@ -109,7 +113,11 @@ impl ParquetObjectReader {
         }
     }
 
-    /// Load the Offset Index as part of [`Self::get_metadata`]
+    /// Whether to load the Offset Index as part of [`Self::get_metadata`]
+    ///
+    /// Note: This setting may be overridden by [`ArrowReaderOptions`] `page_index_policy`.
+    /// If `page_index_policy` is `Optional` or `Required`, it will take precedence
+    /// over this preload flag. When it is `Skip` (default), this flag is used.
     pub fn with_preload_offset_index(self, preload_offset_index: bool) -> Self {
         Self {
             preload_offset_index,
@@ -199,15 +207,28 @@ impl AsyncFileReader for ParquetObjectReader {
         options: Option<&'a ArrowReaderOptions>,
     ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
         Box::pin(async move {
+            let metadata_opts = options.map(|o| o.metadata_options().clone());
             let mut metadata = ParquetMetaDataReader::new()
-                .with_column_indexes(self.preload_column_index)
-                .with_offset_indexes(self.preload_offset_index)
+                .with_metadata_options(metadata_opts)
+                .with_column_index_policy(PageIndexPolicy::from(self.preload_column_index))
+                .with_offset_index_policy(PageIndexPolicy::from(self.preload_offset_index))
                 .with_prefetch_hint(self.metadata_size_hint);
 
             #[cfg(feature = "encryption")]
             if let Some(options) = options {
-                metadata = metadata
-                    .with_decryption_properties(options.file_decryption_properties.as_ref());
+                metadata = metadata.with_decryption_properties(
+                    options.file_decryption_properties.as_ref().map(Arc::clone),
+                );
+            }
+
+            // Override page index policies from ArrowReaderOptions if specified and not Skip.
+            // When page_index_policy is Skip (default), use the reader's preload flags.
+            // When page_index_policy is Optional or Required, override the preload flags
+            // to ensure the specified policy takes precedence.
+            if let Some(options) = options {
+                if options.page_index_policy != PageIndexPolicy::Skip {
+                    metadata = metadata.with_page_index_policy(options.page_index_policy);
+                }
             }
 
             let metadata = if let Some(file_size) = self.file_size {
@@ -223,15 +244,17 @@ impl AsyncFileReader for ParquetObjectReader {
 
 #[cfg(test)]
 mod tests {
+    use crate::arrow::async_reader::ArrowReaderOptions;
+    use crate::file::metadata::PageIndexPolicy;
     use std::sync::{
-        atomic::{AtomicUsize, Ordering},
         Arc,
+        atomic::{AtomicUsize, Ordering},
     };
 
     use futures::TryStreamExt;
 
-    use crate::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
     use crate::arrow::ParquetRecordBatchStreamBuilder;
+    use crate::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
     use crate::errors::ParquetError;
     use arrow::util::test_util::parquet_test_data;
     use futures::FutureExt;
@@ -251,6 +274,18 @@ mod tests {
         (meta, Arc::new(store) as Arc<dyn ObjectStore>)
     }
 
+    async fn get_meta_store_with_page_index() -> (ObjectMeta, Arc<dyn ObjectStore>) {
+        let res = parquet_test_data();
+        let store = LocalFileSystem::new_with_prefix(res).unwrap();
+
+        let meta = store
+            .head(&Path::from("alltypes_tiny_pages_plain.parquet"))
+            .await
+            .unwrap();
+
+        (meta, Arc::new(store) as Arc<dyn ObjectStore>)
+    }
+
     #[tokio::test]
     async fn test_simple() {
         let (meta, store) = get_meta_store().await;
@@ -292,10 +327,7 @@ mod tests {
             Ok(_) => panic!("expected failure"),
             Err(e) => {
                 let err = e.to_string();
-                assert!(
-                    err.contains("not found: No such file or directory (os error 2)"),
-                    "{err}",
-                );
+                assert!(err.contains("I don't exist.parquet not found:"), "{err}",);
             }
         }
     }
@@ -382,4 +414,97 @@ mod tests {
 
         assert!(err.to_string().contains("was cancelled"));
     }
+
+    #[tokio::test]
+    async fn test_page_index_policy_skip_uses_preload_true() {
+        let (meta, store) = get_meta_store_with_page_index().await;
+
+        // Create reader with preload flags set to true
+        let mut reader = ParquetObjectReader::new(store.clone(), meta.location.clone())
+            .with_file_size(meta.size)
+            .with_preload_column_index(true)
+            .with_preload_offset_index(true);
+
+        // Create options with page_index_policy set to Skip (default)
+        let mut options = ArrowReaderOptions::new();
+        options.page_index_policy = PageIndexPolicy::Skip;
+
+        // Get metadata - Skip means use reader's preload flags (true)
+        let metadata = reader.get_metadata(Some(&options)).await.unwrap();
+
+        // With preload=true, indexes should be loaded since the test file has them
+        assert!(metadata.column_index().is_some());
+    }
+
+    #[tokio::test]
+    async fn test_page_index_policy_optional_overrides_preload_false() {
+        let (meta, store) = get_meta_store_with_page_index().await;
+
+        // Create reader with preload flags set to false
+        let mut reader = ParquetObjectReader::new(store.clone(), meta.location.clone())
+            .with_file_size(meta.size)
+            .with_preload_column_index(false)
+            .with_preload_offset_index(false);
+
+        // Create options with page_index_policy set to Optional
+        let mut options = ArrowReaderOptions::new();
+        options.page_index_policy = PageIndexPolicy::Optional;
+
+        // Get metadata - Optional overrides preload flags and attempts to load indexes
+        let metadata = reader.get_metadata(Some(&options)).await.unwrap();
+
+        // With Optional policy, it will TRY to load indexes but won't fail if they don't exist
+        // The test file has page indexes, so they will be some
+        assert!(metadata.column_index().is_some());
+    }
+
+    #[tokio::test]
+    async fn test_page_index_policy_optional_vs_skip() {
+        let (meta, store) = get_meta_store_with_page_index().await;
+
+        // Test 1: preload=false + Skip policy -> uses preload flags (false)
+        let mut reader1 = ParquetObjectReader::new(store.clone(), meta.location.clone())
+            .with_file_size(meta.size)
+            .with_preload_column_index(false)
+            .with_preload_offset_index(false);
+
+        let mut options1 = ArrowReaderOptions::new();
+        options1.page_index_policy = PageIndexPolicy::Skip;
+        let metadata1 = reader1.get_metadata(Some(&options1)).await.unwrap();
+
+        // Test 2: preload=false + Optional policy -> overrides to try loading
+        let mut reader2 = ParquetObjectReader::new(store.clone(), meta.location.clone())
+            .with_file_size(meta.size)
+            .with_preload_column_index(false)
+            .with_preload_offset_index(false);
+
+        let mut options2 = ArrowReaderOptions::new();
+        options2.page_index_policy = PageIndexPolicy::Optional;
+        let metadata2 = reader2.get_metadata(Some(&options2)).await.unwrap();
+
+        // Both should succeed (no panic/error)
+        // metadata1 (Skip) uses preload=false -> Skip policy
+        // metadata2 (Optional) overrides preload=false -> Optional policy
+        assert!(metadata1.column_index().is_none());
+        assert!(metadata2.column_index().is_some());
+    }
+
+    #[tokio::test]
+    async fn test_page_index_policy_no_options_uses_preload() {
+        let (meta, store) = get_meta_store_with_page_index().await;
+
+        // Create reader with preload flags set to true
+        let mut reader = ParquetObjectReader::new(store, meta.location)
+            .with_file_size(meta.size)
+            .with_preload_column_index(true)
+            .with_preload_offset_index(true);
+
+        // Get metadata without options - should use reader's preload flags
+        let metadata = reader.get_metadata(None).await.unwrap();
+
+        // With no options provided, preload flags (true) should be respected
+        // and converted to Optional policy internally (preload=true -> Optional)
+        // The test file has page indexes, so they will be some
+        assert!(metadata.column_index().is_some() && metadata.column_index().is_some());
+    }
 }
diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs
index 27bd2bf816cb..9018c09f2a89 100644
--- a/parquet/src/arrow/async_writer/mod.rs
+++ b/parquet/src/arrow/async_writer/mod.rs
@@ -61,17 +61,19 @@ mod store;
 pub use store::*;
 
 use crate::{
-    arrow::arrow_writer::ArrowWriterOptions,
     arrow::ArrowWriter,
+    arrow::arrow_writer::ArrowWriterOptions,
     errors::{ParquetError, Result},
-    file::{metadata::RowGroupMetaData, properties::WriterProperties},
-    format::{FileMetaData, KeyValue},
+    file::{
+        metadata::{KeyValue, ParquetMetaData, RowGroupMetaData},
+        properties::WriterProperties,
+    },
 };
 use arrow_array::RecordBatch;
 use arrow_schema::SchemaRef;
 use bytes::Bytes;
-use futures::future::BoxFuture;
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use std::mem;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 
@@ -245,7 +247,7 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
     /// Unlike [`Self::close`] this does not consume self
     ///
     /// Attempting to write after calling finish will result in an error
-    pub async fn finish(&mut self) -> Result<FileMetaData> {
+    pub async fn finish(&mut self) -> Result<ParquetMetaData> {
         let metadata = self.sync_writer.finish()?;
 
         // Force to flush the remaining data.
@@ -258,7 +260,7 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
     /// Close and finalize the writer.
     ///
     /// All the data in the inner buffer will be force flushed.
-    pub async fn close(mut self) -> Result<FileMetaData> {
+    pub async fn close(mut self) -> Result<ParquetMetaData> {
         self.finish().await
     }
 
@@ -292,20 +294,18 @@ impl<W: AsyncFileWriter> AsyncArrowWriter<W> {
 
 #[cfg(test)]
 mod tests {
+    use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_array::{ArrayRef, BinaryArray, Int32Array, Int64Array, RecordBatchReader};
     use bytes::Bytes;
     use std::sync::Arc;
-    use tokio::pin;
-
-    use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
 
     use super::*;
 
     fn get_test_reader() -> ParquetRecordBatchReader {
         let testdata = arrow::util::test_util::parquet_test_data();
         // This test file is large enough to generate multiple row groups.
-        let path = format!("{}/alltypes_tiny_pages_plain.parquet", testdata);
+        let path = format!("{testdata}/alltypes_tiny_pages_plain.parquet");
         let original_data = Bytes::from(std::fs::read(path).unwrap());
         ParquetRecordBatchReaderBuilder::try_new(original_data)
             .unwrap()
@@ -365,49 +365,6 @@ mod tests {
         assert_eq!(sync_buffer, async_buffer);
     }
 
-    struct TestAsyncSink {
-        sink: Vec<u8>,
-        min_accept_bytes: usize,
-        expect_total_bytes: usize,
-    }
-
-    impl AsyncWrite for TestAsyncSink {
-        fn poll_write(
-            self: std::pin::Pin<&mut Self>,
-            cx: &mut std::task::Context<'_>,
-            buf: &[u8],
-        ) -> std::task::Poll<std::result::Result<usize, std::io::Error>> {
-            let written_bytes = self.sink.len();
-            if written_bytes + buf.len() < self.expect_total_bytes {
-                assert!(buf.len() >= self.min_accept_bytes);
-            } else {
-                assert_eq!(written_bytes + buf.len(), self.expect_total_bytes);
-            }
-
-            let sink = &mut self.get_mut().sink;
-            pin!(sink);
-            sink.poll_write(cx, buf)
-        }
-
-        fn poll_flush(
-            self: std::pin::Pin<&mut Self>,
-            cx: &mut std::task::Context<'_>,
-        ) -> std::task::Poll<std::result::Result<(), std::io::Error>> {
-            let sink = &mut self.get_mut().sink;
-            pin!(sink);
-            sink.poll_flush(cx)
-        }
-
-        fn poll_shutdown(
-            self: std::pin::Pin<&mut Self>,
-            cx: &mut std::task::Context<'_>,
-        ) -> std::task::Poll<std::result::Result<(), std::io::Error>> {
-            let sink = &mut self.get_mut().sink;
-            pin!(sink);
-            sink.poll_shutdown(cx)
-        }
-    }
-
     #[tokio::test]
     async fn test_async_writer_bytes_written() {
         let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef;
diff --git a/parquet/src/arrow/async_writer/store.rs b/parquet/src/arrow/async_writer/store.rs
index ad09eae4996f..b067e4d92785 100644
--- a/parquet/src/arrow/async_writer/store.rs
+++ b/parquet/src/arrow/async_writer/store.rs
@@ -21,9 +21,9 @@ use std::sync::Arc;
 
 use crate::arrow::async_writer::AsyncFileWriter;
 use crate::errors::{ParquetError, Result};
+use object_store::ObjectStore;
 use object_store::buffered::BufWriter;
 use object_store::path::Path;
-use object_store::ObjectStore;
 use tokio::io::AsyncWriteExt;
 
 /// [`ParquetObjectWriter`] for writing to parquet to [`ObjectStore`]
@@ -123,8 +123,8 @@ mod tests {
     use std::sync::Arc;
 
     use super::*;
-    use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
     use crate::arrow::AsyncArrowWriter;
+    use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
 
     #[tokio::test]
     async fn test_async_writer() {
diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs
index 386177639356..71fb18917d9a 100644
--- a/parquet/src/arrow/buffer/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -18,7 +18,14 @@
 use crate::arrow::buffer::offset_buffer::OffsetBuffer;
 use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::errors::{ParquetError, Result};
-use arrow_array::{make_array, Array, ArrayRef, OffsetSizeTrait};
+use arrow_array::{Array, GenericByteArray, downcast_integer};
+use arrow_array::{
+    ArrayRef, FixedSizeBinaryArray, OffsetSizeTrait,
+    builder::{FixedSizeBinaryDictionaryBuilder, GenericByteDictionaryBuilder},
+    cast::AsArray,
+    make_array,
+    types::{ArrowDictionaryKeyType, ByteArrayType},
+};
 use arrow_buffer::{ArrowNativeType, Buffer};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::DataType as ArrowType;
@@ -158,7 +165,12 @@ impl<K: ArrowNativeType + Ord, V: OffsetSizeTrait> DictionaryBuffer<K, V> {
                     unreachable!()
                 };
                 let values = if let ArrowType::FixedSizeBinary(size) = **value_type {
-                    arrow_cast::cast(&values, &ArrowType::FixedSizeBinary(size)).unwrap()
+                    let binary = values.as_binary::<i32>();
+                    Arc::new(FixedSizeBinaryArray::new(
+                        size,
+                        binary.values().clone(),
+                        binary.nulls().cloned(),
+                    )) as _
                 } else {
                     values
                 };
@@ -177,17 +189,13 @@ impl<K: ArrowNativeType + Ord, V: OffsetSizeTrait> DictionaryBuffer<K, V> {
                 Ok(make_array(data))
             }
             Self::Values { values } => {
-                let value_type = match data_type {
-                    ArrowType::Dictionary(_, v) => v.as_ref().clone(),
+                let (key_type, value_type) = match data_type {
+                    ArrowType::Dictionary(k, v) => (k, v.as_ref().clone()),
                     _ => unreachable!(),
                 };
 
-                // This will compute a new dictionary
-                let array =
-                    arrow_cast::cast(&values.into_array(null_buffer, value_type), data_type)
-                        .expect("cast should be infallible");
-
-                Ok(array)
+                let array = values.into_array(null_buffer, value_type);
+                pack_values(key_type, &array)
             }
         }
     }
@@ -213,6 +221,60 @@ impl<K: ArrowNativeType, V: OffsetSizeTrait> ValuesBuffer for DictionaryBuffer<K
     }
 }
 
+macro_rules! dict_helper {
+    ($k:ty, $array:ident) => {
+        match $array.data_type() {
+            ArrowType::Utf8 => pack_values_impl::<$k, _>($array.as_string::<i32>()),
+            ArrowType::LargeUtf8 => pack_values_impl::<$k, _>($array.as_string::<i64>()),
+            ArrowType::Binary => pack_values_impl::<$k, _>($array.as_binary::<i32>()),
+            ArrowType::LargeBinary => pack_values_impl::<$k, _>($array.as_binary::<i64>()),
+            ArrowType::FixedSizeBinary(_) => {
+                pack_fixed_values_impl::<$k>($array.as_fixed_size_binary())
+            }
+            _ => unreachable!(),
+        }
+    };
+}
+
+fn pack_values(key_type: &ArrowType, values: &ArrayRef) -> Result<ArrayRef> {
+    downcast_integer! {
+        key_type => (dict_helper, values),
+            _ => unreachable!(),
+    }
+}
+
+fn pack_values_impl<K: ArrowDictionaryKeyType, T: ByteArrayType>(
+    array: &GenericByteArray<T>,
+) -> Result<ArrayRef> {
+    let mut builder = GenericByteDictionaryBuilder::<K, T>::with_capacity(array.len(), 1024, 1024);
+    for x in array {
+        match x {
+            Some(x) => builder.append_value(x),
+            None => builder.append_null(),
+        }
+    }
+    let raw = builder.finish();
+    Ok(Arc::new(raw))
+}
+
+fn pack_fixed_values_impl<K: ArrowDictionaryKeyType>(
+    array: &FixedSizeBinaryArray,
+) -> Result<ArrayRef> {
+    let mut builder = FixedSizeBinaryDictionaryBuilder::<K>::with_capacity(
+        array.len(),
+        1024,
+        array.value_length(),
+    );
+    for x in array {
+        match x {
+            Some(x) => builder.append_value(x),
+            None => builder.append_null(),
+        }
+    }
+    let raw = builder.finish();
+    Ok(Arc::new(raw))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs
index 5051dce12b37..209ed4e5c15f 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -19,7 +19,7 @@ use crate::arrow::buffer::bit_util::iter_set_bits_rev;
 use crate::arrow::record_reader::buffer::ValuesBuffer;
 use crate::errors::{ParquetError, Result};
 use crate::util::utf8::check_valid_utf8;
-use arrow_array::{make_array, ArrayRef, OffsetSizeTrait};
+use arrow_array::{ArrayRef, OffsetSizeTrait, make_array};
 use arrow_buffer::{ArrowNativeType, Buffer};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::DataType as ArrowType;
@@ -321,7 +321,7 @@ mod tests {
     #[test]
     fn test_pad_nulls_empty() {
         let mut buffer = OffsetBuffer::<i32>::default();
-        let valid_mask = Buffer::from_iter(std::iter::repeat(false).take(9));
+        let valid_mask = Buffer::from_iter(std::iter::repeat_n(false, 9));
         buffer.pad_nulls(0, 0, 9, valid_mask.as_slice());
 
         let array = buffer.into_array(Some(valid_mask), ArrowType::Utf8);
diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs
index fd7d6c213f04..2802f97f8f37 100644
--- a/parquet/src/arrow/buffer/view_buffer.rs
+++ b/parquet/src/arrow/buffer/view_buffer.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::arrow::record_reader::buffer::ValuesBuffer;
-use arrow_array::{builder::make_view, make_array, ArrayRef};
+use arrow_array::{ArrayRef, builder::make_view, make_array};
 use arrow_buffer::Buffer;
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::DataType as ArrowType;
@@ -49,9 +49,9 @@ impl ViewBuffer {
     /// - `offset` and `offset + len` are valid indices into the buffer
     /// - The `(offset, offset + len)` is valid value for the native type.
     pub unsafe fn append_view_unchecked(&mut self, block: u32, offset: u32, len: u32) {
-        let b = self.buffers.get_unchecked(block as usize);
+        let b = unsafe { self.buffers.get_unchecked(block as usize) };
         let end = offset.saturating_add(len);
-        let b = b.get_unchecked(offset as usize..end as usize);
+        let b = unsafe { b.get_unchecked(offset as usize..end as usize) };
 
         let view = make_view(b, block, offset);
 
@@ -91,7 +91,7 @@ impl ViewBuffer {
                 let array = unsafe { builder.build_unchecked() };
                 make_array(array)
             }
-            _ => panic!("Unsupported data type: {:?}", data_type),
+            _ => panic!("Unsupported data type: {data_type}"),
         }
     }
 }
diff --git a/parquet/src/arrow/decoder/dictionary_index.rs b/parquet/src/arrow/decoder/dictionary_index.rs
index 38f2b058360c..bb96f4bf98d6 100644
--- a/parquet/src/arrow/decoder/dictionary_index.rs
+++ b/parquet/src/arrow/decoder/dictionary_index.rs
@@ -42,18 +42,18 @@ pub struct DictIndexDecoder {
 impl DictIndexDecoder {
     /// Create a new [`DictIndexDecoder`] with the provided data page, the number of levels
     /// associated with this data page, and the number of non-null values (if known)
-    pub fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Self {
+    pub fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Result<Self> {
         let bit_width = data[0];
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(data.slice(1..));
+        decoder.set_data(data.slice(1..))?;
 
-        Self {
+        Ok(Self {
             decoder,
             index_buf: Box::new([0; 1024]),
             index_buf_len: 0,
             index_offset: 0,
             max_remaining_values: num_values.unwrap_or(num_levels),
-        }
+        })
     }
 
     /// Read up to `len` values, returning the number of values read
diff --git a/parquet/src/arrow/in_memory_row_group.rs b/parquet/src/arrow/in_memory_row_group.rs
new file mode 100644
index 000000000000..4baa8cf9de80
--- /dev/null
+++ b/parquet/src/arrow/in_memory_row_group.rs
@@ -0,0 +1,317 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::arrow::ProjectionMask;
+use crate::arrow::array_reader::RowGroups;
+use crate::arrow::arrow_reader::RowSelection;
+use crate::column::page::{PageIterator, PageReader};
+use crate::errors::ParquetError;
+use crate::file::metadata::{ParquetMetaData, RowGroupMetaData};
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::file::reader::{ChunkReader, Length, SerializedPageReader};
+use bytes::{Buf, Bytes};
+use std::ops::Range;
+use std::sync::Arc;
+
+/// An in-memory collection of column chunks
+#[derive(Debug)]
+pub(crate) struct InMemoryRowGroup<'a> {
+    pub(crate) offset_index: Option<&'a [OffsetIndexMetaData]>,
+    /// Column chunks for this row group
+    pub(crate) column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
+    pub(crate) row_count: usize,
+    pub(crate) row_group_idx: usize,
+    pub(crate) metadata: &'a ParquetMetaData,
+}
+
+/// What ranges to fetch for the columns in this row group
+#[derive(Debug)]
+pub(crate) struct FetchRanges {
+    /// The byte ranges to fetch
+    pub(crate) ranges: Vec<Range<u64>>,
+    /// If `Some`, the start offsets of each page for each column chunk
+    pub(crate) page_start_offsets: Option<Vec<Vec<u64>>>,
+}
+
+impl InMemoryRowGroup<'_> {
+    /// Returns the byte ranges to fetch for the columns specified in
+    /// `projection` and `selection`.
+    ///
+    /// `cache_mask` indicates which columns, if any, are being cached by
+    /// [`RowGroupCache`](crate::arrow::array_reader::RowGroupCache).
+    /// The `selection` for Cached columns is expanded to batch boundaries to simplify
+    /// accounting for what data is cached.
+    pub(crate) fn fetch_ranges(
+        &self,
+        projection: &ProjectionMask,
+        selection: Option<&RowSelection>,
+        batch_size: usize,
+        cache_mask: Option<&ProjectionMask>,
+    ) -> FetchRanges {
+        let metadata = self.metadata.row_group(self.row_group_idx);
+        if let Some((selection, offset_index)) = selection.zip(self.offset_index) {
+            let expanded_selection =
+                selection.expand_to_batch_boundaries(batch_size, self.row_count);
+
+            // If we have a `RowSelection` and an `OffsetIndex` then only fetch
+            // pages required for the `RowSelection`
+            // Consider preallocating outer vec: https://github.com/apache/arrow-rs/issues/8667
+            let mut page_start_offsets: Vec<Vec<u64>> = vec![];
+
+            let ranges = self
+                .column_chunks
+                .iter()
+                .zip(metadata.columns())
+                .enumerate()
+                .filter(|&(idx, (chunk, _chunk_meta))| {
+                    chunk.is_none() && projection.leaf_included(idx)
+                })
+                .flat_map(|(idx, (_chunk, chunk_meta))| {
+                    // If the first page does not start at the beginning of the column,
+                    // then we need to also fetch a dictionary page.
+                    let mut ranges: Vec<Range<u64>> = vec![];
+                    let (start, _len) = chunk_meta.byte_range();
+                    match offset_index[idx].page_locations.first() {
+                        Some(first) if first.offset as u64 != start => {
+                            ranges.push(start..first.offset as u64);
+                        }
+                        _ => (),
+                    }
+
+                    // Expand selection to batch boundaries if needed for caching
+                    // (see doc comment for this function for details on `cache_mask`)
+                    let use_expanded = cache_mask.map(|m| m.leaf_included(idx)).unwrap_or(false);
+                    if use_expanded {
+                        ranges.extend(
+                            expanded_selection.scan_ranges(&offset_index[idx].page_locations),
+                        );
+                    } else {
+                        ranges.extend(selection.scan_ranges(&offset_index[idx].page_locations));
+                    }
+                    page_start_offsets.push(ranges.iter().map(|range| range.start).collect());
+
+                    ranges
+                })
+                .collect();
+            FetchRanges {
+                ranges,
+                page_start_offsets: Some(page_start_offsets),
+            }
+        } else {
+            let ranges = self
+                .column_chunks
+                .iter()
+                .enumerate()
+                .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx))
+                .map(|(idx, _chunk)| {
+                    let column = metadata.column(idx);
+                    let (start, length) = column.byte_range();
+                    start..(start + length)
+                })
+                .collect();
+            FetchRanges {
+                ranges,
+                page_start_offsets: None,
+            }
+        }
+    }
+
+    /// Fills in `self.column_chunks` with the data fetched from `chunk_data`.
+    ///
+    /// This function **must** be called with the data from the ranges returned by
+    /// `fetch_ranges` and the corresponding page_start_offsets, with the exact same and `selection`.
+    pub(crate) fn fill_column_chunks<I>(
+        &mut self,
+        projection: &ProjectionMask,
+        page_start_offsets: Option<Vec<Vec<u64>>>,
+        chunk_data: I,
+    ) where
+        I: IntoIterator<Item = Bytes>,
+    {
+        let mut chunk_data = chunk_data.into_iter();
+        let metadata = self.metadata.row_group(self.row_group_idx);
+        if let Some(page_start_offsets) = page_start_offsets {
+            // If we have a `RowSelection` and an `OffsetIndex` then only fetch pages required for the
+            // `RowSelection`
+            let mut page_start_offsets = page_start_offsets.into_iter();
+
+            for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
+                if chunk.is_some() || !projection.leaf_included(idx) {
+                    continue;
+                }
+
+                if let Some(offsets) = page_start_offsets.next() {
+                    let mut chunks = Vec::with_capacity(offsets.len());
+                    for _ in 0..offsets.len() {
+                        chunks.push(chunk_data.next().unwrap());
+                    }
+
+                    *chunk = Some(Arc::new(ColumnChunkData::Sparse {
+                        length: metadata.column(idx).byte_range().1 as usize,
+                        data: offsets
+                            .into_iter()
+                            .map(|x| x as usize)
+                            .zip(chunks.into_iter())
+                            .collect(),
+                    }))
+                }
+            }
+        } else {
+            for (idx, chunk) in self.column_chunks.iter_mut().enumerate() {
+                if chunk.is_some() || !projection.leaf_included(idx) {
+                    continue;
+                }
+
+                if let Some(data) = chunk_data.next() {
+                    *chunk = Some(Arc::new(ColumnChunkData::Dense {
+                        offset: metadata.column(idx).byte_range().0 as usize,
+                        data,
+                    }));
+                }
+            }
+        }
+    }
+}
+
+impl RowGroups for InMemoryRowGroup<'_> {
+    fn num_rows(&self) -> usize {
+        self.row_count
+    }
+
+    /// Return chunks for column i
+    fn column_chunks(&self, i: usize) -> crate::errors::Result<Box<dyn PageIterator>> {
+        match &self.column_chunks[i] {
+            None => Err(ParquetError::General(format!(
+                "Invalid column index {i}, column was not fetched"
+            ))),
+            Some(data) => {
+                let page_locations = self
+                    .offset_index
+                    // filter out empty offset indexes (old versions specified Some(vec![]) when no present)
+                    .filter(|index| !index.is_empty())
+                    .map(|index| index[i].page_locations.clone());
+                let column_chunk_metadata = self.metadata.row_group(self.row_group_idx).column(i);
+                let page_reader = SerializedPageReader::new(
+                    data.clone(),
+                    column_chunk_metadata,
+                    self.row_count,
+                    page_locations,
+                )?;
+                let page_reader = page_reader.add_crypto_context(
+                    self.row_group_idx,
+                    i,
+                    self.metadata,
+                    column_chunk_metadata,
+                )?;
+
+                let page_reader: Box<dyn PageReader> = Box::new(page_reader);
+
+                Ok(Box::new(ColumnChunkIterator {
+                    reader: Some(Ok(page_reader)),
+                }))
+            }
+        }
+    }
+
+    fn row_groups(&self) -> Box<dyn Iterator<Item = &RowGroupMetaData> + '_> {
+        Box::new(std::iter::once(self.metadata.row_group(self.row_group_idx)))
+    }
+
+    fn metadata(&self) -> &ParquetMetaData {
+        self.metadata
+    }
+}
+
+/// An in-memory column chunk.
+/// This allows us to hold either dense column chunks or sparse column chunks and easily
+/// access them by offset.
+#[derive(Clone, Debug)]
+pub(crate) enum ColumnChunkData {
+    /// Column chunk data representing only a subset of data pages.
+    /// For example if a row selection (possibly caused by a filter in a query) causes us to read only
+    /// a subset of the rows in the column.
+    Sparse {
+        /// Length of the full column chunk
+        length: usize,
+        /// Subset of data pages included in this sparse chunk.
+        ///
+        /// Each element is a tuple of (page offset within file, page data).
+        /// Each entry is a complete page and the list is ordered by offset.
+        data: Vec<(usize, Bytes)>,
+    },
+    /// Full column chunk and the offset within the original file
+    Dense { offset: usize, data: Bytes },
+}
+
+impl ColumnChunkData {
+    /// Return the data for this column chunk at the given offset
+    fn get(&self, start: u64) -> crate::errors::Result<Bytes> {
+        match &self {
+            ColumnChunkData::Sparse { data, .. } => data
+                .binary_search_by_key(&start, |(offset, _)| *offset as u64)
+                .map(|idx| data[idx].1.clone())
+                .map_err(|_| {
+                    ParquetError::General(format!(
+                        "Invalid offset in sparse column chunk data: {start}, no matching page found.\
+                         If you are using a `SelectionStrategyPolicy::Mask`, ensure that the OffsetIndex is provided when \
+                         creating the InMemoryRowGroup."
+                    ))
+                }),
+            ColumnChunkData::Dense { offset, data } => {
+                let start = start as usize - *offset;
+                Ok(data.slice(start..))
+            }
+        }
+    }
+}
+
+impl Length for ColumnChunkData {
+    /// Return the total length of the full column chunk
+    fn len(&self) -> u64 {
+        match &self {
+            ColumnChunkData::Sparse { length, .. } => *length as u64,
+            ColumnChunkData::Dense { data, .. } => data.len() as u64,
+        }
+    }
+}
+
+impl ChunkReader for ColumnChunkData {
+    type T = bytes::buf::Reader<Bytes>;
+
+    fn get_read(&self, start: u64) -> crate::errors::Result<Self::T> {
+        Ok(self.get(start)?.reader())
+    }
+
+    fn get_bytes(&self, start: u64, length: usize) -> crate::errors::Result<Bytes> {
+        Ok(self.get(start)?.slice(..length))
+    }
+}
+
+/// Implements [`PageIterator`] for a single column chunk, yielding a single [`PageReader`]
+struct ColumnChunkIterator {
+    reader: Option<crate::errors::Result<Box<dyn PageReader>>>,
+}
+
+impl Iterator for ColumnChunkIterator {
+    type Item = crate::errors::Result<Box<dyn PageReader>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.reader.take()
+    }
+}
+
+impl PageIterator for ColumnChunkIterator {}
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index e33d6a05a757..52152988166f 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -190,25 +190,27 @@ pub mod async_reader;
 #[cfg(feature = "async")]
 pub mod async_writer;
 
+pub mod push_decoder;
+
+mod in_memory_row_group;
 mod record_reader;
+
 experimental!(mod schema);
 
-use std::sync::Arc;
+use std::fmt::Debug;
 
 pub use self::arrow_writer::ArrowWriter;
 #[cfg(feature = "async")]
 pub use self::async_reader::ParquetRecordBatchStreamBuilder;
 #[cfg(feature = "async")]
 pub use self::async_writer::AsyncArrowWriter;
-use crate::schema::types::{SchemaDescriptor, Type};
+use crate::schema::types::SchemaDescriptor;
 use arrow_schema::{FieldRef, Schema};
-// continue to export deprecated methods until they are removed
-#[allow(deprecated)]
-pub use self::schema::arrow_to_parquet_schema;
 
 pub use self::schema::{
-    add_encoded_arrow_schema_to_metadata, encode_arrow_schema, parquet_to_arrow_field_levels,
-    parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns, ArrowSchemaConverter, FieldLevels,
+    ArrowSchemaConverter, FieldLevels, add_encoded_arrow_schema_to_metadata, encode_arrow_schema,
+    parquet_to_arrow_field_levels, parquet_to_arrow_field_levels_with_virtual,
+    parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns, virtual_type::*,
 };
 
 /// Schema metadata key used to store serialized Arrow schema
@@ -263,7 +265,7 @@ pub struct ProjectionMask {
     /// A mask of `[true, false, true, false]` will result in a schema 2
     /// elements long:
     /// * `fields[0]`: `a`
-    /// * `fields[1]`: `c`    
+    /// * `fields[1]`: `c`
     ///
     /// A mask of `None` will result in a schema 4 elements long:
     /// * `fields[0]`: `a`
@@ -279,6 +281,13 @@ impl ProjectionMask {
         Self { mask: None }
     }
 
+    /// Create a [`ProjectionMask`] which selects no columns
+    pub fn none(len: usize) -> Self {
+        Self {
+            mask: Some(vec![false; len]),
+        }
+    }
+
     /// Create a [`ProjectionMask`] which selects only the specified leaf columns
     ///
     /// Note: repeated or out of order indices will not impact the final mask
@@ -314,21 +323,6 @@ impl ProjectionMask {
         Self { mask: Some(mask) }
     }
 
-    // Given a starting point in the schema, do a DFS for that node adding leaf paths to `paths`.
-    fn find_leaves(root: &Arc<Type>, parent: Option<&String>, paths: &mut Vec<String>) {
-        let path = parent
-            .map(|p| [p, root.name()].join("."))
-            .unwrap_or(root.name().to_string());
-        if root.is_group() {
-            for child in root.get_fields() {
-                Self::find_leaves(child, Some(&path), paths);
-            }
-        } else {
-            // Reached a leaf, add to paths
-            paths.push(path);
-        }
-    }
-
     /// Create a [`ProjectionMask`] which selects only the named columns
     ///
     /// All leaf columns that fall below a given name will be selected. For example, given
@@ -356,21 +350,24 @@ impl ProjectionMask {
     /// Note: repeated or out of order indices will not impact the final mask.
     ///
     /// i.e. `["b", "c"]` will construct the same mask as `["c", "b", "c"]`.
+    ///
+    /// Also, this will not produce the desired results if a column contains a '.' in its name.
+    /// Use [`Self::leaves`] or [`Self::roots`] in that case.
     pub fn columns<'a>(
         schema: &SchemaDescriptor,
         names: impl IntoIterator<Item = &'a str>,
     ) -> Self {
-        // first make vector of paths for leaf columns
-        let mut paths: Vec<String> = vec![];
-        for root in schema.root_schema().get_fields() {
-            Self::find_leaves(root, None, &mut paths);
-        }
-        assert_eq!(paths.len(), schema.num_columns());
-
         let mut mask = vec![false; schema.num_columns()];
         for name in names {
-            for idx in 0..schema.num_columns() {
-                if paths[idx].starts_with(name) {
+            let name_path: Vec<&str> = name.split('.').collect();
+            for (idx, col) in schema.columns().iter().enumerate() {
+                let path = col.path().parts();
+                // searching for "a.b.c" cannot match "a.b"
+                if name_path.len() > path.len() {
+                    continue;
+                }
+                // now path >= name_path, so check that each element in name_path matches
+                if name_path.iter().zip(path.iter()).all(|(a, b)| a == b) {
                     mask[idx] = true;
                 }
             }
@@ -422,6 +419,51 @@ impl ProjectionMask {
             }
         }
     }
+
+    /// Return a new [`ProjectionMask`] that excludes any leaf columns that are
+    /// part of a nested type, such as struct, list, or map
+    ///
+    /// If there are no non-nested columns in the mask, returns `None`
+    pub(crate) fn without_nested_types(&self, schema: &SchemaDescriptor) -> Option<Self> {
+        let num_leaves = schema.num_columns();
+
+        // Count how many leaves each root column has
+        let num_roots = schema.root_schema().get_fields().len();
+        let mut root_leaf_counts = vec![0usize; num_roots];
+        for leaf_idx in 0..num_leaves {
+            let root_idx = schema.get_column_root_idx(leaf_idx);
+            root_leaf_counts[root_idx] += 1;
+        }
+
+        // Keep only leaves whose root has exactly one leaf (non-nested) and is not a
+        // LIST. LIST is encoded as a wrapped logical type with a single leaf, e.g.
+        // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+        //
+        // ```text
+        // // List<String> (list non-null, elements nullable)
+        // required group my_list (LIST) {
+        //   repeated group list {
+        //     optional binary element (STRING);
+        //   }
+        // }
+        // ```
+        let mut included_leaves = Vec::new();
+        for leaf_idx in 0..num_leaves {
+            if self.leaf_included(leaf_idx) {
+                let root = schema.get_column_root(leaf_idx);
+                let root_idx = schema.get_column_root_idx(leaf_idx);
+                if root_leaf_counts[root_idx] == 1 && !root.is_list() {
+                    included_leaves.push(leaf_idx);
+                }
+            }
+        }
+
+        if included_leaves.is_empty() {
+            None
+        } else {
+            Some(ProjectionMask::leaves(schema, included_leaves))
+        }
+    }
 }
 
 /// Lookups up the parquet column by name
@@ -452,7 +494,9 @@ pub fn parquet_column<'a>(
 #[cfg(test)]
 mod test {
     use crate::arrow::ArrowWriter;
-    use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader, ParquetMetaDataWriter};
+    use crate::file::metadata::{
+        ParquetMetaData, ParquetMetaDataOptions, ParquetMetaDataReader, ParquetMetaDataWriter,
+    };
     use crate::file::properties::{EnabledStatistics, WriterProperties};
     use crate::schema::parser::parse_message_type;
     use crate::schema::types::SchemaDescriptor;
@@ -463,18 +507,23 @@ mod test {
     use super::ProjectionMask;
 
     #[test]
+    #[allow(deprecated)]
     // Reproducer for https://github.com/apache/arrow-rs/issues/6464
     fn test_metadata_read_write_partial_offset() {
         let parquet_bytes = create_parquet_file();
 
         // read the metadata from the file WITHOUT the page index structures
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let original_metadata = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .parse_and_finish(&parquet_bytes)
             .unwrap();
 
         // this should error because the page indexes are not present, but have offsets specified
         let metadata_bytes = metadata_to_bytes(&original_metadata);
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let err = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .with_page_indexes(true) // there are no page indexes in the metadata
             .parse_and_finish(&metadata_bytes)
             .err()
@@ -490,7 +539,9 @@ mod test {
         let parquet_bytes = create_parquet_file();
 
         // read the metadata from the file
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let original_metadata = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .parse_and_finish(&parquet_bytes)
             .unwrap();
 
@@ -502,7 +553,9 @@ mod test {
             "metadata is subset of parquet"
         );
 
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let roundtrip_metadata = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .parse_and_finish(&metadata_bytes)
             .unwrap();
 
@@ -510,19 +563,24 @@ mod test {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn test_metadata_read_write_roundtrip_page_index() {
         let parquet_bytes = create_parquet_file();
 
         // read the metadata from the file including the page index structures
         // (which are stored elsewhere in the footer)
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let original_metadata = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .with_page_indexes(true)
             .parse_and_finish(&parquet_bytes)
             .unwrap();
 
         // read metadata back from the serialized bytes and ensure it is the same
         let metadata_bytes = metadata_to_bytes(&original_metadata);
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let roundtrip_metadata = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options))
             .with_page_indexes(true)
             .parse_and_finish(&metadata_bytes)
             .unwrap();
@@ -569,6 +627,7 @@ mod test {
         let batch = RecordBatch::try_from_iter(vec![("id", array)]).unwrap();
         let props = WriterProperties::builder()
             .set_statistics_enabled(EnabledStatistics::Page)
+            .set_write_page_header_statistics(true)
             .build();
 
         let mut writer = ArrowWriter::try_new(&mut buf, batch.schema(), Some(props)).unwrap();
@@ -590,7 +649,8 @@ mod test {
 
     #[test]
     fn test_mask_from_column_names() {
-        let message_type = "
+        let schema = parse_schema(
+            "
             message test_schema {
                 OPTIONAL group a (MAP) {
                     REPEATED group key_value {
@@ -606,9 +666,8 @@ mod test {
                 REQUIRED INT32 b;
                 REQUIRED DOUBLE c;
             }
-            ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+            ",
+        );
 
         let mask = ProjectionMask::columns(&schema, ["foo", "bar"]);
         assert_eq!(mask.mask.unwrap(), vec![false; 5]);
@@ -625,7 +684,8 @@ mod test {
         let mask = ProjectionMask::columns(&schema, ["a.key_value.value", "b"]);
         assert_eq!(mask.mask.unwrap(), [false, true, true, true, false]);
 
-        let message_type = "
+        let schema = parse_schema(
+            "
             message test_schema {
                 OPTIONAL group a (LIST) {
                     REPEATED group list {
@@ -642,9 +702,8 @@ mod test {
                 }
                 REQUIRED INT32 b;
             }
-            ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+            ",
+        );
 
         let mask = ProjectionMask::columns(&schema, ["a", "b"]);
         assert_eq!(mask.mask.unwrap(), [true, true]);
@@ -659,7 +718,8 @@ mod test {
         let mask = ProjectionMask::columns(&schema, ["b"]);
         assert_eq!(mask.mask.unwrap(), [false, true]);
 
-        let message_type = "
+        let schema = parse_schema(
+            "
             message test_schema {
                 OPTIONAL INT32 a;
                 OPTIONAL INT32 b;
@@ -667,9 +727,8 @@ mod test {
                 OPTIONAL INT32 d;
                 OPTIONAL INT32 e;
             }
-            ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+            ",
+        );
 
         let mask = ProjectionMask::columns(&schema, ["a", "b"]);
         assert_eq!(mask.mask.unwrap(), [true, true, false, false, false]);
@@ -677,7 +736,8 @@ mod test {
         let mask = ProjectionMask::columns(&schema, ["d", "b", "d"]);
         assert_eq!(mask.mask.unwrap(), [false, true, false, true, false]);
 
-        let message_type = "
+        let schema = parse_schema(
+            "
             message test_schema {
                 OPTIONAL INT32 a;
                 OPTIONAL INT32 b;
@@ -685,12 +745,23 @@ mod test {
                 OPTIONAL INT32 d;
                 OPTIONAL INT32 e;
             }
-            ";
-        let parquet_group_type = parse_message_type(message_type).unwrap();
-        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+            ",
+        );
 
         let mask = ProjectionMask::columns(&schema, ["a", "e"]);
         assert_eq!(mask.mask.unwrap(), [true, false, true, false, true]);
+
+        let schema = parse_schema(
+            "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 aa;
+            }
+            ",
+        );
+
+        let mask = ProjectionMask::columns(&schema, ["a"]);
+        assert_eq!(mask.mask.unwrap(), [true, false]);
     }
 
     #[test]
@@ -754,4 +825,226 @@ mod test {
         mask1.intersect(&mask2);
         assert_eq!(mask1.mask, None);
     }
+
+    #[test]
+    fn test_projection_mask_without_nested_no_nested() {
+        // Schema with no nested types
+        let schema = parse_schema(
+            "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 b;
+                REQUIRED DOUBLE d;
+            }
+            ",
+        );
+
+        let mask = ProjectionMask::all();
+        // All columns are non-nested, but without_nested_types returns a new mask
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [0, 1, 2])),
+            mask.without_nested_types(&schema)
+        );
+
+        // select b, c
+        let mask = ProjectionMask::leaves(&schema, [1, 2]);
+        assert_eq!(Some(mask.clone()), mask.without_nested_types(&schema));
+    }
+
+    #[test]
+    fn test_projection_mask_without_nested_nested() {
+        // Schema with nested types (structs)
+        let schema = parse_schema(
+            "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL group b {
+                    REQUIRED INT32 b1;
+                    OPTIONAL INT64 b2;
+                }
+                OPTIONAL group c (LIST) {
+                    REPEATED group list {
+                        OPTIONAL INT32 element;
+                    }
+                }
+                REQUIRED DOUBLE d;
+            }
+            ",
+        );
+
+        // all leaves --> a, d
+        let mask = ProjectionMask::all();
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [0, 4])),
+            mask.without_nested_types(&schema)
+        );
+
+        // b1 --> empty (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [1]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+
+        // b2, d --> d
+        let mask = ProjectionMask::leaves(&schema, [1, 4]);
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [4])),
+            mask.without_nested_types(&schema)
+        );
+
+        // element --> empty (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [3]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+    }
+
+    #[test]
+    fn test_projection_mask_without_nested_map_only() {
+        // Example from https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+        let schema = parse_schema(
+            "
+            message test_schema {
+                required group my_map (MAP) {
+                    repeated group key_value {
+                        required binary key (STRING);
+                        optional int32 value;
+                    }
+                }
+            }
+            ",
+        );
+
+        let mask = ProjectionMask::all();
+        assert_eq!(None, mask.without_nested_types(&schema));
+
+        // key --> empty (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [0]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+
+        // value --> empty (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [1]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+    }
+
+    #[test]
+    fn test_projection_mask_without_nested_map_with_non_nested() {
+        // Example from https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+        // with an additional non-nested field
+        let schema = parse_schema(
+            "
+            message test_schema {
+                REQUIRED INT32 a;
+                required group my_map (MAP) {
+                    repeated group key_value {
+                        required binary key (STRING);
+                        optional int32 value;
+                    }
+                }
+                REQUIRED INT32 b;
+            }
+            ",
+        );
+
+        // all leaves --> a, b which are the only non nested ones
+        let mask = ProjectionMask::all();
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [0, 3])),
+            mask.without_nested_types(&schema)
+        );
+
+        // key, value, b --> b (the only non-nested one)
+        let mask = ProjectionMask::leaves(&schema, [1, 2, 3]);
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [3])),
+            mask.without_nested_types(&schema)
+        );
+
+        // key, value --> NONE
+        let mask = ProjectionMask::leaves(&schema, [1, 2]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+    }
+
+    #[test]
+    fn test_projection_mask_without_nested_deeply_nested() {
+        // Map of Maps
+        let schema = parse_schema(
+            "
+            message test_schema {
+                OPTIONAL group a (MAP) {
+                    REPEATED group key_value {
+                        REQUIRED BYTE_ARRAY key (UTF8);
+                        OPTIONAL group value (MAP) {
+                            REPEATED group key_value {
+                                REQUIRED INT32 key;
+                                REQUIRED BOOLEAN value;
+                            }
+                        }
+                    }
+                }
+                REQUIRED INT32 b;
+                REQUIRED DOUBLE c;
+            ",
+        );
+
+        let mask = ProjectionMask::all();
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [3, 4])),
+            mask.without_nested_types(&schema)
+        );
+
+        // (first) key, c --> c (the only non-nested one)
+        let mask = ProjectionMask::leaves(&schema, [0, 4]);
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [4])),
+            mask.without_nested_types(&schema)
+        );
+
+        // (second) key, value, b --> b (the only non-nested one)
+        let mask = ProjectionMask::leaves(&schema, [1, 2, 3]);
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [3])),
+            mask.without_nested_types(&schema)
+        );
+
+        // key --> NONE (the only non-nested one)
+        let mask = ProjectionMask::leaves(&schema, [0]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+    }
+
+    #[test]
+    fn test_projection_mask_without_nested_list() {
+        // Example from https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+        let schema = parse_schema(
+            "
+            message test_schema {
+                required group my_list (LIST) {
+                    repeated group list {
+                        optional binary element (STRING);
+                    }
+                }
+                REQUIRED INT32 b;
+            }
+            ",
+        );
+
+        let mask = ProjectionMask::all();
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [1])),
+            mask.without_nested_types(&schema),
+        );
+
+        // element --> empty (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [0]);
+        assert_eq!(None, mask.without_nested_types(&schema));
+
+        // element, b --> b (it is nested)
+        let mask = ProjectionMask::leaves(&schema, [0, 1]);
+        assert_eq!(
+            Some(ProjectionMask::leaves(&schema, [1])),
+            mask.without_nested_types(&schema),
+        );
+    }
+
+    /// Converts a schema string into a `SchemaDescriptor`
+    fn parse_schema(schema: &str) -> SchemaDescriptor {
+        let parquet_group_type = parse_message_type(schema).unwrap();
+        SchemaDescriptor::new(Arc::new(parquet_group_type))
+    }
 }
diff --git a/parquet/src/arrow/push_decoder/mod.rs b/parquet/src/arrow/push_decoder/mod.rs
new file mode 100644
index 000000000000..50451aee120e
--- /dev/null
+++ b/parquet/src/arrow/push_decoder/mod.rs
@@ -0,0 +1,1236 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ParquetPushDecoder`]: decodes Parquet data with data provided by the
+//! caller (rather than from an underlying reader).
+
+mod reader_builder;
+mod remaining;
+
+use crate::DecodeResult;
+use crate::arrow::arrow_reader::{
+    ArrowReaderBuilder, ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReader,
+};
+use crate::errors::ParquetError;
+use crate::file::metadata::ParquetMetaData;
+use crate::util::push_buffers::PushBuffers;
+use arrow_array::RecordBatch;
+use bytes::Bytes;
+use reader_builder::RowGroupReaderBuilder;
+use remaining::RemainingRowGroups;
+use std::ops::Range;
+use std::sync::Arc;
+
+/// A builder for [`ParquetPushDecoder`].
+///
+/// To create a new decoder, use [`ParquetPushDecoderBuilder::try_new_decoder`].
+///
+/// You can decode the metadata from a Parquet file using either
+/// [`ParquetMetadataReader`] or [`ParquetMetaDataPushDecoder`].
+///
+/// [`ParquetMetadataReader`]: crate::file::metadata::ParquetMetaDataReader
+/// [`ParquetMetaDataPushDecoder`]: crate::file::metadata::ParquetMetaDataPushDecoder
+///
+/// Note the "input" type is `u64` which represents the length of the Parquet file
+/// being decoded. This is needed to initialize the internal buffers that track
+/// what data has been provided to the decoder.
+///
+/// # Example
+/// ```
+/// # use std::ops::Range;
+/// # use std::sync::Arc;
+/// # use bytes::Bytes;
+/// # use arrow_array::record_batch;
+/// # use parquet::DecodeResult;
+/// # use parquet::arrow::push_decoder::ParquetPushDecoderBuilder;
+/// # use parquet::arrow::ArrowWriter;
+/// # use parquet::file::metadata::ParquetMetaDataPushDecoder;
+/// # let file_bytes = {
+/// #   let mut buffer = vec![];
+/// #   let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+/// #   let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+/// #   writer.write(&batch).unwrap();
+/// #   writer.close().unwrap();
+/// #   Bytes::from(buffer)
+/// # };
+/// # // mimic IO by returning a function that returns the bytes for a given range
+/// # let get_range = |range: &Range<u64>| -> Bytes {
+/// #    let start = range.start as usize;
+/// #     let end = range.end as usize;
+/// #    file_bytes.slice(start..end)
+/// # };
+/// # let file_length = file_bytes.len() as u64;
+/// # let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_length).unwrap();
+/// # metadata_decoder.push_ranges(vec![0..file_length], vec![file_bytes.clone()]).unwrap();
+/// # let DecodeResult::Data(parquet_metadata) = metadata_decoder.try_decode().unwrap() else { panic!("failed to decode metadata") };
+/// # let parquet_metadata = Arc::new(parquet_metadata);
+/// // The file length and metadata are required to create the decoder
+/// let mut decoder =
+///     ParquetPushDecoderBuilder::try_new_decoder(parquet_metadata)
+///       .unwrap()
+///       // Optionally configure the decoder, e.g. batch size
+///       .with_batch_size(1024)
+///       // Build the decoder
+///       .build()
+///       .unwrap();
+///
+///     // In a loop, ask the decoder what it needs next, and provide it with the required data
+///     loop {
+///         match decoder.try_decode().unwrap() {
+///             DecodeResult::NeedsData(ranges) => {
+///                 // The decoder needs more data. Fetch the data for the given ranges
+///                 let data = ranges.iter().map(|r| get_range(r)).collect::<Vec<_>>();
+///                 // Push the data to the decoder
+///                 decoder.push_ranges(ranges, data).unwrap();
+///                 // After pushing the data, we can try to decode again on the next iteration
+///             }
+///             DecodeResult::Data(batch) => {
+///                 // Successfully decoded a batch of data
+///                 assert!(batch.num_rows() > 0);
+///             }
+///             DecodeResult::Finished => {
+///                 // The decoder has finished decoding exit the loop
+///                 break;
+///             }
+///         }
+///     }
+/// ```
+pub type ParquetPushDecoderBuilder = ArrowReaderBuilder<NoInput>;
+
+/// Type that represents "No input" for the [`ParquetPushDecoderBuilder`]
+///
+/// There is no "input" for the push decoder by design (the idea is that
+/// the caller pushes data to the decoder as needed)..
+///
+/// However, [`ArrowReaderBuilder`] is shared with the sync and async readers,
+/// which DO have an `input`. To support reusing the same builder code for
+/// all three types of decoders, we define this `NoInput` for the push decoder to
+/// denote in the type system there is no type.
+#[derive(Debug, Clone, Copy)]
+pub struct NoInput;
+
+/// Methods for building a ParquetDecoder. See the base [`ArrowReaderBuilder`] for
+/// more options that can be configured.
+impl ParquetPushDecoderBuilder {
+    /// Create a new `ParquetDecoderBuilder` for configuring a Parquet decoder for the given file.
+    ///
+    /// See [`ParquetMetadataDecoder`] for a builder that can read the metadata from a Parquet file.
+    ///
+    /// [`ParquetMetadataDecoder`]: crate::file::metadata::ParquetMetaDataPushDecoder
+    ///
+    /// See example on [`ParquetPushDecoderBuilder`]
+    pub fn try_new_decoder(parquet_metadata: Arc<ParquetMetaData>) -> Result<Self, ParquetError> {
+        Self::try_new_decoder_with_options(parquet_metadata, ArrowReaderOptions::default())
+    }
+
+    /// Create a new `ParquetDecoderBuilder` for configuring a Parquet decoder for the given file
+    /// with the given reader options.
+    ///
+    /// This is similar to [`Self::try_new_decoder`] but allows configuring
+    /// options such as Arrow schema
+    pub fn try_new_decoder_with_options(
+        parquet_metadata: Arc<ParquetMetaData>,
+        arrow_reader_options: ArrowReaderOptions,
+    ) -> Result<Self, ParquetError> {
+        let arrow_reader_metadata =
+            ArrowReaderMetadata::try_new(parquet_metadata, arrow_reader_options)?;
+        Ok(Self::new_with_metadata(arrow_reader_metadata))
+    }
+
+    /// Create a new `ParquetDecoderBuilder` given [`ArrowReaderMetadata`].
+    ///
+    /// See [`ArrowReaderMetadata::try_new`] for how to create the metadata from
+    /// the Parquet metadata and reader options.
+    pub fn new_with_metadata(arrow_reader_metadata: ArrowReaderMetadata) -> Self {
+        Self::new_builder(NoInput, arrow_reader_metadata)
+    }
+
+    /// Create a [`ParquetPushDecoder`] with the configured options
+    pub fn build(self) -> Result<ParquetPushDecoder, ParquetError> {
+        let Self {
+            input: NoInput,
+            metadata: parquet_metadata,
+            schema: _,
+            fields,
+            batch_size,
+            row_groups,
+            projection,
+            filter,
+            selection,
+            limit,
+            offset,
+            metrics,
+            row_selection_policy,
+            max_predicate_cache_size,
+        } = self;
+
+        // If no row groups were specified, read all of them
+        let row_groups =
+            row_groups.unwrap_or_else(|| (0..parquet_metadata.num_row_groups()).collect());
+
+        // Prepare to build RowGroup readers
+        let file_len = 0; // not used in push decoder
+        let buffers = PushBuffers::new(file_len);
+        let row_group_reader_builder = RowGroupReaderBuilder::new(
+            batch_size,
+            projection,
+            Arc::clone(&parquet_metadata),
+            fields,
+            filter,
+            limit,
+            offset,
+            metrics,
+            max_predicate_cache_size,
+            buffers,
+            row_selection_policy,
+        );
+
+        // Initialize the decoder with the configured options
+        let remaining_row_groups = RemainingRowGroups::new(
+            parquet_metadata,
+            row_groups,
+            selection,
+            row_group_reader_builder,
+        );
+
+        Ok(ParquetPushDecoder {
+            state: ParquetDecoderState::ReadingRowGroup {
+                remaining_row_groups: Box::new(remaining_row_groups),
+            },
+        })
+    }
+}
+
+/// A push based Parquet Decoder
+///
+/// See [`ParquetPushDecoderBuilder`] for an example of how to build and use the decoder.
+///
+/// [`ParquetPushDecoder`] is a low level API for decoding Parquet data without an
+/// underlying reader for performing IO, and thus offers fine grained control
+/// over how data is fetched and decoded.
+///
+/// When more data is needed to make progress, instead of reading data directly
+/// from a reader, the decoder returns [`DecodeResult`] indicating what ranges
+/// are needed. Once the caller provides the requested ranges via
+/// [`Self::push_ranges`], they try to decode again by calling
+/// [`Self::try_decode`].
+///
+/// The decoder's internal state tracks what has been already decoded and what
+/// is needed next.
+#[derive(Debug)]
+pub struct ParquetPushDecoder {
+    /// The inner state.
+    ///
+    /// This state is consumed on every transition and a new state is produced
+    /// so the Rust compiler can ensure that the state is always valid and
+    /// transitions are not missed.
+    state: ParquetDecoderState,
+}
+
+impl ParquetPushDecoder {
+    /// Attempt to decode the next batch of data, or return what data is needed
+    ///
+    /// The the decoder communicates the next state with a [`DecodeResult`]
+    ///
+    /// See full example in [`ParquetPushDecoderBuilder`]
+    ///
+    /// ```no_run
+    /// # use parquet::arrow::push_decoder::ParquetPushDecoder;
+    /// use parquet::DecodeResult;
+    /// # fn get_decoder() -> ParquetPushDecoder { unimplemented!() }
+    /// # fn push_data(decoder: &mut ParquetPushDecoder, ranges: Vec<std::ops::Range<u64>>) { unimplemented!() }
+    /// let mut decoder = get_decoder();
+    /// loop {
+    ///    match decoder.try_decode().unwrap() {
+    ///       DecodeResult::NeedsData(ranges) => {
+    ///         // The decoder needs more data. Fetch the data for the given ranges
+    ///         // call decoder.push_ranges(ranges, data) and call again
+    ///         push_data(&mut decoder, ranges);
+    ///       }
+    ///       DecodeResult::Data(batch) => {
+    ///         // Successfully decoded the next batch of data
+    ///         println!("Got batch with {} rows", batch.num_rows());
+    ///       }
+    ///       DecodeResult::Finished => {
+    ///         // The decoder has finished decoding all data
+    ///         break;
+    ///       }
+    ///    }
+    /// }
+    ///```
+    pub fn try_decode(&mut self) -> Result<DecodeResult<RecordBatch>, ParquetError> {
+        let current_state = std::mem::replace(&mut self.state, ParquetDecoderState::Finished);
+        let (new_state, decode_result) = current_state.try_next_batch()?;
+        self.state = new_state;
+        Ok(decode_result)
+    }
+
+    /// Return a [`ParquetRecordBatchReader`] that reads the next set of rows, or
+    /// return what data is needed to produce it.
+    ///
+    /// This API can be used to get a reader for decoding the next set of
+    /// RecordBatches while proceeding to begin fetching data for the set (e.g
+    /// row group)
+    ///
+    /// Example
+    /// ```no_run
+    /// # use parquet::arrow::push_decoder::ParquetPushDecoder;
+    /// use parquet::DecodeResult;
+    /// # fn get_decoder() -> ParquetPushDecoder { unimplemented!() }
+    /// # fn push_data(decoder: &mut ParquetPushDecoder, ranges: Vec<std::ops::Range<u64>>) { unimplemented!() }
+    /// let mut decoder = get_decoder();
+    /// loop {
+    ///    match decoder.try_next_reader().unwrap() {
+    ///       DecodeResult::NeedsData(ranges) => {
+    ///         // The decoder needs more data. Fetch the data for the given ranges
+    ///         // call decoder.push_ranges(ranges, data) and call again
+    ///         push_data(&mut decoder, ranges);
+    ///       }
+    ///       DecodeResult::Data(reader) => {
+    ///          // spawn a thread to read the batches in parallel
+    ///          // with fetching the next row group / data
+    ///          std::thread::spawn(move || {
+    ///            for batch in reader {
+    ///              let batch = batch.unwrap();
+    ///              println!("Got batch with {} rows", batch.num_rows());
+    ///            }
+    ///         });
+    ///       }
+    ///       DecodeResult::Finished => {
+    ///         // The decoder has finished decoding all data
+    ///         break;
+    ///       }
+    ///    }
+    /// }
+    ///```
+    pub fn try_next_reader(
+        &mut self,
+    ) -> Result<DecodeResult<ParquetRecordBatchReader>, ParquetError> {
+        let current_state = std::mem::replace(&mut self.state, ParquetDecoderState::Finished);
+        let (new_state, decode_result) = current_state.try_next_reader()?;
+        self.state = new_state;
+        Ok(decode_result)
+    }
+
+    /// Push data into the decoder for processing
+    ///
+    /// This is a convenience wrapper around [`Self::push_ranges`] for pushing a
+    /// single range of data.
+    ///
+    /// Note this can be the entire file or just a part of it. If it is part of the file,
+    /// the ranges should correspond to the data ranges requested by the decoder.
+    ///
+    /// See example in [`ParquetPushDecoderBuilder`]
+    pub fn push_range(&mut self, range: Range<u64>, data: Bytes) -> Result<(), ParquetError> {
+        self.push_ranges(vec![range], vec![data])
+    }
+
+    /// Push data into the decoder for processing
+    ///
+    /// This should correspond to the data ranges requested by the decoder
+    pub fn push_ranges(
+        &mut self,
+        ranges: Vec<Range<u64>>,
+        data: Vec<Bytes>,
+    ) -> Result<(), ParquetError> {
+        let current_state = std::mem::replace(&mut self.state, ParquetDecoderState::Finished);
+        self.state = current_state.push_data(ranges, data)?;
+        Ok(())
+    }
+
+    /// Returns the total number of buffered bytes in the decoder
+    ///
+    /// This is the sum of the size of all [`Bytes`] that has been pushed to the
+    /// decoder but not yet consumed.
+    ///
+    /// Note that this does not include any overhead of the internal data
+    /// structures and that since [`Bytes`] are ref counted memory, this may not
+    /// reflect additional memory usage.
+    ///
+    /// This can be used to monitor memory usage of the decoder.
+    pub fn buffered_bytes(&self) -> u64 {
+        self.state.buffered_bytes()
+    }
+}
+
+/// Internal state machine for the [`ParquetPushDecoder`]
+#[derive(Debug)]
+enum ParquetDecoderState {
+    /// Waiting for data needed to decode the next RowGroup
+    ReadingRowGroup {
+        remaining_row_groups: Box<RemainingRowGroups>,
+    },
+    /// The decoder is actively decoding a RowGroup
+    DecodingRowGroup {
+        /// Current active reader
+        record_batch_reader: Box<ParquetRecordBatchReader>,
+        remaining_row_groups: Box<RemainingRowGroups>,
+    },
+    /// The decoder has finished processing all data
+    Finished,
+}
+
+impl ParquetDecoderState {
+    /// If actively reading a RowGroup, return the currently active
+    /// ParquetRecordBatchReader and advance to the next group.
+    fn try_next_reader(
+        self,
+    ) -> Result<(Self, DecodeResult<ParquetRecordBatchReader>), ParquetError> {
+        let mut current_state = self;
+        loop {
+            let (next_state, decode_result) = current_state.transition()?;
+            // if more data is needed to transition, can't proceed further without it
+            match decode_result {
+                DecodeResult::NeedsData(ranges) => {
+                    return Ok((next_state, DecodeResult::NeedsData(ranges)));
+                }
+                // act next based on state
+                DecodeResult::Data(()) | DecodeResult::Finished => {}
+            }
+            match next_state {
+                // not ready to read yet, continue transitioning
+                Self::ReadingRowGroup { .. } => current_state = next_state,
+                // have a reader ready, so return it and set ourself to ReadingRowGroup
+                Self::DecodingRowGroup {
+                    record_batch_reader,
+                    remaining_row_groups,
+                } => {
+                    let result = DecodeResult::Data(*record_batch_reader);
+                    let next_state = Self::ReadingRowGroup {
+                        remaining_row_groups,
+                    };
+                    return Ok((next_state, result));
+                }
+                Self::Finished => {
+                    return Ok((Self::Finished, DecodeResult::Finished));
+                }
+            }
+        }
+    }
+
+    /// Current state --> next state + output
+    ///
+    /// This function is called to get the next RecordBatch
+    ///
+    /// This structure is used to reduce the indentation level of the main loop
+    /// in try_build
+    fn try_next_batch(self) -> Result<(Self, DecodeResult<RecordBatch>), ParquetError> {
+        let mut current_state = self;
+        loop {
+            let (new_state, decode_result) = current_state.transition()?;
+            // if more data is needed to transition, can't proceed further without it
+            match decode_result {
+                DecodeResult::NeedsData(ranges) => {
+                    return Ok((new_state, DecodeResult::NeedsData(ranges)));
+                }
+                // act next based on state
+                DecodeResult::Data(()) | DecodeResult::Finished => {}
+            }
+            match new_state {
+                // not ready to read yet, continue transitioning
+                Self::ReadingRowGroup { .. } => current_state = new_state,
+                // have a reader ready, so decode the next batch
+                Self::DecodingRowGroup {
+                    mut record_batch_reader,
+                    remaining_row_groups,
+                } => {
+                    match record_batch_reader.next() {
+                        // Successfully decoded a batch, return it
+                        Some(Ok(batch)) => {
+                            let result = DecodeResult::Data(batch);
+                            let next_state = Self::DecodingRowGroup {
+                                record_batch_reader,
+                                remaining_row_groups,
+                            };
+                            return Ok((next_state, result));
+                        }
+                        // No more batches in this row group, move to the next row group
+                        None => {
+                            current_state = Self::ReadingRowGroup {
+                                remaining_row_groups,
+                            }
+                        }
+                        // some error occurred while decoding, so return that
+                        Some(Err(e)) => {
+                            // TODO: preserve ArrowError in ParquetError (rather than convert to a string)
+                            return Err(ParquetError::ArrowError(e.to_string()));
+                        }
+                    }
+                }
+                Self::Finished => {
+                    return Ok((Self::Finished, DecodeResult::Finished));
+                }
+            }
+        }
+    }
+
+    /// Transition to the next state with a reader (data can be produced), if not end of stream
+    ///
+    /// This function is called in a loop until the decoder is ready to return
+    /// data (has the required pages buffered) or is finished.
+    fn transition(self) -> Result<(Self, DecodeResult<()>), ParquetError> {
+        // result returned when there is data ready
+        let data_ready = DecodeResult::Data(());
+        match self {
+            Self::ReadingRowGroup {
+                mut remaining_row_groups,
+            } => {
+                match remaining_row_groups.try_next_reader()? {
+                    // If we have a next reader, we can transition to decoding it
+                    DecodeResult::Data(record_batch_reader) => {
+                        // Transition to decoding the row group
+                        Ok((
+                            Self::DecodingRowGroup {
+                                record_batch_reader: Box::new(record_batch_reader),
+                                remaining_row_groups,
+                            },
+                            data_ready,
+                        ))
+                    }
+                    DecodeResult::NeedsData(ranges) => {
+                        // If we need more data, we return the ranges needed and stay in Reading
+                        // RowGroup state
+                        Ok((
+                            Self::ReadingRowGroup {
+                                remaining_row_groups,
+                            },
+                            DecodeResult::NeedsData(ranges),
+                        ))
+                    }
+                    // If there are no more readers, we are finished
+                    DecodeResult::Finished => {
+                        // No more row groups to read, we are finished
+                        Ok((Self::Finished, DecodeResult::Finished))
+                    }
+                }
+            }
+            // if we are already in DecodingRowGroup, just return data ready
+            Self::DecodingRowGroup { .. } => Ok((self, data_ready)),
+            // if finished, just return finished
+            Self::Finished => Ok((self, DecodeResult::Finished)),
+        }
+    }
+
+    /// Push data, and transition state if needed
+    ///
+    /// This should correspond to the data ranges requested by the decoder
+    pub fn push_data(
+        self,
+        ranges: Vec<Range<u64>>,
+        data: Vec<Bytes>,
+    ) -> Result<Self, ParquetError> {
+        match self {
+            ParquetDecoderState::ReadingRowGroup {
+                mut remaining_row_groups,
+            } => {
+                // Push data to the RowGroupReaderBuilder
+                remaining_row_groups.push_data(ranges, data);
+                Ok(ParquetDecoderState::ReadingRowGroup {
+                    remaining_row_groups,
+                })
+            }
+            // it is ok to get data before we asked for it
+            ParquetDecoderState::DecodingRowGroup {
+                record_batch_reader,
+                mut remaining_row_groups,
+            } => {
+                remaining_row_groups.push_data(ranges, data);
+                Ok(ParquetDecoderState::DecodingRowGroup {
+                    record_batch_reader,
+                    remaining_row_groups,
+                })
+            }
+            ParquetDecoderState::Finished => Err(ParquetError::General(
+                "Cannot push data to a finished decoder".to_string(),
+            )),
+        }
+    }
+
+    /// How many bytes are currently buffered in the decoder?
+    fn buffered_bytes(&self) -> u64 {
+        match self {
+            ParquetDecoderState::ReadingRowGroup {
+                remaining_row_groups,
+            } => remaining_row_groups.buffered_bytes(),
+            ParquetDecoderState::DecodingRowGroup {
+                record_batch_reader: _,
+                remaining_row_groups,
+            } => remaining_row_groups.buffered_bytes(),
+            ParquetDecoderState::Finished => 0,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::DecodeResult;
+    use crate::arrow::arrow_reader::{ArrowPredicateFn, RowFilter, RowSelection, RowSelector};
+    use crate::arrow::push_decoder::{ParquetPushDecoder, ParquetPushDecoderBuilder};
+    use crate::arrow::{ArrowWriter, ProjectionMask};
+    use crate::errors::ParquetError;
+    use crate::file::metadata::ParquetMetaDataPushDecoder;
+    use crate::file::properties::WriterProperties;
+    use arrow::compute::kernels::cmp::{gt, lt};
+    use arrow_array::cast::AsArray;
+    use arrow_array::types::Int64Type;
+    use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringViewArray};
+    use arrow_select::concat::concat_batches;
+    use bytes::Bytes;
+    use std::fmt::Debug;
+    use std::ops::Range;
+    use std::sync::{Arc, LazyLock};
+
+    /// Test decoder struct size (as they are copied around on each transition, they
+    /// should not grow too large)
+    #[test]
+    fn test_decoder_size() {
+        assert_eq!(std::mem::size_of::<ParquetDecoderState>(), 24);
+    }
+
+    /// Decode the entire file at once, simulating a scenario where all data is
+    /// available in memory
+    #[test]
+    fn test_decoder_all_data() {
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            .build()
+            .unwrap();
+
+        decoder
+            .push_range(test_file_range(), TEST_FILE_DATA.clone())
+            .unwrap();
+
+        let results = vec![
+            // first row group should be decoded without needing more data
+            expect_data(decoder.try_decode()),
+            // second row group should be decoded without needing more data
+            expect_data(decoder.try_decode()),
+        ];
+        expect_finished(decoder.try_decode());
+
+        let all_output = concat_batches(&TEST_BATCH.schema(), &results).unwrap();
+        // Check that the output matches the input batch
+        assert_eq!(all_output, *TEST_BATCH);
+    }
+
+    /// Decode the entire file incrementally, simulating a scenario where data is
+    /// fetched as needed
+    #[test]
+    fn test_decoder_incremental() {
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            .build()
+            .unwrap();
+
+        let mut results = vec![];
+
+        // First row group, expect a single request
+        let ranges = expect_needs_data(decoder.try_decode());
+        let num_bytes_requested: u64 = ranges.iter().map(|r| r.end - r.start).sum();
+        push_ranges_to_decoder(&mut decoder, ranges);
+        // The decoder should currently only store the data it needs to decode the first row group
+        assert_eq!(decoder.buffered_bytes(), num_bytes_requested);
+        results.push(expect_data(decoder.try_decode()));
+        // the decoder should have consumed the data for the first row group and freed it
+        assert_eq!(decoder.buffered_bytes(), 0);
+
+        // Second row group,
+        let ranges = expect_needs_data(decoder.try_decode());
+        let num_bytes_requested: u64 = ranges.iter().map(|r| r.end - r.start).sum();
+        push_ranges_to_decoder(&mut decoder, ranges);
+        // The decoder should currently only store the data it needs to decode the second row group
+        assert_eq!(decoder.buffered_bytes(), num_bytes_requested);
+        results.push(expect_data(decoder.try_decode()));
+        // the decoder should have consumed the data for the second row group and freed it
+        assert_eq!(decoder.buffered_bytes(), 0);
+        expect_finished(decoder.try_decode());
+
+        // Check that the output matches the input batch
+        let all_output = concat_batches(&TEST_BATCH.schema(), &results).unwrap();
+        assert_eq!(all_output, *TEST_BATCH);
+    }
+
+    /// Decode the entire file incrementally, simulating partial reads
+    #[test]
+    fn test_decoder_partial() {
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            .build()
+            .unwrap();
+
+        // First row group, expect a single request for all data needed to read "a" and "b"
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(0, 200);
+        assert_eq!(batch1, expected1);
+
+        // Second row group, this time provide the data in two steps
+        let ranges = expect_needs_data(decoder.try_decode());
+        let (ranges1, ranges2) = ranges.split_at(ranges.len() / 2);
+        assert!(!ranges1.is_empty());
+        assert!(!ranges2.is_empty());
+        // push first half to simulate partial read
+        push_ranges_to_decoder(&mut decoder, ranges1.to_vec());
+
+        // still expect more data
+        let ranges = expect_needs_data(decoder.try_decode());
+        assert_eq!(ranges, ranges2); // should be the remaining ranges
+        // push empty ranges should be a no-op
+        push_ranges_to_decoder(&mut decoder, vec![]);
+        let ranges = expect_needs_data(decoder.try_decode());
+        assert_eq!(ranges, ranges2); // should be the remaining ranges
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch2 = expect_data(decoder.try_decode());
+        let expected2 = TEST_BATCH.slice(200, 200);
+        assert_eq!(batch2, expected2);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Decode multiple columns "a" and "b", expect that the decoder requests
+    /// only a single request per row group
+    #[test]
+    fn test_decoder_selection_does_one_request() {
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        let mut decoder = builder
+            .with_projection(
+                ProjectionMask::columns(&schema_descr, ["a", "b"]), // read "a", "b"
+            )
+            .build()
+            .unwrap();
+
+        // First row group, expect a single request for all data needed to read "a" and "b"
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(0, 200).project(&[0, 1]).unwrap();
+        assert_eq!(batch1, expected1);
+
+        // Second row group, similarly expect a single request for all data needed to read "a" and "b"
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch2 = expect_data(decoder.try_decode());
+        let expected2 = TEST_BATCH.slice(200, 200).project(&[0, 1]).unwrap();
+        assert_eq!(batch2, expected2);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Decode with a filter that requires multiple requests, but only provide part
+    /// of the data needed for the filter at a time simulating partial reads.
+    #[test]
+    fn test_decoder_single_filter_partial() {
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+
+        // Values in column "a" range 0..399
+        // First filter: "a" > 250  (nothing in Row Group 0, both data pages in Row Group 1)
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        // a > 250
+        let row_filter_a = ArrowPredicateFn::new(
+            // claim to use both a and b so we get two ranges requests for the filter pages
+            ProjectionMask::columns(&schema_descr, ["a", "b"]),
+            |batch: RecordBatch| {
+                let scalar_250 = Int64Array::new_scalar(250);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                gt(column, &scalar_250)
+            },
+        );
+
+        let mut decoder = builder
+            .with_projection(
+                // read only column "a" to test that filter pages are reused
+                ProjectionMask::columns(&schema_descr, ["a"]), // read "a"
+            )
+            .with_row_filter(RowFilter::new(vec![Box::new(row_filter_a)]))
+            .build()
+            .unwrap();
+
+        // First row group, evaluating filters
+        let ranges = expect_needs_data(decoder.try_decode());
+        // only provide half the ranges
+        let (ranges1, ranges2) = ranges.split_at(ranges.len() / 2);
+        assert!(!ranges1.is_empty());
+        assert!(!ranges2.is_empty());
+        push_ranges_to_decoder(&mut decoder, ranges1.to_vec());
+        // still expect more data
+        let ranges = expect_needs_data(decoder.try_decode());
+        assert_eq!(ranges, ranges2); // should be the remaining ranges
+        let ranges = expect_needs_data(decoder.try_decode());
+        assert_eq!(ranges, ranges2); // should be the remaining ranges
+        push_ranges_to_decoder(&mut decoder, ranges2.to_vec());
+
+        // Since no rows in the first row group pass the filters, there is no
+        // additional requests to read data pages for "b" here
+
+        // Second row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch = expect_data(decoder.try_decode());
+        let expected = TEST_BATCH.slice(251, 149).project(&[0]).unwrap();
+        assert_eq!(batch, expected);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Decode with a filter where we also skip one of the RowGroups via a RowSelection
+    #[test]
+    fn test_decoder_single_filter_and_row_selection() {
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+
+        // Values in column "a" range 0..399
+        // First filter: "a" > 250  (nothing in Row Group 0, last data page in Row Group 1)
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        // a > 250
+        let row_filter_a = ArrowPredicateFn::new(
+            ProjectionMask::columns(&schema_descr, ["a"]),
+            |batch: RecordBatch| {
+                let scalar_250 = Int64Array::new_scalar(250);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                gt(column, &scalar_250)
+            },
+        );
+
+        let mut decoder = builder
+            .with_projection(
+                // read only column "a" to test that filter pages are reused
+                ProjectionMask::columns(&schema_descr, ["b"]), // read "b"
+            )
+            .with_row_filter(RowFilter::new(vec![Box::new(row_filter_a)]))
+            .with_row_selection(RowSelection::from(vec![
+                RowSelector::skip(200),   // skip first row group
+                RowSelector::select(100), // first 100 rows of second row group
+                RowSelector::skip(100),
+            ]))
+            .build()
+            .unwrap();
+
+        // expect the first row group to be filtered out (no filter is evaluated due to row selection)
+
+        // First row group, first filter (a > 250)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // Second row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        let batch = expect_data(decoder.try_decode());
+        let expected = TEST_BATCH.slice(251, 49).project(&[1]).unwrap();
+        assert_eq!(batch, expected);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Decode with multiple filters that require multiple requests
+    #[test]
+    fn test_decoder_multi_filters() {
+        // Create a decoder for decoding parquet data (note it does not have any IO / readers)
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+
+        // Values in column "a" range 0..399
+        // Values in column "b" range 400..799
+        // First filter: "a" > 175  (last data page in Row Group 0)
+        // Second filter: "b" < 625 (last data page in Row Group 0 and first DataPage in RowGroup 1)
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        // a > 175
+        let row_filter_a = ArrowPredicateFn::new(
+            ProjectionMask::columns(&schema_descr, ["a"]),
+            |batch: RecordBatch| {
+                let scalar_175 = Int64Array::new_scalar(175);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                gt(column, &scalar_175)
+            },
+        );
+
+        // b < 625
+        let row_filter_b = ArrowPredicateFn::new(
+            ProjectionMask::columns(&schema_descr, ["b"]),
+            |batch: RecordBatch| {
+                let scalar_625 = Int64Array::new_scalar(625);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                lt(column, &scalar_625)
+            },
+        );
+
+        let mut decoder = builder
+            .with_projection(
+                ProjectionMask::columns(&schema_descr, ["c"]), // read "c"
+            )
+            .with_row_filter(RowFilter::new(vec![
+                Box::new(row_filter_a),
+                Box::new(row_filter_b),
+            ]))
+            .build()
+            .unwrap();
+
+        // First row group, first filter (a > 175)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // first row group, second filter (b < 625)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // first row group, data pages for "c"
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first batch to be decoded: rows 176..199, column "c"
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(176, 24).project(&[2]).unwrap();
+        assert_eq!(batch1, expected1);
+
+        // Second row group, first filter (a > 175)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // Second row group, second filter (b < 625)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // Second row group, data pages for "c"
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the second batch to be decoded: rows 200..224, column "c"
+        let batch2 = expect_data(decoder.try_decode());
+        let expected2 = TEST_BATCH.slice(200, 25).project(&[2]).unwrap();
+        assert_eq!(batch2, expected2);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Decode with a filter that uses a column that is also projected, and expect
+    /// that the filter pages are reused (don't refetch them)
+    #[test]
+    fn test_decoder_reuses_filter_pages() {
+        // Create a decoder for decoding parquet data (note it does not have any IO / readers)
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+
+        // Values in column "a" range 0..399
+        // First filter: "a" > 250  (nothing in Row Group 0, last data page in Row Group 1)
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        // a > 250
+        let row_filter_a = ArrowPredicateFn::new(
+            ProjectionMask::columns(&schema_descr, ["a"]),
+            |batch: RecordBatch| {
+                let scalar_250 = Int64Array::new_scalar(250);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                gt(column, &scalar_250)
+            },
+        );
+
+        let mut decoder = builder
+            .with_projection(
+                // read only column "a" to test that filter pages are reused
+                ProjectionMask::columns(&schema_descr, ["a"]), // read "a"
+            )
+            .with_row_filter(RowFilter::new(vec![Box::new(row_filter_a)]))
+            .build()
+            .unwrap();
+
+        // First row group, first filter (a > 175)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first row group to be filtered out (no rows match)
+
+        // Second row group, first filter (a > 250)
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect that the second row group is decoded: rows 251..399, column "a"
+        // Note that the filter pages for "a" should be reused and no additional data
+        // should be requested
+        let batch = expect_data(decoder.try_decode());
+        let expected = TEST_BATCH.slice(251, 149).project(&[0]).unwrap();
+        assert_eq!(batch, expected);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    #[test]
+    fn test_decoder_empty_filters() {
+        let builder =
+            ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata()).unwrap();
+        let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+        // only read column "c", but with empty filters
+        let mut decoder = builder
+            .with_projection(
+                ProjectionMask::columns(&schema_descr, ["c"]), // read "c"
+            )
+            .with_row_filter(RowFilter::new(vec![
+                // empty filters should be ignored
+            ]))
+            .build()
+            .unwrap();
+
+        // First row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first batch to be decoded: rows 0..199, column "c"
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(0, 200).project(&[2]).unwrap();
+        assert_eq!(batch1, expected1);
+
+        // Second row group,
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the second batch to be decoded: rows 200..399, column "c"
+        let batch2 = expect_data(decoder.try_decode());
+        let expected2 = TEST_BATCH.slice(200, 200).project(&[2]).unwrap();
+
+        assert_eq!(batch2, expected2);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    #[test]
+    fn test_decoder_offset_limit() {
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            // skip entire first row group (200 rows) and first 25 rows of second row group
+            .with_offset(225)
+            // and limit to 20 rows
+            .with_limit(20)
+            .build()
+            .unwrap();
+
+        // First row group should be skipped,
+
+        // Second row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first and only batch to be decoded
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(225, 20);
+        assert_eq!(batch1, expected1);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    #[test]
+    fn test_decoder_row_group_selection() {
+        // take only the second row group
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            .with_row_groups(vec![1])
+            .build()
+            .unwrap();
+
+        // First row group should be skipped,
+
+        // Second row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first and only batch to be decoded
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(200, 200);
+        assert_eq!(batch1, expected1);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    #[test]
+    fn test_decoder_row_selection() {
+        // take only the second row group
+        let mut decoder = ParquetPushDecoderBuilder::try_new_decoder(test_file_parquet_metadata())
+            .unwrap()
+            .with_row_selection(RowSelection::from(vec![
+                RowSelector::skip(225),  // skip first row group and 25 rows of second])
+                RowSelector::select(20), // take 20 rows
+            ]))
+            .build()
+            .unwrap();
+
+        // First row group should be skipped,
+
+        // Second row group
+        let ranges = expect_needs_data(decoder.try_decode());
+        push_ranges_to_decoder(&mut decoder, ranges);
+
+        // expect the first ane only batch to be decoded
+        let batch1 = expect_data(decoder.try_decode());
+        let expected1 = TEST_BATCH.slice(225, 20);
+        assert_eq!(batch1, expected1);
+
+        expect_finished(decoder.try_decode());
+    }
+
+    /// Returns a batch with 400 rows, with 3 columns: "a", "b", "c"
+    ///
+    /// Note c is a different types (so the data page sizes will be different)
+    static TEST_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
+        let a: ArrayRef = Arc::new(Int64Array::from_iter_values(0..400));
+        let b: ArrayRef = Arc::new(Int64Array::from_iter_values(400..800));
+        let c: ArrayRef = Arc::new(StringViewArray::from_iter_values((0..400).map(|i| {
+            if i % 2 == 0 {
+                format!("string_{i}")
+            } else {
+                format!("A string larger than 12 bytes and thus not inlined {i}")
+            }
+        })));
+
+        RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap()
+    });
+
+    /// Create a parquet file in memory for testing.
+    ///
+    /// See [`TEST_BATCH`] for the data in the file.
+    ///
+    /// Each column is written in 4 data pages, each with 100 rows, across 2
+    /// row groups. Each column in each row group has two data pages.
+    ///
+    /// The data is split across row groups like this
+    ///
+    /// Column |   Values                | Data Page | Row Group
+    /// -------|------------------------|-----------|-----------
+    /// a      | 0..99                  | 1         | 0
+    /// a      | 100..199               | 2         | 0
+    /// a      | 200..299               | 1         | 1
+    /// a      | 300..399               | 2         | 1
+    ///
+    /// b      | 400..499               | 1         | 0
+    /// b      | 500..599               | 2         | 0
+    /// b      | 600..699               | 1         | 1
+    /// b      | 700..799               | 2         | 1
+    ///
+    /// c      | "string_0".."string_99"        | 1         | 0
+    /// c      | "string_100".."string_199"     | 2         | 0
+    /// c      | "string_200".."string_299"     | 1         | 1
+    /// c      | "string_300".."string_399"     | 2         | 1
+    static TEST_FILE_DATA: LazyLock<Bytes> = LazyLock::new(|| {
+        let input_batch = &TEST_BATCH;
+        let mut output = Vec::new();
+
+        let writer_options = WriterProperties::builder()
+            .set_max_row_group_size(200)
+            .set_data_page_row_count_limit(100)
+            .build();
+        let mut writer =
+            ArrowWriter::try_new(&mut output, input_batch.schema(), Some(writer_options)).unwrap();
+
+        // since the limits are only enforced on batch boundaries, write the input
+        // batch in chunks of 50
+        let mut row_remain = input_batch.num_rows();
+        while row_remain > 0 {
+            let chunk_size = row_remain.min(50);
+            let chunk = input_batch.slice(input_batch.num_rows() - row_remain, chunk_size);
+            writer.write(&chunk).unwrap();
+            row_remain -= chunk_size;
+        }
+        writer.close().unwrap();
+        Bytes::from(output)
+    });
+
+    /// Return the length of [`TEST_FILE_DATA`], in bytes
+    fn test_file_len() -> u64 {
+        TEST_FILE_DATA.len() as u64
+    }
+
+    /// Return a range that covers the entire [`TEST_FILE_DATA`]
+    fn test_file_range() -> Range<u64> {
+        0..test_file_len()
+    }
+
+    /// Return a slice of the test file data from the given range
+    pub fn test_file_slice(range: Range<u64>) -> Bytes {
+        let start: usize = range.start.try_into().unwrap();
+        let end: usize = range.end.try_into().unwrap();
+        TEST_FILE_DATA.slice(start..end)
+    }
+
+    /// return the metadata for the test file
+    pub fn test_file_parquet_metadata() -> Arc<crate::file::metadata::ParquetMetaData> {
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(test_file_len()).unwrap();
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, vec![test_file_range()]);
+        let metadata = metadata_decoder.try_decode().unwrap();
+        let DecodeResult::Data(metadata) = metadata else {
+            panic!("Expected metadata to be decoded successfully");
+        };
+        Arc::new(metadata)
+    }
+
+    /// Push the given ranges to the metadata decoder, simulating reading from a file
+    fn push_ranges_to_metadata_decoder(
+        metadata_decoder: &mut ParquetMetaDataPushDecoder,
+        ranges: Vec<Range<u64>>,
+    ) {
+        let data = ranges
+            .iter()
+            .map(|range| test_file_slice(range.clone()))
+            .collect::<Vec<_>>();
+        metadata_decoder.push_ranges(ranges, data).unwrap();
+    }
+
+    fn push_ranges_to_decoder(decoder: &mut ParquetPushDecoder, ranges: Vec<Range<u64>>) {
+        let data = ranges
+            .iter()
+            .map(|range| test_file_slice(range.clone()))
+            .collect::<Vec<_>>();
+        decoder.push_ranges(ranges, data).unwrap();
+    }
+
+    /// Expect that the [`DecodeResult`] is a [`DecodeResult::Data`] and return the corresponding element
+    fn expect_data<T: Debug>(result: Result<DecodeResult<T>, ParquetError>) -> T {
+        match result.expect("Expected Ok(DecodeResult::Data(T))") {
+            DecodeResult::Data(data) => data,
+            result => panic!("Expected DecodeResult::Data, got {result:?}"),
+        }
+    }
+
+    /// Expect that the [`DecodeResult`] is a [`DecodeResult::NeedsData`] and return the corresponding ranges
+    fn expect_needs_data<T: Debug>(
+        result: Result<DecodeResult<T>, ParquetError>,
+    ) -> Vec<Range<u64>> {
+        match result.expect("Expected Ok(DecodeResult::NeedsData{ranges})") {
+            DecodeResult::NeedsData(ranges) => ranges,
+            result => panic!("Expected DecodeResult::NeedsData, got {result:?}"),
+        }
+    }
+
+    fn expect_finished<T: Debug>(result: Result<DecodeResult<T>, ParquetError>) {
+        match result.expect("Expected Ok(DecodeResult::Finished)") {
+            DecodeResult::Finished => {}
+            result => panic!("Expected DecodeResult::Finished, got {result:?}"),
+        }
+    }
+}
diff --git a/parquet/src/arrow/push_decoder/reader_builder/data.rs b/parquet/src/arrow/push_decoder/reader_builder/data.rs
new file mode 100644
index 000000000000..6fbc2090b06e
--- /dev/null
+++ b/parquet/src/arrow/push_decoder/reader_builder/data.rs
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`DataRequest`] tracks and holds data needed to construct InMemoryRowGroups
+
+use crate::arrow::ProjectionMask;
+use crate::arrow::arrow_reader::RowSelection;
+use crate::arrow::in_memory_row_group::{ColumnChunkData, FetchRanges, InMemoryRowGroup};
+use crate::errors::ParquetError;
+use crate::file::metadata::ParquetMetaData;
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::file::reader::ChunkReader;
+use crate::util::push_buffers::PushBuffers;
+use bytes::Bytes;
+use std::ops::Range;
+use std::sync::Arc;
+
+/// Contains in-progress state to construct InMemoryRowGroups
+///
+/// See [`DataRequestBuilder`] for creating new requests
+#[derive(Debug)]
+pub(super) struct DataRequest {
+    /// Any previously read column chunk data
+    column_chunks: Vec<Option<Arc<ColumnChunkData>>>,
+    /// The ranges of data that are needed next
+    ranges: Vec<Range<u64>>,
+    /// Optional page start offsets for each requested range. This is used
+    /// to create the relevant InMemoryRowGroup
+    page_start_offsets: Option<Vec<Vec<u64>>>,
+}
+
+impl DataRequest {
+    /// return what ranges are still needed to satisfy this request. Returns an empty vec
+    /// if all ranges are satisfied
+    pub fn needed_ranges(&self, buffers: &PushBuffers) -> Vec<Range<u64>> {
+        self.ranges
+            .iter()
+            .filter(|&range| !buffers.has_range(range))
+            .cloned()
+            .collect()
+    }
+
+    /// Returns the chunks from the buffers that satisfy this request
+    fn get_chunks(&self, buffers: &PushBuffers) -> Result<Vec<Bytes>, ParquetError> {
+        self.ranges
+            .iter()
+            .map(|range| {
+                let length: usize = (range.end - range.start)
+                    .try_into()
+                    .expect("overflow for offset");
+                // should have all the data due to the check above
+                buffers.get_bytes(range.start, length).map_err(|e| {
+                    ParquetError::General(format!(
+                        "Internal Error missing data for range {range:?} in buffers: {e}",
+                    ))
+                })
+            })
+            .collect()
+    }
+
+    /// Create a new InMemoryRowGroup, and fill it with provided data
+    ///
+    /// Assumes that all needed data is present in the buffers
+    /// and clears any explicitly requested ranges
+    pub fn try_into_in_memory_row_group<'a>(
+        self,
+        row_group_idx: usize,
+        row_count: usize,
+        parquet_metadata: &'a ParquetMetaData,
+        projection: &ProjectionMask,
+        buffers: &mut PushBuffers,
+    ) -> Result<InMemoryRowGroup<'a>, ParquetError> {
+        let chunks = self.get_chunks(buffers)?;
+
+        let Self {
+            column_chunks,
+            ranges,
+            page_start_offsets,
+        } = self;
+
+        // Create an InMemoryRowGroup to hold the column chunks, this is a
+        // temporary structure used to tell the ArrowReaders what pages are
+        // needed for decoding
+        let mut in_memory_row_group = InMemoryRowGroup {
+            row_count,
+            column_chunks,
+            offset_index: get_offset_index(parquet_metadata, row_group_idx),
+            row_group_idx,
+            metadata: parquet_metadata,
+        };
+
+        in_memory_row_group.fill_column_chunks(projection, page_start_offsets, chunks);
+
+        // Clear the ranges that were explicitly requested
+        buffers.clear_ranges(&ranges);
+
+        Ok(in_memory_row_group)
+    }
+}
+
+/// Builder for [`DataRequest`]
+pub(super) struct DataRequestBuilder<'a> {
+    /// The row group index
+    row_group_idx: usize,
+    /// The number of rows in the row group
+    row_count: usize,
+    /// The batch size to read
+    batch_size: usize,
+    /// The parquet metadata
+    parquet_metadata: &'a ParquetMetaData,
+    /// The projection mask (which columns to read)
+    projection: &'a ProjectionMask,
+    /// Optional row selection to apply
+    selection: Option<&'a RowSelection>,
+    /// Optional projection mask if using
+    /// [`RowGroupCache`](crate::arrow::array_reader::RowGroupCache)
+    /// for caching decoded columns.
+    cache_projection: Option<&'a ProjectionMask>,
+    /// Any previously read column chunks
+    column_chunks: Option<Vec<Option<Arc<ColumnChunkData>>>>,
+}
+
+impl<'a> DataRequestBuilder<'a> {
+    pub(super) fn new(
+        row_group_idx: usize,
+        row_count: usize,
+        batch_size: usize,
+        parquet_metadata: &'a ParquetMetaData,
+        projection: &'a ProjectionMask,
+    ) -> Self {
+        Self {
+            row_group_idx,
+            row_count,
+            batch_size,
+            parquet_metadata,
+            projection,
+            selection: None,
+            cache_projection: None,
+            column_chunks: None,
+        }
+    }
+
+    /// Set an optional row selection to apply
+    pub(super) fn with_selection(mut self, selection: Option<&'a RowSelection>) -> Self {
+        self.selection = selection;
+        self
+    }
+
+    /// set columns to cache, if any
+    pub(super) fn with_cache_projection(
+        mut self,
+        cache_projection: Option<&'a ProjectionMask>,
+    ) -> Self {
+        self.cache_projection = cache_projection;
+        self
+    }
+
+    /// Provide any previously read column chunks
+    pub(super) fn with_column_chunks(
+        mut self,
+        column_chunks: Option<Vec<Option<Arc<ColumnChunkData>>>>,
+    ) -> Self {
+        self.column_chunks = column_chunks;
+        self
+    }
+
+    pub(crate) fn build(self) -> DataRequest {
+        let Self {
+            row_group_idx,
+            row_count,
+            batch_size,
+            parquet_metadata,
+            projection,
+            selection,
+            cache_projection,
+            column_chunks,
+        } = self;
+
+        let row_group_meta_data = parquet_metadata.row_group(row_group_idx);
+
+        // If no previously read column chunks are provided, create a new location to hold them
+        let column_chunks =
+            column_chunks.unwrap_or_else(|| vec![None; row_group_meta_data.columns().len()]);
+
+        // Create an InMemoryRowGroup to hold the column chunks, this is a
+        // temporary structure used to tell the ArrowReaders what pages are
+        // needed for decoding
+        let row_group = InMemoryRowGroup {
+            row_count,
+            column_chunks,
+            offset_index: get_offset_index(parquet_metadata, row_group_idx),
+            row_group_idx,
+            metadata: parquet_metadata,
+        };
+
+        let FetchRanges {
+            ranges,
+            page_start_offsets,
+        } = row_group.fetch_ranges(projection, selection, batch_size, cache_projection);
+
+        DataRequest {
+            // Save any previously read column chunks
+            column_chunks: row_group.column_chunks,
+            ranges,
+            page_start_offsets,
+        }
+    }
+}
+
+fn get_offset_index(
+    parquet_metadata: &ParquetMetaData,
+    row_group_idx: usize,
+) -> Option<&[OffsetIndexMetaData]> {
+    parquet_metadata
+        .offset_index()
+        // filter out empty offset indexes (old versions specified Some(vec![]) when no present)
+        .filter(|index| !index.is_empty())
+        .map(|x| x[row_group_idx].as_slice())
+}
diff --git a/parquet/src/arrow/push_decoder/reader_builder/filter.rs b/parquet/src/arrow/push_decoder/reader_builder/filter.rs
new file mode 100644
index 000000000000..4a3c38e95947
--- /dev/null
+++ b/parquet/src/arrow/push_decoder/reader_builder/filter.rs
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`FilterInfo`] state machine for evaluating row filters
+
+use crate::arrow::ProjectionMask;
+use crate::arrow::array_reader::{CacheOptionsBuilder, RowGroupCache};
+use crate::arrow::arrow_reader::{ArrowPredicate, RowFilter};
+use std::num::NonZeroUsize;
+use std::sync::{Arc, Mutex};
+
+/// State machine for evaluating a sequence of predicates.
+///
+/// The `FilterInfo` owns the [`RowFilter`] being evaluated and tracks the current
+/// predicate to evaluate.
+#[derive(Debug)]
+pub(super) struct FilterInfo {
+    /// The predicates to evaluate, in order
+    ///
+    /// RowFilter is owned by `FilterInfo` because they may be mutated as part
+    /// of evaluation. Specifically, [`ArrowPredicate`] requires &mut self for
+    /// evaluation.
+    filter: RowFilter,
+    /// The next filter to be evaluated
+    next_predicate: NonZeroUsize,
+    /// Previously computed filter results
+    cache_info: CacheInfo,
+}
+
+/// Predicate cache
+///
+/// Note this is basically the same as CacheOptionsBuilder
+/// but it owns the ProjectionMask and RowGroupCache
+#[derive(Debug)]
+pub(super) struct CacheInfo {
+    /// The columns to cache in the predicate cache.
+    /// Normally these are the columns that filters may look at such that
+    /// if we have a filter like `(a + 10 > 5) AND (a + b = 0)` we cache `a` to avoid re-reading it between evaluating `a + 10 > 5` and `a + b = 0`.
+    cache_projection: ProjectionMask,
+    row_group_cache: Arc<Mutex<RowGroupCache>>,
+}
+
+impl CacheInfo {
+    pub(super) fn new(
+        cache_projection: ProjectionMask,
+        row_group_cache: Arc<Mutex<RowGroupCache>>,
+    ) -> Self {
+        Self {
+            cache_projection,
+            row_group_cache,
+        }
+    }
+
+    pub(super) fn builder(&self) -> CacheOptionsBuilder<'_> {
+        CacheOptionsBuilder::new(&self.cache_projection, &self.row_group_cache)
+    }
+}
+
+pub(super) enum AdvanceResult {
+    /// Advanced to the next predicate
+    Continue(FilterInfo),
+    /// No more predicates returns the row filter and cache info
+    Done(RowFilter, CacheInfo),
+}
+
+impl FilterInfo {
+    /// Create a new FilterInfo
+    pub(super) fn new(filter: RowFilter, cache_info: CacheInfo) -> Self {
+        Self {
+            filter,
+            next_predicate: NonZeroUsize::new(1).expect("1 is always non-zero"),
+            cache_info,
+        }
+    }
+
+    /// Advance to the next predicate
+    ///
+    /// Returns
+    /// * [`AdvanceResult::Continue`] returning the `FilterInfo` if there are
+    ///   more predicate to evaluate.
+    /// * [`AdvanceResult::Done`] with the inner [`RowFilter`] and [`CacheInfo]`
+    ///   if there are no more predicates
+    pub(super) fn advance(mut self) -> AdvanceResult {
+        if self.next_predicate.get() >= self.filter.predicates.len() {
+            AdvanceResult::Done(self.filter, self.cache_info)
+        } else {
+            self.next_predicate = self
+                .next_predicate
+                .checked_add(1)
+                .expect("no usize overflow");
+            AdvanceResult::Continue(self)
+        }
+    }
+
+    /// Return a mutable reference to the current predicate
+    pub(super) fn current_mut(&mut self) -> &mut dyn ArrowPredicate {
+        self.filter
+            .predicates
+            .get_mut(self.next_predicate.get() - 1)
+            // advance ensures next_predicate is always in bounds
+            .unwrap()
+            .as_mut()
+    }
+
+    /// Return the current predicate to evaluate
+    pub(super) fn current(&self) -> &dyn ArrowPredicate {
+        self.filter
+            .predicates
+            .get(self.next_predicate.get() - 1)
+            // advance ensures next_predicate is always in bounds
+            .unwrap()
+            .as_ref()
+    }
+
+    /// Return a reference to the cache projection
+    pub(super) fn cache_projection(&self) -> &ProjectionMask {
+        &self.cache_info.cache_projection
+    }
+
+    /// Return a cache builder to save the results of predicate evaluation
+    pub(super) fn cache_builder(&self) -> CacheOptionsBuilder<'_> {
+        self.cache_info.builder()
+    }
+
+    /// Returns the inner filter, consuming this FilterInfo
+    pub(super) fn into_filter(self) -> RowFilter {
+        self.filter
+    }
+}
diff --git a/parquet/src/arrow/push_decoder/reader_builder/mod.rs b/parquet/src/arrow/push_decoder/reader_builder/mod.rs
new file mode 100644
index 000000000000..61a244589c6d
--- /dev/null
+++ b/parquet/src/arrow/push_decoder/reader_builder/mod.rs
@@ -0,0 +1,717 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod data;
+mod filter;
+
+use crate::DecodeResult;
+use crate::arrow::ProjectionMask;
+use crate::arrow::array_reader::{ArrayReaderBuilder, RowGroupCache};
+use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+use crate::arrow::arrow_reader::selection::RowSelectionStrategy;
+use crate::arrow::arrow_reader::{
+    ParquetRecordBatchReader, ReadPlanBuilder, RowFilter, RowSelection, RowSelectionPolicy,
+};
+use crate::arrow::in_memory_row_group::ColumnChunkData;
+use crate::arrow::push_decoder::reader_builder::data::DataRequestBuilder;
+use crate::arrow::push_decoder::reader_builder::filter::CacheInfo;
+use crate::arrow::schema::ParquetField;
+use crate::errors::ParquetError;
+use crate::file::metadata::ParquetMetaData;
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::util::push_buffers::PushBuffers;
+use bytes::Bytes;
+use data::DataRequest;
+use filter::AdvanceResult;
+use filter::FilterInfo;
+use std::ops::Range;
+use std::sync::{Arc, Mutex};
+
+/// The current row group being read and the read plan
+#[derive(Debug)]
+struct RowGroupInfo {
+    row_group_idx: usize,
+    row_count: usize,
+    plan_builder: ReadPlanBuilder,
+}
+
+/// This is the inner state machine for reading a single row group.
+#[derive(Debug)]
+enum RowGroupDecoderState {
+    Start {
+        row_group_info: RowGroupInfo,
+    },
+    /// Planning filters, but haven't yet requested data to evaluate them
+    Filters {
+        row_group_info: RowGroupInfo,
+        /// Any previously read column chunk data from prior filters
+        column_chunks: Option<Vec<Option<Arc<ColumnChunkData>>>>,
+        filter_info: FilterInfo,
+    },
+    /// Needs data to evaluate current filter
+    WaitingOnFilterData {
+        row_group_info: RowGroupInfo,
+        filter_info: FilterInfo,
+        data_request: DataRequest,
+    },
+    /// Know what data to actually read, after all predicates
+    StartData {
+        row_group_info: RowGroupInfo,
+        /// Any previously read column chunk data from the filtering phase
+        column_chunks: Option<Vec<Option<Arc<ColumnChunkData>>>>,
+        /// Any cached filter results
+        cache_info: Option<CacheInfo>,
+    },
+    /// Needs data to proceed with reading the output
+    WaitingOnData {
+        row_group_info: RowGroupInfo,
+        data_request: DataRequest,
+        /// Any cached filter results
+        cache_info: Option<CacheInfo>,
+    },
+    /// Finished (or not yet started) reading this group
+    Finished,
+}
+
+/// Result of a state transition
+#[derive(Debug)]
+struct NextState {
+    next_state: RowGroupDecoderState,
+    /// result to return, if any
+    ///
+    /// * `Some`: the processing should stop and return the result
+    /// * `None`: processing should continue
+    result: Option<DecodeResult<ParquetRecordBatchReader>>,
+}
+
+impl NextState {
+    /// The next state with no result.
+    ///
+    /// This indicates processing should continue
+    fn again(next_state: RowGroupDecoderState) -> Self {
+        Self {
+            next_state,
+            result: None,
+        }
+    }
+
+    /// Create a NextState with a result that should be returned
+    fn result(
+        next_state: RowGroupDecoderState,
+        result: DecodeResult<ParquetRecordBatchReader>,
+    ) -> Self {
+        Self {
+            next_state,
+            result: Some(result),
+        }
+    }
+}
+
+/// Builder for [`ParquetRecordBatchReader`] for a single row group
+///
+/// This struct drives the main state machine for decoding each row group -- it
+/// determines what data is needed, and then assembles the
+/// `ParquetRecordBatchReader` when all data is available.
+#[derive(Debug)]
+pub(crate) struct RowGroupReaderBuilder {
+    /// The output batch size
+    batch_size: usize,
+
+    /// What columns to project (produce in each output batch)
+    projection: ProjectionMask,
+
+    /// The Parquet file metadata
+    metadata: Arc<ParquetMetaData>,
+
+    /// Top level parquet schema and arrow schema mapping
+    fields: Option<Arc<ParquetField>>,
+
+    /// Optional filter
+    filter: Option<RowFilter>,
+
+    /// Limit to apply to remaining row groups (decremented as rows are read)
+    limit: Option<usize>,
+
+    /// Offset to apply to remaining row groups (decremented as rows are read)
+    offset: Option<usize>,
+
+    /// The size in bytes of the predicate cache to use
+    ///
+    /// See [`RowGroupCache`] for details.
+    max_predicate_cache_size: usize,
+
+    /// The metrics collector
+    metrics: ArrowReaderMetrics,
+
+    /// Strategy for materialising row selections
+    row_selection_policy: RowSelectionPolicy,
+
+    /// Current state of the decoder.
+    ///
+    /// It is taken when processing, and must be put back before returning
+    /// it is a bug error if it is not put back after transitioning states.
+    state: Option<RowGroupDecoderState>,
+
+    /// The underlying data store
+    buffers: PushBuffers,
+}
+
+impl RowGroupReaderBuilder {
+    /// Create a new RowGroupReaderBuilder
+    #[expect(clippy::too_many_arguments)]
+    pub(crate) fn new(
+        batch_size: usize,
+        projection: ProjectionMask,
+        metadata: Arc<ParquetMetaData>,
+        fields: Option<Arc<ParquetField>>,
+        filter: Option<RowFilter>,
+        limit: Option<usize>,
+        offset: Option<usize>,
+        metrics: ArrowReaderMetrics,
+        max_predicate_cache_size: usize,
+        buffers: PushBuffers,
+        row_selection_policy: RowSelectionPolicy,
+    ) -> Self {
+        Self {
+            batch_size,
+            projection,
+            metadata,
+            fields,
+            filter,
+            limit,
+            offset,
+            metrics,
+            max_predicate_cache_size,
+            row_selection_policy,
+            state: Some(RowGroupDecoderState::Finished),
+            buffers,
+        }
+    }
+
+    /// Push new data buffers that can be used to satisfy pending requests
+    pub fn push_data(&mut self, ranges: Vec<Range<u64>>, buffers: Vec<Bytes>) {
+        self.buffers.push_ranges(ranges, buffers);
+    }
+
+    /// Returns the total number of buffered bytes available
+    pub fn buffered_bytes(&self) -> u64 {
+        self.buffers.buffered_bytes()
+    }
+
+    /// take the current state, leaving None in its place.
+    ///
+    /// Returns an error if there the state wasn't put back after the previous
+    /// call to [`Self::take_state`].
+    ///
+    /// Any code that calls this method must ensure that the state is put back
+    /// before returning, otherwise the reader will error next time it is called
+    fn take_state(&mut self) -> Result<RowGroupDecoderState, ParquetError> {
+        self.state.take().ok_or_else(|| {
+            ParquetError::General(String::from(
+                "Internal Error: RowGroupReader in invalid state",
+            ))
+        })
+    }
+
+    /// Setup this reader to read the next row group
+    pub(crate) fn next_row_group(
+        &mut self,
+        row_group_idx: usize,
+        row_count: usize,
+        selection: Option<RowSelection>,
+    ) -> Result<(), ParquetError> {
+        let state = self.take_state()?;
+        if !matches!(state, RowGroupDecoderState::Finished) {
+            return Err(ParquetError::General(format!(
+                "Internal Error: next_row_group called while still reading a row group. Expected Finished state, got {state:?}"
+            )));
+        }
+        let plan_builder = ReadPlanBuilder::new(self.batch_size)
+            .with_selection(selection)
+            .with_row_selection_policy(self.row_selection_policy);
+
+        let row_group_info = RowGroupInfo {
+            row_group_idx,
+            row_count,
+            plan_builder,
+        };
+
+        self.state = Some(RowGroupDecoderState::Start { row_group_info });
+        Ok(())
+    }
+
+    /// Try to build the next `ParquetRecordBatchReader` from this RowGroupReader.
+    ///
+    /// If more data is needed, returns [`DecodeResult::NeedsData`] with the
+    /// ranges of data that are needed to proceed.
+    ///
+    /// If a [`ParquetRecordBatchReader`] is ready, it is returned in
+    /// `DecodeResult::Data`.
+    pub(crate) fn try_build(
+        &mut self,
+    ) -> Result<DecodeResult<ParquetRecordBatchReader>, ParquetError> {
+        loop {
+            let current_state = self.take_state()?;
+            // Try to transition the decoder.
+            match self.try_transition(current_state)? {
+                // Either produced a batch reader, needed input, or finished
+                NextState {
+                    next_state,
+                    result: Some(result),
+                } => {
+                    // put back the next state
+                    self.state = Some(next_state);
+                    return Ok(result);
+                }
+                // completed one internal state, maybe can proceed further
+                NextState {
+                    next_state,
+                    result: None,
+                } => {
+                    // continue processing
+                    self.state = Some(next_state);
+                }
+            }
+        }
+    }
+
+    /// Current state --> next state + optional output
+    ///
+    /// This is the main state transition function for the row group reader
+    /// and encodes the row group decoding state machine.
+    ///
+    /// # Notes
+    ///
+    /// This structure is used to reduce the indentation level of the main loop
+    /// in try_build
+    fn try_transition(
+        &mut self,
+        current_state: RowGroupDecoderState,
+    ) -> Result<NextState, ParquetError> {
+        let result = match current_state {
+            RowGroupDecoderState::Start { row_group_info } => {
+                let column_chunks = None; // no prior column chunks
+
+                let Some(filter) = self.filter.take() else {
+                    // no filter, start trying to read data immediately
+                    return Ok(NextState::again(RowGroupDecoderState::StartData {
+                        row_group_info,
+                        column_chunks,
+                        cache_info: None,
+                    }));
+                };
+                // no predicates in filter, so start reading immediately
+                if filter.predicates.is_empty() {
+                    return Ok(NextState::again(RowGroupDecoderState::StartData {
+                        row_group_info,
+                        column_chunks,
+                        cache_info: None,
+                    }));
+                };
+
+                // we have predicates to evaluate
+                let cache_projection =
+                    self.compute_cache_projection(row_group_info.row_group_idx, &filter);
+
+                let cache_info = CacheInfo::new(
+                    cache_projection,
+                    Arc::new(Mutex::new(RowGroupCache::new(
+                        self.batch_size,
+                        self.max_predicate_cache_size,
+                    ))),
+                );
+
+                let filter_info = FilterInfo::new(filter, cache_info);
+                NextState::again(RowGroupDecoderState::Filters {
+                    row_group_info,
+                    filter_info,
+                    column_chunks,
+                })
+            }
+            // need to evaluate filters
+            RowGroupDecoderState::Filters {
+                row_group_info,
+                column_chunks,
+                filter_info,
+            } => {
+                let RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                } = row_group_info;
+
+                // If nothing is selected, we are done with this row group
+                if !plan_builder.selects_any() {
+                    // ruled out entire row group
+                    self.filter = Some(filter_info.into_filter());
+                    return Ok(NextState::result(
+                        RowGroupDecoderState::Finished,
+                        DecodeResult::Finished,
+                    ));
+                }
+
+                // Make a request for the data needed to evaluate the current predicate
+                let predicate = filter_info.current();
+
+                // need to fetch pages the column needs for decoding, figure
+                // that out based on the current selection and projection
+                let data_request = DataRequestBuilder::new(
+                    row_group_idx,
+                    row_count,
+                    self.batch_size,
+                    &self.metadata,
+                    predicate.projection(), // use the predicate's projection
+                )
+                .with_selection(plan_builder.selection())
+                // Fetch predicate columns; expand selection only for cached predicate columns
+                .with_cache_projection(Some(filter_info.cache_projection()))
+                .with_column_chunks(column_chunks)
+                .build();
+
+                let row_group_info = RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                };
+
+                NextState::again(RowGroupDecoderState::WaitingOnFilterData {
+                    row_group_info,
+                    filter_info,
+                    data_request,
+                })
+            }
+            RowGroupDecoderState::WaitingOnFilterData {
+                row_group_info,
+                data_request,
+                mut filter_info,
+            } => {
+                // figure out what ranges we still need
+                let needed_ranges = data_request.needed_ranges(&self.buffers);
+                if !needed_ranges.is_empty() {
+                    // still need data
+                    return Ok(NextState::result(
+                        RowGroupDecoderState::WaitingOnFilterData {
+                            row_group_info,
+                            filter_info,
+                            data_request,
+                        },
+                        DecodeResult::NeedsData(needed_ranges),
+                    ));
+                }
+
+                // otherwise we have all the data we need to evaluate the predicate
+                let RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    mut plan_builder,
+                } = row_group_info;
+
+                let predicate = filter_info.current();
+
+                let row_group = data_request.try_into_in_memory_row_group(
+                    row_group_idx,
+                    row_count,
+                    &self.metadata,
+                    predicate.projection(),
+                    &mut self.buffers,
+                )?;
+
+                let cache_options = filter_info.cache_builder().producer();
+
+                let array_reader = ArrayReaderBuilder::new(&row_group, &self.metrics)
+                    .with_cache_options(Some(&cache_options))
+                    .with_parquet_metadata(&self.metadata)
+                    .build_array_reader(self.fields.as_deref(), predicate.projection())?;
+
+                plan_builder =
+                    plan_builder.with_predicate(array_reader, filter_info.current_mut())?;
+
+                let row_group_info = RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                };
+
+                // Take back the column chunks that were read
+                let column_chunks = Some(row_group.column_chunks);
+
+                // advance to the next predicate, if any
+                match filter_info.advance() {
+                    AdvanceResult::Continue(filter_info) => {
+                        NextState::again(RowGroupDecoderState::Filters {
+                            row_group_info,
+                            column_chunks,
+                            filter_info,
+                        })
+                    }
+                    // done with predicates, proceed to reading data
+                    AdvanceResult::Done(filter, cache_info) => {
+                        // remember we need to put back the filter
+                        assert!(self.filter.is_none());
+                        self.filter = Some(filter);
+                        NextState::again(RowGroupDecoderState::StartData {
+                            row_group_info,
+                            column_chunks,
+                            cache_info: Some(cache_info),
+                        })
+                    }
+                }
+            }
+            RowGroupDecoderState::StartData {
+                row_group_info,
+                column_chunks,
+                cache_info,
+            } => {
+                let RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                } = row_group_info;
+
+                // Compute the number of rows in the selection before applying limit and offset
+                let rows_before = plan_builder.num_rows_selected().unwrap_or(row_count);
+
+                if rows_before == 0 {
+                    // ruled out entire row group
+                    return Ok(NextState::result(
+                        RowGroupDecoderState::Finished,
+                        DecodeResult::Finished,
+                    ));
+                }
+
+                // Apply any limit and offset
+                let mut plan_builder = plan_builder
+                    .limited(row_count)
+                    .with_offset(self.offset)
+                    .with_limit(self.limit)
+                    .build_limited();
+
+                let rows_after = plan_builder.num_rows_selected().unwrap_or(row_count);
+
+                // Update running offset and limit for after the current row group is read
+                if let Some(offset) = &mut self.offset {
+                    // Reduction is either because of offset or limit, as limit is applied
+                    // after offset has been "exhausted" can just use saturating sub here
+                    *offset = offset.saturating_sub(rows_before - rows_after)
+                }
+
+                if rows_after == 0 {
+                    // no rows left after applying limit/offset
+                    return Ok(NextState::result(
+                        RowGroupDecoderState::Finished,
+                        DecodeResult::Finished,
+                    ));
+                }
+
+                if let Some(limit) = &mut self.limit {
+                    *limit -= rows_after;
+                }
+
+                let data_request = DataRequestBuilder::new(
+                    row_group_idx,
+                    row_count,
+                    self.batch_size,
+                    &self.metadata,
+                    &self.projection,
+                )
+                .with_selection(plan_builder.selection())
+                .with_column_chunks(column_chunks)
+                // Final projection fetch shouldn't expand selection for cache
+                // so don't call with_cache_projection here
+                .build();
+
+                plan_builder = plan_builder.with_row_selection_policy(self.row_selection_policy);
+
+                plan_builder = override_selector_strategy_if_needed(
+                    plan_builder,
+                    &self.projection,
+                    self.row_group_offset_index(row_group_idx),
+                );
+
+                let row_group_info = RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                };
+
+                NextState::again(RowGroupDecoderState::WaitingOnData {
+                    row_group_info,
+                    data_request,
+                    cache_info,
+                })
+            }
+            // Waiting on data to proceed with reading the output
+            RowGroupDecoderState::WaitingOnData {
+                row_group_info,
+                data_request,
+                cache_info,
+            } => {
+                let needed_ranges = data_request.needed_ranges(&self.buffers);
+                if !needed_ranges.is_empty() {
+                    // still need data
+                    return Ok(NextState::result(
+                        RowGroupDecoderState::WaitingOnData {
+                            row_group_info,
+                            data_request,
+                            cache_info,
+                        },
+                        DecodeResult::NeedsData(needed_ranges),
+                    ));
+                }
+
+                // otherwise we have all the data we need to proceed
+                let RowGroupInfo {
+                    row_group_idx,
+                    row_count,
+                    plan_builder,
+                } = row_group_info;
+
+                let row_group = data_request.try_into_in_memory_row_group(
+                    row_group_idx,
+                    row_count,
+                    &self.metadata,
+                    &self.projection,
+                    &mut self.buffers,
+                )?;
+
+                let plan = plan_builder.build();
+
+                // if we have any cached results, connect them up
+                let array_reader_builder = ArrayReaderBuilder::new(&row_group, &self.metrics)
+                    .with_parquet_metadata(&self.metadata);
+                let array_reader = if let Some(cache_info) = cache_info.as_ref() {
+                    let cache_options = cache_info.builder().consumer();
+                    array_reader_builder
+                        .with_cache_options(Some(&cache_options))
+                        .build_array_reader(self.fields.as_deref(), &self.projection)
+                } else {
+                    array_reader_builder
+                        .build_array_reader(self.fields.as_deref(), &self.projection)
+                }?;
+
+                let reader = ParquetRecordBatchReader::new(array_reader, plan);
+                NextState::result(RowGroupDecoderState::Finished, DecodeResult::Data(reader))
+            }
+            RowGroupDecoderState::Finished => {
+                // nothing left to read
+                NextState::result(RowGroupDecoderState::Finished, DecodeResult::Finished)
+            }
+        };
+        Ok(result)
+    }
+
+    /// Which columns should be cached?
+    ///
+    /// Returns the columns that are used by the filters *and* then used in the
+    /// final projection, excluding any nested columns.
+    fn compute_cache_projection(&self, row_group_idx: usize, filter: &RowFilter) -> ProjectionMask {
+        let meta = self.metadata.row_group(row_group_idx);
+        match self.compute_cache_projection_inner(filter) {
+            Some(projection) => projection,
+            None => ProjectionMask::none(meta.columns().len()),
+        }
+    }
+
+    fn compute_cache_projection_inner(&self, filter: &RowFilter) -> Option<ProjectionMask> {
+        // Do not compute the projection mask if the predicate cache is disabled
+        if self.max_predicate_cache_size == 0 {
+            return None;
+        }
+        let mut cache_projection = filter.predicates.first()?.projection().clone();
+        for predicate in filter.predicates.iter() {
+            cache_projection.union(predicate.projection());
+        }
+        cache_projection.intersect(&self.projection);
+        self.exclude_nested_columns_from_cache(&cache_projection)
+    }
+
+    /// Exclude leaves belonging to roots that span multiple parquet leaves (i.e. nested columns)
+    fn exclude_nested_columns_from_cache(&self, mask: &ProjectionMask) -> Option<ProjectionMask> {
+        mask.without_nested_types(self.metadata.file_metadata().schema_descr())
+    }
+
+    /// Get the offset index for the specified row group, if any
+    fn row_group_offset_index(&self, row_group_idx: usize) -> Option<&[OffsetIndexMetaData]> {
+        self.metadata
+            .offset_index()
+            .filter(|index| !index.is_empty())
+            .and_then(|index| index.get(row_group_idx))
+            .map(|columns| columns.as_slice())
+    }
+}
+
+/// Override the selection strategy if needed.
+///
+/// Some pages can be skipped during row-group construction if they are not read
+/// by the selections. This means that the data pages for those rows are never
+/// loaded and definition/repetition levels are never read. When using
+/// `RowSelections` selection works because `skip_records()` handles this
+/// case and skips the page accordingly.
+///
+/// However, with the current mask design, all values must be read and decoded
+/// and then a mask filter is applied. Thus if any pages are skipped during
+/// row-group construction, the data pages are missing and cannot be decoded.
+///
+/// A simple example:
+/// * the page size is 2, the mask is 100001, row selection should be read(1) skip(4) read(1)
+/// * the `ColumnChunkData` would be page1(10), page2(skipped), page3(01)
+///
+/// Using the row selection to skip(4), page2 won't be read at all, so in this
+/// case we can't decode all the rows and apply a mask. To correctly apply the
+/// bit mask, we need all 6 values be read, but page2 is not in memory.
+fn override_selector_strategy_if_needed(
+    plan_builder: ReadPlanBuilder,
+    projection_mask: &ProjectionMask,
+    offset_index: Option<&[OffsetIndexMetaData]>,
+) -> ReadPlanBuilder {
+    // override only applies to Auto policy, If the policy is already Mask or Selectors, respect that
+    let RowSelectionPolicy::Auto { .. } = plan_builder.row_selection_policy() else {
+        return plan_builder;
+    };
+
+    let preferred_strategy = plan_builder.resolve_selection_strategy();
+
+    let force_selectors = matches!(preferred_strategy, RowSelectionStrategy::Mask)
+        && plan_builder.selection().is_some_and(|selection| {
+            selection.should_force_selectors(projection_mask, offset_index)
+        });
+
+    let resolved_strategy = if force_selectors {
+        RowSelectionStrategy::Selectors
+    } else {
+        preferred_strategy
+    };
+
+    // override the plan builder strategy with the resolved one
+    let new_policy = match resolved_strategy {
+        RowSelectionStrategy::Mask => RowSelectionPolicy::Mask,
+        RowSelectionStrategy::Selectors => RowSelectionPolicy::Selectors,
+    };
+
+    plan_builder.with_row_selection_policy(new_policy)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    // Verify that the size of RowGroupDecoderState does not grow too large
+    fn test_structure_size() {
+        assert_eq!(std::mem::size_of::<RowGroupDecoderState>(), 200);
+    }
+}
diff --git a/parquet/src/arrow/push_decoder/remaining.rs b/parquet/src/arrow/push_decoder/remaining.rs
new file mode 100644
index 000000000000..4613fda08749
--- /dev/null
+++ b/parquet/src/arrow/push_decoder/remaining.rs
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::DecodeResult;
+use crate::arrow::arrow_reader::{ParquetRecordBatchReader, RowSelection};
+use crate::arrow::push_decoder::reader_builder::RowGroupReaderBuilder;
+use crate::errors::ParquetError;
+use crate::file::metadata::ParquetMetaData;
+use bytes::Bytes;
+use std::collections::VecDeque;
+use std::ops::Range;
+use std::sync::Arc;
+
+/// State machine that tracks the remaining high level chunks (row groups) of
+/// Parquet data are left to read.
+///
+/// This is currently a row group, but the author aspires to extend the pattern
+/// to data boundaries other than RowGroups in the future.
+#[derive(Debug)]
+pub(crate) struct RemainingRowGroups {
+    /// The underlying Parquet metadata
+    parquet_metadata: Arc<ParquetMetaData>,
+
+    /// The row groups that have not yet been read
+    row_groups: VecDeque<usize>,
+
+    /// Remaining selection to apply to the next row groups
+    selection: Option<RowSelection>,
+
+    /// State for building the reader for the current row group
+    row_group_reader_builder: RowGroupReaderBuilder,
+}
+
+impl RemainingRowGroups {
+    pub fn new(
+        parquet_metadata: Arc<ParquetMetaData>,
+        row_groups: Vec<usize>,
+        selection: Option<RowSelection>,
+        row_group_reader_builder: RowGroupReaderBuilder,
+    ) -> Self {
+        Self {
+            parquet_metadata,
+            row_groups: VecDeque::from(row_groups),
+            selection,
+            row_group_reader_builder,
+        }
+    }
+
+    /// Push new data buffers that can be used to satisfy pending requests
+    pub fn push_data(&mut self, ranges: Vec<Range<u64>>, buffers: Vec<Bytes>) {
+        self.row_group_reader_builder.push_data(ranges, buffers);
+    }
+
+    /// Return the total number of bytes buffered so far
+    pub fn buffered_bytes(&self) -> u64 {
+        self.row_group_reader_builder.buffered_bytes()
+    }
+
+    /// returns [`ParquetRecordBatchReader`] suitable for reading the next
+    /// group of rows from the Parquet data, or the list of data ranges still
+    /// needed to proceed
+    pub fn try_next_reader(
+        &mut self,
+    ) -> Result<DecodeResult<ParquetRecordBatchReader>, ParquetError> {
+        loop {
+            // Are we ready yet to start reading?
+            let result: DecodeResult<ParquetRecordBatchReader> =
+                self.row_group_reader_builder.try_build()?;
+            match result {
+                DecodeResult::Finished => {
+                    // reader is done, proceed to the next row group
+                    // fall through to the next row group
+                    // This happens if the row group was completely filtered out
+                }
+                DecodeResult::NeedsData(ranges) => {
+                    // need more data to proceed
+                    return Ok(DecodeResult::NeedsData(ranges));
+                }
+                DecodeResult::Data(batch_reader) => {
+                    // ready to read the row group
+                    return Ok(DecodeResult::Data(batch_reader));
+                }
+            }
+
+            // No current reader, proceed to the next row group if any
+            let row_group_idx = match self.row_groups.pop_front() {
+                None => return Ok(DecodeResult::Finished),
+                Some(idx) => idx,
+            };
+
+            let row_count: usize = self
+                .parquet_metadata
+                .row_group(row_group_idx)
+                .num_rows()
+                .try_into()
+                .map_err(|e| ParquetError::General(format!("Row count overflow: {e}")))?;
+
+            let selection = self.selection.as_mut().map(|s| s.split_off(row_count));
+            self.row_group_reader_builder
+                .next_row_group(row_group_idx, row_count, selection)?;
+            // the next iteration will try to build the reader for the new row group
+        }
+    }
+}
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index a90b3c4ec795..8fe26a9b5234 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use arrow_array::builder::BooleanBufferBuilder;
-use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
 use arrow_buffer::Buffer;
+use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
 use bytes::Bytes;
 
 use crate::arrow::buffer::bit_util::count_set_bits;
@@ -131,11 +131,12 @@ impl DefinitionLevelBufferDecoder {
 impl ColumnLevelDecoder for DefinitionLevelBufferDecoder {
     type Buffer = DefinitionLevelBuffer;
 
-    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) -> Result<()> {
         match &mut self.decoder {
             MaybePacked::Packed(d) => d.set_data(encoding, data),
-            MaybePacked::Fallback(d) => d.set_data(encoding, data),
-        }
+            MaybePacked::Fallback(d) => d.set_data(encoding, data)?,
+        };
+        Ok(())
     }
 }
 
@@ -351,7 +352,7 @@ mod tests {
     use super::*;
 
     use crate::encodings::rle::RleEncoder;
-    use rand::{rng, Rng};
+    use rand::{Rng, rng};
 
     #[test]
     fn test_packed_decoder() {
diff --git a/parquet/src/arrow/record_reader/mod.rs b/parquet/src/arrow/record_reader/mod.rs
index fbcb1069e49c..758aea6ede36 100644
--- a/parquet/src/arrow/record_reader/mod.rs
+++ b/parquet/src/arrow/record_reader/mod.rs
@@ -25,8 +25,8 @@ use crate::column::reader::decoder::RepetitionLevelDecoderImpl;
 use crate::column::{
     page::PageReader,
     reader::{
-        decoder::{ColumnValueDecoder, ColumnValueDecoderImpl},
         GenericColumnReader,
+        decoder::{ColumnValueDecoder, ColumnValueDecoderImpl},
     },
 };
 use crate::data_type::DataType;
diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs
index 16d46bd852dc..8b85cac479c1 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -18,13 +18,15 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
+use crate::arrow::schema::extension::try_add_extension_type;
 use crate::arrow::schema::primitive::convert_primitive;
-use crate::arrow::{ProjectionMask, PARQUET_FIELD_ID_META_KEY};
+use crate::arrow::schema::virtual_type::RowNumber;
+use crate::arrow::{PARQUET_FIELD_ID_META_KEY, ProjectionMask};
 use crate::basic::{ConvertedType, Repetition};
 use crate::errors::ParquetError;
 use crate::errors::Result;
 use crate::schema::types::{SchemaDescriptor, Type, TypePtr};
-use arrow_schema::{DataType, Field, Fields, SchemaBuilder};
+use arrow_schema::{DataType, Field, Fields, SchemaBuilder, extension::ExtensionType};
 
 fn get_repetition(t: &Type) -> Repetition {
     let info = t.get_basic_info();
@@ -76,10 +78,18 @@ impl ParquetField {
         match &self.field_type {
             ParquetFieldType::Primitive { .. } => None,
             ParquetFieldType::Group { children } => Some(children),
+            ParquetFieldType::Virtual(_) => None,
         }
     }
 }
 
+/// Types of virtual columns that can be computed at read time
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum VirtualColumnType {
+    /// Row number within the file
+    RowNumber,
+}
+
 #[derive(Debug, Clone)]
 pub enum ParquetFieldType {
     Primitive {
@@ -91,6 +101,9 @@ pub enum ParquetFieldType {
     Group {
         children: Vec<ParquetField>,
     },
+    /// Virtual column that doesn't exist in the parquet file
+    /// but is computed at read time (e.g., row_number)
+    Virtual(VirtualColumnType),
 }
 
 /// Encodes the context of the parent of the field currently under consideration
@@ -172,7 +185,7 @@ impl Visitor {
 
         let parquet_fields = struct_type.get_fields();
 
-        // Extract the arrow fields
+        // Extract any arrow fields from the hints
         let arrow_fields = match &context.data_type {
             Some(DataType::Struct(fields)) => {
                 if fields.len() != parquet_fields.len() {
@@ -188,7 +201,7 @@ impl Visitor {
                 return Err(arrow_err!(
                     "incompatible arrow schema, expected struct got {}",
                     d
-                ))
+                ));
             }
             None => None,
         };
@@ -220,10 +233,10 @@ impl Visitor {
                 data_type,
             };
 
-            if let Some(child) = self.dispatch(parquet_field, child_ctx)? {
+            if let Some(mut child) = self.dispatch(parquet_field, child_ctx)? {
                 // The child type returned may be different from what is encoded in the arrow
                 // schema in the event of a mismatch or a projection
-                child_fields.push(convert_field(parquet_field, &child, arrow_field));
+                child_fields.push(convert_field(parquet_field, &mut child, arrow_field)?);
                 children.push(child);
             }
         }
@@ -325,7 +338,7 @@ impl Visitor {
                 return Err(arrow_err!(
                     "incompatible arrow schema, expected map got {}",
                     d
-                ))
+                ));
             }
             None => (None, None, None, false),
         };
@@ -352,13 +365,13 @@ impl Visitor {
 
         // Need both columns to be projected
         match (maybe_key, maybe_value) {
-            (Some(key), Some(value)) => {
+            (Some(mut key), Some(mut value)) => {
                 let key_field = Arc::new(
-                    convert_field(map_key, &key, arrow_key)
+                    convert_field(map_key, &mut key, arrow_key)?
                         // The key is always non-nullable (#5630)
                         .with_nullable(false),
                 );
-                let value_field = Arc::new(convert_field(map_value, &value, arrow_value));
+                let value_field = Arc::new(convert_field(map_value, &mut value, arrow_value)?);
                 let field_metadata = match arrow_map {
                     Some(field) => field.metadata().clone(),
                     _ => HashMap::default(),
@@ -425,7 +438,7 @@ impl Visitor {
                 return Err(arrow_err!(
                     "incompatible arrow schema, expected list got {}",
                     d
-                ))
+                ));
             }
             None => None,
         };
@@ -495,8 +508,8 @@ impl Visitor {
         };
 
         match self.dispatch(item_type, new_context) {
-            Ok(Some(item)) => {
-                let item_field = Arc::new(convert_field(item_type, &item, arrow_field));
+            Ok(Some(mut item)) => {
+                let item_field = Arc::new(convert_field(item_type, &mut item, arrow_field)?);
 
                 // Use arrow type as hint for index size
                 let arrow_type = match context.data_type {
@@ -540,11 +553,64 @@ impl Visitor {
     }
 }
 
-/// Computes the [`Field`] for a child column
+/// Converts a virtual Arrow [`Field`] to a [`ParquetField`]
+///
+/// Virtual fields don't correspond to any data in the parquet file,
+/// but are computed at read time (e.g., row_number)
+///
+/// The levels are computed based on the parent context:
+/// - If nullable: def_level = parent_def_level + 1
+/// - If required: def_level = parent_def_level
+/// - rep_level = parent_rep_level (virtual fields are not repeated)
+pub(super) fn convert_virtual_field(
+    arrow_field: &Field,
+    parent_rep_level: i16,
+    parent_def_level: i16,
+) -> Result<ParquetField> {
+    let nullable = arrow_field.is_nullable();
+    let def_level = if nullable {
+        parent_def_level + 1
+    } else {
+        parent_def_level
+    };
+
+    // Determine the virtual column type based on the extension type name
+    let extension_name = arrow_field.extension_type_name().ok_or_else(|| {
+        ParquetError::ArrowError(format!(
+            "virtual column field '{}' must have an extension type",
+            arrow_field.name()
+        ))
+    })?;
+
+    let virtual_type = match extension_name {
+        RowNumber::NAME => VirtualColumnType::RowNumber,
+        _ => {
+            return Err(ParquetError::ArrowError(format!(
+                "unsupported virtual column type '{}' for field '{}'",
+                extension_name,
+                arrow_field.name()
+            )));
+        }
+    };
+
+    Ok(ParquetField {
+        rep_level: parent_rep_level,
+        def_level,
+        nullable,
+        arrow_type: arrow_field.data_type().clone(),
+        field_type: ParquetFieldType::Virtual(virtual_type),
+    })
+}
+
+/// Computes the Arrow [`Field`] for a child column
 ///
-/// The resulting [`Field`] will have the type dictated by `field`, a name
+/// The resulting Arrow [`Field`] will have the type dictated by the Parquet `field`, a name
 /// dictated by the `parquet_type`, and any metadata from `arrow_hint`
-fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<&Field>) -> Field {
+fn convert_field(
+    parquet_type: &Type,
+    field: &mut ParquetField,
+    arrow_hint: Option<&Field>,
+) -> Result<Field, ParquetError> {
     let name = parquet_type.name();
     let data_type = field.arrow_type.clone();
     let nullable = field.nullable;
@@ -562,7 +628,7 @@ fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<&
                 _ => Field::new(name, data_type, nullable),
             };
 
-            field.with_metadata(hint.metadata().clone())
+            Ok(field.with_metadata(hint.metadata().clone()))
         }
         None => {
             let mut ret = Field::new(name, data_type, nullable);
@@ -575,7 +641,7 @@ fn convert_field(parquet_type: &Type, field: &ParquetField, arrow_hint: Option<&
                 );
                 ret.set_metadata(meta);
             }
-            ret
+            try_add_extension_type(ret, parquet_type)
         }
     }
 }
diff --git a/parquet/src/arrow/schema/extension.rs b/parquet/src/arrow/schema/extension.rs
new file mode 100644
index 000000000000..247101964668
--- /dev/null
+++ b/parquet/src/arrow/schema/extension.rs
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Arrow Extension Type Support for Parquet
+//!
+//! This module contains mapping code to map Parquet [`LogicalType`]s to/from
+//! Arrow [`ExtensionType`]s.
+//!
+//! Extension types are represented using the metadata from Arrow [`Field`]s
+//! with the key "ARROW:extension:name".
+
+use crate::basic::LogicalType;
+use crate::errors::ParquetError;
+use crate::schema::types::Type;
+use arrow_schema::Field;
+use arrow_schema::extension::ExtensionType;
+
+/// Adds extension type metadata, if necessary, based on the Parquet field's
+/// [`LogicalType`]
+///
+/// Some Parquet logical types, such as Variant, do not map directly to an
+/// Arrow DataType, and instead are represented by an Arrow ExtensionType.
+/// Extension types are attached to Arrow Fields via metadata.
+pub(crate) fn try_add_extension_type(
+    mut arrow_field: Field,
+    parquet_type: &Type,
+) -> Result<Field, ParquetError> {
+    let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
+        return Ok(arrow_field);
+    };
+    match parquet_logical_type {
+        #[cfg(feature = "variant_experimental")]
+        LogicalType::Variant { .. } => {
+            arrow_field.try_with_extension_type(parquet_variant_compute::VariantType)?;
+        }
+        #[cfg(feature = "arrow_canonical_extension_types")]
+        LogicalType::Uuid => {
+            arrow_field.try_with_extension_type(arrow_schema::extension::Uuid)?;
+        }
+        #[cfg(feature = "arrow_canonical_extension_types")]
+        LogicalType::Json => {
+            arrow_field.try_with_extension_type(arrow_schema::extension::Json::default())?;
+        }
+        #[cfg(feature = "geospatial")]
+        LogicalType::Geometry { crs } => {
+            let md = parquet_geospatial::WkbMetadata::new(crs.as_deref(), None);
+            arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?;
+        }
+        #[cfg(feature = "geospatial")]
+        LogicalType::Geography { crs, algorithm } => {
+            let algorithm = algorithm.map(|a| a.try_as_edges()).transpose()?;
+            let md = parquet_geospatial::WkbMetadata::new(crs.as_deref(), algorithm);
+            arrow_field.try_with_extension_type(parquet_geospatial::WkbType::new(Some(md)))?;
+        }
+        _ => {}
+    };
+    Ok(arrow_field)
+}
+
+/// Returns true if [`try_add_extension_type`] would add an extension type
+/// to the specified Parquet field.
+///
+/// This is used to preallocate the metadata hashmap size
+pub(crate) fn has_extension_type(parquet_type: &Type) -> bool {
+    let Some(parquet_logical_type) = parquet_type.get_basic_info().logical_type_ref() else {
+        return false;
+    };
+    match parquet_logical_type {
+        #[cfg(feature = "variant_experimental")]
+        LogicalType::Variant { .. } => true,
+        #[cfg(feature = "arrow_canonical_extension_types")]
+        LogicalType::Uuid => true,
+        #[cfg(feature = "arrow_canonical_extension_types")]
+        LogicalType::Json => true,
+        #[cfg(feature = "geospatial")]
+        LogicalType::Geometry { .. } => true,
+        #[cfg(feature = "geospatial")]
+        LogicalType::Geography { .. } => true,
+        _ => false,
+    }
+}
+
+/// Return the Parquet logical type to use for the specified Arrow Struct field, if any.
+#[cfg(feature = "variant_experimental")]
+pub(crate) fn logical_type_for_struct(field: &Field) -> Option<LogicalType> {
+    use parquet_variant_compute::VariantType;
+    // Check the name (= quick and cheap) and only try_extension_type if the name matches
+    // to avoid unnecessary String allocations in ArrowError
+    if field.extension_type_name()? != VariantType::NAME {
+        return None;
+    }
+    match field.try_extension_type::<VariantType>() {
+        Ok(VariantType) => Some(LogicalType::Variant {
+            specification_version: None,
+        }),
+        // Given check above, this should not error, but if it does ignore
+        Err(_e) => None,
+    }
+}
+
+#[cfg(not(feature = "variant_experimental"))]
+pub(crate) fn logical_type_for_struct(_field: &Field) -> Option<LogicalType> {
+    None
+}
+
+/// Return the Parquet logical type to use for the specified Arrow fixed size binary field, if any.
+#[cfg(feature = "arrow_canonical_extension_types")]
+pub(crate) fn logical_type_for_fixed_size_binary(field: &Field) -> Option<LogicalType> {
+    use arrow_schema::extension::Uuid;
+    // If set, map arrow uuid extension type to parquet uuid logical type.
+    field
+        .try_extension_type::<Uuid>()
+        .ok()
+        .map(|_| LogicalType::Uuid)
+}
+
+#[cfg(not(feature = "arrow_canonical_extension_types"))]
+pub(crate) fn logical_type_for_fixed_size_binary(_field: &Field) -> Option<LogicalType> {
+    None
+}
+
+/// Return the Parquet logical type to use for the specified Arrow string field (Utf8, LargeUtf8) if any
+#[cfg(feature = "arrow_canonical_extension_types")]
+pub(crate) fn logical_type_for_string(field: &Field) -> Option<LogicalType> {
+    use arrow_schema::extension::Json;
+    // Use the Json logical type if the canonical Json
+    // extension type is set on this field.
+    field
+        .try_extension_type::<Json>()
+        .map_or(Some(LogicalType::String), |_| Some(LogicalType::Json))
+}
+
+#[cfg(not(feature = "arrow_canonical_extension_types"))]
+pub(crate) fn logical_type_for_string(_field: &Field) -> Option<LogicalType> {
+    Some(LogicalType::String)
+}
+
+#[cfg(feature = "geospatial")]
+pub(crate) fn logical_type_for_binary(field: &Field) -> Option<LogicalType> {
+    use parquet_geospatial::WkbType;
+    use parquet_geospatial::WkbTypeHint;
+
+    match field.extension_type_name() {
+        Some(n) if n == WkbType::NAME => match field.try_extension_type::<WkbType>() {
+            Ok(wkb_type) => match wkb_type.metadata().type_hint() {
+                WkbTypeHint::Geometry => Some(LogicalType::Geometry {
+                    crs: wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
+                }),
+                WkbTypeHint::Geography => Some(LogicalType::Geography {
+                    crs: wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
+                    algorithm: wkb_type.metadata().algorithm.map(|a| a.into()),
+                }),
+            },
+            Err(_e) => None,
+        },
+        _ => None,
+    }
+}
+
+#[cfg(not(feature = "geospatial"))]
+pub(crate) fn logical_type_for_binary(field: &Field) -> Option<LogicalType> {
+    None
+}
+
+#[cfg(feature = "geospatial")]
+pub(crate) fn logical_type_for_binary_view(field: &Field) -> Option<LogicalType> {
+    logical_type_for_binary(field)
+}
+
+#[cfg(not(feature = "geospatial"))]
+pub(crate) fn logical_type_for_binary_view(field: &Field) -> Option<LogicalType> {
+    None
+}
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 975b48dd04a3..b33f9c14dde5 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -17,15 +17,13 @@
 
 //! Converting Parquet schema <--> Arrow schema: [`ArrowSchemaConverter`] and [parquet_to_arrow_schema]
 
-use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
+use base64::prelude::BASE64_STANDARD;
 use std::collections::HashMap;
 use std::sync::Arc;
 
 use arrow_ipc::writer;
-#[cfg(feature = "arrow_canonical_extension_types")]
-use arrow_schema::extension::{Json, Uuid};
-use arrow_schema::{DataType, Field, Fields, Schema, TimeUnit};
+use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, TimeUnit};
 
 use crate::basic::{
     ConvertedType, LogicalType, Repetition, TimeUnit as ParquetTimeUnit, Type as PhysicalType,
@@ -35,12 +33,18 @@ use crate::file::{metadata::KeyValue, properties::WriterProperties};
 use crate::schema::types::{ColumnDescriptor, SchemaDescriptor, Type};
 
 mod complex;
+mod extension;
 mod primitive;
-
-use crate::arrow::ProjectionMask;
-pub(crate) use complex::{ParquetField, ParquetFieldType};
+pub mod virtual_type;
 
 use super::PARQUET_FIELD_ID_META_KEY;
+use crate::arrow::ProjectionMask;
+use crate::arrow::schema::extension::{
+    has_extension_type, logical_type_for_binary, logical_type_for_binary_view,
+    logical_type_for_fixed_size_binary, logical_type_for_string, logical_type_for_struct,
+    try_add_extension_type,
+};
+pub(crate) use complex::{ParquetField, ParquetFieldType, VirtualColumnType};
 
 /// Convert Parquet schema to Arrow schema including optional metadata
 ///
@@ -60,14 +64,19 @@ pub fn parquet_to_arrow_schema_by_columns(
     mask: ProjectionMask,
     key_value_metadata: Option<&Vec<KeyValue>>,
 ) -> Result<Schema> {
-    Ok(parquet_to_arrow_schema_and_fields(parquet_schema, mask, key_value_metadata)?.0)
+    Ok(parquet_to_arrow_schema_and_fields(parquet_schema, mask, key_value_metadata, &[])?.0)
 }
 
-/// Extracts the arrow metadata
+/// Determines the Arrow Schema from a Parquet schema
+///
+/// Looks for an Arrow schema metadata "hint" (see
+/// [`parquet_to_arrow_field_levels`]), and uses it if present to ensure
+/// lossless round trips.
 pub(crate) fn parquet_to_arrow_schema_and_fields(
     parquet_schema: &SchemaDescriptor,
     mask: ProjectionMask,
     key_value_metadata: Option<&Vec<KeyValue>>,
+    virtual_columns: &[FieldRef],
 ) -> Result<(Schema, Option<ParquetField>)> {
     let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default();
     let maybe_schema = metadata
@@ -83,7 +92,8 @@ pub(crate) fn parquet_to_arrow_schema_and_fields(
     }
 
     let hint = maybe_schema.as_ref().map(|s| s.fields());
-    let field_levels = parquet_to_arrow_field_levels(parquet_schema, mask, hint)?;
+    let field_levels =
+        parquet_to_arrow_field_levels_with_virtual(parquet_schema, mask, hint, virtual_columns)?;
     let schema = Schema::new_with_metadata(field_levels.fields, metadata);
     Ok((schema, field_levels.levels))
 }
@@ -126,18 +136,123 @@ pub fn parquet_to_arrow_field_levels(
     mask: ProjectionMask,
     hint: Option<&Fields>,
 ) -> Result<FieldLevels> {
-    match complex::convert_schema(schema, mask, hint)? {
-        Some(field) => match &field.arrow_type {
-            DataType::Struct(fields) => Ok(FieldLevels {
-                fields: fields.clone(),
-                levels: Some(field),
-            }),
-            _ => unreachable!(),
-        },
-        None => Ok(FieldLevels {
-            fields: Fields::empty(),
-            levels: None,
+    parquet_to_arrow_field_levels_with_virtual(schema, mask, hint, &[])
+}
+
+/// Convert a parquet [`SchemaDescriptor`] to [`FieldLevels`] with support for virtual columns
+///
+/// Columns not included within [`ProjectionMask`] will be ignored.
+///
+/// The optional `hint` parameter is the desired Arrow schema. See the
+/// [`arrow`] module documentation for more information.
+///
+/// [`arrow`]: crate::arrow
+///
+/// # Arguments
+/// * `schema` - The Parquet schema descriptor
+/// * `mask` - Projection mask to select which columns to include
+/// * `hint` - Optional hint for Arrow field types to use instead of defaults
+/// * `virtual_columns` - Virtual columns to append to the schema (e.g., row numbers)
+///
+/// # Notes:
+/// Where a field type in `hint` is compatible with the corresponding parquet type in `schema`, it
+/// will be used, otherwise the default arrow type for the given parquet column type will be used.
+///
+/// Virtual columns are columns that don't exist in the Parquet file but are generated during reading.
+/// They must have extension type names starting with "arrow.virtual.".
+///
+/// This is to accommodate arrow types that cannot be round-tripped through parquet natively.
+/// Depending on the parquet writer, this can lead to a mismatch between a file's parquet schema
+/// and its embedded arrow schema. The parquet `schema` must be treated as authoritative in such
+/// an event. See [#1663](https://github.com/apache/arrow-rs/issues/1663) for more information
+///
+/// Note: this is a low-level API, most users will want to make use of the higher-level
+/// [`parquet_to_arrow_schema`] for decoding metadata from a parquet file.
+pub fn parquet_to_arrow_field_levels_with_virtual(
+    schema: &SchemaDescriptor,
+    mask: ProjectionMask,
+    hint: Option<&Fields>,
+    virtual_columns: &[FieldRef],
+) -> Result<FieldLevels> {
+    // Validate that all fields are virtual columns
+    for field in virtual_columns {
+        if !virtual_type::is_virtual_column(field) {
+            return Err(ParquetError::General(format!(
+                "Field '{}' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'",
+                field.name()
+            )));
+        }
+    }
+
+    // Convert the regular schema first
+    let mut parquet_field = match complex::convert_schema(schema, mask, hint)? {
+        Some(field) => field,
+        None if virtual_columns.is_empty() => {
+            return Ok(FieldLevels {
+                fields: Fields::empty(),
+                levels: None,
+            });
+        }
+        None => {
+            // No regular fields, but we have virtual columns - create empty root struct
+            ParquetField {
+                rep_level: 0,
+                def_level: 0,
+                nullable: false,
+                arrow_type: DataType::Struct(Fields::empty()),
+                field_type: ParquetFieldType::Group {
+                    children: Vec::new(),
+                },
+            }
+        }
+    };
+
+    // Append virtual columns if any
+    if !virtual_columns.is_empty() {
+        match &mut parquet_field.field_type {
+            ParquetFieldType::Group { children } => {
+                // Get the mutable fields from the struct type
+                let DataType::Struct(ref mut fields) = parquet_field.arrow_type else {
+                    unreachable!("Root field must be a struct");
+                };
+
+                // Convert to mutable Vec to append
+                let mut fields_vec: Vec<FieldRef> = fields.iter().cloned().collect();
+
+                // Append each virtual column
+                for virtual_column in virtual_columns {
+                    // Virtual columns can only be added at the root level
+                    assert_eq!(
+                        parquet_field.rep_level, 0,
+                        "Virtual columns can only be added at rep level 0"
+                    );
+                    assert_eq!(
+                        parquet_field.def_level, 0,
+                        "Virtual columns can only be added at def level 0"
+                    );
+
+                    fields_vec.push(virtual_column.clone());
+                    let virtual_parquet_field = complex::convert_virtual_field(
+                        virtual_column,
+                        parquet_field.rep_level,
+                        parquet_field.def_level,
+                    )?;
+                    children.push(virtual_parquet_field);
+                }
+
+                // Update the fields
+                parquet_field.arrow_type = DataType::Struct(Fields::from(fields_vec));
+            }
+            _ => unreachable!("Root field must be a group"),
+        }
+    }
+
+    match &parquet_field.arrow_type {
+        DataType::Struct(fields) => Ok(FieldLevels {
+            fields: fields.clone(),
+            levels: Some(parquet_field),
         }),
+        _ => unreachable!(),
     }
 }
 
@@ -180,9 +295,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
 /// Encodes the Arrow schema into the IPC format, and base64 encodes it
 pub fn encode_arrow_schema(schema: &Schema) -> String {
     let options = writer::IpcWriteOptions::default();
-    #[allow(deprecated)]
-    let mut dictionary_tracker =
-        writer::DictionaryTracker::new_with_preserve_dict_id(true, options.preserve_dict_id());
+    let mut dictionary_tracker = writer::DictionaryTracker::new(true);
     let data_gen = writer::IpcDataGenerator::default();
     let mut serialized_schema =
         data_gen.schema_to_bytes_with_dictionary_tracker(schema, &mut dictionary_tracker, &options);
@@ -358,15 +471,6 @@ impl<'a> ArrowSchemaConverter<'a> {
     }
 }
 
-/// Convert arrow schema to parquet schema
-///
-/// The name of the root schema element defaults to `"arrow_schema"`, this can be
-/// overridden with [`ArrowSchemaConverter`]
-#[deprecated(since = "54.0.0", note = "Use `ArrowSchemaConverter` instead")]
-pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {
-    ArrowSchemaConverter::new().convert(schema)
-}
-
 fn parse_key_value_metadata(
     key_value_metadata: Option<&Vec<KeyValue>>,
 ) -> Option<HashMap<String, String>> {
@@ -381,11 +485,7 @@ fn parse_key_value_metadata(
                 })
                 .collect();
 
-            if map.is_empty() {
-                None
-            } else {
-                Some(map)
-            }
+            if map.is_empty() { None } else { Some(map) }
         }
         None => None,
     }
@@ -396,31 +496,27 @@ pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result<Field
     let field = complex::convert_type(&parquet_column.self_type_ptr())?;
     let mut ret = Field::new(parquet_column.name(), field.arrow_type, field.nullable);
 
-    let basic_info = parquet_column.self_type().get_basic_info();
-    let mut meta = HashMap::with_capacity(if cfg!(feature = "arrow_canonical_extension_types") {
-        2
-    } else {
-        1
-    });
+    let parquet_type = parquet_column.self_type();
+    let basic_info = parquet_type.get_basic_info();
+
+    let mut hash_map_size = 0;
+    if basic_info.has_id() {
+        hash_map_size += 1;
+    }
+    if has_extension_type(parquet_type) {
+        hash_map_size += 1;
+    }
+    if hash_map_size == 0 {
+        return Ok(ret);
+    }
+    ret.set_metadata(HashMap::with_capacity(hash_map_size));
     if basic_info.has_id() {
-        meta.insert(
+        ret.metadata_mut().insert(
             PARQUET_FIELD_ID_META_KEY.to_string(),
             basic_info.id().to_string(),
         );
     }
-    #[cfg(feature = "arrow_canonical_extension_types")]
-    if let Some(logical_type) = basic_info.logical_type() {
-        match logical_type {
-            LogicalType::Uuid => ret.try_with_extension_type(Uuid)?,
-            LogicalType::Json => ret.try_with_extension_type(Json::default())?,
-            _ => {}
-        }
-    }
-    if !meta.is_empty() {
-        ret.set_metadata(meta);
-    }
-
-    Ok(ret)
+    try_add_extension_type(ret, parquet_column.self_type())
 }
 
 pub fn decimal_length_from_precision(precision: u8) -> usize {
@@ -543,9 +639,9 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
                     is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_ref().is_empty()),
                     unit: match time_unit {
                         TimeUnit::Second => unreachable!(),
-                        TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
-                        TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                        TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
+                        TimeUnit::Millisecond => ParquetTimeUnit::MILLIS,
+                        TimeUnit::Microsecond => ParquetTimeUnit::MICROS,
+                        TimeUnit::Nanosecond => ParquetTimeUnit::NANOS,
                     },
                 }))
                 .with_repetition(repetition)
@@ -582,7 +678,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             .with_logical_type(Some(LogicalType::Time {
                 is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"),
                 unit: match unit {
-                    TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
+                    TimeUnit::Millisecond => ParquetTimeUnit::MILLIS,
                     u => unreachable!("Invalid unit for Time32: {:?}", u),
                 },
             }))
@@ -593,8 +689,8 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             .with_logical_type(Some(LogicalType::Time {
                 is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"),
                 unit: match unit {
-                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
+                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS,
+                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS,
                     u => unreachable!("Invalid unit for Time64: {:?}", u),
                 },
             }))
@@ -617,6 +713,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
                 .with_repetition(repetition)
                 .with_id(id)
+                .with_logical_type(logical_type_for_binary(field))
                 .build()
         }
         DataType::FixedSizeBinary(length) => {
@@ -624,23 +721,18 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
                 .with_repetition(repetition)
                 .with_id(id)
                 .with_length(*length)
-                .with_logical_type(
-                    #[cfg(feature = "arrow_canonical_extension_types")]
-                    // If set, map arrow uuid extension type to parquet uuid logical type.
-                    field
-                        .try_extension_type::<Uuid>()
-                        .ok()
-                        .map(|_| LogicalType::Uuid),
-                    #[cfg(not(feature = "arrow_canonical_extension_types"))]
-                    None,
-                )
+                .with_logical_type(logical_type_for_fixed_size_binary(field))
                 .build()
         }
         DataType::BinaryView => Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
             .with_repetition(repetition)
             .with_id(id)
+            .with_logical_type(logical_type_for_binary_view(field))
             .build(),
-        DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
+        DataType::Decimal32(precision, scale)
+        | DataType::Decimal64(precision, scale)
+        | DataType::Decimal128(precision, scale)
+        | DataType::Decimal256(precision, scale) => {
             // Decimal precision determines the Parquet physical type to use.
             // Following the: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
             let (physical_type, length) = if *precision > 1 && *precision <= 9 {
@@ -667,35 +759,13 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
         }
         DataType::Utf8 | DataType::LargeUtf8 => {
             Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-                .with_logical_type({
-                    #[cfg(feature = "arrow_canonical_extension_types")]
-                    {
-                        // Use the Json logical type if the canonical Json
-                        // extension type is set on this field.
-                        field
-                            .try_extension_type::<Json>()
-                            .map_or(Some(LogicalType::String), |_| Some(LogicalType::Json))
-                    }
-                    #[cfg(not(feature = "arrow_canonical_extension_types"))]
-                    Some(LogicalType::String)
-                })
+                .with_logical_type(logical_type_for_string(field))
                 .with_repetition(repetition)
                 .with_id(id)
                 .build()
         }
         DataType::Utf8View => Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY)
-            .with_logical_type({
-                #[cfg(feature = "arrow_canonical_extension_types")]
-                {
-                    // Use the Json logical type if the canonical Json
-                    // extension type is set on this field.
-                    field
-                        .try_extension_type::<Json>()
-                        .map_or(Some(LogicalType::String), |_| Some(LogicalType::Json))
-                }
-                #[cfg(not(feature = "arrow_canonical_extension_types"))]
-                Some(LogicalType::String)
-            })
+            .with_logical_type(logical_type_for_string(field))
             .with_repetition(repetition)
             .with_id(id)
             .build(),
@@ -736,6 +806,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
                 .with_fields(fields)
                 .with_repetition(repetition)
                 .with_id(id)
+                .with_logical_type(logical_type_for_struct(field))
                 .build()
         }
         DataType::Map(field, _) => {
@@ -777,7 +848,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             }
         }
         DataType::Union(_, _) => unimplemented!("See ARROW-8817."),
-        DataType::Dictionary(_, ref value) => {
+        DataType::Dictionary(_, value) => {
             // Dictionary encoding not handled at the schema level
             let dict_field = field.clone().with_data_type(value.as_ref().clone());
             arrow_to_parquet_type(&dict_field, coerce_types)
@@ -799,15 +870,14 @@ mod tests {
 
     use std::{collections::HashMap, sync::Arc};
 
-    use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
-
     use crate::arrow::PARQUET_FIELD_ID_META_KEY;
     use crate::file::metadata::KeyValue;
     use crate::file::reader::FileReader;
     use crate::{
-        arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter},
+        arrow::{ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder},
         schema::{parser::parse_message_type, types::SchemaDescriptor},
     };
+    use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
 
     #[test]
     fn test_flat_primitives() {
@@ -846,12 +916,26 @@ mod tests {
             Field::new("float16", DataType::Float16, true),
             Field::new("string", DataType::Utf8, true),
             Field::new("string_2", DataType::Utf8, true),
-            Field::new("json", DataType::Utf8, true),
+            json_field(),
         ]);
 
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
     }
 
+    /// Return the expected Field for a Parquet column annotated with
+    /// the JSON logical type.
+    fn json_field() -> Field {
+        #[cfg(feature = "arrow_canonical_extension_types")]
+        {
+            Field::new("json", DataType::Utf8, true)
+                .with_extension_type(arrow_schema::extension::Json::default())
+        }
+        #[cfg(not(feature = "arrow_canonical_extension_types"))]
+        {
+            Field::new("json", DataType::Utf8, true)
+        }
+    }
+
     #[test]
     fn test_decimal_fields() {
         let message_type = "
@@ -1867,7 +1951,7 @@ mod tests {
                 // This is because the Arrow conversion always sets logical type,
                 // even if there wasn't originally one.
                 // This is not an issue, but is an inconvenience for this test.
-                match a.logical_type() {
+                match a.logical_type_ref() {
                     Some(_) => {
                         assert_eq!(a, b)
                     }
@@ -2079,6 +2163,8 @@ mod tests {
                     false, // fails to roundtrip keys_sorted
                     false,
                 ),
+                Field::new("c42", DataType::Decimal32(5, 2), false),
+                Field::new("c43", DataType::Decimal64(18, 12), true),
             ],
             meta(&[("Key", "Value")]),
         );
@@ -2233,23 +2319,20 @@ mod tests {
     #[test]
     #[cfg(feature = "arrow_canonical_extension_types")]
     fn arrow_uuid_to_parquet_uuid() -> Result<()> {
-        let arrow_schema = Schema::new(vec![Field::new(
-            "uuid",
-            DataType::FixedSizeBinary(16),
-            false,
-        )
-        .with_extension_type(Uuid)]);
+        use arrow_schema::extension::Uuid;
+        let arrow_schema = Schema::new(vec![
+            Field::new("uuid", DataType::FixedSizeBinary(16), false).with_extension_type(Uuid),
+        ]);
 
         let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;
 
         assert_eq!(
-            parquet_schema.column(0).logical_type(),
-            Some(LogicalType::Uuid)
+            parquet_schema.column(0).logical_type_ref(),
+            Some(&LogicalType::Uuid)
         );
 
-        // TODO: roundtrip
-        // let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
-        // assert_eq!(arrow_schema.field(0).try_extension_type::<Uuid>()?, Uuid);
+        let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
+        assert_eq!(arrow_schema.field(0).try_extension_type::<Uuid>()?, Uuid);
 
         Ok(())
     }
@@ -2257,25 +2340,52 @@ mod tests {
     #[test]
     #[cfg(feature = "arrow_canonical_extension_types")]
     fn arrow_json_to_parquet_json() -> Result<()> {
+        use arrow_schema::extension::Json;
         let arrow_schema = Schema::new(vec![
-            Field::new("json", DataType::Utf8, false).with_extension_type(Json::default())
+            Field::new("json", DataType::Utf8, false).with_extension_type(Json::default()),
         ]);
 
         let parquet_schema = ArrowSchemaConverter::new().convert(&arrow_schema)?;
 
         assert_eq!(
-            parquet_schema.column(0).logical_type(),
-            Some(LogicalType::Json)
+            parquet_schema.column(0).logical_type_ref(),
+            Some(&LogicalType::Json)
         );
 
-        // TODO: roundtrip
-        // https://github.com/apache/arrow-rs/issues/7063
-        // let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
-        // assert_eq!(
-        //     arrow_schema.field(0).try_extension_type::<Json>()?,
-        //     Json::default()
-        // );
+        let arrow_schema = parquet_to_arrow_schema(&parquet_schema, None)?;
+        assert_eq!(
+            arrow_schema.field(0).try_extension_type::<Json>()?,
+            Json::default()
+        );
 
         Ok(())
     }
+
+    #[test]
+    fn test_parquet_to_arrow_field_levels_with_virtual_rejects_non_virtual() {
+        let message_type = "
+        message test_schema {
+            REQUIRED INT32 id;
+        }
+        ";
+        let parquet_schema = Arc::new(parse_message_type(message_type).unwrap());
+        let descriptor = SchemaDescriptor::new(parquet_schema);
+
+        // Try to pass a regular field (not a virtual column)
+        let regular_field = Arc::new(Field::new("regular_column", DataType::Int64, false));
+        let result = parquet_to_arrow_field_levels_with_virtual(
+            &descriptor,
+            ProjectionMask::all(),
+            None,
+            &[regular_field],
+        );
+
+        assert!(result.is_err());
+        assert!(
+            result
+                .unwrap_err()
+                .to_string()
+                .contains("is not a virtual column")
+        );
+    }
 }
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index cc276eb611b0..8959081bcb41 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -18,7 +18,7 @@
 use crate::basic::{ConvertedType, LogicalType, TimeUnit as ParquetTimeUnit, Type as PhysicalType};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::{BasicTypeInfo, Type};
-use arrow_schema::{DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION};
+use arrow_schema::{DECIMAL128_MAX_PRECISION, DataType, IntervalUnit, TimeUnit};
 
 /// Converts [`Type`] to [`DataType`] with an optional `arrow_type_hint`
 /// provided by the arrow schema
@@ -85,7 +85,9 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType {
         // Determine interval time unit (#1666)
         (DataType::Interval(_), DataType::Interval(_)) => hint,
 
-        // Promote to Decimal256
+        // Promote to Decimal256 or narrow to Decimal32 or Decimal64
+        (DataType::Decimal128(_, _), DataType::Decimal32(_, _)) => hint,
+        (DataType::Decimal128(_, _), DataType::Decimal64(_, _)) => hint,
         (DataType::Decimal128(_, _), DataType::Decimal256(_, _)) => hint,
 
         // Potentially preserve dictionary encoding
@@ -162,7 +164,7 @@ fn decimal_256_type(scale: i32, precision: i32) -> Result<DataType> {
 }
 
 fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
-    match (info.logical_type(), info.converted_type()) {
+    match (info.logical_type_ref(), info.converted_type()) {
         (None, ConvertedType::NONE) => Ok(DataType::Int32),
         (
             Some(
@@ -181,10 +183,12 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
             (32, false) => Ok(DataType::UInt32),
             _ => Err(arrow_err!("Cannot create INT32 physical type from {:?}", t)),
         },
-        (Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
+        (Some(LogicalType::Decimal { scale, precision }), _) => {
+            decimal_128_type(*scale, *precision)
+        }
         (Some(LogicalType::Date), _) => Ok(DataType::Date32),
         (Some(LogicalType::Time { unit, .. }), _) => match unit {
-            ParquetTimeUnit::MILLIS(_) => Ok(DataType::Time32(TimeUnit::Millisecond)),
+            ParquetTimeUnit::MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)),
             _ => Err(arrow_err!(
                 "Cannot create INT32 physical type from {:?}",
                 unit
@@ -210,7 +214,7 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
 }
 
 fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataType> {
-    match (info.logical_type(), info.converted_type()) {
+    match (info.logical_type_ref(), info.converted_type()) {
         (None, ConvertedType::NONE) => Ok(DataType::Int64),
         (
             Some(LogicalType::Integer {
@@ -223,11 +227,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
             false => Ok(DataType::UInt64),
         },
         (Some(LogicalType::Time { unit, .. }), _) => match unit {
-            ParquetTimeUnit::MILLIS(_) => {
+            ParquetTimeUnit::MILLIS => {
                 Err(arrow_err!("Cannot create INT64 from MILLIS time unit",))
             }
-            ParquetTimeUnit::MICROS(_) => Ok(DataType::Time64(TimeUnit::Microsecond)),
-            ParquetTimeUnit::NANOS(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)),
+            ParquetTimeUnit::MICROS => Ok(DataType::Time64(TimeUnit::Microsecond)),
+            ParquetTimeUnit::NANOS => Ok(DataType::Time64(TimeUnit::Nanosecond)),
         },
         (
             Some(LogicalType::Timestamp {
@@ -237,11 +241,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
             _,
         ) => Ok(DataType::Timestamp(
             match unit {
-                ParquetTimeUnit::MILLIS(_) => TimeUnit::Millisecond,
-                ParquetTimeUnit::MICROS(_) => TimeUnit::Microsecond,
-                ParquetTimeUnit::NANOS(_) => TimeUnit::Nanosecond,
+                ParquetTimeUnit::MILLIS => TimeUnit::Millisecond,
+                ParquetTimeUnit::MICROS => TimeUnit::Microsecond,
+                ParquetTimeUnit::NANOS => TimeUnit::Nanosecond,
             },
-            if is_adjusted_to_u_t_c {
+            if *is_adjusted_to_u_t_c {
                 Some("UTC".into())
             } else {
                 None
@@ -258,7 +262,9 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
             TimeUnit::Microsecond,
             Some("UTC".into()),
         )),
-        (Some(LogicalType::Decimal { scale, precision }), _) => decimal_128_type(scale, precision),
+        (Some(LogicalType::Decimal { scale, precision }), _) => {
+            decimal_128_type(*scale, *precision)
+        }
         (None, ConvertedType::DECIMAL) => decimal_128_type(scale, precision),
         (logical, converted) => Err(arrow_err!(
             "Unable to convert parquet INT64 logical type {:?} or converted type {}",
@@ -269,11 +275,14 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
 }
 
 fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<DataType> {
-    match (info.logical_type(), info.converted_type()) {
+    match (info.logical_type_ref(), info.converted_type()) {
         (Some(LogicalType::String), _) => Ok(DataType::Utf8),
         (Some(LogicalType::Json), _) => Ok(DataType::Utf8),
         (Some(LogicalType::Bson), _) => Ok(DataType::Binary),
         (Some(LogicalType::Enum), _) => Ok(DataType::Binary),
+        (Some(LogicalType::Geometry { .. }), _) => Ok(DataType::Binary),
+        (Some(LogicalType::Geography { .. }), _) => Ok(DataType::Binary),
+        (Some(LogicalType::_Unknown { .. }), _) => Ok(DataType::Binary),
         (None, ConvertedType::NONE) => Ok(DataType::Binary),
         (None, ConvertedType::JSON) => Ok(DataType::Utf8),
         (None, ConvertedType::BSON) => Ok(DataType::Binary),
@@ -285,7 +294,7 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result<D
                 precision: p,
             }),
             _,
-        ) => decimal_type(s, p),
+        ) => decimal_type(*s, *p),
         (None, ConvertedType::DECIMAL) => decimal_type(scale, precision),
         (logical, converted) => Err(arrow_err!(
             "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}",
@@ -302,12 +311,12 @@ fn from_fixed_len_byte_array(
     type_length: i32,
 ) -> Result<DataType> {
     // TODO: This should check the type length for the decimal and interval types
-    match (info.logical_type(), info.converted_type()) {
+    match (info.logical_type_ref(), info.converted_type()) {
         (Some(LogicalType::Decimal { scale, precision }), _) => {
             if type_length <= 16 {
-                decimal_128_type(scale, precision)
+                decimal_128_type(*scale, *precision)
             } else {
-                decimal_256_type(scale, precision)
+                decimal_256_type(*scale, *precision)
             }
         }
         (None, ConvertedType::DECIMAL) => {
diff --git a/parquet/src/arrow/schema/virtual_type.rs b/parquet/src/arrow/schema/virtual_type.rs
new file mode 100644
index 000000000000..d3092a3bd53f
--- /dev/null
+++ b/parquet/src/arrow/schema/virtual_type.rs
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! RowNumber
+//!
+
+use arrow_schema::{ArrowError, DataType, Field, extension::ExtensionType};
+
+/// Prefix for virtual column extension type names.
+macro_rules! VIRTUAL_PREFIX {
+    () => {
+        "parquet.virtual."
+    };
+}
+
+/// The extension type for row numbers.
+///
+/// Extension name: `parquet.virtual.row_number`.
+///
+/// This virtual column has storage type `Int64` and uses empty string metadata.
+#[derive(Debug, Default, Clone, Copy, PartialEq)]
+pub struct RowNumber;
+
+impl ExtensionType for RowNumber {
+    const NAME: &'static str = concat!(VIRTUAL_PREFIX!(), "row_number");
+    type Metadata = &'static str;
+
+    fn metadata(&self) -> &Self::Metadata {
+        &""
+    }
+
+    fn serialize_metadata(&self) -> Option<String> {
+        Some(String::default())
+    }
+
+    fn deserialize_metadata(metadata: Option<&str>) -> Result<Self::Metadata, ArrowError> {
+        if metadata.is_some_and(str::is_empty) {
+            Ok("")
+        } else {
+            Err(ArrowError::InvalidArgumentError(
+                "Virtual column extension type expects an empty string as metadata".to_owned(),
+            ))
+        }
+    }
+
+    fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> {
+        match data_type {
+            DataType::Int64 => Ok(()),
+            data_type => Err(ArrowError::InvalidArgumentError(format!(
+                "Virtual column data type mismatch, expected Int64, found {data_type}"
+            ))),
+        }
+    }
+
+    fn try_new(data_type: &DataType, _metadata: Self::Metadata) -> Result<Self, ArrowError> {
+        Self.supports_data_type(data_type).map(|_| Self)
+    }
+}
+
+/// Returns `true` if the field is a virtual column.
+///
+/// Virtual columns have extension type names starting with `parquet.virtual.`.
+pub fn is_virtual_column(field: &Field) -> bool {
+    field
+        .extension_type_name()
+        .is_some_and(|name| name.starts_with(VIRTUAL_PREFIX!()))
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_schema::{
+        ArrowError, DataType, Field,
+        extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY},
+    };
+
+    use super::*;
+
+    #[test]
+    fn valid() -> Result<(), ArrowError> {
+        let mut field = Field::new("", DataType::Int64, false);
+        field.try_with_extension_type(RowNumber)?;
+        field.try_extension_type::<RowNumber>()?;
+
+        Ok(())
+    }
+
+    #[test]
+    #[should_panic(expected = "Field extension type name missing")]
+    fn missing_name() {
+        let field = Field::new("", DataType::Int64, false).with_metadata(
+            [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "".to_owned())]
+                .into_iter()
+                .collect(),
+        );
+        field.extension_type::<RowNumber>();
+    }
+
+    #[test]
+    #[should_panic(expected = "expected Int64, found Int32")]
+    fn invalid_type() {
+        Field::new("", DataType::Int32, false).with_extension_type(RowNumber);
+    }
+
+    #[test]
+    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
+    fn missing_metadata() {
+        let field = Field::new("", DataType::Int64, false).with_metadata(
+            [(
+                EXTENSION_TYPE_NAME_KEY.to_owned(),
+                RowNumber::NAME.to_owned(),
+            )]
+            .into_iter()
+            .collect(),
+        );
+        field.extension_type::<RowNumber>();
+    }
+
+    #[test]
+    #[should_panic(expected = "Virtual column extension type expects an empty string as metadata")]
+    fn invalid_metadata() {
+        let field = Field::new("", DataType::Int64, false).with_metadata(
+            [
+                (
+                    EXTENSION_TYPE_NAME_KEY.to_owned(),
+                    RowNumber::NAME.to_owned(),
+                ),
+                (
+                    EXTENSION_TYPE_METADATA_KEY.to_owned(),
+                    "non-empty".to_owned(),
+                ),
+            ]
+            .into_iter()
+            .collect(),
+        );
+        field.extension_type::<RowNumber>();
+    }
+}
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 99f122fe4c3e..ba8ffc2e92c3 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -15,59 +15,56 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Contains Rust mappings for Thrift definition.
-//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) file to see raw definitions.
+//! Contains Rust mappings for Thrift definition. This module contains only mappings for thrift
+//! enums and unions. Thrift structs are handled elsewhere.
+//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift)
+//! file to see raw definitions.
 
+use std::io::Write;
 use std::str::FromStr;
 use std::{fmt, str};
 
 pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
-use crate::format as parquet;
+use crate::file::metadata::HeapSize;
+use crate::parquet_thrift::{
+    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
+    WriteThrift, WriteThriftField,
+};
+use crate::{thrift_enum, thrift_struct, thrift_union_all_empty, write_thrift_field};
 
 use crate::errors::{ParquetError, Result};
 
-// Re-export crate::format types used in this module
-pub use crate::format::{
-    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
-    StringType, TimeType, TimeUnit, TimestampType, UUIDType,
-};
-
 // ----------------------------------------------------------------------
 // Types from the Thrift definition
 
 // ----------------------------------------------------------------------
-// Mirrors `parquet::Type`
+// Mirrors thrift enum `Type`
 
+thrift_enum!(
 /// Types supported by Parquet.
 ///
 /// These physical types are intended to be used in combination with the encodings to
 /// control the on disk storage format.
 /// For example INT16 is not included as a type since a good encoding of INT32
 /// would handle this.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-#[allow(non_camel_case_types)]
-pub enum Type {
-    /// A boolean value.
-    BOOLEAN,
-    /// 32-bit signed integer.
-    INT32,
-    /// 64-bit signed integer.
-    INT64,
-    /// 96-bit signed integer for timestamps.
-    INT96,
-    /// IEEE 754 single-precision floating point value.
-    FLOAT,
-    /// IEEE 754 double-precision floating point value.
-    DOUBLE,
-    /// Arbitrary length byte array.
-    BYTE_ARRAY,
-    /// Fixed length byte array.
-    FIXED_LEN_BYTE_ARRAY,
+enum Type {
+  BOOLEAN = 0;
+  INT32 = 1;
+  INT64 = 2;
+  INT96 = 3;  // deprecated, only used by legacy implementations.
+  FLOAT = 4;
+  DOUBLE = 5;
+  BYTE_ARRAY = 6;
+  FIXED_LEN_BYTE_ARRAY = 7;
 }
+);
 
 // ----------------------------------------------------------------------
-// Mirrors `parquet::ConvertedType`
+// Mirrors thrift enum `ConvertedType`
 
+// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should
+// look into removing it and using `Option<ConvertedType>` instead.
+thrift_enum!(
 /// Common types (converted types) used by frameworks when using Parquet.
 ///
 /// This helps map between types in those frameworks to the base types in Parquet.
@@ -75,103 +72,166 @@ pub enum Type {
 ///
 /// This struct was renamed from `LogicalType` in version 4.0.0.
 /// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-#[allow(non_camel_case_types)]
-pub enum ConvertedType {
-    /// No type conversion.
-    NONE,
-    /// A BYTE_ARRAY actually contains UTF8 encoded chars.
-    UTF8,
-
-    /// A map is converted as an optional field containing a repeated key/value pair.
-    MAP,
-
-    /// A key/value pair is converted into a group of two fields.
-    MAP_KEY_VALUE,
-
-    /// A list is converted into an optional field containing a repeated field for its
-    /// values.
-    LIST,
-
-    /// An enum is converted into a binary field
-    ENUM,
-
-    /// A decimal value.
-    /// This may be used to annotate binary or fixed primitive types. The
-    /// underlying byte array stores the unscaled value encoded as two's
-    /// complement using big-endian byte order (the most significant byte is the
-    /// zeroth element).
-    ///
-    /// This must be accompanied by a (maximum) precision and a scale in the
-    /// SchemaElement. The precision specifies the number of digits in the decimal
-    /// and the scale stores the location of the decimal point. For example 1.23
-    /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
-    /// 2 digits over).
-    DECIMAL,
-
-    /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
-    DATE,
-
-    /// The total number of milliseconds since midnight. The value is stored as an INT32
-    /// physical type.
-    TIME_MILLIS,
-
-    /// The total number of microseconds since midnight. The value is stored as an INT64
-    /// physical type.
-    TIME_MICROS,
-
-    /// Date and time recorded as milliseconds since the Unix epoch.
-    /// Recorded as a physical type of INT64.
-    TIMESTAMP_MILLIS,
-
-    /// Date and time recorded as microseconds since the Unix epoch.
-    /// The value is stored as an INT64 physical type.
-    TIMESTAMP_MICROS,
-
-    /// An unsigned 8 bit integer value stored as INT32 physical type.
-    UINT_8,
+enum ConvertedType {
+  /// Not defined in the spec, used internally to indicate no type conversion
+  NONE = -1;
+
+  /// A BYTE_ARRAY actually contains UTF8 encoded chars.
+  UTF8 = 0;
+
+  /// A map is converted as an optional field containing a repeated key/value pair.
+  MAP = 1;
+
+  /// A key/value pair is converted into a group of two fields.
+  MAP_KEY_VALUE = 2;
+
+  /// A list is converted into an optional field containing a repeated field for its
+  /// values.
+  LIST = 3;
+
+  /// An enum is converted into a BYTE_ARRAY field
+  ENUM = 4;
+
+  /// A decimal value.
+  ///
+  /// This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive
+  /// types. The underlying byte array stores the unscaled value encoded as two's
+  /// complement using big-endian byte order (the most significant byte is the
+  /// zeroth element). The value of the decimal is the value * 10^{-scale}.
+  ///
+  /// This must be accompanied by a (maximum) precision and a scale in the
+  /// SchemaElement. The precision specifies the number of digits in the decimal
+  /// and the scale stores the location of the decimal point. For example 1.23
+  /// would have precision 3 (3 total digits) and scale 2 (the decimal point is
+  /// 2 digits over).
+  DECIMAL = 5;
+
+  /// A date stored as days since Unix epoch, encoded as the INT32 physical type.
+  DATE = 6;
+
+  /// The total number of milliseconds since midnight. The value is stored as an INT32
+  /// physical type.
+  TIME_MILLIS = 7;
+
+  /// The total number of microseconds since midnight. The value is stored as an INT64
+  /// physical type.
+  TIME_MICROS = 8;
+
+  /// Date and time recorded as milliseconds since the Unix epoch.
+  /// Recorded as a physical type of INT64.
+  TIMESTAMP_MILLIS = 9;
+
+  /// Date and time recorded as microseconds since the Unix epoch.
+  /// The value is stored as an INT64 physical type.
+  TIMESTAMP_MICROS = 10;
+
+  /// An unsigned 8 bit integer value stored as INT32 physical type.
+  UINT_8 = 11;
+
+  /// An unsigned 16 bit integer value stored as INT32 physical type.
+  UINT_16 = 12;
+
+  /// An unsigned 32 bit integer value stored as INT32 physical type.
+  UINT_32 = 13;
+
+  /// An unsigned 64 bit integer value stored as INT64 physical type.
+  UINT_64 = 14;
+
+  /// A signed 8 bit integer value stored as INT32 physical type.
+  INT_8 = 15;
+
+  /// A signed 16 bit integer value stored as INT32 physical type.
+  INT_16 = 16;
+
+  /// A signed 32 bit integer value stored as INT32 physical type.
+  INT_32 = 17;
+
+  /// A signed 64 bit integer value stored as INT64 physical type.
+  INT_64 = 18;
+
+  /// A JSON document embedded within a single UTF8 column.
+  JSON = 19;
+
+   /// A BSON document embedded within a single BINARY column.
+  BSON = 20;
+
+  /// An interval of time
+  ///
+  /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
+  /// This data is composed of three separate little endian unsigned integers.
+  /// Each stores a component of a duration of time. The first integer identifies
+  /// the number of months associated with the duration, the second identifies
+  /// the number of days associated with the duration and the third identifies
+  /// the number of milliseconds associated with the provided duration.
+  /// This duration of time is independent of any particular timezone or date.
+  INTERVAL = 21;
+}
+);
 
-    /// An unsigned 16 bit integer value stored as INT32 physical type.
-    UINT_16,
+// ----------------------------------------------------------------------
+// Mirrors thrift union `TimeUnit`
+
+thrift_union_all_empty!(
+/// Time unit for `Time` and `Timestamp` logical types.
+union TimeUnit {
+  1: MilliSeconds MILLIS
+  2: MicroSeconds MICROS
+  3: NanoSeconds NANOS
+}
+);
 
-    /// An unsigned 32 bit integer value stored as INT32 physical type.
-    UINT_32,
+// ----------------------------------------------------------------------
+// Mirrors thrift union `LogicalType`
 
-    /// An unsigned 64 bit integer value stored as INT64 physical type.
-    UINT_64,
+// private structs for decoding logical type
 
-    /// A signed 8 bit integer value stored as INT32 physical type.
-    INT_8,
+thrift_struct!(
+struct DecimalType {
+  1: required i32 scale
+  2: required i32 precision
+}
+);
 
-    /// A signed 16 bit integer value stored as INT32 physical type.
-    INT_16,
+thrift_struct!(
+struct TimestampType {
+  1: required bool is_adjusted_to_u_t_c
+  2: required TimeUnit unit
+}
+);
 
-    /// A signed 32 bit integer value stored as INT32 physical type.
-    INT_32,
+// they are identical
+use TimestampType as TimeType;
 
-    /// A signed 64 bit integer value stored as INT64 physical type.
-    INT_64,
+thrift_struct!(
+struct IntType {
+  1: required i8 bit_width
+  2: required bool is_signed
+}
+);
 
-    /// A JSON document embedded within a single UTF8 column.
-    JSON,
+thrift_struct!(
+struct VariantType {
+  // The version of the variant specification that the variant was
+  // written with.
+  1: optional i8 specification_version
+}
+);
 
-    /// A BSON document embedded within a single BINARY column.
-    BSON,
+thrift_struct!(
+struct GeometryType<'a> {
+  1: optional string<'a> crs;
+}
+);
 
-    /// An interval of time.
-    ///
-    /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
-    /// This data is composed of three separate little endian unsigned integers.
-    /// Each stores a component of a duration of time. The first integer identifies
-    /// the number of months associated with the duration, the second identifies
-    /// the number of days associated with the duration and the third identifies
-    /// the number of milliseconds associated with the provided duration.
-    /// This duration of time is independent of any particular timezone or date.
-    INTERVAL,
+thrift_struct!(
+struct GeographyType<'a> {
+  1: optional string<'a> crs;
+  2: optional EdgeInterpolationAlgorithm algorithm;
 }
+);
 
-// ----------------------------------------------------------------------
-// Mirrors `parquet::LogicalType`
+// TODO(ets): should we switch to tuple variants so we can use
+// the thrift macros?
 
 /// Logical types used by version 2.4.0+ of the Parquet format.
 ///
@@ -228,26 +288,272 @@ pub enum LogicalType {
     Uuid,
     /// A 16-bit floating point number.
     Float16,
+    /// A Variant value.
+    Variant {
+        /// The version of the variant specification that the variant was written with.
+        specification_version: Option<i8>,
+    },
+    /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation.
+    Geometry {
+        /// A custom CRS. If unset the defaults to `OGC:CRS84`, which means that the geometries
+        /// must be stored in longitude, latitude based on the WGS84 datum.
+        crs: Option<String>,
+    },
+    /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation.
+    Geography {
+        /// A custom CRS. If unset the defaults to `OGC:CRS84`.
+        crs: Option<String>,
+        /// An optional algorithm can be set to correctly interpret edges interpolation
+        /// of the geometries. If unset, the algorithm defaults to `SPHERICAL`.
+        algorithm: Option<EdgeInterpolationAlgorithm>,
+    },
+    /// For forward compatibility; used when an unknown union value is encountered.
+    _Unknown {
+        /// The field id encountered when parsing the unknown logical type.
+        field_id: i16,
+    },
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let field_ident = prot.read_field_begin(0)?;
+        if field_ident.field_type == FieldType::Stop {
+            return Err(general_err!("received empty union from remote LogicalType"));
+        }
+        let ret = match field_ident.id {
+            1 => {
+                prot.skip_empty_struct()?;
+                Self::String
+            }
+            2 => {
+                prot.skip_empty_struct()?;
+                Self::Map
+            }
+            3 => {
+                prot.skip_empty_struct()?;
+                Self::List
+            }
+            4 => {
+                prot.skip_empty_struct()?;
+                Self::Enum
+            }
+            5 => {
+                let val = DecimalType::read_thrift(&mut *prot)?;
+                Self::Decimal {
+                    scale: val.scale,
+                    precision: val.precision,
+                }
+            }
+            6 => {
+                prot.skip_empty_struct()?;
+                Self::Date
+            }
+            7 => {
+                let val = TimeType::read_thrift(&mut *prot)?;
+                Self::Time {
+                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
+                    unit: val.unit,
+                }
+            }
+            8 => {
+                let val = TimestampType::read_thrift(&mut *prot)?;
+                Self::Timestamp {
+                    is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
+                    unit: val.unit,
+                }
+            }
+            10 => {
+                let val = IntType::read_thrift(&mut *prot)?;
+                Self::Integer {
+                    is_signed: val.is_signed,
+                    bit_width: val.bit_width,
+                }
+            }
+            11 => {
+                prot.skip_empty_struct()?;
+                Self::Unknown
+            }
+            12 => {
+                prot.skip_empty_struct()?;
+                Self::Json
+            }
+            13 => {
+                prot.skip_empty_struct()?;
+                Self::Bson
+            }
+            14 => {
+                prot.skip_empty_struct()?;
+                Self::Uuid
+            }
+            15 => {
+                prot.skip_empty_struct()?;
+                Self::Float16
+            }
+            16 => {
+                let val = VariantType::read_thrift(&mut *prot)?;
+                Self::Variant {
+                    specification_version: val.specification_version,
+                }
+            }
+            17 => {
+                let val = GeometryType::read_thrift(&mut *prot)?;
+                Self::Geometry {
+                    crs: val.crs.map(|s| s.to_owned()),
+                }
+            }
+            18 => {
+                let val = GeographyType::read_thrift(&mut *prot)?;
+                // unset algorithm means SPHERICAL, per the spec:
+                // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#geography
+                let algorithm = val
+                    .algorithm
+                    .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
+                Self::Geography {
+                    crs: val.crs.map(|s| s.to_owned()),
+                    algorithm: Some(algorithm),
+                }
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+                Self::_Unknown {
+                    field_id: field_ident.id,
+                }
+            }
+        };
+        let field_ident = prot.read_field_begin(field_ident.id)?;
+        if field_ident.field_type != FieldType::Stop {
+            return Err(general_err!(
+                "Received multiple fields for union from remote LogicalType"
+            ));
+        }
+        Ok(ret)
+    }
+}
+
+impl WriteThrift for LogicalType {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        match self {
+            Self::String => {
+                writer.write_empty_struct(1, 0)?;
+            }
+            Self::Map => {
+                writer.write_empty_struct(2, 0)?;
+            }
+            Self::List => {
+                writer.write_empty_struct(3, 0)?;
+            }
+            Self::Enum => {
+                writer.write_empty_struct(4, 0)?;
+            }
+            Self::Decimal { scale, precision } => {
+                DecimalType {
+                    scale: *scale,
+                    precision: *precision,
+                }
+                .write_thrift_field(writer, 5, 0)?;
+            }
+            Self::Date => {
+                writer.write_empty_struct(6, 0)?;
+            }
+            Self::Time {
+                is_adjusted_to_u_t_c,
+                unit,
+            } => {
+                TimeType {
+                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
+                    unit: *unit,
+                }
+                .write_thrift_field(writer, 7, 0)?;
+            }
+            Self::Timestamp {
+                is_adjusted_to_u_t_c,
+                unit,
+            } => {
+                TimestampType {
+                    is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c,
+                    unit: *unit,
+                }
+                .write_thrift_field(writer, 8, 0)?;
+            }
+            Self::Integer {
+                bit_width,
+                is_signed,
+            } => {
+                IntType {
+                    bit_width: *bit_width,
+                    is_signed: *is_signed,
+                }
+                .write_thrift_field(writer, 10, 0)?;
+            }
+            Self::Unknown => {
+                writer.write_empty_struct(11, 0)?;
+            }
+            Self::Json => {
+                writer.write_empty_struct(12, 0)?;
+            }
+            Self::Bson => {
+                writer.write_empty_struct(13, 0)?;
+            }
+            Self::Uuid => {
+                writer.write_empty_struct(14, 0)?;
+            }
+            Self::Float16 => {
+                writer.write_empty_struct(15, 0)?;
+            }
+            Self::Variant {
+                specification_version,
+            } => {
+                VariantType {
+                    specification_version: *specification_version,
+                }
+                .write_thrift_field(writer, 16, 0)?;
+            }
+            Self::Geometry { crs } => {
+                GeometryType {
+                    crs: crs.as_ref().map(|s| s.as_str()),
+                }
+                .write_thrift_field(writer, 17, 0)?;
+            }
+            Self::Geography { crs, algorithm } => {
+                GeographyType {
+                    crs: crs.as_ref().map(|s| s.as_str()),
+                    algorithm: *algorithm,
+                }
+                .write_thrift_field(writer, 18, 0)?;
+            }
+            _ => return Err(nyi_err!("logical type")),
+        }
+        writer.write_struct_end()
+    }
 }
 
+write_thrift_field!(LogicalType, FieldType::Struct);
+
 // ----------------------------------------------------------------------
-// Mirrors `parquet::FieldRepetitionType`
+// Mirrors thrift enum `FieldRepetitionType`
+//
 
+thrift_enum!(
 /// Representation of field types in schema.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-#[allow(non_camel_case_types)]
-pub enum Repetition {
-    /// Field is required (can not be null) and each record has exactly 1 value.
-    REQUIRED,
-    /// Field is optional (can be null) and each record has 0 or 1 values.
-    OPTIONAL,
-    /// Field is repeated and can contain 0 or more values.
-    REPEATED,
+enum FieldRepetitionType {
+  /// This field is required (can not be null) and each row has exactly 1 value.
+  REQUIRED = 0;
+  /// The field is optional (can be null) and each row has 0 or 1 values.
+  OPTIONAL = 1;
+  /// The field is repeated and can contain 0 or more values.
+  REPEATED = 2;
 }
+);
+
+/// Type alias for thrift `FieldRepetitionType`
+pub type Repetition = FieldRepetitionType;
 
 // ----------------------------------------------------------------------
-// Mirrors `parquet::Encoding`
+// Mirrors thrift enum `Encoding`
 
+thrift_enum!(
 /// Encodings supported by Parquet.
 ///
 /// Not all encodings are valid for all types. These enums are also used to specify the
@@ -264,80 +570,72 @@ pub enum Repetition {
 /// performance impact when evaluating these encodings.
 ///
 /// [WriterVersion]: crate::file::properties::WriterVersion
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
-#[allow(non_camel_case_types)]
-pub enum Encoding {
-    /// Default byte encoding.
-    /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true.
-    /// - INT32 - 4 bytes per value, stored as little-endian.
-    /// - INT64 - 8 bytes per value, stored as little-endian.
-    /// - FLOAT - 4 bytes per value, stored as little-endian.
-    /// - DOUBLE - 8 bytes per value, stored as little-endian.
-    /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
-    /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored.
-    PLAIN,
-
-    /// **Deprecated** dictionary encoding.
-    ///
-    /// The values in the dictionary are encoded using PLAIN encoding.
-    /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
-    /// PLAIN encoding is used for dictionary page.
-    PLAIN_DICTIONARY,
-
-    /// Group packed run length encoding.
-    ///
-    /// Usable for definition/repetition levels encoding and boolean values.
-    RLE,
-
-    /// **Deprecated** Bit-packed encoding.
-    ///
-    /// This can only be used if the data has a known max width.
-    /// Usable for definition/repetition levels encoding.
-    ///
-    /// There are compatibility issues with files using this encoding.
-    /// The parquet standard specifies the bits to be packed starting from the
-    /// most-significant bit, several implementations do not follow this bit order.
-    /// Several other implementations also have issues reading this encoding
-    /// because of incorrect assumptions about the length of the encoded data.
-    ///
-    /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
-    #[deprecated(
-        since = "51.0.0",
-        note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
-    )]
-    BIT_PACKED,
-
-    /// Delta encoding for integers, either INT32 or INT64.
-    ///
-    /// Works best on sorted data.
-    DELTA_BINARY_PACKED,
-
-    /// Encoding for byte arrays to separate the length values and the data.
-    ///
-    /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
-    DELTA_LENGTH_BYTE_ARRAY,
-
-    /// Incremental encoding for byte arrays.
-    ///
-    /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
-    /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
-    DELTA_BYTE_ARRAY,
-
-    /// Dictionary encoding.
-    ///
-    /// The ids are encoded using the RLE encoding.
-    RLE_DICTIONARY,
-
-    /// Encoding for fixed-width data.
-    ///
-    /// K byte-streams are created where K is the size in bytes of the data type.
-    /// The individual bytes of a value are scattered to the corresponding stream and
-    /// the streams are concatenated.
-    /// This itself does not reduce the size of the data but can lead to better compression
-    /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
-    /// perform poorly for large values of N.
-    BYTE_STREAM_SPLIT,
+enum Encoding {
+  /// Default encoding.
+  /// - BOOLEAN - 1 bit per value. 0 is false; 1 is true.
+  /// - INT32 - 4 bytes per value.  Stored as little-endian.
+  /// - INT64 - 8 bytes per value.  Stored as little-endian.
+  /// - FLOAT - 4 bytes per value.  IEEE. Stored as little-endian.
+  /// - DOUBLE - 8 bytes per value.  IEEE. Stored as little-endian.
+  /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes.
+  /// - FIXED_LEN_BYTE_ARRAY - Just the bytes.
+  PLAIN = 0;
+  //  GROUP_VAR_INT = 1;
+  /// **Deprecated** dictionary encoding.
+  ///
+  /// The values in the dictionary are encoded using PLAIN encoding.
+  /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and
+  /// PLAIN encoding is used for dictionary page.
+  PLAIN_DICTIONARY = 2;
+  /// Group packed run length encoding.
+  ///
+  /// Usable for definition/repetition levels encoding and boolean values.
+  RLE = 3;
+  /// **Deprecated** Bit-packed encoding.
+  ///
+  /// This can only be used if the data has a known max width.
+  /// Usable for definition/repetition levels encoding.
+  ///
+  /// There are compatibility issues with files using this encoding.
+  /// The parquet standard specifies the bits to be packed starting from the
+  /// most-significant bit, several implementations do not follow this bit order.
+  /// Several other implementations also have issues reading this encoding
+  /// because of incorrect assumptions about the length of the encoded data.
+  ///
+  /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead.
+  #[deprecated(
+      since = "51.0.0",
+      note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead"
+  )]
+  BIT_PACKED = 4;
+  /// Delta encoding for integers, either INT32 or INT64.
+  ///
+  /// Works best on sorted data.
+  DELTA_BINARY_PACKED = 5;
+  /// Encoding for byte arrays to separate the length values and the data.
+  ///
+  /// The lengths are encoded using DELTA_BINARY_PACKED encoding.
+  DELTA_LENGTH_BYTE_ARRAY = 6;
+  /// Incremental encoding for byte arrays.
+  ///
+  /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding.
+  /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding.
+  DELTA_BYTE_ARRAY = 7;
+  /// Dictionary encoding.
+  ///
+  /// The ids are encoded using the RLE encoding.
+  RLE_DICTIONARY = 8;
+  /// Encoding for fixed-width data.
+  ///
+  /// K byte-streams are created where K is the size in bytes of the data type.
+  /// The individual bytes of a value are scattered to the corresponding stream and
+  /// the streams are concatenated.
+  /// This itself does not reduce the size of the data but can lead to better compression
+  /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
+  /// perform poorly for large values of N.
+  BYTE_STREAM_SPLIT = 9;
 }
+);
 
 impl FromStr for Encoding {
     type Err = ParquetError;
@@ -361,8 +659,144 @@ impl FromStr for Encoding {
     }
 }
 
+/// A bitmask representing the [`Encoding`]s employed while encoding a Parquet column chunk.
+///
+/// The Parquet [`ColumnMetaData`] struct contains an array that indicates what encodings were
+/// used when writing that column chunk. For memory and performance reasons, this crate reduces
+/// that array to bitmask, where each bit position represents a different [`Encoding`]. This
+/// struct contains that bitmask, and provides methods to interact with the data.
+///
+/// # Example
+/// ```no_run
+/// # use parquet::file::metadata::ParquetMetaDataReader;
+/// # use parquet::basic::Encoding;
+/// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
+/// // read parquet metadata from a file
+/// let file = open_parquet_file("some_path.parquet");
+/// let mut reader = ParquetMetaDataReader::new();
+/// reader.try_parse(&file).unwrap();
+/// let metadata = reader.finish().unwrap();
+///
+/// // find the encodings used by the first column chunk in the first row group
+/// let col_meta = metadata.row_group(0).column(0);
+/// let encodings = col_meta.encodings_mask();
+///
+/// // check to see if a particular encoding was used
+/// let used_rle = encodings.is_set(Encoding::RLE);
+///
+/// // check to see if all of a set of encodings were used
+/// let used_all = encodings.all_set([Encoding::RLE, Encoding::PLAIN].iter());
+///
+/// // convert mask to a Vec<Encoding>
+/// let encodings_vec = encodings.encodings().collect::<Vec<_>>();
+/// ```
+///
+/// [`ColumnMetaData`]: https://github.com/apache/parquet-format/blob/9fd57b59e0ce1a82a69237dcf8977d3e72a2965d/src/main/thrift/parquet.thrift#L875
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub struct EncodingMask(i32);
+
+impl EncodingMask {
+    /// Highest valued discriminant in the [`Encoding`] enum
+    const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT;
+    /// A mask consisting of unused bit positions, used for validation. This includes the never
+    /// used GROUP_VAR_INT encoding value of `1`.
+    const ALLOWED_MASK: u32 =
+        !(1u32 << (EncodingMask::MAX_ENCODING as u32 + 1)).wrapping_sub(1) | 1 << 1;
+
+    /// Attempt to create a new `EncodingMask` from an integer.
+    ///
+    /// This will return an error if a bit outside the allowable range is set.
+    pub fn try_new(val: i32) -> Result<Self> {
+        if val as u32 & Self::ALLOWED_MASK != 0 {
+            return Err(general_err!("Attempt to create invalid mask: 0x{:x}", val));
+        }
+        Ok(Self(val))
+    }
+
+    /// Return an integer representation of this `EncodingMask`.
+    pub fn as_i32(&self) -> i32 {
+        self.0
+    }
+
+    /// Create a new `EncodingMask` from a collection of [`Encoding`]s.
+    pub fn new_from_encodings<'a>(encodings: impl Iterator<Item = &'a Encoding>) -> Self {
+        let mut mask = 0;
+        for &e in encodings {
+            mask |= 1 << (e as i32);
+        }
+        Self(mask)
+    }
+
+    /// Mark the given [`Encoding`] as present in this mask.
+    pub fn insert(&mut self, val: Encoding) {
+        self.0 |= 1 << (val as i32);
+    }
+
+    /// Test if a given [`Encoding`] is present in this mask.
+    pub fn is_set(&self, val: Encoding) -> bool {
+        self.0 & (1 << (val as i32)) != 0
+    }
+
+    /// Test if this mask has only the bit for the given [`Encoding`] set.
+    pub fn is_only(&self, val: Encoding) -> bool {
+        self.0 == (1 << (val as i32))
+    }
+
+    /// Test if all [`Encoding`]s in a given set are present in this mask.
+    pub fn all_set<'a>(&self, mut encodings: impl Iterator<Item = &'a Encoding>) -> bool {
+        encodings.all(|&e| self.is_set(e))
+    }
+
+    /// Return an iterator over all [`Encoding`]s present in this mask.
+    pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
+        Self::mask_to_encodings_iter(self.0)
+    }
+
+    fn mask_to_encodings_iter(mask: i32) -> impl Iterator<Item = Encoding> {
+        (0..=Self::MAX_ENCODING)
+            .filter(move |i| mask & (1 << i) != 0)
+            .map(i32_to_encoding)
+    }
+}
+
+impl HeapSize for EncodingMask {
+    fn heap_size(&self) -> usize {
+        0 // no heap allocations
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EncodingMask {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let mut mask = 0;
+
+        // This reads a Thrift `list<Encoding>` and turns it into a bitmask
+        let list_ident = prot.read_list_begin()?;
+        for _ in 0..list_ident.size {
+            let val = Encoding::read_thrift(prot)?;
+            mask |= 1 << val as i32;
+        }
+        Ok(Self(mask))
+    }
+}
+
+#[allow(deprecated)]
+fn i32_to_encoding(val: i32) -> Encoding {
+    match val {
+        0 => Encoding::PLAIN,
+        2 => Encoding::PLAIN_DICTIONARY,
+        3 => Encoding::RLE,
+        4 => Encoding::BIT_PACKED,
+        5 => Encoding::DELTA_BINARY_PACKED,
+        6 => Encoding::DELTA_LENGTH_BYTE_ARRAY,
+        7 => Encoding::DELTA_BYTE_ARRAY,
+        8 => Encoding::RLE_DICTIONARY,
+        9 => Encoding::BYTE_STREAM_SPLIT,
+        _ => panic!("Impossible encoding {val}"),
+    }
+}
+
 // ----------------------------------------------------------------------
-// Mirrors `parquet::CompressionCodec`
+// Mirrors thrift enum `CompressionCodec`
 
 /// Supported block compression algorithms.
 ///
@@ -400,11 +834,51 @@ pub enum Compression {
     LZ4_RAW,
 }
 
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let val = prot.read_i32()?;
+        Ok(match val {
+            0 => Self::UNCOMPRESSED,
+            1 => Self::SNAPPY,
+            2 => Self::GZIP(Default::default()),
+            3 => Self::LZO,
+            4 => Self::BROTLI(Default::default()),
+            5 => Self::LZ4,
+            6 => Self::ZSTD(Default::default()),
+            7 => Self::LZ4_RAW,
+            _ => return Err(general_err!("Unexpected CompressionCodec {}", val)),
+        })
+    }
+}
+
+// TODO(ets): explore replacing this with a thrift_enum!(ThriftCompression) for the serialization
+// and then provide `From` impls to convert back and forth. This is necessary due to the addition
+// of compression level to some variants.
+impl WriteThrift for Compression {
+    const ELEMENT_TYPE: ElementType = ElementType::I32;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        let id: i32 = match *self {
+            Self::UNCOMPRESSED => 0,
+            Self::SNAPPY => 1,
+            Self::GZIP(_) => 2,
+            Self::LZO => 3,
+            Self::BROTLI(_) => 4,
+            Self::LZ4 => 5,
+            Self::ZSTD(_) => 6,
+            Self::LZ4_RAW => 7,
+        };
+        writer.write_i32(id)
+    }
+}
+
+write_thrift_field!(Compression, FieldType::I32);
+
 impl Compression {
     /// Returns the codec type of this compression setting as a string, without the compression
     /// level.
     pub(crate) fn codec_to_string(self) -> String {
-        format!("{:?}", self).split('(').next().unwrap().to_owned()
+        format!("{self:?}").split('(').next().unwrap().to_owned()
     }
 }
 
@@ -416,7 +890,7 @@ fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), Pa
             let level = &level_str[..level_str.len() - 1]
                 .parse::<u32>()
                 .map_err(|_| {
-                    ParquetError::General(format!("invalid compression level: {}", level_str))
+                    ParquetError::General(format!("invalid compression level: {level_str}"))
                 })?;
             Ok((codec, Some(*level)))
         }
@@ -436,8 +910,7 @@ fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
 
 fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
     level.ok_or(ParquetError::General(format!(
-        "{} requires a compression level",
-        codec
+        "{codec} requires a compression level",
     )))
 }
 
@@ -492,25 +965,182 @@ impl FromStr for Compression {
 }
 
 // ----------------------------------------------------------------------
-/// Mirrors [parquet::PageType]
-///
+// Mirrors thrift enum `PageType`
+
+thrift_enum!(
 /// Available data pages for Parquet file format.
 /// Note that some of the page types may not be supported.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-#[allow(non_camel_case_types)]
-pub enum PageType {
-    /// Data page Parquet 1.0
-    DATA_PAGE,
-    /// Index page
-    INDEX_PAGE,
-    /// Dictionary page
-    DICTIONARY_PAGE,
-    /// Data page Parquet 2.0
-    DATA_PAGE_V2,
+enum PageType {
+  DATA_PAGE = 0;
+  INDEX_PAGE = 1;
+  DICTIONARY_PAGE = 2;
+  DATA_PAGE_V2 = 3;
+}
+);
+
+// ----------------------------------------------------------------------
+// Mirrors thrift enum `BoundaryOrder`
+
+thrift_enum!(
+/// Enum to annotate whether lists of min/max elements inside ColumnIndex
+/// are ordered and if so, in which direction.
+enum BoundaryOrder {
+  UNORDERED = 0;
+  ASCENDING = 1;
+  DESCENDING = 2;
+}
+);
+
+// ----------------------------------------------------------------------
+// Mirrors thrift enum `EdgeInterpolationAlgorithm`
+
+// this is hand coded to allow for the _Unknown variant (allows this to be forward compatible)
+
+/// Edge interpolation algorithm for [`LogicalType::Geography`]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[repr(i32)]
+#[derive(Default)]
+pub enum EdgeInterpolationAlgorithm {
+    /// Edges are interpolated as geodesics on a sphere.
+    #[default]
+    SPHERICAL = 0,
+    /// <https://en.wikipedia.org/wiki/Vincenty%27s_formulae>
+    VINCENTY = 1,
+    /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970
+    THOMAS = 2,
+    /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965.
+    ANDOYER = 3,
+    /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55
+    KARNEY = 4,
+    /// Unknown algorithm
+    _Unknown(i32),
+}
+
+#[cfg(feature = "geospatial")]
+impl EdgeInterpolationAlgorithm {
+    /// Converts an [`EdgeInterpolationAlgorithm`] into its corresponding algorithm defined by
+    /// [`parquet_geospatial::WkbEdges`].
+    ///
+    /// This method will only return an Err if the [`EdgeInterpolationAlgorithm`] is the `_Unknown`
+    /// variant.
+    pub fn try_as_edges(&self) -> Result<parquet_geospatial::WkbEdges> {
+        match &self {
+            Self::SPHERICAL => Ok(parquet_geospatial::WkbEdges::Spherical),
+            Self::VINCENTY => Ok(parquet_geospatial::WkbEdges::Vincenty),
+            Self::THOMAS => Ok(parquet_geospatial::WkbEdges::Thomas),
+            Self::ANDOYER => Ok(parquet_geospatial::WkbEdges::Andoyer),
+            Self::KARNEY => Ok(parquet_geospatial::WkbEdges::Karney),
+            unknown => Err(general_err!(
+                "Unknown edge interpolation algorithm: {}",
+                unknown
+            )),
+        }
+    }
+}
+
+impl fmt::Display for EdgeInterpolationAlgorithm {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_fmt(format_args!("{0:?}", self))
+    }
+}
+
+#[cfg(feature = "geospatial")]
+impl From<parquet_geospatial::WkbEdges> for EdgeInterpolationAlgorithm {
+    fn from(value: parquet_geospatial::WkbEdges) -> Self {
+        match value {
+            parquet_geospatial::WkbEdges::Spherical => Self::SPHERICAL,
+            parquet_geospatial::WkbEdges::Vincenty => Self::VINCENTY,
+            parquet_geospatial::WkbEdges::Thomas => Self::THOMAS,
+            parquet_geospatial::WkbEdges::Andoyer => Self::ANDOYER,
+            parquet_geospatial::WkbEdges::Karney => Self::KARNEY,
+        }
+    }
+}
+
+impl FromStr for EdgeInterpolationAlgorithm {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s.to_ascii_uppercase().as_str() {
+            "SPHERICAL" => Ok(EdgeInterpolationAlgorithm::SPHERICAL),
+            "VINCENTY" => Ok(EdgeInterpolationAlgorithm::VINCENTY),
+            "THOMAS" => Ok(EdgeInterpolationAlgorithm::THOMAS),
+            "ANDOYER" => Ok(EdgeInterpolationAlgorithm::ANDOYER),
+            "KARNEY" => Ok(EdgeInterpolationAlgorithm::KARNEY),
+            unknown => Err(general_err!(
+                "Unknown edge interpolation algorithm: {}",
+                unknown
+            )),
+        }
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let val = prot.read_i32()?;
+        match val {
+            0 => Ok(Self::SPHERICAL),
+            1 => Ok(Self::VINCENTY),
+            2 => Ok(Self::THOMAS),
+            3 => Ok(Self::ANDOYER),
+            4 => Ok(Self::KARNEY),
+            _ => Ok(Self::_Unknown(val)),
+        }
+    }
+}
+
+impl WriteThrift for EdgeInterpolationAlgorithm {
+    const ELEMENT_TYPE: ElementType = ElementType::I32;
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        let val: i32 = match *self {
+            Self::SPHERICAL => 0,
+            Self::VINCENTY => 1,
+            Self::THOMAS => 2,
+            Self::ANDOYER => 3,
+            Self::KARNEY => 4,
+            Self::_Unknown(i) => i,
+        };
+        writer.write_i32(val)
+    }
+}
+
+write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
+
+// ----------------------------------------------------------------------
+// Mirrors thrift union `BloomFilterAlgorithm`
+
+thrift_union_all_empty!(
+/// The algorithm used in Bloom filter.
+union BloomFilterAlgorithm {
+  /// Block-based Bloom filter.
+  1: SplitBlockAlgorithm BLOCK;
 }
+);
 
 // ----------------------------------------------------------------------
-// Mirrors `parquet::ColumnOrder`
+// Mirrors thrift union `BloomFilterHash`
+
+thrift_union_all_empty!(
+/// The hash function used in Bloom filter. This function takes the hash of a column value
+/// using plain encoding.
+union BloomFilterHash {
+  /// xxHash Strategy.
+  1: XxHash XXHASH;
+}
+);
+
+// ----------------------------------------------------------------------
+// Mirrors thrift union `BloomFilterCompression`
+
+thrift_union_all_empty!(
+/// The compression used in the Bloom filter.
+union BloomFilterCompression {
+  1: Uncompressed UNCOMPRESSED;
+}
+);
+
+// ----------------------------------------------------------------------
+// Mirrors thrift union `ColumnOrder`
 
 /// Sort order for page and column statistics.
 ///
@@ -549,19 +1179,35 @@ pub enum ColumnOrder {
     /// Column uses the order defined by its logical or physical type
     /// (if there is no logical type), parquet-format 2.4.0+.
     TYPE_DEFINED_ORDER(SortOrder),
+    // The following are not defined in the Parquet spec and should always be last.
     /// Undefined column order, means legacy behaviour before parquet-format 2.4.0.
     /// Sort order is always SIGNED.
     UNDEFINED,
+    /// An unknown but present ColumnOrder. Statistics with an unknown `ColumnOrder`
+    /// will be ignored.
+    UNKNOWN,
 }
 
 impl ColumnOrder {
     /// Returns sort order for a physical/logical type.
+    #[deprecated(
+        since = "57.1.0",
+        note = "use `ColumnOrder::sort_order_for_type` instead"
+    )]
     pub fn get_sort_order(
         logical_type: Option<LogicalType>,
         converted_type: ConvertedType,
         physical_type: Type,
     ) -> SortOrder {
-        // TODO: Should this take converted and logical type, for compatibility?
+        Self::sort_order_for_type(logical_type.as_ref(), converted_type, physical_type)
+    }
+
+    /// Returns sort order for a physical/logical type.
+    pub fn sort_order_for_type(
+        logical_type: Option<&LogicalType>,
+        converted_type: ConvertedType,
+        physical_type: Type,
+    ) -> SortOrder {
         match logical_type {
             Some(logical) => match logical {
                 LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
@@ -579,6 +1225,10 @@ impl ColumnOrder {
                 LogicalType::Unknown => SortOrder::UNDEFINED,
                 LogicalType::Uuid => SortOrder::UNSIGNED,
                 LogicalType::Float16 => SortOrder::SIGNED,
+                LogicalType::Variant { .. }
+                | LogicalType::Geometry { .. }
+                | LogicalType::Geography { .. }
+                | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED,
             },
             // Fall back to converted type
             None => Self::get_converted_sort_order(converted_type, physical_type),
@@ -648,33 +1298,56 @@ impl ColumnOrder {
         match *self {
             ColumnOrder::TYPE_DEFINED_ORDER(order) => order,
             ColumnOrder::UNDEFINED => SortOrder::SIGNED,
+            ColumnOrder::UNKNOWN => SortOrder::UNDEFINED,
         }
     }
 }
 
-impl fmt::Display for Type {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{self:?}")
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        let field_ident = prot.read_field_begin(0)?;
+        if field_ident.field_type == FieldType::Stop {
+            return Err(general_err!("Received empty union from remote ColumnOrder"));
+        }
+        let ret = match field_ident.id {
+            1 => {
+                // NOTE: the sort order needs to be set correctly after parsing.
+                prot.skip_empty_struct()?;
+                Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+                Self::UNKNOWN
+            }
+        };
+        let field_ident = prot.read_field_begin(field_ident.id)?;
+        if field_ident.field_type != FieldType::Stop {
+            return Err(general_err!(
+                "Received multiple fields for union from remote ColumnOrder"
+            ));
+        }
+        Ok(ret)
     }
 }
 
-impl fmt::Display for ConvertedType {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{self:?}")
-    }
-}
+impl WriteThrift for ColumnOrder {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
 
-impl fmt::Display for Repetition {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{self:?}")
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        match *self {
+            Self::TYPE_DEFINED_ORDER(_) => {
+                writer.write_field_begin(FieldType::Struct, 1, 0)?;
+                writer.write_struct_end()?;
+            }
+            _ => return Err(general_err!("Attempt to write undefined ColumnOrder")),
+        }
+        // write end of struct for this union
+        writer.write_struct_end()
     }
 }
 
-impl fmt::Display for Encoding {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{self:?}")
-    }
-}
+// ----------------------------------------------------------------------
+// Display handlers
 
 impl fmt::Display for Compression {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -682,210 +1355,18 @@ impl fmt::Display for Compression {
     }
 }
 
-impl fmt::Display for PageType {
+impl fmt::Display for SortOrder {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{self:?}")
     }
 }
 
-impl fmt::Display for SortOrder {
+impl fmt::Display for ColumnOrder {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{self:?}")
     }
 }
 
-impl fmt::Display for ColumnOrder {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{self:?}")
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Type <=> Type conversion
-
-impl TryFrom<parquet::Type> for Type {
-    type Error = ParquetError;
-
-    fn try_from(value: parquet::Type) -> Result<Self> {
-        Ok(match value {
-            parquet::Type::BOOLEAN => Type::BOOLEAN,
-            parquet::Type::INT32 => Type::INT32,
-            parquet::Type::INT64 => Type::INT64,
-            parquet::Type::INT96 => Type::INT96,
-            parquet::Type::FLOAT => Type::FLOAT,
-            parquet::Type::DOUBLE => Type::DOUBLE,
-            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
-            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
-            _ => return Err(general_err!("unexpected parquet type: {}", value.0)),
-        })
-    }
-}
-
-impl From<Type> for parquet::Type {
-    fn from(value: Type) -> Self {
-        match value {
-            Type::BOOLEAN => parquet::Type::BOOLEAN,
-            Type::INT32 => parquet::Type::INT32,
-            Type::INT64 => parquet::Type::INT64,
-            Type::INT96 => parquet::Type::INT96,
-            Type::FLOAT => parquet::Type::FLOAT,
-            Type::DOUBLE => parquet::Type::DOUBLE,
-            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
-            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::ConvertedType <=> ConvertedType conversion
-
-impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
-    type Error = ParquetError;
-
-    fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
-        Ok(match option {
-            None => ConvertedType::NONE,
-            Some(value) => match value {
-                parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
-                parquet::ConvertedType::MAP => ConvertedType::MAP,
-                parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE,
-                parquet::ConvertedType::LIST => ConvertedType::LIST,
-                parquet::ConvertedType::ENUM => ConvertedType::ENUM,
-                parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
-                parquet::ConvertedType::DATE => ConvertedType::DATE,
-                parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
-                parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
-                parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
-                parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
-                parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
-                parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
-                parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
-                parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
-                parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
-                parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
-                parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
-                parquet::ConvertedType::JSON => ConvertedType::JSON,
-                parquet::ConvertedType::BSON => ConvertedType::BSON,
-                parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
-                _ => {
-                    return Err(general_err!(
-                        "unexpected parquet converted type: {}",
-                        value.0
-                    ))
-                }
-            },
-        })
-    }
-}
-
-impl From<ConvertedType> for Option<parquet::ConvertedType> {
-    fn from(value: ConvertedType) -> Self {
-        match value {
-            ConvertedType::NONE => None,
-            ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),
-            ConvertedType::MAP => Some(parquet::ConvertedType::MAP),
-            ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE),
-            ConvertedType::LIST => Some(parquet::ConvertedType::LIST),
-            ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM),
-            ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL),
-            ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
-            ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
-            ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
-            ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
-            ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
-            ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
-            ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
-            ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
-            ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64),
-            ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8),
-            ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16),
-            ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32),
-            ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64),
-            ConvertedType::JSON => Some(parquet::ConvertedType::JSON),
-            ConvertedType::BSON => Some(parquet::ConvertedType::BSON),
-            ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL),
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::LogicalType <=> LogicalType conversion
-
-impl From<parquet::LogicalType> for LogicalType {
-    fn from(value: parquet::LogicalType) -> Self {
-        match value {
-            parquet::LogicalType::STRING(_) => LogicalType::String,
-            parquet::LogicalType::MAP(_) => LogicalType::Map,
-            parquet::LogicalType::LIST(_) => LogicalType::List,
-            parquet::LogicalType::ENUM(_) => LogicalType::Enum,
-            parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal {
-                scale: t.scale,
-                precision: t.precision,
-            },
-            parquet::LogicalType::DATE(_) => LogicalType::Date,
-            parquet::LogicalType::TIME(t) => LogicalType::Time {
-                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
-                unit: t.unit,
-            },
-            parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp {
-                is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c,
-                unit: t.unit,
-            },
-            parquet::LogicalType::INTEGER(t) => LogicalType::Integer {
-                bit_width: t.bit_width,
-                is_signed: t.is_signed,
-            },
-            parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown,
-            parquet::LogicalType::JSON(_) => LogicalType::Json,
-            parquet::LogicalType::BSON(_) => LogicalType::Bson,
-            parquet::LogicalType::UUID(_) => LogicalType::Uuid,
-            parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
-        }
-    }
-}
-
-impl From<LogicalType> for parquet::LogicalType {
-    fn from(value: LogicalType) -> Self {
-        match value {
-            LogicalType::String => parquet::LogicalType::STRING(Default::default()),
-            LogicalType::Map => parquet::LogicalType::MAP(Default::default()),
-            LogicalType::List => parquet::LogicalType::LIST(Default::default()),
-            LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()),
-            LogicalType::Decimal { scale, precision } => {
-                parquet::LogicalType::DECIMAL(DecimalType { scale, precision })
-            }
-            LogicalType::Date => parquet::LogicalType::DATE(Default::default()),
-            LogicalType::Time {
-                is_adjusted_to_u_t_c,
-                unit,
-            } => parquet::LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c,
-                unit,
-            }),
-            LogicalType::Timestamp {
-                is_adjusted_to_u_t_c,
-                unit,
-            } => parquet::LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c,
-                unit,
-            }),
-            LogicalType::Integer {
-                bit_width,
-                is_signed,
-            } => parquet::LogicalType::INTEGER(IntType {
-                bit_width,
-                is_signed,
-            }),
-            LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()),
-            LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
-            LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
-            LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
-            LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
-        }
-    }
-}
-
 // ----------------------------------------------------------------------
 // LogicalType <=> ConvertedType conversion
 
@@ -906,14 +1387,14 @@ impl From<Option<LogicalType>> for ConvertedType {
                 LogicalType::Decimal { .. } => ConvertedType::DECIMAL,
                 LogicalType::Date => ConvertedType::DATE,
                 LogicalType::Time { unit, .. } => match unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
+                    TimeUnit::MILLIS => ConvertedType::TIME_MILLIS,
+                    TimeUnit::MICROS => ConvertedType::TIME_MICROS,
+                    TimeUnit::NANOS => ConvertedType::NONE,
                 },
                 LogicalType::Timestamp { unit, .. } => match unit {
-                    TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS,
-                    TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS,
-                    TimeUnit::NANOS(_) => ConvertedType::NONE,
+                    TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS,
+                    TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS,
+                    TimeUnit::NANOS => ConvertedType::NONE,
                 },
                 LogicalType::Integer {
                     bit_width,
@@ -927,159 +1408,25 @@ impl From<Option<LogicalType>> for ConvertedType {
                     (16, false) => ConvertedType::UINT_16,
                     (32, false) => ConvertedType::UINT_32,
                     (64, false) => ConvertedType::UINT_64,
-                    t => panic!("Integer type {t:?} is not supported"),
+                    (bit_width, is_signed) => panic!(
+                        "Integer type bit_width={bit_width}, signed={is_signed} is not supported"
+                    ),
                 },
                 LogicalType::Json => ConvertedType::JSON,
                 LogicalType::Bson => ConvertedType::BSON,
-                LogicalType::Uuid | LogicalType::Float16 | LogicalType::Unknown => {
-                    ConvertedType::NONE
-                }
+                LogicalType::Uuid
+                | LogicalType::Float16
+                | LogicalType::Variant { .. }
+                | LogicalType::Geometry { .. }
+                | LogicalType::Geography { .. }
+                | LogicalType::_Unknown { .. }
+                | LogicalType::Unknown => ConvertedType::NONE,
             },
             None => ConvertedType::NONE,
         }
     }
 }
 
-// ----------------------------------------------------------------------
-// parquet::FieldRepetitionType <=> Repetition conversion
-
-impl TryFrom<parquet::FieldRepetitionType> for Repetition {
-    type Error = ParquetError;
-
-    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
-        Ok(match value {
-            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
-            _ => {
-                return Err(general_err!(
-                    "unexpected parquet repetition type: {}",
-                    value.0
-                ))
-            }
-        })
-    }
-}
-
-impl From<Repetition> for parquet::FieldRepetitionType {
-    fn from(value: Repetition) -> Self {
-        match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
-            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::Encoding <=> Encoding conversion
-
-impl TryFrom<parquet::Encoding> for Encoding {
-    type Error = ParquetError;
-
-    fn try_from(value: parquet::Encoding) -> Result<Self> {
-        Ok(match value {
-            parquet::Encoding::PLAIN => Encoding::PLAIN,
-            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::RLE => Encoding::RLE,
-            #[allow(deprecated)]
-            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
-            parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
-            parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
-            _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)),
-        })
-    }
-}
-
-impl From<Encoding> for parquet::Encoding {
-    fn from(value: Encoding) -> Self {
-        match value {
-            Encoding::PLAIN => parquet::Encoding::PLAIN,
-            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
-            Encoding::RLE => parquet::Encoding::RLE,
-            #[allow(deprecated)]
-            Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
-            Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
-            Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
-            Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::CompressionCodec <=> Compression conversion
-
-impl TryFrom<parquet::CompressionCodec> for Compression {
-    type Error = ParquetError;
-
-    fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
-        Ok(match value {
-            parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED,
-            parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
-            parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()),
-            parquet::CompressionCodec::LZO => Compression::LZO,
-            parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()),
-            parquet::CompressionCodec::LZ4 => Compression::LZ4,
-            parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()),
-            parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW,
-            _ => {
-                return Err(general_err!(
-                    "unexpected parquet compression codec: {}",
-                    value.0
-                ))
-            }
-        })
-    }
-}
-
-impl From<Compression> for parquet::CompressionCodec {
-    fn from(value: Compression) -> Self {
-        match value {
-            Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED,
-            Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
-            Compression::GZIP(_) => parquet::CompressionCodec::GZIP,
-            Compression::LZO => parquet::CompressionCodec::LZO,
-            Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI,
-            Compression::LZ4 => parquet::CompressionCodec::LZ4,
-            Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD,
-            Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW,
-        }
-    }
-}
-
-// ----------------------------------------------------------------------
-// parquet::PageType <=> PageType conversion
-
-impl TryFrom<parquet::PageType> for PageType {
-    type Error = ParquetError;
-
-    fn try_from(value: parquet::PageType) -> Result<Self> {
-        Ok(match value {
-            parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
-            parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
-            parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
-            parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
-            _ => return Err(general_err!("unexpected parquet page type: {}", value.0)),
-        })
-    }
-}
-
-impl From<PageType> for parquet::PageType {
-    fn from(value: PageType) -> Self {
-        match value {
-            PageType::DATA_PAGE => parquet::PageType::DATA_PAGE,
-            PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE,
-            PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE,
-            PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2,
-        }
-    }
-}
-
 // ----------------------------------------------------------------------
 // String conversions for schema parsing.
 
@@ -1167,11 +1514,11 @@ impl str::FromStr for LogicalType {
             "DATE" => Ok(LogicalType::Date),
             "TIME" => Ok(LogicalType::Time {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
+                unit: TimeUnit::MILLIS,
             }),
             "TIMESTAMP" => Ok(LogicalType::Timestamp {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(parquet::MilliSeconds {}),
+                unit: TimeUnit::MILLIS,
             }),
             "STRING" => Ok(LogicalType::String),
             "JSON" => Ok(LogicalType::Json),
@@ -1181,337 +1528,148 @@ impl str::FromStr for LogicalType {
             "INTERVAL" => Err(general_err!(
                 "Interval parquet logical type not yet supported"
             )),
-            "FLOAT16" => Ok(LogicalType::Float16),
-            other => Err(general_err!("Invalid parquet logical type {}", other)),
-        }
-    }
-}
-
-#[cfg(test)]
-#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_display_type() {
-        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
-        assert_eq!(Type::INT32.to_string(), "INT32");
-        assert_eq!(Type::INT64.to_string(), "INT64");
-        assert_eq!(Type::INT96.to_string(), "INT96");
-        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
-        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
-        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
-            "FIXED_LEN_BYTE_ARRAY"
-        );
-    }
-
-    #[test]
-    fn test_from_type() {
-        assert_eq!(
-            Type::try_from(parquet::Type::BOOLEAN).unwrap(),
-            Type::BOOLEAN
-        );
-        assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32);
-        assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64);
-        assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96);
-        assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT);
-        assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE);
-        assert_eq!(
-            Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(),
-            Type::BYTE_ARRAY
-        );
-        assert_eq!(
-            Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_type() {
-        assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into());
-        assert_eq!(parquet::Type::INT32, Type::INT32.into());
-        assert_eq!(parquet::Type::INT64, Type::INT64.into());
-        assert_eq!(parquet::Type::INT96, Type::INT96.into());
-        assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into());
-        assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into());
-        assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into());
-        assert_eq!(
-            parquet::Type::FIXED_LEN_BYTE_ARRAY,
-            Type::FIXED_LEN_BYTE_ARRAY.into()
-        );
-    }
-
-    #[test]
-    fn test_from_string_into_type() {
-        assert_eq!(
-            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
-            Type::BOOLEAN
-        );
-        assert_eq!(
-            Type::INT32.to_string().parse::<Type>().unwrap(),
-            Type::INT32
-        );
-        assert_eq!(
-            Type::INT64.to_string().parse::<Type>().unwrap(),
-            Type::INT64
-        );
-        assert_eq!(
-            Type::INT96.to_string().parse::<Type>().unwrap(),
-            Type::INT96
-        );
-        assert_eq!(
-            Type::FLOAT.to_string().parse::<Type>().unwrap(),
-            Type::FLOAT
-        );
-        assert_eq!(
-            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
-            Type::DOUBLE
-        );
-        assert_eq!(
-            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
-            Type::BYTE_ARRAY
-        );
-        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
-        assert_eq!(
-            Type::FIXED_LEN_BYTE_ARRAY
-                .to_string()
-                .parse::<Type>()
-                .unwrap(),
-            Type::FIXED_LEN_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_display_converted_type() {
-        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
-        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
-        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
-        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
-        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
-        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
-        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
-        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
-        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MILLIS.to_string(),
-            "TIMESTAMP_MILLIS"
-        );
-        assert_eq!(
-            ConvertedType::TIMESTAMP_MICROS.to_string(),
-            "TIMESTAMP_MICROS"
-        );
-        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
-        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
-        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
-        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
-        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
-        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
-        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
-        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
-        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
-        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
-        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
-        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
-    }
-
-    #[test]
-    fn test_from_converted_type() {
-        let parquet_conv_none: Option<parquet::ConvertedType> = None;
-        assert_eq!(
-            ConvertedType::try_from(parquet_conv_none).unwrap(),
-            ConvertedType::NONE
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(),
-            ConvertedType::UTF8
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(),
-            ConvertedType::MAP
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(),
-            ConvertedType::MAP_KEY_VALUE
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(),
-            ConvertedType::LIST
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(),
-            ConvertedType::ENUM
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
-            ConvertedType::DECIMAL
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(),
-            ConvertedType::DATE
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(),
-            ConvertedType::TIME_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(),
-            ConvertedType::TIME_MICROS
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
-            ConvertedType::TIMESTAMP_MILLIS
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
-            ConvertedType::TIMESTAMP_MICROS
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(),
-            ConvertedType::UINT_8
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(),
-            ConvertedType::UINT_16
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(),
-            ConvertedType::UINT_32
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(),
-            ConvertedType::UINT_64
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(),
-            ConvertedType::INT_8
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(),
-            ConvertedType::INT_16
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(),
-            ConvertedType::INT_32
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(),
-            ConvertedType::INT_64
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(),
-            ConvertedType::JSON
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(),
-            ConvertedType::BSON
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(),
-            ConvertedType::INTERVAL
-        );
-        assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(),
-            ConvertedType::DECIMAL
-        )
+            "FLOAT16" => Ok(LogicalType::Float16),
+            "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
+            "GEOGRAPHY" => Ok(LogicalType::Geography {
+                crs: None,
+                algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
+            }),
+            other => Err(general_err!("Invalid parquet logical type {}", other)),
+        }
     }
+}
+
+#[cfg(test)]
+#[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module
+mod tests {
+    use super::*;
+    use crate::parquet_thrift::{ThriftSliceInputProtocol, tests::test_roundtrip};
 
     #[test]
-    fn test_into_converted_type() {
-        let converted_type: Option<parquet::ConvertedType> = None;
-        assert_eq!(converted_type, ConvertedType::NONE.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::UTF8),
-            ConvertedType::UTF8.into()
-        );
-        assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into());
-        assert_eq!(
-            Some(parquet::ConvertedType::MAP_KEY_VALUE),
-            ConvertedType::MAP_KEY_VALUE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::LIST),
-            ConvertedType::LIST.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::ENUM),
-            ConvertedType::ENUM.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::DECIMAL),
-            ConvertedType::DECIMAL.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::DATE),
-            ConvertedType::DATE.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TIME_MILLIS),
-            ConvertedType::TIME_MILLIS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TIME_MICROS),
-            ConvertedType::TIME_MICROS.into()
-        );
-        assert_eq!(
-            Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
-            ConvertedType::TIMESTAMP_MILLIS.into()
-        );
+    fn test_display_type() {
+        assert_eq!(Type::BOOLEAN.to_string(), "BOOLEAN");
+        assert_eq!(Type::INT32.to_string(), "INT32");
+        assert_eq!(Type::INT64.to_string(), "INT64");
+        assert_eq!(Type::INT96.to_string(), "INT96");
+        assert_eq!(Type::FLOAT.to_string(), "FLOAT");
+        assert_eq!(Type::DOUBLE.to_string(), "DOUBLE");
+        assert_eq!(Type::BYTE_ARRAY.to_string(), "BYTE_ARRAY");
         assert_eq!(
-            Some(parquet::ConvertedType::TIMESTAMP_MICROS),
-            ConvertedType::TIMESTAMP_MICROS.into()
+            Type::FIXED_LEN_BYTE_ARRAY.to_string(),
+            "FIXED_LEN_BYTE_ARRAY"
         );
+    }
+
+    #[test]
+    fn test_from_string_into_type() {
         assert_eq!(
-            Some(parquet::ConvertedType::UINT_8),
-            ConvertedType::UINT_8.into()
+            Type::BOOLEAN.to_string().parse::<Type>().unwrap(),
+            Type::BOOLEAN
         );
         assert_eq!(
-            Some(parquet::ConvertedType::UINT_16),
-            ConvertedType::UINT_16.into()
+            Type::INT32.to_string().parse::<Type>().unwrap(),
+            Type::INT32
         );
         assert_eq!(
-            Some(parquet::ConvertedType::UINT_32),
-            ConvertedType::UINT_32.into()
+            Type::INT64.to_string().parse::<Type>().unwrap(),
+            Type::INT64
         );
         assert_eq!(
-            Some(parquet::ConvertedType::UINT_64),
-            ConvertedType::UINT_64.into()
+            Type::INT96.to_string().parse::<Type>().unwrap(),
+            Type::INT96
         );
         assert_eq!(
-            Some(parquet::ConvertedType::INT_8),
-            ConvertedType::INT_8.into()
+            Type::FLOAT.to_string().parse::<Type>().unwrap(),
+            Type::FLOAT
         );
         assert_eq!(
-            Some(parquet::ConvertedType::INT_16),
-            ConvertedType::INT_16.into()
+            Type::DOUBLE.to_string().parse::<Type>().unwrap(),
+            Type::DOUBLE
         );
         assert_eq!(
-            Some(parquet::ConvertedType::INT_32),
-            ConvertedType::INT_32.into()
+            Type::BYTE_ARRAY.to_string().parse::<Type>().unwrap(),
+            Type::BYTE_ARRAY
         );
+        assert_eq!("BINARY".parse::<Type>().unwrap(), Type::BYTE_ARRAY);
         assert_eq!(
-            Some(parquet::ConvertedType::INT_64),
-            ConvertedType::INT_64.into()
+            Type::FIXED_LEN_BYTE_ARRAY
+                .to_string()
+                .parse::<Type>()
+                .unwrap(),
+            Type::FIXED_LEN_BYTE_ARRAY
         );
+    }
+
+    #[test]
+    fn test_converted_type_roundtrip() {
+        test_roundtrip(ConvertedType::UTF8);
+        test_roundtrip(ConvertedType::MAP);
+        test_roundtrip(ConvertedType::MAP_KEY_VALUE);
+        test_roundtrip(ConvertedType::LIST);
+        test_roundtrip(ConvertedType::ENUM);
+        test_roundtrip(ConvertedType::DECIMAL);
+        test_roundtrip(ConvertedType::DATE);
+        test_roundtrip(ConvertedType::TIME_MILLIS);
+        test_roundtrip(ConvertedType::TIME_MICROS);
+        test_roundtrip(ConvertedType::TIMESTAMP_MILLIS);
+        test_roundtrip(ConvertedType::TIMESTAMP_MICROS);
+        test_roundtrip(ConvertedType::UINT_8);
+        test_roundtrip(ConvertedType::UINT_16);
+        test_roundtrip(ConvertedType::UINT_32);
+        test_roundtrip(ConvertedType::UINT_64);
+        test_roundtrip(ConvertedType::INT_8);
+        test_roundtrip(ConvertedType::INT_16);
+        test_roundtrip(ConvertedType::INT_32);
+        test_roundtrip(ConvertedType::INT_64);
+        test_roundtrip(ConvertedType::JSON);
+        test_roundtrip(ConvertedType::BSON);
+        test_roundtrip(ConvertedType::INTERVAL);
+    }
+
+    #[test]
+    fn test_read_invalid_converted_type() {
+        let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]);
+        let res = ConvertedType::read_thrift(&mut prot);
+        assert!(res.is_err());
         assert_eq!(
-            Some(parquet::ConvertedType::JSON),
-            ConvertedType::JSON.into()
+            res.unwrap_err().to_string(),
+            "Parquet error: Unexpected ConvertedType 63"
         );
+    }
+
+    #[test]
+    fn test_display_converted_type() {
+        assert_eq!(ConvertedType::NONE.to_string(), "NONE");
+        assert_eq!(ConvertedType::UTF8.to_string(), "UTF8");
+        assert_eq!(ConvertedType::MAP.to_string(), "MAP");
+        assert_eq!(ConvertedType::MAP_KEY_VALUE.to_string(), "MAP_KEY_VALUE");
+        assert_eq!(ConvertedType::LIST.to_string(), "LIST");
+        assert_eq!(ConvertedType::ENUM.to_string(), "ENUM");
+        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL");
+        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
+        assert_eq!(ConvertedType::TIME_MILLIS.to_string(), "TIME_MILLIS");
+        assert_eq!(ConvertedType::DATE.to_string(), "DATE");
+        assert_eq!(ConvertedType::TIME_MICROS.to_string(), "TIME_MICROS");
         assert_eq!(
-            Some(parquet::ConvertedType::BSON),
-            ConvertedType::BSON.into()
+            ConvertedType::TIMESTAMP_MILLIS.to_string(),
+            "TIMESTAMP_MILLIS"
         );
         assert_eq!(
-            Some(parquet::ConvertedType::INTERVAL),
-            ConvertedType::INTERVAL.into()
+            ConvertedType::TIMESTAMP_MICROS.to_string(),
+            "TIMESTAMP_MICROS"
         );
-        assert_eq!(
-            Some(parquet::ConvertedType::DECIMAL),
-            ConvertedType::DECIMAL.into()
-        )
+        assert_eq!(ConvertedType::UINT_8.to_string(), "UINT_8");
+        assert_eq!(ConvertedType::UINT_16.to_string(), "UINT_16");
+        assert_eq!(ConvertedType::UINT_32.to_string(), "UINT_32");
+        assert_eq!(ConvertedType::UINT_64.to_string(), "UINT_64");
+        assert_eq!(ConvertedType::INT_8.to_string(), "INT_8");
+        assert_eq!(ConvertedType::INT_16.to_string(), "INT_16");
+        assert_eq!(ConvertedType::INT_32.to_string(), "INT_32");
+        assert_eq!(ConvertedType::INT_64.to_string(), "INT_64");
+        assert_eq!(ConvertedType::JSON.to_string(), "JSON");
+        assert_eq!(ConvertedType::BSON.to_string(), "BSON");
+        assert_eq!(ConvertedType::INTERVAL.to_string(), "INTERVAL");
+        assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL")
     }
 
     #[test]
@@ -1715,42 +1873,42 @@ mod tests {
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Time {
-                unit: TimeUnit::MILLIS(Default::default()),
+                unit: TimeUnit::MILLIS,
                 is_adjusted_to_u_t_c: true,
             })),
             ConvertedType::TIME_MILLIS
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Time {
-                unit: TimeUnit::MICROS(Default::default()),
+                unit: TimeUnit::MICROS,
                 is_adjusted_to_u_t_c: true,
             })),
             ConvertedType::TIME_MICROS
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Time {
-                unit: TimeUnit::NANOS(Default::default()),
+                unit: TimeUnit::NANOS,
                 is_adjusted_to_u_t_c: false,
             })),
             ConvertedType::NONE
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Timestamp {
-                unit: TimeUnit::MILLIS(Default::default()),
+                unit: TimeUnit::MILLIS,
                 is_adjusted_to_u_t_c: true,
             })),
             ConvertedType::TIMESTAMP_MILLIS
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Timestamp {
-                unit: TimeUnit::MICROS(Default::default()),
+                unit: TimeUnit::MICROS,
                 is_adjusted_to_u_t_c: false,
             })),
             ConvertedType::TIMESTAMP_MICROS
         );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Timestamp {
-                unit: TimeUnit::NANOS(Default::default()),
+                unit: TimeUnit::NANOS,
                 is_adjusted_to_u_t_c: false,
             })),
             ConvertedType::NONE
@@ -1831,12 +1989,106 @@ mod tests {
             ConvertedType::from(Some(LogicalType::Float16)),
             ConvertedType::NONE
         );
+        assert_eq!(
+            ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
+            ConvertedType::NONE
+        );
+        assert_eq!(
+            ConvertedType::from(Some(LogicalType::Geography {
+                crs: None,
+                algorithm: Some(EdgeInterpolationAlgorithm::default()),
+            })),
+            ConvertedType::NONE
+        );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Unknown)),
             ConvertedType::NONE
         );
     }
 
+    #[test]
+    fn test_logical_type_roundtrip() {
+        test_roundtrip(LogicalType::String);
+        test_roundtrip(LogicalType::Map);
+        test_roundtrip(LogicalType::List);
+        test_roundtrip(LogicalType::Enum);
+        test_roundtrip(LogicalType::Decimal {
+            scale: 0,
+            precision: 20,
+        });
+        test_roundtrip(LogicalType::Date);
+        test_roundtrip(LogicalType::Time {
+            is_adjusted_to_u_t_c: true,
+            unit: TimeUnit::MICROS,
+        });
+        test_roundtrip(LogicalType::Time {
+            is_adjusted_to_u_t_c: false,
+            unit: TimeUnit::MILLIS,
+        });
+        test_roundtrip(LogicalType::Time {
+            is_adjusted_to_u_t_c: false,
+            unit: TimeUnit::NANOS,
+        });
+        test_roundtrip(LogicalType::Timestamp {
+            is_adjusted_to_u_t_c: false,
+            unit: TimeUnit::MICROS,
+        });
+        test_roundtrip(LogicalType::Timestamp {
+            is_adjusted_to_u_t_c: true,
+            unit: TimeUnit::MILLIS,
+        });
+        test_roundtrip(LogicalType::Timestamp {
+            is_adjusted_to_u_t_c: true,
+            unit: TimeUnit::NANOS,
+        });
+        test_roundtrip(LogicalType::Integer {
+            bit_width: 8,
+            is_signed: true,
+        });
+        test_roundtrip(LogicalType::Integer {
+            bit_width: 16,
+            is_signed: false,
+        });
+        test_roundtrip(LogicalType::Integer {
+            bit_width: 32,
+            is_signed: true,
+        });
+        test_roundtrip(LogicalType::Integer {
+            bit_width: 64,
+            is_signed: false,
+        });
+        test_roundtrip(LogicalType::Json);
+        test_roundtrip(LogicalType::Bson);
+        test_roundtrip(LogicalType::Uuid);
+        test_roundtrip(LogicalType::Float16);
+        test_roundtrip(LogicalType::Variant {
+            specification_version: Some(1),
+        });
+        test_roundtrip(LogicalType::Variant {
+            specification_version: None,
+        });
+        test_roundtrip(LogicalType::Geometry {
+            crs: Some("foo".to_owned()),
+        });
+        test_roundtrip(LogicalType::Geometry { crs: None });
+        test_roundtrip(LogicalType::Geography {
+            crs: Some("foo".to_owned()),
+            algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
+        });
+        test_roundtrip(LogicalType::Geography {
+            crs: None,
+            algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
+        });
+        test_roundtrip(LogicalType::Geography {
+            crs: Some("foo".to_owned()),
+            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
+        });
+        test_roundtrip(LogicalType::Geography {
+            crs: None,
+            algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
+        });
+    }
+
     #[test]
     fn test_display_repetition() {
         assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED");
@@ -1844,38 +2096,6 @@ mod tests {
         assert_eq!(Repetition::REPEATED.to_string(), "REPEATED");
     }
 
-    #[test]
-    fn test_from_repetition() {
-        assert_eq!(
-            Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(),
-            Repetition::REQUIRED
-        );
-        assert_eq!(
-            Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(),
-            Repetition::OPTIONAL
-        );
-        assert_eq!(
-            Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(),
-            Repetition::REPEATED
-        );
-    }
-
-    #[test]
-    fn test_into_repetition() {
-        assert_eq!(
-            parquet::FieldRepetitionType::REQUIRED,
-            Repetition::REQUIRED.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::OPTIONAL,
-            Repetition::OPTIONAL.into()
-        );
-        assert_eq!(
-            parquet::FieldRepetitionType::REPEATED,
-            Repetition::REPEATED.into()
-        );
-    }
-
     #[test]
     fn test_from_string_into_repetition() {
         assert_eq!(
@@ -1919,61 +2139,6 @@ mod tests {
         assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
     }
 
-    #[test]
-    fn test_from_encoding() {
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::PLAIN).unwrap(),
-            Encoding::PLAIN
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(),
-            Encoding::PLAIN_DICTIONARY
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::RLE).unwrap(),
-            Encoding::RLE
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(),
-            Encoding::BIT_PACKED
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(),
-            Encoding::DELTA_BINARY_PACKED
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(),
-            Encoding::DELTA_LENGTH_BYTE_ARRAY
-        );
-        assert_eq!(
-            Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(),
-            Encoding::DELTA_BYTE_ARRAY
-        );
-    }
-
-    #[test]
-    fn test_into_encoding() {
-        assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into());
-        assert_eq!(
-            parquet::Encoding::PLAIN_DICTIONARY,
-            Encoding::PLAIN_DICTIONARY.into()
-        );
-        assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into());
-        assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into());
-        assert_eq!(
-            parquet::Encoding::DELTA_BINARY_PACKED,
-            Encoding::DELTA_BINARY_PACKED.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY.into()
-        );
-        assert_eq!(
-            parquet::Encoding::DELTA_BYTE_ARRAY,
-            Encoding::DELTA_BYTE_ARRAY.into()
-        );
-    }
-
     #[test]
     fn test_compression_codec_to_string() {
         assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED");
@@ -2003,64 +2168,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_from_compression() {
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(),
-            Compression::UNCOMPRESSED
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(),
-            Compression::SNAPPY
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(),
-            Compression::GZIP(Default::default())
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::LZO).unwrap(),
-            Compression::LZO
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(),
-            Compression::BROTLI(Default::default())
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(),
-            Compression::LZ4
-        );
-        assert_eq!(
-            Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(),
-            Compression::ZSTD(Default::default())
-        );
-    }
-
-    #[test]
-    fn test_into_compression() {
-        assert_eq!(
-            parquet::CompressionCodec::UNCOMPRESSED,
-            Compression::UNCOMPRESSED.into()
-        );
-        assert_eq!(
-            parquet::CompressionCodec::SNAPPY,
-            Compression::SNAPPY.into()
-        );
-        assert_eq!(
-            parquet::CompressionCodec::GZIP,
-            Compression::GZIP(Default::default()).into()
-        );
-        assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into());
-        assert_eq!(
-            parquet::CompressionCodec::BROTLI,
-            Compression::BROTLI(Default::default()).into()
-        );
-        assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into());
-        assert_eq!(
-            parquet::CompressionCodec::ZSTD,
-            Compression::ZSTD(Default::default()).into()
-        );
-    }
-
     #[test]
     fn test_display_page_type() {
         assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE");
@@ -2069,40 +2176,6 @@ mod tests {
         assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2");
     }
 
-    #[test]
-    fn test_from_page_type() {
-        assert_eq!(
-            PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(),
-            PageType::DATA_PAGE
-        );
-        assert_eq!(
-            PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(),
-            PageType::INDEX_PAGE
-        );
-        assert_eq!(
-            PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(),
-            PageType::DICTIONARY_PAGE
-        );
-        assert_eq!(
-            PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(),
-            PageType::DATA_PAGE_V2
-        );
-    }
-
-    #[test]
-    fn test_into_page_type() {
-        assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into());
-        assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into());
-        assert_eq!(
-            parquet::PageType::DICTIONARY_PAGE,
-            PageType::DICTIONARY_PAGE.into()
-        );
-        assert_eq!(
-            parquet::PageType::DATA_PAGE_V2,
-            PageType::DATA_PAGE_V2.into()
-        );
-    }
-
     #[test]
     fn test_display_sort_order() {
         assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED");
@@ -2127,6 +2200,12 @@ mod tests {
         assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED");
     }
 
+    #[test]
+    fn test_column_order_roundtrip() {
+        // SortOrder::SIGNED is the default on read.
+        test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED))
+    }
+
     #[test]
     fn test_column_order_get_logical_type_sort_order() {
         // Helper to check the order in a list of values.
@@ -2191,34 +2270,42 @@ mod tests {
             LogicalType::Date,
             LogicalType::Time {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
+                unit: TimeUnit::MILLIS,
             },
             LogicalType::Time {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
+                unit: TimeUnit::MICROS,
             },
             LogicalType::Time {
                 is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
+                unit: TimeUnit::NANOS,
             },
             LogicalType::Timestamp {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MILLIS(Default::default()),
+                unit: TimeUnit::MILLIS,
             },
             LogicalType::Timestamp {
                 is_adjusted_to_u_t_c: false,
-                unit: TimeUnit::MICROS(Default::default()),
+                unit: TimeUnit::MICROS,
             },
             LogicalType::Timestamp {
                 is_adjusted_to_u_t_c: true,
-                unit: TimeUnit::NANOS(Default::default()),
+                unit: TimeUnit::NANOS,
             },
             LogicalType::Float16,
         ];
         check_sort_order(signed, SortOrder::SIGNED);
 
         // Undefined comparison
-        let undefined = vec![LogicalType::List, LogicalType::Map];
+        let undefined = vec![
+            LogicalType::List,
+            LogicalType::Map,
+            LogicalType::Geometry { crs: None },
+            LogicalType::Geography {
+                crs: None,
+                algorithm: Some(EdgeInterpolationAlgorithm::default()),
+            },
+        ];
         check_sort_order(undefined, SortOrder::UNDEFINED);
     }
 
@@ -2359,7 +2446,7 @@ mod tests {
         // test unknown string
         match "plain_xxx".parse::<Encoding>() {
             Ok(e) => {
-                panic!("Should not be able to parse {:?}", e);
+                panic!("Should not be able to parse {e:?}");
             }
             Err(e) => {
                 assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
@@ -2407,4 +2494,117 @@ mod tests {
             "Parquet error: unknown encoding: gzip(-10)"
         );
     }
+
+    #[test]
+    fn test_display_boundary_order() {
+        assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING");
+        assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING");
+        assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED");
+    }
+
+    #[test]
+    fn test_display_edge_algo() {
+        assert_eq!(
+            EdgeInterpolationAlgorithm::SPHERICAL.to_string(),
+            "SPHERICAL"
+        );
+        assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY");
+        assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS");
+        assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER");
+        assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY");
+    }
+
+    #[test]
+    fn test_from_str_edge_algo() {
+        assert_eq!(
+            "spHErical".parse::<EdgeInterpolationAlgorithm>().unwrap(),
+            EdgeInterpolationAlgorithm::SPHERICAL
+        );
+        assert_eq!(
+            "vinceNTY".parse::<EdgeInterpolationAlgorithm>().unwrap(),
+            EdgeInterpolationAlgorithm::VINCENTY
+        );
+        assert_eq!(
+            "tHOmas".parse::<EdgeInterpolationAlgorithm>().unwrap(),
+            EdgeInterpolationAlgorithm::THOMAS
+        );
+        assert_eq!(
+            "anDOYEr".parse::<EdgeInterpolationAlgorithm>().unwrap(),
+            EdgeInterpolationAlgorithm::ANDOYER
+        );
+        assert_eq!(
+            "kaRNey".parse::<EdgeInterpolationAlgorithm>().unwrap(),
+            EdgeInterpolationAlgorithm::KARNEY
+        );
+        assert!(
+            "does not exist"
+                .parse::<EdgeInterpolationAlgorithm>()
+                .is_err()
+        );
+    }
+
+    fn encodings_roundtrip(mut encodings: Vec<Encoding>) {
+        encodings.sort();
+        let mask = EncodingMask::new_from_encodings(encodings.iter());
+        assert!(mask.all_set(encodings.iter()));
+        let v = mask.encodings().collect::<Vec<_>>();
+        assert_eq!(v, encodings);
+    }
+
+    #[test]
+    fn test_encoding_roundtrip() {
+        encodings_roundtrip(
+            [
+                Encoding::RLE,
+                Encoding::PLAIN,
+                Encoding::DELTA_BINARY_PACKED,
+            ]
+            .into(),
+        );
+        encodings_roundtrip([Encoding::RLE_DICTIONARY, Encoding::PLAIN_DICTIONARY].into());
+        encodings_roundtrip([].into());
+        let encodings = [
+            Encoding::PLAIN,
+            Encoding::BIT_PACKED,
+            Encoding::RLE,
+            Encoding::DELTA_BINARY_PACKED,
+            Encoding::DELTA_BYTE_ARRAY,
+            Encoding::DELTA_LENGTH_BYTE_ARRAY,
+            Encoding::PLAIN_DICTIONARY,
+            Encoding::RLE_DICTIONARY,
+            Encoding::BYTE_STREAM_SPLIT,
+        ];
+        encodings_roundtrip(encodings.into());
+    }
+
+    #[test]
+    fn test_invalid_encoding_mask() {
+        // any set bits higher than the max should trigger an error
+        let res = EncodingMask::try_new(-1);
+        assert!(res.is_err());
+        let err = res.unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Attempt to create invalid mask: 0xffffffff"
+        );
+
+        // test that GROUP_VAR_INT is disallowed
+        let res = EncodingMask::try_new(2);
+        assert!(res.is_err());
+        let err = res.unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Attempt to create invalid mask: 0x2"
+        );
+    }
+
+    #[test]
+    fn test_encoding_mask_is_only() {
+        let mask = EncodingMask::new_from_encodings([Encoding::PLAIN].iter());
+        assert!(mask.is_only(Encoding::PLAIN));
+
+        let mask =
+            EncodingMask::new_from_encodings([Encoding::PLAIN, Encoding::PLAIN_DICTIONARY].iter());
+        assert!(!mask.is_only(Encoding::PLAIN));
+    }
 }
diff --git a/parquet/src/bin/parquet-concat.rs b/parquet/src/bin/parquet-concat.rs
index e8ce4ca1dbed..a6f1aef78110 100644
--- a/parquet/src/bin/parquet-concat.rs
+++ b/parquet/src/bin/parquet-concat.rs
@@ -37,10 +37,12 @@
 //!
 
 use clap::Parser;
+use parquet::bloom_filter::Sbbf;
 use parquet::column::writer::ColumnCloseResult;
 use parquet::errors::{ParquetError, Result};
-use parquet::file::metadata::ParquetMetaDataReader;
+use parquet::file::metadata::{ColumnChunkMetaData, PageIndexPolicy, ParquetMetaDataReader};
 use parquet::file::properties::WriterProperties;
+use parquet::file::reader::ChunkReader;
 use parquet::file::writer::SerializedFileWriter;
 use std::fs::File;
 use std::sync::Arc;
@@ -56,6 +58,10 @@ struct Args {
     input: Vec<String>,
 }
 
+fn read_bloom_filter<R: ChunkReader>(column: &ColumnChunkMetaData, input: &R) -> Option<Sbbf> {
+    Sbbf::read_from_column_chunk(column, input).ok().flatten()
+}
+
 impl Args {
     fn run(&self) -> Result<()> {
         if self.input.is_empty() {
@@ -71,7 +77,10 @@ impl Args {
             .iter()
             .map(|x| {
                 let reader = File::open(x)?;
-                let metadata = ParquetMetaDataReader::new().parse_and_finish(&reader)?;
+                // Enable reading page indexes if present
+                let metadata = ParquetMetaDataReader::new()
+                    .with_page_index_policy(PageIndexPolicy::Optional)
+                    .parse_and_finish(&reader)?;
                 Ok((reader, metadata))
             })
             .collect::<Result<Vec<_>>>()?;
@@ -91,16 +100,26 @@ impl Args {
         let mut writer = SerializedFileWriter::new(output, schema, props)?;
 
         for (input, metadata) in inputs {
-            for rg in metadata.row_groups() {
+            let column_indexes = metadata.column_index();
+            let offset_indexes = metadata.offset_index();
+
+            for (rg_idx, rg) in metadata.row_groups().iter().enumerate() {
+                let rg_column_indexes = column_indexes.and_then(|ci| ci.get(rg_idx));
+                let rg_offset_indexes = offset_indexes.and_then(|oi| oi.get(rg_idx));
                 let mut rg_out = writer.next_row_group()?;
-                for column in rg.columns() {
+                for (col_idx, column) in rg.columns().iter().enumerate() {
+                    let bloom_filter = read_bloom_filter(column, &input);
+                    let column_index = rg_column_indexes.and_then(|row| row.get(col_idx)).cloned();
+
+                    let offset_index = rg_offset_indexes.and_then(|row| row.get(col_idx)).cloned();
+
                     let result = ColumnCloseResult {
                         bytes_written: column.compressed_size() as _,
                         rows_written: rg.num_rows() as _,
                         metadata: column.clone(),
-                        bloom_filter: None,
-                        column_index: None,
-                        offset_index: None,
+                        bloom_filter,
+                        column_index,
+                        offset_index,
                     };
                     rg_out.append_column(&input, result)?;
                 }
diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs
index 6f8c47c62829..bba07b9b056e 100644
--- a/parquet/src/bin/parquet-fromcsv.rs
+++ b/parquet/src/bin/parquet-fromcsv.rs
@@ -72,7 +72,7 @@
 
 use std::{
     fmt::Display,
-    fs::{read_to_string, File},
+    fs::{File, read_to_string},
     io::Read,
     path::{Path, PathBuf},
     sync::Arc,
@@ -83,7 +83,7 @@ use arrow_schema::{ArrowError, Schema};
 use clap::{Parser, ValueEnum};
 use parquet::arrow::arrow_writer::ArrowWriterOptions;
 use parquet::{
-    arrow::{parquet_to_arrow_schema, ArrowWriter},
+    arrow::{ArrowWriter, parquet_to_arrow_schema},
     basic::Compression,
     errors::ParquetError,
     file::properties::{WriterProperties, WriterVersion},
@@ -224,9 +224,9 @@ fn compression_from_str(cmp: &str) -> Result<Compression, String> {
         "BROTLI" => Ok(Compression::BROTLI(Default::default())),
         "LZ4" => Ok(Compression::LZ4),
         "ZSTD" => Ok(Compression::ZSTD(Default::default())),
-        v => Err(
-            format!("Unknown compression {v} : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD \n\nFor more information try --help")
-        )
+        v => Err(format!(
+            "Unknown compression {v} : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD \n\nFor more information try --help"
+        )),
     }
 }
 
diff --git a/parquet/src/bin/parquet-index.rs b/parquet/src/bin/parquet-index.rs
index 1a9b74dd78fb..397a75c76ae4 100644
--- a/parquet/src/bin/parquet-index.rs
+++ b/parquet/src/bin/parquet-index.rs
@@ -35,12 +35,14 @@
 //! [page index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 
 use clap::Parser;
+use parquet::data_type::ByteArray;
 use parquet::errors::{ParquetError, Result};
-use parquet::file::page_index::index::{Index, PageIndex};
-use parquet::file::page_index::offset_index::OffsetIndexMetaData;
+use parquet::file::page_index::column_index::{
+    ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
+};
+use parquet::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation};
 use parquet::file::reader::{FileReader, SerializedFileReader};
 use parquet::file::serialized_reader::ReadOptionsBuilder;
-use parquet::format::PageLocation;
 use std::fs::File;
 
 #[derive(Debug, Parser)]
@@ -97,16 +99,20 @@ impl Args {
             let row_counts =
                 compute_row_counts(offset_index.page_locations.as_slice(), row_group.num_rows());
             match &column_indices[column_idx] {
-                Index::NONE => println!("NO INDEX"),
-                Index::BOOLEAN(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::INT32(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::INT64(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::INT96(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::FLOAT(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::DOUBLE(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::BYTE_ARRAY(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::FIXED_LEN_BYTE_ARRAY(v) => {
-                    print_index(&v.indexes, offset_index, &row_counts)?
+                ColumnIndexMetaData::NONE => println!("NO INDEX"),
+                ColumnIndexMetaData::BOOLEAN(v) => {
+                    print_index::<bool>(v, offset_index, &row_counts)?
+                }
+                ColumnIndexMetaData::INT32(v) => print_index(v, offset_index, &row_counts)?,
+                ColumnIndexMetaData::INT64(v) => print_index(v, offset_index, &row_counts)?,
+                ColumnIndexMetaData::INT96(v) => print_index(v, offset_index, &row_counts)?,
+                ColumnIndexMetaData::FLOAT(v) => print_index(v, offset_index, &row_counts)?,
+                ColumnIndexMetaData::DOUBLE(v) => print_index(v, offset_index, &row_counts)?,
+                ColumnIndexMetaData::BYTE_ARRAY(v) => {
+                    print_bytes_index(v, offset_index, &row_counts)?
+                }
+                ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(v) => {
+                    print_bytes_index(v, offset_index, &row_counts)?
                 }
             }
         }
@@ -132,20 +138,21 @@ fn compute_row_counts(offset_index: &[PageLocation], rows: i64) -> Vec<i64> {
 
 /// Prints index information for a single column chunk
 fn print_index<T: std::fmt::Display>(
-    column_index: &[PageIndex<T>],
+    column_index: &PrimitiveColumnIndex<T>,
     offset_index: &OffsetIndexMetaData,
     row_counts: &[i64],
 ) -> Result<()> {
-    if column_index.len() != offset_index.page_locations.len() {
+    if column_index.num_pages() as usize != offset_index.page_locations.len() {
         return Err(ParquetError::General(format!(
             "Index length mismatch, got {} and {}",
-            column_index.len(),
+            column_index.num_pages(),
             offset_index.page_locations.len()
         )));
     }
 
-    for (idx, ((c, o), row_count)) in column_index
-        .iter()
+    for (idx, (((min, max), o), row_count)) in column_index
+        .min_values_iter()
+        .zip(column_index.max_values_iter())
         .zip(offset_index.page_locations())
         .zip(row_counts)
         .enumerate()
@@ -154,12 +161,12 @@ fn print_index<T: std::fmt::Display>(
             "Page {:>5} at offset {:#010x} with length {:>10} and row count {:>10}",
             idx, o.offset, o.compressed_page_size, row_count
         );
-        match &c.min {
+        match min {
             Some(m) => print!(", min {m:>10}"),
             None => print!(", min {:>10}", "NONE"),
         }
 
-        match &c.max {
+        match max {
             Some(m) => print!(", max {m:>10}"),
             None => print!(", max {:>10}", "NONE"),
         }
@@ -169,6 +176,51 @@ fn print_index<T: std::fmt::Display>(
     Ok(())
 }
 
+fn print_bytes_index(
+    column_index: &ByteArrayColumnIndex,
+    offset_index: &OffsetIndexMetaData,
+    row_counts: &[i64],
+) -> Result<()> {
+    if column_index.num_pages() as usize != offset_index.page_locations.len() {
+        return Err(ParquetError::General(format!(
+            "Index length mismatch, got {} and {}",
+            column_index.num_pages(),
+            offset_index.page_locations.len()
+        )));
+    }
+
+    for (idx, (((min, max), o), row_count)) in column_index
+        .min_values_iter()
+        .zip(column_index.max_values_iter())
+        .zip(offset_index.page_locations())
+        .zip(row_counts)
+        .enumerate()
+    {
+        print!(
+            "Page {:>5} at offset {:#010x} with length {:>10} and row count {:>10}",
+            idx, o.offset, o.compressed_page_size, row_count
+        );
+        match min {
+            Some(m) => match String::from_utf8(m.to_vec()) {
+                Ok(s) => print!(", min {s:>10}"),
+                Err(_) => print!(", min {:>10}", ByteArray::from(m)),
+            },
+            None => print!(", min {:>10}", "NONE"),
+        }
+
+        match max {
+            Some(m) => match String::from_utf8(m.to_vec()) {
+                Ok(s) => print!(", max {s:>10}"),
+                Err(_) => print!(", min {:>10}", ByteArray::from(m)),
+            },
+            None => print!(", max {:>10}", "NONE"),
+        }
+        println!()
+    }
+
+    Ok(())
+}
+
 fn main() -> Result<()> {
     Args::parse().run()
 }
diff --git a/parquet/src/bin/parquet-layout.rs b/parquet/src/bin/parquet-layout.rs
index 46a231a7d02b..007f93517d96 100644
--- a/parquet/src/bin/parquet-layout.rs
+++ b/parquet/src/bin/parquet-layout.rs
@@ -17,6 +17,10 @@
 
 //! Binary that prints the physical layout of a parquet file
 //!
+//! NOTE: due to this binary's use of the deprecated [`parquet::format`] module, it
+//! will no longer be maintained, and will likely be removed in the future.
+//! Alternatives to this include [`parquet-cli`] and [`parquet-viewer`].
+//!
 //! # Install
 //!
 //! `parquet-layout` can be installed using `cargo`:
@@ -32,6 +36,9 @@
 //! ```
 //! cargo run --features=cli --bin parquet-layout XYZ.parquet
 //! ```
+//!
+//! [`parquet-cli`]: https://github.com/apache/parquet-java/tree/master/parquet-cli
+//! [`parquet-viewer`]: https://github.com/xiangpenghao/parquet-viewer
 
 use std::fs::File;
 use std::io::Read;
@@ -41,15 +48,28 @@ use parquet::file::metadata::ParquetMetaDataReader;
 use serde::Serialize;
 use thrift::protocol::TCompactInputProtocol;
 
-use parquet::basic::{Compression, Encoding};
+use parquet::basic::Compression;
 use parquet::errors::Result;
 use parquet::file::reader::ChunkReader;
+#[allow(deprecated)]
 use parquet::format::PageHeader;
 use parquet::thrift::TSerializable;
 
+#[derive(Serialize, Debug)]
+struct Index {
+    offset: i64,
+    length: Option<i32>,
+}
+
+#[derive(Serialize, Debug)]
+struct Footer {
+    metadata_size: Option<usize>,
+}
+
 #[derive(Serialize, Debug)]
 struct ParquetFile {
     row_groups: Vec<RowGroup>,
+    footer: Footer,
 }
 
 #[derive(Serialize, Debug)]
@@ -64,6 +84,9 @@ struct ColumnChunk {
     has_offset_index: bool,
     has_column_index: bool,
     has_bloom_filter: bool,
+    offset_index: Option<Index>,
+    column_index: Option<Index>,
+    bloom_filter: Option<Index>,
     pages: Vec<Page>,
 }
 
@@ -79,8 +102,12 @@ struct Page {
     num_values: i32,
 }
 
+#[allow(deprecated)]
 fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
-    let metadata = ParquetMetaDataReader::new().parse_and_finish(reader)?;
+    let mut metadata_reader = ParquetMetaDataReader::new();
+    metadata_reader.try_parse(reader)?;
+    let metadata_size = metadata_reader.metadata_size();
+    let metadata = metadata_reader.finish()?;
     let schema = metadata.file_metadata().schema_descr();
 
     let row_groups = (0..metadata.num_row_groups())
@@ -105,7 +132,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                         if let Some(dictionary) = header.dictionary_page_header {
                             pages.push(Page {
                                 compression,
-                                encoding: encoding(dictionary.encoding),
+                                encoding: encoding(dictionary.encoding.0),
                                 page_type: "dictionary",
                                 offset: start,
                                 compressed_bytes: header.compressed_page_size,
@@ -116,7 +143,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                         } else if let Some(data_page) = header.data_page_header {
                             pages.push(Page {
                                 compression,
-                                encoding: encoding(data_page.encoding),
+                                encoding: encoding(data_page.encoding.0),
                                 page_type: "data_page_v1",
                                 offset: start,
                                 compressed_bytes: header.compressed_page_size,
@@ -129,7 +156,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
 
                             pages.push(Page {
                                 compression: compression.filter(|_| is_compressed),
-                                encoding: encoding(data_page.encoding),
+                                encoding: encoding(data_page.encoding.0),
                                 page_type: "data_page_v2",
                                 offset: start,
                                 compressed_bytes: header.compressed_page_size,
@@ -146,6 +173,18 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
                         has_offset_index: column.offset_index_offset().is_some(),
                         has_column_index: column.column_index_offset().is_some(),
                         has_bloom_filter: column.bloom_filter_offset().is_some(),
+                        offset_index: column.offset_index_offset().map(|offset| Index {
+                            offset,
+                            length: column.offset_index_length(),
+                        }),
+                        column_index: column.column_index_offset().map(|offset| Index {
+                            offset,
+                            length: column.column_index_length(),
+                        }),
+                        bloom_filter: column.bloom_filter_offset().map(|offset| Index {
+                            offset,
+                            length: column.bloom_filter_length(),
+                        }),
                         pages,
                     })
                 })
@@ -158,11 +197,15 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
         })
         .collect::<Result<Vec<_>>>()?;
 
-    Ok(ParquetFile { row_groups })
+    Ok(ParquetFile {
+        row_groups,
+        footer: Footer { metadata_size },
+    })
 }
 
 /// Reads the page header at `offset` from `reader`, returning
 /// both the `PageHeader` and its length in bytes
+#[allow(deprecated)]
 fn read_page_header<C: ChunkReader>(reader: &C, offset: u64) -> Result<(usize, PageHeader)> {
     struct TrackedRead<R>(R, usize);
 
@@ -196,19 +239,19 @@ fn compression(compression: Compression) -> Option<&'static str> {
 }
 
 /// Returns a string representation for a given encoding
-fn encoding(encoding: parquet::format::Encoding) -> &'static str {
-    match Encoding::try_from(encoding) {
-        Ok(Encoding::PLAIN) => "plain",
-        Ok(Encoding::PLAIN_DICTIONARY) => "plain_dictionary",
-        Ok(Encoding::RLE) => "rle",
+fn encoding(encoding: i32) -> &'static str {
+    match encoding {
+        0 => "plain",
+        2 => "plain_dictionary",
+        3 => "rle",
         #[allow(deprecated)]
-        Ok(Encoding::BIT_PACKED) => "bit_packed",
-        Ok(Encoding::DELTA_BINARY_PACKED) => "delta_binary_packed",
-        Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY) => "delta_length_byte_array",
-        Ok(Encoding::DELTA_BYTE_ARRAY) => "delta_byte_array",
-        Ok(Encoding::RLE_DICTIONARY) => "rle_dictionary",
-        Ok(Encoding::BYTE_STREAM_SPLIT) => "byte_stream_split",
-        Err(_) => "unknown",
+        4 => "bit_packed",
+        5 => "delta_binary_packed",
+        6 => "delta_length_byte_array",
+        7 => "delta_byte_array",
+        8 => "rle_dictionary",
+        9 => "byte_stream_split",
+        _ => "unknown",
     }
 }
 
diff --git a/parquet/src/bin/parquet-rewrite.rs b/parquet/src/bin/parquet-rewrite.rs
index 6bf7246f5629..31058e552d15 100644
--- a/parquet/src/bin/parquet-rewrite.rs
+++ b/parquet/src/bin/parquet-rewrite.rs
@@ -36,10 +36,10 @@
 use std::fs::File;
 
 use arrow_array::RecordBatchReader;
-use clap::{builder::PossibleValue, Parser, ValueEnum};
+use clap::{Parser, ValueEnum, builder::PossibleValue};
 use parquet::{
-    arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter},
-    basic::{Compression, Encoding},
+    arrow::{ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder},
+    basic::{BrotliLevel, Compression, Encoding, GzipLevel, ZstdLevel},
     file::{
         properties::{BloomFilterPosition, EnabledStatistics, WriterProperties, WriterVersion},
         reader::FileReader,
@@ -74,18 +74,31 @@ enum CompressionArgs {
     Lz4Raw,
 }
 
-impl From<CompressionArgs> for Compression {
-    fn from(value: CompressionArgs) -> Self {
-        match value {
-            CompressionArgs::None => Self::UNCOMPRESSED,
-            CompressionArgs::Snappy => Self::SNAPPY,
-            CompressionArgs::Gzip => Self::GZIP(Default::default()),
-            CompressionArgs::Lzo => Self::LZO,
-            CompressionArgs::Brotli => Self::BROTLI(Default::default()),
-            CompressionArgs::Lz4 => Self::LZ4,
-            CompressionArgs::Zstd => Self::ZSTD(Default::default()),
-            CompressionArgs::Lz4Raw => Self::LZ4_RAW,
-        }
+fn compression_from_args(codec: CompressionArgs, level: Option<u32>) -> Compression {
+    match codec {
+        CompressionArgs::None => Compression::UNCOMPRESSED,
+        CompressionArgs::Snappy => Compression::SNAPPY,
+        CompressionArgs::Gzip => match level {
+            Some(lvl) => {
+                Compression::GZIP(GzipLevel::try_new(lvl).expect("invalid gzip compression level"))
+            }
+            None => Compression::GZIP(Default::default()),
+        },
+        CompressionArgs::Lzo => Compression::LZO,
+        CompressionArgs::Brotli => match level {
+            Some(lvl) => Compression::BROTLI(
+                BrotliLevel::try_new(lvl).expect("invalid brotli compression level"),
+            ),
+            None => Compression::BROTLI(Default::default()),
+        },
+        CompressionArgs::Lz4 => Compression::LZ4,
+        CompressionArgs::Zstd => match level {
+            Some(lvl) => Compression::ZSTD(
+                ZstdLevel::try_new(lvl as i32).expect("invalid zstd compression level"),
+            ),
+            None => Compression::ZSTD(Default::default()),
+        },
+        CompressionArgs::Lz4Raw => Compression::LZ4_RAW,
     }
 }
 
@@ -219,6 +232,10 @@ struct Args {
     #[clap(long, value_enum)]
     compression: Option<CompressionArgs>,
 
+    /// Compression level for gzip/brotli/zstd.
+    #[clap(long)]
+    compression_level: Option<u32>,
+
     /// Encoding used for all columns, if dictionary is not enabled.
     #[clap(long, value_enum)]
     encoding: Option<EncodingArgs>,
@@ -243,11 +260,24 @@ struct Args {
     #[clap(long)]
     data_page_size_limit: Option<usize>,
 
-    /// Sets max statistics size for all columns.
+    /// Sets the max length of min/max statistics in row group and data page
+    /// header statistics for all columns.
+    ///
+    /// Applicable only if statistics are enabled.
+    #[clap(long)]
+    statistics_truncate_length: Option<usize>,
+
+    /// Sets the max length of min/max statistics in the column index.
     ///
     /// Applicable only if statistics are enabled.
     #[clap(long)]
-    max_statistics_size: Option<usize>,
+    column_index_truncate_length: Option<usize>,
+
+    /// Write statistics to the data page headers?
+    ///
+    /// Setting this true will also enable page level statistics.
+    #[clap(long)]
+    write_page_header_statistics: Option<bool>,
 
     /// Sets whether bloom filter is enabled for all columns.
     #[clap(long)]
@@ -273,6 +303,10 @@ struct Args {
     #[clap(long)]
     writer_version: Option<WriterVersionArgs>,
 
+    /// Sets write batch size.
+    #[clap(long)]
+    write_batch_size: Option<usize>,
+
     /// Sets whether to coerce Arrow types to match Parquet specification
     #[clap(long)]
     coerce_types: Option<bool>,
@@ -300,8 +334,10 @@ fn main() {
     .expect("parquet open");
 
     let mut writer_properties_builder = WriterProperties::builder().set_key_value_metadata(kv_md);
+
     if let Some(value) = args.compression {
-        writer_properties_builder = writer_properties_builder.set_compression(value.into());
+        let compression = compression_from_args(value, args.compression_level);
+        writer_properties_builder = writer_properties_builder.set_compression(compression);
     }
 
     // setup encoding
@@ -324,9 +360,16 @@ fn main() {
     if let Some(value) = args.data_page_size_limit {
         writer_properties_builder = writer_properties_builder.set_data_page_size_limit(value);
     }
-    #[allow(deprecated)]
-    if let Some(value) = args.max_statistics_size {
-        writer_properties_builder = writer_properties_builder.set_max_statistics_size(value);
+    if let Some(value) = args.dictionary_page_size_limit {
+        writer_properties_builder = writer_properties_builder.set_dictionary_page_size_limit(value);
+    }
+    if let Some(value) = args.statistics_truncate_length {
+        writer_properties_builder =
+            writer_properties_builder.set_statistics_truncate_length(Some(value));
+    }
+    if let Some(value) = args.column_index_truncate_length {
+        writer_properties_builder =
+            writer_properties_builder.set_column_index_truncate_length(Some(value));
     }
     if let Some(value) = args.bloom_filter_enabled {
         writer_properties_builder = writer_properties_builder.set_bloom_filter_enabled(value);
@@ -347,12 +390,24 @@ fn main() {
     if let Some(value) = args.statistics_enabled {
         writer_properties_builder = writer_properties_builder.set_statistics_enabled(value.into());
     }
+    // set this after statistics_enabled
+    if let Some(value) = args.write_page_header_statistics {
+        writer_properties_builder =
+            writer_properties_builder.set_write_page_header_statistics(value);
+        if value {
+            writer_properties_builder =
+                writer_properties_builder.set_statistics_enabled(EnabledStatistics::Page);
+        }
+    }
     if let Some(value) = args.writer_version {
         writer_properties_builder = writer_properties_builder.set_writer_version(value.into());
     }
     if let Some(value) = args.coerce_types {
         writer_properties_builder = writer_properties_builder.set_coerce_types(value);
     }
+    if let Some(value) = args.write_batch_size {
+        writer_properties_builder = writer_properties_builder.set_write_batch_size(value);
+    }
     let writer_properties = writer_properties_builder.build();
     let mut parquet_writer = ArrowWriter::try_new(
         File::create(&args.output).expect("Unable to open output file"),
diff --git a/parquet/src/bin/parquet-show-bloom-filter.rs b/parquet/src/bin/parquet-show-bloom-filter.rs
index 41e3ac9b5233..2b052a45f2f4 100644
--- a/parquet/src/bin/parquet-show-bloom-filter.rs
+++ b/parquet/src/bin/parquet-show-bloom-filter.rs
@@ -49,7 +49,9 @@ use std::{fs::File, path::Path};
 struct Args {
     #[clap(help("Path to the parquet file"))]
     file_name: String,
-    #[clap(help("Check the bloom filter indexes for the given column. Only string typed columns or columns with an Int32 or Int64 physical type are supported"))]
+    #[clap(help(
+        "Check the bloom filter indexes for the given column. Only string typed columns or columns with an Int32 or Int64 physical type are supported"
+    ))]
     column: String,
     #[clap(
         help(
@@ -128,13 +130,13 @@ fn check_filter(sbbf: &Sbbf, value: &String, column: &ColumnChunkMetaData) -> Re
         Type::INT32 => {
             let value: i32 = value
                 .parse()
-                .map_err(|e| format!("Unable to parse value '{}' to i32: {}", value, e))?;
+                .map_err(|e| format!("Unable to parse value '{value}' to i32: {e}"))?;
             Ok(sbbf.check(&value))
         }
         Type::INT64 => {
             let value: i64 = value
                 .parse()
-                .map_err(|e| format!("Unable to parse value '{}' to i64: {}", value, e))?;
+                .map_err(|e| format!("Unable to parse value '{value}' to i64: {e}"))?;
             Ok(sbbf.check(&value))
         }
         Type::BYTE_ARRAY => Ok(sbbf.check(&value.as_str())),
diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs
index 69ef4538baa1..1f77e492ccf1 100644
--- a/parquet/src/bloom_filter/mod.rs
+++ b/parquet/src/bloom_filter/mod.rs
@@ -72,18 +72,18 @@
 //! [sbbf-paper]: https://arxiv.org/pdf/2101.01719
 //! [bf-formulae]: http://tfk.mit.edu/pdf/bloom.pdf
 
+use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash};
 use crate::data_type::AsBytes;
-use crate::errors::ParquetError;
+use crate::errors::{ParquetError, Result};
 use crate::file::metadata::ColumnChunkMetaData;
 use crate::file::reader::ChunkReader;
-use crate::format::{
-    BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash, BloomFilterHeader,
-    SplitBlockAlgorithm, Uncompressed, XxHash,
+use crate::parquet_thrift::{
+    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
+    ThriftSliceInputProtocol, WriteThrift, WriteThriftField,
 };
-use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+use crate::thrift_struct;
 use bytes::Bytes;
 use std::io::Write;
-use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
 use twox_hash::XxHash64;
 
 /// Salt as defined in the [spec](https://github.com/apache/parquet-format/blob/master/BloomFilter.md#technical-approach).
@@ -98,9 +98,26 @@ const SALT: [u32; 8] = [
     0x5c6bfb31_u32,
 ];
 
+thrift_struct!(
+/// Bloom filter header is stored at beginning of Bloom filter data of each column
+/// and followed by its bitset.
+///
+pub struct BloomFilterHeader {
+  /// The size of bitset in bytes
+  1: required i32 num_bytes;
+  /// The algorithm for setting bits.
+  2: required BloomFilterAlgorithm algorithm;
+  /// The hash function used for Bloom filter
+  3: required BloomFilterHash hash;
+  /// The compression used in the Bloom filter
+  4: required BloomFilterCompression compression;
+}
+);
+
 /// Each block is 256 bits, broken up into eight contiguous "words", each consisting of 32 bits.
 /// Each word is thought of as an array of bits; each bit is either "set" or "not set".
 #[derive(Debug, Copy, Clone)]
+#[repr(transparent)]
 struct Block([u32; 8]);
 impl Block {
     const ZERO: Block = Block([0; 8]);
@@ -119,9 +136,10 @@ impl Block {
     }
 
     #[inline]
-    #[cfg(target_endian = "little")]
-    fn to_le_bytes(self) -> [u8; 32] {
-        self.to_ne_bytes()
+    #[cfg(not(target_endian = "little"))]
+    fn to_ne_bytes(self) -> [u8; 32] {
+        // SAFETY: [u32; 8] and [u8; 32] have the same size and neither has invalid bit patterns.
+        unsafe { std::mem::transmute(self.0) }
     }
 
     #[inline]
@@ -130,12 +148,6 @@ impl Block {
         self.swap_bytes().to_ne_bytes()
     }
 
-    #[inline]
-    fn to_ne_bytes(self) -> [u8; 32] {
-        // SAFETY: [u32; 8] and [u8; 32] have the same size and neither has invalid bit patterns.
-        unsafe { std::mem::transmute(self.0) }
-    }
-
     #[inline]
     #[cfg(not(target_endian = "little"))]
     fn swap_bytes(mut self) -> Self {
@@ -203,10 +215,19 @@ pub(crate) fn chunk_read_bloom_filter_header_and_offset(
 #[inline]
 pub(crate) fn read_bloom_filter_header_and_length(
     buffer: Bytes,
+) -> Result<(BloomFilterHeader, u64), ParquetError> {
+    read_bloom_filter_header_and_length_from_bytes(buffer.as_ref())
+}
+
+/// Given a byte slice, try to read out a bloom filter header and return both the header and
+/// length of the header.
+#[inline]
+fn read_bloom_filter_header_and_length_from_bytes(
+    buffer: &[u8],
 ) -> Result<(BloomFilterHeader, u64), ParquetError> {
     let total_length = buffer.len();
-    let mut prot = TCompactSliceInputProtocol::new(buffer.as_ref());
-    let header = BloomFilterHeader::read_from_in_protocol(&mut prot)
+    let mut prot = ThriftSliceInputProtocol::new(buffer);
+    let header = BloomFilterHeader::read_thrift(&mut prot)
         .map_err(|e| ParquetError::General(format!("Could not read bloom filter header: {e}")))?;
     Ok((header, (total_length - prot.as_slice().len()) as u64))
 }
@@ -248,8 +269,10 @@ impl Sbbf {
     /// to the next power of two bounded by [BITSET_MIN_LENGTH] and [BITSET_MAX_LENGTH].
     pub(crate) fn new_with_num_of_bytes(num_bytes: usize) -> Self {
         let num_bytes = optimal_num_of_bytes(num_bytes);
-        let bitset = vec![0_u8; num_bytes];
-        Self::new(&bitset)
+        assert_eq!(num_bytes % size_of::<Block>(), 0);
+        let num_blocks = num_bytes / size_of::<Block>();
+        let bitset = vec![Block::ZERO; num_blocks];
+        Self(bitset)
     }
 
     pub(crate) fn new(bitset: &[u8]) -> Self {
@@ -269,18 +292,18 @@ impl Sbbf {
     /// Write the bloom filter data (header and then bitset) to the output. This doesn't
     /// flush the writer in order to boost performance of bulk writing all blocks. Caller
     /// must remember to flush the writer.
-    pub(crate) fn write<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
-        let mut protocol = TCompactOutputProtocol::new(&mut writer);
-        let header = self.header();
-        header.write_to_out_protocol(&mut protocol).map_err(|e| {
+    /// This method usually is used in conjunction with [`Self::from_bytes`] for serialization/deserialization.
+    pub fn write<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
+        let mut protocol = ThriftCompactOutputProtocol::new(&mut writer);
+        self.header().write_thrift(&mut protocol).map_err(|e| {
             ParquetError::General(format!("Could not write bloom filter header: {e}"))
         })?;
-        protocol.flush()?;
         self.write_bitset(&mut writer)?;
         Ok(())
     }
 
     /// Write the bitset in serialized form to the writer.
+    #[cfg(not(target_endian = "little"))]
     fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
         for block in &self.0 {
             writer
@@ -292,19 +315,35 @@ impl Sbbf {
         Ok(())
     }
 
+    /// Write the bitset in serialized form to the writer.
+    #[cfg(target_endian = "little")]
+    fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
+        // Safety: Block is repr(transparent) and [u32; 8] can be reinterpreted as [u8; 32].
+        let slice = unsafe {
+            std::slice::from_raw_parts(
+                self.0.as_ptr() as *const u8,
+                self.0.len() * size_of::<Block>(),
+            )
+        };
+        writer.write_all(slice).map_err(|e| {
+            ParquetError::General(format!("Could not write bloom filter bit set: {e}"))
+        })?;
+        Ok(())
+    }
+
     /// Create and populate [`BloomFilterHeader`] from this bitset for writing to serialized form
     fn header(&self) -> BloomFilterHeader {
         BloomFilterHeader {
             // 8 i32 per block, 4 bytes per i32
             num_bytes: self.0.len() as i32 * 4 * 8,
-            algorithm: BloomFilterAlgorithm::BLOCK(SplitBlockAlgorithm {}),
-            hash: BloomFilterHash::XXHASH(XxHash {}),
-            compression: BloomFilterCompression::UNCOMPRESSED(Uncompressed {}),
+            algorithm: BloomFilterAlgorithm::BLOCK,
+            hash: BloomFilterHash::XXHASH,
+            compression: BloomFilterCompression::UNCOMPRESSED,
         }
     }
 
     /// Read a new bloom filter from the given offset in the given reader.
-    pub(crate) fn read_from_column_chunk<R: ChunkReader>(
+    pub fn read_from_column_chunk<R: ChunkReader>(
         column_metadata: &ColumnChunkMetaData,
         reader: &R,
     ) -> Result<Option<Self>, ParquetError> {
@@ -325,17 +364,17 @@ impl Sbbf {
             chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?;
 
         match header.algorithm {
-            BloomFilterAlgorithm::BLOCK(_) => {
+            BloomFilterAlgorithm::BLOCK => {
                 // this match exists to future proof the singleton algorithm enum
             }
         }
         match header.compression {
-            BloomFilterCompression::UNCOMPRESSED(_) => {
+            BloomFilterCompression::UNCOMPRESSED => {
                 // this match exists to future proof the singleton compression enum
             }
         }
         match header.hash {
-            BloomFilterHash::XXHASH(_) => {
+            BloomFilterHash::XXHASH => {
                 // this match exists to future proof the singleton hash enum
             }
         }
@@ -388,6 +427,54 @@ impl Sbbf {
     pub(crate) fn estimated_memory_size(&self) -> usize {
         self.0.capacity() * std::mem::size_of::<Block>()
     }
+
+    /// Reads a Sbff from Thrift encoded bytes
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// # use parquet::errors::Result;
+    /// # use parquet::bloom_filter::Sbbf;
+    /// # fn main() -> Result<()> {
+    /// // In a real application, you would read serialized bloom filter bytes from a cache.
+    /// // This example demonstrates the deserialization process.
+    /// // Assuming you have bloom filter bytes from a Parquet file:
+    /// # let serialized_bytes: Vec<u8> = vec![];
+    /// let bloom_filter = Sbbf::from_bytes(&serialized_bytes)?;
+    /// // Now you can use the bloom filter to check for values
+    /// if bloom_filter.check(&"some_value") {
+    ///     println!("Value might be present (or false positive)");
+    /// } else {
+    ///     println!("Value is definitely not present");
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn from_bytes(bytes: &[u8]) -> Result<Self, ParquetError> {
+        let (header, header_len) = read_bloom_filter_header_and_length_from_bytes(bytes)?;
+
+        let bitset_length: u64 = header
+            .num_bytes
+            .try_into()
+            .map_err(|_| ParquetError::General("Bloom filter length is invalid".to_string()))?;
+
+        // Validate that bitset consumes all remaining bytes
+        if header_len + bitset_length != bytes.len() as u64 {
+            return Err(ParquetError::General(format!(
+                "Bloom filter data contains extra bytes: expected {} total bytes, got {}",
+                header_len + bitset_length,
+                bytes.len()
+            )));
+        }
+
+        let start = header_len as usize;
+        let end = (header_len + bitset_length) as usize;
+        let bitset = bytes
+            .get(start..end)
+            .ok_or_else(|| ParquetError::General("Bloom filter bitset is invalid".to_string()))?;
+
+        Ok(Self::new(bitset))
+    }
 }
 
 // per spec we use xxHash with seed=0
@@ -463,15 +550,9 @@ mod tests {
             read_length,
         ) = read_bloom_filter_header_and_length(Bytes::copy_from_slice(buffer)).unwrap();
         assert_eq!(read_length, 15);
-        assert_eq!(
-            algorithm,
-            BloomFilterAlgorithm::BLOCK(SplitBlockAlgorithm {})
-        );
-        assert_eq!(
-            compression,
-            BloomFilterCompression::UNCOMPRESSED(Uncompressed {})
-        );
-        assert_eq!(hash, BloomFilterHash::XXHASH(XxHash {}));
+        assert_eq!(algorithm, BloomFilterAlgorithm::BLOCK);
+        assert_eq!(compression, BloomFilterCompression::UNCOMPRESSED);
+        assert_eq!(hash, BloomFilterHash::XXHASH);
         assert_eq!(num_bytes, 32_i32);
         assert_eq!(20, SBBF_HEADER_SIZE_ESTIMATE);
     }
@@ -518,4 +599,43 @@ mod tests {
             assert_eq!(*num_bits, num_of_bits_from_ndv_fpp(*ndv, *fpp) as u64);
         }
     }
+
+    #[test]
+    fn test_sbbf_write_round_trip() {
+        // Create a bloom filter with a 32-byte bitset (minimum size)
+        let bitset_bytes = vec![0u8; 32];
+        let mut original = Sbbf::new(&bitset_bytes);
+
+        // Insert some test values
+        let test_values = ["hello", "world", "rust", "parquet", "bloom", "filter"];
+        for value in &test_values {
+            original.insert(value);
+        }
+
+        // Serialize to bytes
+        let mut output = Vec::new();
+        original.write(&mut output).unwrap();
+
+        // Validate header was written correctly
+        let mut protocol = ThriftSliceInputProtocol::new(&output);
+        let header = BloomFilterHeader::read_thrift(&mut protocol).unwrap();
+        assert_eq!(header.num_bytes, bitset_bytes.len() as i32);
+        assert_eq!(header.algorithm, BloomFilterAlgorithm::BLOCK);
+        assert_eq!(header.hash, BloomFilterHash::XXHASH);
+        assert_eq!(header.compression, BloomFilterCompression::UNCOMPRESSED);
+
+        // Deserialize using from_bytes
+        let reconstructed = Sbbf::from_bytes(&output).unwrap();
+
+        // Most importantly: verify the bloom filter WORKS correctly after round-trip
+        // Note: bloom filters can have false positives, but should never have false negatives
+        // So we can't assert !check(), but we should verify inserted values are found
+        for value in &test_values {
+            assert!(
+                reconstructed.check(value),
+                "Value '{}' should be present after round-trip",
+                value
+            );
+        }
+    }
 }
diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs
index 1dabe6794f07..f18b296c1c65 100644
--- a/parquet/src/column/page.rs
+++ b/parquet/src/column/page.rs
@@ -21,15 +21,17 @@ use bytes::Bytes;
 
 use crate::basic::{Encoding, PageType};
 use crate::errors::{ParquetError, Result};
-use crate::file::statistics::Statistics;
-use crate::format::PageHeader;
+use crate::file::metadata::thrift::{
+    DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, PageHeader,
+};
+use crate::file::statistics::{Statistics, page_stats_to_thrift};
 
 /// Parquet Page definition.
 ///
 /// List of supported pages.
 /// These are 1-to-1 mapped from the equivalent Thrift definitions, except `buf` which
 /// used to store uncompressed bytes of the page.
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub enum Page {
     /// Data page Parquet format v1.
     DataPage {
@@ -103,9 +105,9 @@ impl Page {
     /// Returns internal byte buffer reference for this page.
     pub fn buffer(&self) -> &Bytes {
         match self {
-            Page::DataPage { ref buf, .. } => buf,
-            Page::DataPageV2 { ref buf, .. } => buf,
-            Page::DictionaryPage { ref buf, .. } => buf,
+            Page::DataPage { buf, .. } => buf,
+            Page::DataPageV2 { buf, .. } => buf,
+            Page::DictionaryPage { buf, .. } => buf,
         }
     }
 
@@ -130,8 +132,8 @@ impl Page {
     /// Returns optional [`Statistics`].
     pub fn statistics(&self) -> Option<&Statistics> {
         match self {
-            Page::DataPage { ref statistics, .. } => statistics.as_ref(),
-            Page::DataPageV2 { ref statistics, .. } => statistics.as_ref(),
+            Page::DataPage { statistics, .. } => statistics.as_ref(),
+            Page::DataPageV2 { statistics, .. } => statistics.as_ref(),
             Page::DictionaryPage { .. } => None,
         }
     }
@@ -196,15 +198,27 @@ impl CompressedPage {
     }
 
     /// Returns the thrift page header
-    pub(crate) fn to_thrift_header(&self) -> PageHeader {
+    pub(crate) fn to_thrift_header(&self) -> Result<PageHeader> {
         let uncompressed_size = self.uncompressed_size();
         let compressed_size = self.compressed_size();
+        if uncompressed_size > i32::MAX as usize {
+            return Err(general_err!(
+                "Page uncompressed size overflow: {}",
+                uncompressed_size
+            ));
+        }
+        if compressed_size > i32::MAX as usize {
+            return Err(general_err!(
+                "Page compressed size overflow: {}",
+                compressed_size
+            ));
+        }
         let num_values = self.num_values();
         let encoding = self.encoding();
         let page_type = self.page_type();
 
         let mut page_header = PageHeader {
-            type_: page_type.into(),
+            r#type: page_type,
             uncompressed_page_size: uncompressed_size as i32,
             compressed_page_size: compressed_size as i32,
             // TODO: Add support for crc checksum
@@ -222,12 +236,12 @@ impl CompressedPage {
                 ref statistics,
                 ..
             } => {
-                let data_page_header = crate::format::DataPageHeader {
+                let data_page_header = DataPageHeader {
                     num_values: num_values as i32,
-                    encoding: encoding.into(),
-                    definition_level_encoding: def_level_encoding.into(),
-                    repetition_level_encoding: rep_level_encoding.into(),
-                    statistics: crate::file::statistics::to_thrift(statistics.as_ref()),
+                    encoding,
+                    definition_level_encoding: def_level_encoding,
+                    repetition_level_encoding: rep_level_encoding,
+                    statistics: page_stats_to_thrift(statistics.as_ref()),
                 };
                 page_header.data_page_header = Some(data_page_header);
             }
@@ -240,28 +254,28 @@ impl CompressedPage {
                 ref statistics,
                 ..
             } => {
-                let data_page_header_v2 = crate::format::DataPageHeaderV2 {
+                let data_page_header_v2 = DataPageHeaderV2 {
                     num_values: num_values as i32,
                     num_nulls: num_nulls as i32,
                     num_rows: num_rows as i32,
-                    encoding: encoding.into(),
+                    encoding,
                     definition_levels_byte_length: def_levels_byte_len as i32,
                     repetition_levels_byte_length: rep_levels_byte_len as i32,
                     is_compressed: Some(is_compressed),
-                    statistics: crate::file::statistics::to_thrift(statistics.as_ref()),
+                    statistics: page_stats_to_thrift(statistics.as_ref()),
                 };
                 page_header.data_page_header_v2 = Some(data_page_header_v2);
             }
             Page::DictionaryPage { is_sorted, .. } => {
-                let dictionary_page_header = crate::format::DictionaryPageHeader {
+                let dictionary_page_header = DictionaryPageHeader {
                     num_values: num_values as i32,
-                    encoding: encoding.into(),
+                    encoding,
                     is_sorted: Some(is_sorted),
                 };
                 page_header.dictionary_page_header = Some(dictionary_page_header);
             }
         }
-        page_header
+        Ok(page_header)
     }
 
     /// Update the compressed buffer for a page.
@@ -331,12 +345,14 @@ pub struct PageMetadata {
     pub is_dict: bool,
 }
 
-impl TryFrom<&PageHeader> for PageMetadata {
+impl TryFrom<&crate::file::metadata::thrift::PageHeader> for PageMetadata {
     type Error = ParquetError;
 
-    fn try_from(value: &PageHeader) -> std::result::Result<Self, Self::Error> {
-        match value.type_ {
-            crate::format::PageType::DATA_PAGE => {
+    fn try_from(
+        value: &crate::file::metadata::thrift::PageHeader,
+    ) -> std::result::Result<Self, Self::Error> {
+        match value.r#type {
+            PageType::DATA_PAGE => {
                 let header = value.data_page_header.as_ref().unwrap();
                 Ok(PageMetadata {
                     num_rows: None,
@@ -344,12 +360,12 @@ impl TryFrom<&PageHeader> for PageMetadata {
                     is_dict: false,
                 })
             }
-            crate::format::PageType::DICTIONARY_PAGE => Ok(PageMetadata {
+            PageType::DICTIONARY_PAGE => Ok(PageMetadata {
                 num_rows: None,
                 num_levels: None,
                 is_dict: true,
             }),
-            crate::format::PageType::DATA_PAGE_V2 => {
+            PageType::DATA_PAGE_V2 => {
                 let header = value.data_page_header_v2.as_ref().unwrap();
                 Ok(PageMetadata {
                     num_rows: Some(header.num_rows as _),
@@ -491,4 +507,28 @@ mod tests {
         assert_eq!(cpage.encoding(), Encoding::PLAIN);
         assert_eq!(cpage.data(), &[0, 1, 2]);
     }
+
+    #[test]
+    fn test_compressed_page_uncompressed_size_overflow() {
+        // Test that to_thrift_header fails when uncompressed size exceeds i32::MAX
+        let data_page = Page::DataPage {
+            buf: Bytes::from(vec![0, 1, 2]),
+            num_values: 10,
+            encoding: Encoding::PLAIN,
+            def_level_encoding: Encoding::RLE,
+            rep_level_encoding: Encoding::RLE,
+            statistics: None,
+        };
+
+        // Create a CompressedPage with uncompressed size larger than i32::MAX
+        let uncompressed_size = (i32::MAX as usize) + 1;
+        let cpage = CompressedPage::new(data_page, uncompressed_size);
+
+        // Verify that to_thrift_header returns an error
+        let result = cpage.to_thrift_header();
+        assert!(result.is_err());
+
+        let error_msg = result.unwrap_err().to_string();
+        assert!(error_msg.contains("Page uncompressed size overflow"));
+    }
 }
diff --git a/parquet/src/column/page_encryption.rs b/parquet/src/column/page_encryption.rs
index 0fb7c8942675..26df75900ce7 100644
--- a/parquet/src/column/page_encryption.rs
+++ b/parquet/src/column/page_encryption.rs
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::basic::PageType;
 use crate::column::page::CompressedPage;
 use crate::encryption::ciphers::BlockEncryptor;
-use crate::encryption::encrypt::{encrypt_object, FileEncryptor};
-use crate::encryption::modules::{create_module_aad, ModuleType};
+use crate::encryption::encrypt::{FileEncryptor, encrypt_thrift_object};
+use crate::encryption::modules::{ModuleType, create_module_aad};
 use crate::errors::ParquetError;
 use crate::errors::Result;
-use crate::format::PageHeader;
-use crate::format::PageType;
+use crate::file::metadata::thrift::PageHeader;
 use bytes::Bytes;
 use std::io::Write;
 use std::sync::Arc;
@@ -95,15 +95,15 @@ impl PageEncryptor {
         page_header: &PageHeader,
         sink: &mut W,
     ) -> Result<()> {
-        let module_type = match page_header.type_ {
+        let module_type = match page_header.r#type {
             PageType::DATA_PAGE => ModuleType::DataPageHeader,
             PageType::DATA_PAGE_V2 => ModuleType::DataPageHeader,
             PageType::DICTIONARY_PAGE => ModuleType::DictionaryPageHeader,
             _ => {
                 return Err(general_err!(
                     "Unsupported page type for page header encryption: {:?}",
-                    page_header.type_
-                ))
+                    page_header.r#type
+                ));
             }
         };
         let aad = create_module_aad(
@@ -114,6 +114,6 @@ impl PageEncryptor {
             Some(self.page_index),
         )?;
 
-        encrypt_object(page_header, &mut self.block_encryptor, sink, &aad)
+        encrypt_thrift_object(page_header, &mut self.block_encryptor, sink, &aad)
     }
 }
diff --git a/parquet/src/column/page_encryption_disabled.rs b/parquet/src/column/page_encryption_disabled.rs
index e85b0281168a..71f25862cc34 100644
--- a/parquet/src/column/page_encryption_disabled.rs
+++ b/parquet/src/column/page_encryption_disabled.rs
@@ -17,7 +17,7 @@
 
 use crate::column::page::CompressedPage;
 use crate::errors::Result;
-use crate::format::PageHeader;
+use crate::file::metadata::thrift::PageHeader;
 use std::io::Write;
 
 #[derive(Debug)]
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index b6998057845d..387a0602a60d 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -252,7 +252,9 @@ where
                     let (values_read, levels_read) = reader.read_def_levels(out, levels_to_read)?;
 
                     if levels_read != levels_to_read {
-                        return Err(general_err!("insufficient definition levels read from column - expected {levels_to_read}, got {levels_read}"));
+                        return Err(general_err!(
+                            "insufficient definition levels read from column - expected {levels_to_read}, got {levels_read}"
+                        ));
                     }
 
                     values_read
@@ -449,7 +451,7 @@ where
                                 self.rep_level_decoder
                                     .as_mut()
                                     .unwrap()
-                                    .set_data(rep_level_encoding, level_data);
+                                    .set_data(rep_level_encoding, level_data)?;
                             }
 
                             if max_def_level > 0 {
@@ -464,7 +466,7 @@ where
                                 self.def_level_decoder
                                     .as_mut()
                                     .unwrap()
-                                    .set_data(def_level_encoding, level_data);
+                                    .set_data(def_level_encoding, level_data)?;
                             }
 
                             self.values_decoder.set_data(
@@ -488,7 +490,11 @@ where
                             statistics: _,
                         } => {
                             if num_nulls > num_values {
-                                return Err(general_err!("more nulls than values in page, contained {} values and {} nulls", num_values, num_nulls));
+                                return Err(general_err!(
+                                    "more nulls than values in page, contained {} values and {} nulls",
+                                    num_values,
+                                    num_nulls
+                                ));
                             }
 
                             self.num_buffered_values = num_values as _;
@@ -506,7 +512,7 @@ where
                                 self.rep_level_decoder.as_mut().unwrap().set_data(
                                     Encoding::RLE,
                                     buf.slice(..rep_levels_byte_len as usize),
-                                );
+                                )?;
                             }
 
                             // DataPage v2 only supports RLE encoding for definition
@@ -518,7 +524,7 @@ where
                                         rep_levels_byte_len as usize
                                             ..(rep_levels_byte_len + def_levels_byte_len) as usize,
                                     ),
-                                );
+                                )?;
                             }
 
                             self.values_decoder.set_data(
@@ -563,11 +569,16 @@ fn parse_v1_level(
     match encoding {
         Encoding::RLE => {
             let i32_size = std::mem::size_of::<i32>();
-            let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
-            Ok((
-                i32_size + data_size,
-                buf.slice(i32_size..i32_size + data_size),
-            ))
+            if i32_size <= buf.len() {
+                let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
+                let end = i32_size
+                    .checked_add(data_size)
+                    .ok_or(general_err!("invalid level length"))?;
+                if end <= buf.len() {
+                    return Ok((end, buf.slice(i32_size..end)));
+                }
+            }
+            Err(general_err!("not enough data to read levels"))
         }
         #[allow(deprecated)]
         Encoding::BIT_PACKED => {
@@ -591,6 +602,25 @@ mod tests {
     use crate::util::test_common::page_util::InMemoryPageReader;
     use crate::util::test_common::rand_gen::make_pages;
 
+    #[test]
+    fn test_parse_v1_level_invalid_length() {
+        // Say length is 10, but buffer is only 4
+        let buf = Bytes::from(vec![10, 0, 0, 0]);
+        let err = parse_v1_level(1, 100, Encoding::RLE, buf).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: not enough data to read levels"
+        );
+
+        // Say length is 4, but buffer is only 3
+        let buf = Bytes::from(vec![4, 0, 0]);
+        let err = parse_v1_level(1, 100, Encoding::RLE, buf).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: not enough data to read levels"
+        );
+    }
+
     const NUM_LEVELS: usize = 128;
     const NUM_PAGES: usize = 2;
     const MAX_DEF_LEVEL: i16 = 5;
diff --git a/parquet/src/column/reader/decoder.rs b/parquet/src/column/reader/decoder.rs
index a8766e82114b..053db813ce5d 100644
--- a/parquet/src/column/reader/decoder.rs
+++ b/parquet/src/column/reader/decoder.rs
@@ -15,26 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::collections::HashMap;
-
 use bytes::Bytes;
 
-use crate::basic::Encoding;
+use crate::basic::{Encoding, EncodingMask};
 use crate::data_type::DataType;
 use crate::encodings::{
-    decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder},
+    decoding::{Decoder, DictDecoder, PlainDecoder, get_decoder},
     rle::RleDecoder,
 };
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::{num_required_bits, BitReader};
+use crate::util::bit_util::{BitReader, num_required_bits};
 
 /// Decodes level data
 pub trait ColumnLevelDecoder {
     type Buffer;
 
     /// Set data for this [`ColumnLevelDecoder`]
-    fn set_data(&mut self, encoding: Encoding, data: Bytes);
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) -> Result<()>;
 }
 
 pub trait RepetitionLevelDecoder: ColumnLevelDecoder {
@@ -68,9 +66,9 @@ pub trait RepetitionLevelDecoder: ColumnLevelDecoder {
 }
 
 pub trait DefinitionLevelDecoder: ColumnLevelDecoder {
-    /// Read up to `num_levels` definition levels into `out`
+    /// Read up to `num_levels` definition levels into `out`.
     ///
-    /// Returns the number of values skipped, and the number of levels skipped
+    /// Returns the number of values read, and the number of levels read.
     ///
     /// # Panics
     ///
@@ -81,9 +79,9 @@ pub trait DefinitionLevelDecoder: ColumnLevelDecoder {
         num_levels: usize,
     ) -> Result<(usize, usize)>;
 
-    /// Skips over `num_levels` definition levels
+    /// Skips over `num_levels` definition levels.
     ///
-    /// Returns the number of values skipped, and the number of levels skipped
+    /// Returns the number of values skipped, and the number of levels skipped.
     fn skip_def_levels(&mut self, num_levels: usize) -> Result<(usize, usize)>;
 }
 
@@ -136,14 +134,22 @@ pub trait ColumnValueDecoder {
     fn skip_values(&mut self, num_values: usize) -> Result<usize>;
 }
 
+/// Bucket-based storage for decoder instances keyed by `Encoding`.
+///
+/// This replaces `HashMap` lookups with direct indexing to avoid hashing overhead in the
+/// hot decoding paths.
+const ENCODING_SLOTS: usize = Encoding::MAX_DISCRIMINANT as usize + 1;
+
 /// An implementation of [`ColumnValueDecoder`] for `[T::T]`
 pub struct ColumnValueDecoderImpl<T: DataType> {
     descr: ColumnDescPtr,
 
     current_encoding: Option<Encoding>,
 
-    // Cache of decoders for existing encodings
-    decoders: HashMap<Encoding, Box<dyn Decoder<T>>>,
+    /// Cache of decoders for existing encodings.
+    /// Uses `EncodingMask` and dense storage keyed by encoding discriminant.
+    decoder_mask: EncodingMask,
+    decoders: [Option<Box<dyn Decoder<T>>>; ENCODING_SLOTS],
 }
 
 impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
@@ -153,7 +159,8 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
         Self {
             descr: descr.clone(),
             current_encoding: None,
-            decoders: Default::default(),
+            decoder_mask: EncodingMask::default(),
+            decoders: std::array::from_fn(|_| None),
         }
     }
 
@@ -168,7 +175,7 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
             encoding = Encoding::RLE_DICTIONARY
         }
 
-        if self.decoders.contains_key(&encoding) {
+        if self.decoder_mask.is_set(encoding) {
             return Err(general_err!("Column cannot have more than one dictionary"));
         }
 
@@ -178,7 +185,8 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
 
             let mut decoder = DictDecoder::new();
             decoder.set_dict(Box::new(dictionary))?;
-            self.decoders.insert(encoding, Box::new(decoder));
+            self.decoders[encoding as usize] = Some(Box::new(decoder));
+            self.decoder_mask.insert(encoding);
             Ok(())
         } else {
             Err(nyi_err!(
@@ -195,25 +203,24 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()> {
-        use std::collections::hash_map::Entry;
-
         if encoding == Encoding::PLAIN_DICTIONARY {
             encoding = Encoding::RLE_DICTIONARY;
         }
 
         let decoder = if encoding == Encoding::RLE_DICTIONARY {
-            self.decoders
-                .get_mut(&encoding)
+            self.decoders[encoding as usize]
+                .as_mut()
                 .expect("Decoder for dict should have been set")
         } else {
-            // Search cache for data page decoder
-            match self.decoders.entry(encoding) {
-                Entry::Occupied(e) => e.into_mut(),
-                Entry::Vacant(v) => {
-                    let data_decoder = get_decoder::<T>(self.descr.clone(), encoding)?;
-                    v.insert(data_decoder)
-                }
+            let slot = encoding as usize;
+            if self.decoders[slot].is_none() {
+                let data_decoder = get_decoder::<T>(self.descr.clone(), encoding)?;
+                self.decoders[slot] = Some(data_decoder);
+                self.decoder_mask.insert(encoding);
             }
+            self.decoders[slot]
+                .as_mut()
+                .expect("decoder should have been inserted")
         };
 
         decoder.set_data(data, num_values.unwrap_or(num_levels))?;
@@ -226,9 +233,8 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
             .current_encoding
             .expect("current_encoding should be set");
 
-        let current_decoder = self
-            .decoders
-            .get_mut(&encoding)
+        let current_decoder = self.decoders[encoding as usize]
+            .as_mut()
             .unwrap_or_else(|| panic!("decoder for encoding {encoding} should be set"));
 
         // TODO: Push vec into decoder (#5177)
@@ -244,9 +250,8 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
             .current_encoding
             .expect("current_encoding should be set");
 
-        let current_decoder = self
-            .decoders
-            .get_mut(&encoding)
+        let current_decoder = self.decoders[encoding as usize]
+            .as_mut()
             .unwrap_or_else(|| panic!("decoder for encoding {encoding} should be set"));
 
         current_decoder.skip(num_values)
@@ -261,15 +266,15 @@ enum LevelDecoder {
 }
 
 impl LevelDecoder {
-    fn new(encoding: Encoding, data: Bytes, bit_width: u8) -> Self {
+    fn new(encoding: Encoding, data: Bytes, bit_width: u8) -> Result<Self> {
         match encoding {
             Encoding::RLE => {
                 let mut decoder = RleDecoder::new(bit_width);
-                decoder.set_data(data);
-                Self::Rle(decoder)
+                decoder.set_data(data)?;
+                Ok(Self::Rle(decoder))
             }
             #[allow(deprecated)]
-            Encoding::BIT_PACKED => Self::Packed(BitReader::new(data), bit_width),
+            Encoding::BIT_PACKED => Ok(Self::Packed(BitReader::new(data), bit_width)),
             _ => unreachable!("invalid level encoding: {}", encoding),
         }
     }
@@ -305,8 +310,9 @@ impl DefinitionLevelDecoderImpl {
 impl ColumnLevelDecoder for DefinitionLevelDecoderImpl {
     type Buffer = Vec<i16>;
 
-    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
-        self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width))
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) -> Result<()> {
+        self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width)?);
+        Ok(())
     }
 }
 
@@ -408,10 +414,11 @@ impl RepetitionLevelDecoderImpl {
 impl ColumnLevelDecoder for RepetitionLevelDecoderImpl {
     type Buffer = Vec<i16>;
 
-    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
-        self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width));
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) -> Result<()> {
+        self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width)?);
         self.buffer_len = 0;
         self.buffer_offset = 0;
+        Ok(())
     }
 }
 
@@ -494,14 +501,14 @@ mod tests {
         let data = Bytes::from(encoder.consume());
 
         let mut decoder = RepetitionLevelDecoderImpl::new(1);
-        decoder.set_data(Encoding::RLE, data.clone());
+        decoder.set_data(Encoding::RLE, data.clone()).unwrap();
         let (_, levels) = decoder.skip_rep_levels(100, 4).unwrap();
         assert_eq!(levels, 4);
 
         // The length of the final bit packed run is ambiguous, so without the correct
         // levels limit, it will decode zero padding
         let mut decoder = RepetitionLevelDecoderImpl::new(1);
-        decoder.set_data(Encoding::RLE, data);
+        decoder.set_data(Encoding::RLE, data).unwrap();
         let (_, levels) = decoder.skip_rep_levels(100, 6).unwrap();
         assert_eq!(levels, 6);
     }
@@ -520,7 +527,7 @@ mod tests {
             let data = Bytes::from(encoder.consume());
 
             let mut decoder = RepetitionLevelDecoderImpl::new(5);
-            decoder.set_data(Encoding::RLE, data);
+            decoder.set_data(Encoding::RLE, data).unwrap();
 
             let total_records = encoded.iter().filter(|x| **x == 0).count();
             let mut remaining_records = total_records;
diff --git a/parquet/src/column/writer/encoder.rs b/parquet/src/column/writer/encoder.rs
index 7371c72a5896..11d4f3142a20 100644
--- a/parquet/src/column/writer/encoder.rs
+++ b/parquet/src/column/writer/encoder.rs
@@ -23,11 +23,13 @@ use crate::bloom_filter::Sbbf;
 use crate::column::writer::{
     compare_greater, fallback_encoding, has_dictionary_support, is_nan, update_max, update_min,
 };
-use crate::data_type::private::ParquetValueType;
 use crate::data_type::DataType;
-use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
+use crate::data_type::private::ParquetValueType;
+use crate::encodings::encoding::{DictEncoder, Encoder, get_encoder};
 use crate::errors::{ParquetError, Result};
 use crate::file::properties::{EnabledStatistics, WriterProperties};
+use crate::geospatial::accumulator::{GeoStatsAccumulator, try_new_geo_stats_accumulator};
+use crate::geospatial::statistics::GeospatialStatistics;
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor};
 
 /// A collection of [`ParquetValueType`] encoded by a [`ColumnValueEncoder`]
@@ -121,6 +123,10 @@ pub trait ColumnValueEncoder {
     /// will *not* be tracked by the bloom filter as it is empty since. This should be called once
     /// near the end of encoding.
     fn flush_bloom_filter(&mut self) -> Option<Sbbf>;
+
+    /// Computes [`GeospatialStatistics`], if any, and resets internal state such that any internal
+    /// accumulator is prepared to accumulate statistics for the next column chunk.
+    fn flush_geospatial_statistics(&mut self) -> Option<Box<GeospatialStatistics>>;
 }
 
 pub struct ColumnValueEncoderImpl<T: DataType> {
@@ -133,6 +139,7 @@ pub struct ColumnValueEncoderImpl<T: DataType> {
     max_value: Option<T::T>,
     bloom_filter: Option<Sbbf>,
     variable_length_bytes: Option<i64>,
+    geo_stats_accumulator: Option<Box<dyn GeoStatsAccumulator>>,
 }
 
 impl<T: DataType> ColumnValueEncoderImpl<T> {
@@ -145,10 +152,12 @@ impl<T: DataType> ColumnValueEncoderImpl<T> {
 
     fn write_slice(&mut self, slice: &[T::T]) -> Result<()> {
         if self.statistics_enabled != EnabledStatistics::None
-            // INTERVAL has undefined sort order, so don't write min/max stats for it
+            // INTERVAL, Geometry, and Geography have undefined sort order, so don't write min/max stats for them
             && self.descr.converted_type() != ConvertedType::INTERVAL
         {
-            if let Some((min, max)) = self.min_max(slice, None) {
+            if let Some(accumulator) = self.geo_stats_accumulator.as_deref_mut() {
+                update_geo_stats_accumulator(accumulator, slice.iter());
+            } else if let Some((min, max)) = self.min_max(slice, None) {
                 update_min(&self.descr, &min, &mut self.min_value);
                 update_max(&self.descr, &max, &mut self.max_value);
             }
@@ -201,6 +210,8 @@ impl<T: DataType> ColumnValueEncoder for ColumnValueEncoderImpl<T> {
             .map(|props| Sbbf::new_with_ndv_fpp(props.ndv, props.fpp))
             .transpose()?;
 
+        let geo_stats_accumulator = try_new_geo_stats_accumulator(descr);
+
         Ok(Self {
             encoder,
             dict_encoder,
@@ -211,6 +222,7 @@ impl<T: DataType> ColumnValueEncoder for ColumnValueEncoderImpl<T> {
             min_value: None,
             max_value: None,
             variable_length_bytes: None,
+            geo_stats_accumulator,
         })
     }
 
@@ -307,6 +319,10 @@ impl<T: DataType> ColumnValueEncoder for ColumnValueEncoderImpl<T> {
             variable_length_bytes: self.variable_length_bytes.take(),
         })
     }
+
+    fn flush_geospatial_statistics(&mut self) -> Option<Box<GeospatialStatistics>> {
+        self.geo_stats_accumulator.as_mut().map(|a| a.finish())?
+    }
 }
 
 fn get_min_max<'a, T, I>(descr: &ColumnDescriptor, mut iter: I) -> Option<(T, T)>
@@ -359,7 +375,7 @@ fn replace_zero<T: ParquetValueType>(val: &T, descr: &ColumnDescriptor, replace:
             T::try_from_le_slice(&f64::to_le_bytes(replace as f64)).unwrap()
         }
         Type::FIXED_LEN_BYTE_ARRAY
-            if descr.logical_type() == Some(LogicalType::Float16)
+            if descr.logical_type_ref() == Some(LogicalType::Float16).as_ref()
                 && f16::from_le_bytes(val.as_bytes().try_into().unwrap()) == f16::NEG_ZERO =>
         {
             T::try_from_le_slice(&f16::to_le_bytes(f16::from_f32(replace))).unwrap()
@@ -367,3 +383,15 @@ fn replace_zero<T: ParquetValueType>(val: &T, descr: &ColumnDescriptor, replace:
         _ => val.clone(),
     }
 }
+
+fn update_geo_stats_accumulator<'a, T, I>(bounder: &mut dyn GeoStatsAccumulator, iter: I)
+where
+    T: ParquetValueType + 'a,
+    I: Iterator<Item = &'a T>,
+{
+    if bounder.is_valid() {
+        for val in iter {
+            bounder.update_wkb(val.as_bytes());
+        }
+    }
+}
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 8aac5d74391f..417c0112759a 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -21,14 +21,17 @@ use bytes::Bytes;
 use half::f16;
 
 use crate::bloom_filter::Sbbf;
-use crate::format::{BoundaryOrder, ColumnIndex, OffsetIndex};
+use crate::file::page_index::column_index::ColumnIndexMetaData;
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
 use std::collections::{BTreeSet, VecDeque};
 use std::str;
 
-use crate::basic::{Compression, ConvertedType, Encoding, LogicalType, PageType, Type};
+use crate::basic::{
+    BoundaryOrder, Compression, ConvertedType, Encoding, EncodingMask, LogicalType, PageType, Type,
+};
 use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
 use crate::column::writer::encoder::{ColumnValueEncoder, ColumnValueEncoderImpl, ColumnValues};
-use crate::compression::{create_codec, Codec, CodecOptionsBuilder};
+use crate::compression::{Codec, CodecOptionsBuilder, create_codec};
 use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
 use crate::encodings::levels::LevelEncoder;
@@ -37,9 +40,8 @@ use crate::encryption::encrypt::get_column_crypto_metadata;
 use crate::errors::{ParquetError, Result};
 use crate::file::metadata::{
     ColumnChunkMetaData, ColumnChunkMetaDataBuilder, ColumnIndexBuilder, LevelHistogram,
-    OffsetIndexBuilder,
+    OffsetIndexBuilder, PageEncodingStats,
 };
-use crate::file::page_encoding_stats::PageEncodingStats;
 use crate::file::properties::{
     EnabledStatistics, WriterProperties, WriterPropertiesPtr, WriterVersion,
 };
@@ -64,6 +66,8 @@ macro_rules! downcast_writer {
 }
 
 /// Column writer for a Parquet type.
+///
+/// See [`get_column_writer`] to create instances of this type
 pub enum ColumnWriter<'a> {
     /// Column writer for boolean type
     BoolColumnWriter(ColumnWriterImpl<'a, BoolType>),
@@ -96,23 +100,13 @@ impl ColumnWriter<'_> {
         downcast_writer!(self, typed, typed.get_estimated_total_bytes())
     }
 
-    /// Close this [`ColumnWriter`]
+    /// Close this [`ColumnWriter`], returning the metadata for the column chunk.
     pub fn close(self) -> Result<ColumnCloseResult> {
         downcast_writer!(self, typed, typed.close())
     }
 }
 
-#[deprecated(
-    since = "54.0.0",
-    note = "Seems like a stray and nobody knows what's it for. Will be removed in the next release."
-)]
-#[allow(missing_docs)]
-pub enum Level {
-    Page,
-    Column,
-}
-
-/// Gets a specific column writer corresponding to column descriptor `descr`.
+/// Create a specific column writer corresponding to column descriptor `descr`.
 pub fn get_column_writer<'a>(
     descr: ColumnDescPtr,
     props: WriterPropertiesPtr,
@@ -183,7 +177,9 @@ pub fn get_typed_column_writer_mut<'a, 'b: 'a, T: DataType>(
     })
 }
 
-/// Metadata returned by [`GenericColumnWriter::close`]
+/// Metadata for a column chunk of a Parquet file.
+///
+/// Note this structure is returned by [`ColumnWriter::close`].
 #[derive(Debug, Clone)]
 pub struct ColumnCloseResult {
     /// The total number of bytes written
@@ -195,9 +191,9 @@ pub struct ColumnCloseResult {
     /// Optional bloom filter for this column
     pub bloom_filter: Option<Sbbf>,
     /// Optional column index, for filtering
-    pub column_index: Option<ColumnIndex>,
+    pub column_index: Option<ColumnIndexMetaData>,
     /// Optional offset index, identifying page locations
-    pub offset_index: Option<OffsetIndex>,
+    pub offset_index: Option<OffsetIndexMetaData>,
 }
 
 // Metrics per page
@@ -326,7 +322,7 @@ impl<T: Default> ColumnMetrics<T> {
 /// Typed column writer for a primitive column.
 pub type ColumnWriterImpl<'a, T> = GenericColumnWriter<'a, ColumnValueEncoderImpl<T>>;
 
-/// Generic column writer for a primitive column.
+/// Generic column writer for a primitive Parquet column
 pub struct GenericColumnWriter<'a, E: ColumnValueEncoder> {
     // Column writer properties
     descr: ColumnDescPtr,
@@ -394,7 +390,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         }
 
         // Disable column_index_builder if not collecting page statistics.
-        let mut column_index_builder = ColumnIndexBuilder::new();
+        let mut column_index_builder = ColumnIndexBuilder::new(descr.physical_type());
         if statistics_enabled != EnabledStatistics::Page {
             column_index_builder.to_invalid()
         }
@@ -625,12 +621,12 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         };
         self.column_index_builder.set_boundary_order(boundary_order);
 
-        let column_index = self
-            .column_index_builder
-            .valid()
-            .then(|| self.column_index_builder.build_to_thrift());
+        let column_index = match self.column_index_builder.valid() {
+            true => Some(self.column_index_builder.build()?),
+            false => None,
+        };
 
-        let offset_index = self.offset_index_builder.map(|b| b.build_to_thrift());
+        let offset_index = self.offset_index_builder.map(|b| b.build());
 
         Ok(ColumnCloseResult {
             bytes_written: self.column_metrics.total_bytes_written,
@@ -663,15 +659,11 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 )
             })?;
 
-            let mut values_to_write = 0;
-            for &level in levels {
-                if level == self.descr.max_def_level() {
-                    values_to_write += 1;
-                } else {
-                    // We must always compute this as it is used to populate v2 pages
-                    self.page_metrics.num_page_nulls += 1
-                }
-            }
+            let values_to_write = levels
+                .iter()
+                .map(|level| (*level == self.descr.max_def_level()) as usize)
+                .sum();
+            self.page_metrics.num_page_nulls += (levels.len() - values_to_write) as u64;
 
             // Update histogram
             self.page_metrics.update_definition_level_histogram(levels);
@@ -876,8 +868,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             // So truncation of those types could lead to inaccurate min/max statistics
             Type::FIXED_LEN_BYTE_ARRAY
                 if !matches!(
-                    self.descr.logical_type(),
-                    Some(LogicalType::Decimal { .. }) | Some(LogicalType::Float16)
+                    self.descr.logical_type_ref(),
+                    Some(&LogicalType::Decimal { .. }) | Some(&LogicalType::Float16)
                 ) =>
             {
                 true
@@ -890,7 +882,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
     /// Returns `true` if this column's logical type is a UTF-8 string.
     fn is_utf8(&self) -> bool {
-        self.get_descriptor().logical_type() == Some(LogicalType::String)
+        self.get_descriptor().logical_type_ref() == Some(&LogicalType::String)
             || self.get_descriptor().converted_type() == ConvertedType::UTF8
     }
 
@@ -1048,8 +1040,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         self.column_metrics
             .update_variable_length_bytes(values_data.variable_length_bytes);
 
-        let page_statistics = page_statistics.map(Statistics::from);
-        let page_statistics = page_statistics.map(|stats| self.truncate_statistics(stats));
+        // From here on, we only need page statistics if they will be written to the page header.
+        let page_statistics = page_statistics
+            .filter(|_| self.props.write_page_header_statistics(self.descr.path()))
+            .map(|stats| self.truncate_statistics(Statistics::from(stats)));
 
         let compressed_page = match self.props.writer_version() {
             WriterVersion::PARQUET_1_0 => {
@@ -1081,6 +1075,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 if let Some(ref mut cmpr) = self.compressor {
                     let mut compressed_buf = Vec::with_capacity(uncompressed_size);
                     cmpr.compress(&buffer[..], &mut compressed_buf)?;
+                    compressed_buf.shrink_to_fit();
                     buffer = compressed_buf;
                 }
 
@@ -1116,12 +1111,23 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                     rep_levels_byte_len + def_levels_byte_len + values_data.buf.len();
 
                 // Data Page v2 compresses values only.
-                match self.compressor {
+                let is_compressed = match self.compressor {
                     Some(ref mut cmpr) => {
+                        let buffer_len = buffer.len();
                         cmpr.compress(&values_data.buf, &mut buffer)?;
+                        if uncompressed_size <= buffer.len() - buffer_len {
+                            buffer.truncate(buffer_len);
+                            buffer.extend_from_slice(&values_data.buf);
+                            false
+                        } else {
+                            true
+                        }
                     }
-                    None => buffer.extend_from_slice(&values_data.buf),
-                }
+                    None => {
+                        buffer.extend_from_slice(&values_data.buf);
+                        false
+                    }
+                };
 
                 let data_page = Page::DataPageV2 {
                     buf: buffer.into(),
@@ -1131,7 +1137,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                     num_rows: self.page_metrics.num_buffered_rows,
                     def_levels_byte_len: def_levels_byte_len as u32,
                     rep_levels_byte_len: rep_levels_byte_len as u32,
-                    is_compressed: self.compressor.is_some(),
+                    is_compressed,
                     statistics: page_statistics,
                 };
 
@@ -1184,7 +1190,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
         let mut builder = ColumnChunkMetaData::builder(self.descr.clone())
             .set_compression(self.codec)
-            .set_encodings(self.encodings.iter().cloned().collect())
+            .set_encodings_mask(EncodingMask::new_from_encodings(self.encodings.iter()))
             .set_page_encoding_stats(self.encoding_stats.clone())
             .set_total_compressed_size(total_compressed_size)
             .set_total_uncompressed_size(total_uncompressed_size)
@@ -1216,6 +1222,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 .set_definition_level_histogram(
                     self.column_metrics.definition_level_histogram.take(),
                 );
+
+            if let Some(geo_stats) = self.encoder.flush_geospatial_statistics() {
+                builder = builder.set_geo_statistics(geo_stats);
+            }
         }
 
         builder = self.set_column_chunk_encryption_properties(builder);
@@ -1375,7 +1385,7 @@ fn update_max<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T, max: &mut
 fn is_nan<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T) -> bool {
     match T::PHYSICAL_TYPE {
         Type::FLOAT | Type::DOUBLE => val != val,
-        Type::FIXED_LEN_BYTE_ARRAY if descr.logical_type() == Some(LogicalType::Float16) => {
+        Type::FIXED_LEN_BYTE_ARRAY if descr.logical_type_ref() == Some(&LogicalType::Float16) => {
             let val = val.as_bytes();
             let val = f16::from_le_bytes([val[0], val[1]]);
             val.is_nan()
@@ -1400,56 +1410,49 @@ fn update_stat<T: ParquetValueType, F>(
         return;
     }
 
-    if cur.as_ref().map_or(true, should_update) {
+    if cur.as_ref().is_none_or(should_update) {
         *cur = Some(val.clone());
     }
 }
 
 /// Evaluate `a > b` according to underlying logical type.
 fn compare_greater<T: ParquetValueType>(descr: &ColumnDescriptor, a: &T, b: &T) -> bool {
-    if let Some(LogicalType::Integer { is_signed, .. }) = descr.logical_type() {
-        if !is_signed {
-            // need to compare unsigned
-            return a.as_u64().unwrap() > b.as_u64().unwrap();
-        }
-    }
+    match T::PHYSICAL_TYPE {
+        Type::INT32 | Type::INT64 => {
+            if let Some(LogicalType::Integer {
+                is_signed: false, ..
+            }) = descr.logical_type_ref()
+            {
+                // need to compare unsigned
+                return compare_greater_unsigned_int(a, b);
+            }
 
-    match descr.converted_type() {
-        ConvertedType::UINT_8
-        | ConvertedType::UINT_16
-        | ConvertedType::UINT_32
-        | ConvertedType::UINT_64 => {
-            return a.as_u64().unwrap() > b.as_u64().unwrap();
+            match descr.converted_type() {
+                ConvertedType::UINT_8
+                | ConvertedType::UINT_16
+                | ConvertedType::UINT_32
+                | ConvertedType::UINT_64 => {
+                    return compare_greater_unsigned_int(a, b);
+                }
+                _ => {}
+            };
         }
-        _ => {}
-    };
-
-    if let Some(LogicalType::Decimal { .. }) = descr.logical_type() {
-        match T::PHYSICAL_TYPE {
-            Type::FIXED_LEN_BYTE_ARRAY | Type::BYTE_ARRAY => {
+        Type::FIXED_LEN_BYTE_ARRAY | Type::BYTE_ARRAY => {
+            if let Some(LogicalType::Decimal { .. }) = descr.logical_type_ref() {
                 return compare_greater_byte_array_decimals(a.as_bytes(), b.as_bytes());
             }
-            _ => {}
-        };
-    }
-
-    if descr.converted_type() == ConvertedType::DECIMAL {
-        match T::PHYSICAL_TYPE {
-            Type::FIXED_LEN_BYTE_ARRAY | Type::BYTE_ARRAY => {
+            if let ConvertedType::DECIMAL = descr.converted_type() {
                 return compare_greater_byte_array_decimals(a.as_bytes(), b.as_bytes());
             }
-            _ => {}
-        };
-    };
+            if let Some(LogicalType::Float16) = descr.logical_type_ref() {
+                return compare_greater_f16(a.as_bytes(), b.as_bytes());
+            }
+        }
 
-    if let Some(LogicalType::Float16) = descr.logical_type() {
-        let a = a.as_bytes();
-        let a = f16::from_le_bytes([a[0], a[1]]);
-        let b = b.as_bytes();
-        let b = f16::from_le_bytes([b[0], b[1]]);
-        return a > b;
+        _ => {}
     }
 
+    // compare independent of logical / converted type
     a > b
 }
 
@@ -1483,6 +1486,18 @@ fn has_dictionary_support(kind: Type, props: &WriterProperties) -> bool {
     }
 }
 
+#[inline]
+fn compare_greater_unsigned_int<T: ParquetValueType>(a: &T, b: &T) -> bool {
+    a.as_u64().unwrap() > b.as_u64().unwrap()
+}
+
+#[inline]
+fn compare_greater_f16(a: &[u8], b: &[u8]) -> bool {
+    let a = f16::from_le_bytes(a.try_into().unwrap());
+    let b = f16::from_le_bytes(b.try_into().unwrap());
+    a > b
+}
+
 /// Signed comparison of bytes arrays
 fn compare_greater_byte_array_decimals(a: &[u8], b: &[u8]) -> bool {
     let a_length = a.len();
@@ -1601,7 +1616,7 @@ mod tests {
 
     use crate::column::{
         page::PageReader,
-        reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
+        reader::{ColumnReaderImpl, get_column_reader, get_typed_column_reader},
     };
     use crate::file::writer::TrackedWrite;
     use crate::file::{
@@ -1719,7 +1734,10 @@ mod tests {
         assert_eq!(r.rows_written, 4);
 
         let metadata = r.metadata;
-        assert_eq!(metadata.encodings(), &vec![Encoding::PLAIN, Encoding::RLE]);
+        assert_eq!(
+            metadata.encodings().collect::<Vec<_>>(),
+            vec![Encoding::PLAIN, Encoding::RLE]
+        );
         assert_eq!(metadata.num_values(), 4); // just values
         assert_eq!(metadata.dictionary_page_offset(), None);
     }
@@ -2080,8 +2098,8 @@ mod tests {
 
         let metadata = r.metadata;
         assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE, Encoding::RLE_DICTIONARY]
+            metadata.encodings().collect::<Vec<_>>(),
+            vec![Encoding::PLAIN, Encoding::RLE, Encoding::RLE_DICTIONARY]
         );
         assert_eq!(metadata.num_values(), 4);
         assert_eq!(metadata.compressed_size(), 20);
@@ -2206,8 +2224,8 @@ mod tests {
 
         let metadata = r.metadata;
         assert_eq!(
-            metadata.encodings(),
-            &vec![Encoding::PLAIN, Encoding::RLE, Encoding::RLE_DICTIONARY]
+            metadata.encodings().collect::<Vec<_>>(),
+            vec![Encoding::PLAIN, Encoding::RLE, Encoding::RLE_DICTIONARY]
         );
         assert_eq!(metadata.num_values(), 4);
         assert_eq!(metadata.compressed_size(), 20);
@@ -2233,7 +2251,11 @@ mod tests {
         let mut buf = Vec::with_capacity(100);
         let mut write = TrackedWrite::new(&mut buf);
         let page_writer = Box::new(SerializedPageWriter::new(&mut write));
-        let props = Default::default();
+        let props = Arc::new(
+            WriterProperties::builder()
+                .set_write_page_header_statistics(true)
+                .build(),
+        );
         let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
 
         writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
@@ -2253,6 +2275,7 @@ mod tests {
 
         let props = ReaderProperties::builder()
             .set_backward_compatible_lz4(false)
+            .set_read_page_statistics(true)
             .build();
         let reader = SerializedPageReader::new_with_properties(
             Arc::new(Bytes::from(buf)),
@@ -2531,8 +2554,8 @@ mod tests {
         let stats = statistics_roundtrip::<Int96Type>(&input);
         assert!(!stats.is_min_max_backwards_compatible());
         if let Statistics::Int96(stats) = stats {
-            assert_eq!(stats.min_opt().unwrap(), &Int96::from(vec![0, 20, 30]));
-            assert_eq!(stats.max_opt().unwrap(), &Int96::from(vec![3, 20, 10]));
+            assert_eq!(stats.min_opt().unwrap(), &Int96::from(vec![3, 20, 10]));
+            assert_eq!(stats.max_opt().unwrap(), &Int96::from(vec![2, 20, 30]));
         } else {
             panic!("expecting Statistics::Int96, got {stats:?}");
         }
@@ -2942,19 +2965,23 @@ mod tests {
         let r = writer.close().unwrap();
         assert!(r.column_index.is_some());
         let col_idx = r.column_index.unwrap();
+        let col_idx = match col_idx {
+            ColumnIndexMetaData::INT32(col_idx) => col_idx,
+            _ => panic!("wrong stats type"),
+        };
         // null_pages should be true for page 0
-        assert!(col_idx.null_pages[0]);
+        assert!(col_idx.is_null_page(0));
         // min and max should be empty byte arrays
-        assert_eq!(col_idx.min_values[0].len(), 0);
-        assert_eq!(col_idx.max_values[0].len(), 0);
+        assert!(col_idx.min_value(0).is_none());
+        assert!(col_idx.max_value(0).is_none());
         // null_counts should be defined and be 4 for page 0
-        assert!(col_idx.null_counts.is_some());
-        assert_eq!(col_idx.null_counts.as_ref().unwrap()[0], 4);
+        assert!(col_idx.null_count(0).is_some());
+        assert_eq!(col_idx.null_count(0), Some(4));
         // there is no repetition so rep histogram should be absent
-        assert!(col_idx.repetition_level_histograms.is_none());
+        assert!(col_idx.repetition_level_histogram(0).is_none());
         // definition_level_histogram should be present and should be 0:4, 1:0
-        assert!(col_idx.definition_level_histograms.is_some());
-        assert_eq!(col_idx.definition_level_histograms.unwrap(), &[4, 0]);
+        assert!(col_idx.definition_level_histogram(0).is_some());
+        assert_eq!(col_idx.definition_level_histogram(0).unwrap(), &[4, 0]);
     }
 
     #[test]
@@ -2977,12 +3004,16 @@ mod tests {
         assert_eq!(8, r.rows_written);
 
         // column index
-        assert_eq!(2, column_index.null_pages.len());
+        let column_index = match column_index {
+            ColumnIndexMetaData::INT32(column_index) => column_index,
+            _ => panic!("wrong stats type"),
+        };
+        assert_eq!(2, column_index.num_pages());
         assert_eq!(2, offset_index.page_locations.len());
         assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order);
         for idx in 0..2 {
-            assert!(!column_index.null_pages[idx]);
-            assert_eq!(0, column_index.null_counts.as_ref().unwrap()[idx]);
+            assert!(!column_index.is_null_page(idx));
+            assert_eq!(0, column_index.null_count(0).unwrap());
         }
 
         if let Some(stats) = r.metadata.statistics() {
@@ -2992,14 +3023,8 @@ mod tests {
                 // first page is [1,2,3,4]
                 // second page is [-5,2,4,8]
                 // note that we don't increment here, as this is a non BinaryArray type.
-                assert_eq!(
-                    stats.min_bytes_opt(),
-                    Some(column_index.min_values[1].as_slice())
-                );
-                assert_eq!(
-                    stats.max_bytes_opt(),
-                    column_index.max_values.get(1).map(Vec::as_slice)
-                );
+                assert_eq!(stats.min_opt(), column_index.min_value(1));
+                assert_eq!(stats.max_opt(), column_index.max_value(1));
             } else {
                 panic!("expecting Statistics::Int32");
             }
@@ -3018,7 +3043,10 @@ mod tests {
         // write data
         // and check the offset index and column index
         let page_writer = get_test_page_writer();
-        let props = Default::default();
+        let props = WriterProperties::builder()
+            .set_statistics_truncate_length(None) // disable column index truncation
+            .build()
+            .into();
         let mut writer = get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
 
         let mut data = vec![FixedLenByteArray::default(); 3];
@@ -3036,37 +3064,36 @@ mod tests {
         let column_index = r.column_index.unwrap();
         let offset_index = r.offset_index.unwrap();
 
+        let column_index = match column_index {
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index,
+            _ => panic!("wrong stats type"),
+        };
+
         assert_eq!(3, r.rows_written);
 
         // column index
-        assert_eq!(1, column_index.null_pages.len());
+        assert_eq!(1, column_index.num_pages());
         assert_eq!(1, offset_index.page_locations.len());
         assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order);
-        assert!(!column_index.null_pages[0]);
-        assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
+        assert!(!column_index.is_null_page(0));
+        assert_eq!(Some(0), column_index.null_count(0));
 
         if let Some(stats) = r.metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
             assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::FixedLenByteArray(stats) = stats {
-                let column_index_min_value = &column_index.min_values[0];
-                let column_index_max_value = &column_index.max_values[0];
+                let column_index_min_value = column_index.min_value(0).unwrap();
+                let column_index_max_value = column_index.max_value(0).unwrap();
 
                 // Column index stats are truncated, while the column chunk's aren't.
-                assert_ne!(
-                    stats.min_bytes_opt(),
-                    Some(column_index_min_value.as_slice())
-                );
-                assert_ne!(
-                    stats.max_bytes_opt(),
-                    Some(column_index_max_value.as_slice())
-                );
+                assert_ne!(stats.min_bytes_opt().unwrap(), column_index_min_value);
+                assert_ne!(stats.max_bytes_opt().unwrap(), column_index_max_value);
 
                 assert_eq!(
                     column_index_min_value.len(),
                     DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH.unwrap()
                 );
-                assert_eq!(column_index_min_value.as_slice(), &[97_u8; 64]);
+                assert_eq!(column_index_min_value, &[97_u8; 64]);
                 assert_eq!(
                     column_index_max_value.len(),
                     DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH.unwrap()
@@ -3108,27 +3135,32 @@ mod tests {
         let column_index = r.column_index.unwrap();
         let offset_index = r.offset_index.unwrap();
 
+        let column_index = match column_index {
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index,
+            _ => panic!("wrong stats type"),
+        };
+
         assert_eq!(1, r.rows_written);
 
         // column index
-        assert_eq!(1, column_index.null_pages.len());
+        assert_eq!(1, column_index.num_pages());
         assert_eq!(1, offset_index.page_locations.len());
         assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order);
-        assert!(!column_index.null_pages[0]);
-        assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
+        assert!(!column_index.is_null_page(0));
+        assert_eq!(Some(0), column_index.null_count(0));
 
         if let Some(stats) = r.metadata.statistics() {
             assert_eq!(stats.null_count_opt(), Some(0));
             assert_eq!(stats.distinct_count_opt(), None);
             if let Statistics::FixedLenByteArray(_stats) = stats {
-                let column_index_min_value = &column_index.min_values[0];
-                let column_index_max_value = &column_index.max_values[0];
+                let column_index_min_value = column_index.min_value(0).unwrap();
+                let column_index_max_value = column_index.max_value(0).unwrap();
 
                 assert_eq!(column_index_min_value.len(), 1);
                 assert_eq!(column_index_max_value.len(), 1);
 
-                assert_eq!("B".as_bytes(), column_index_min_value.as_slice());
-                assert_eq!("C".as_bytes(), column_index_max_value.as_slice());
+                assert_eq!("B".as_bytes(), column_index_min_value);
+                assert_eq!("C".as_bytes(), column_index_max_value);
 
                 assert_ne!(column_index_min_value, stats.min_bytes_opt().unwrap());
                 assert_ne!(column_index_max_value, stats.max_bytes_opt().unwrap());
@@ -3158,8 +3190,12 @@ mod tests {
         // stats should still be written
         // ensure bytes weren't truncated for column index
         let column_index = r.column_index.unwrap();
-        let column_index_min_bytes = column_index.min_values[0].as_slice();
-        let column_index_max_bytes = column_index.max_values[0].as_slice();
+        let column_index = match column_index {
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index,
+            _ => panic!("wrong stats type"),
+        };
+        let column_index_min_bytes = column_index.min_value(0).unwrap();
+        let column_index_max_bytes = column_index.max_value(0).unwrap();
         assert_eq!(expected_value, column_index_min_bytes);
         assert_eq!(expected_value, column_index_max_bytes);
 
@@ -3197,8 +3233,12 @@ mod tests {
         // stats should still be written
         // ensure bytes weren't truncated for column index
         let column_index = r.column_index.unwrap();
-        let column_index_min_bytes = column_index.min_values[0].as_slice();
-        let column_index_max_bytes = column_index.max_values[0].as_slice();
+        let column_index = match column_index {
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index,
+            _ => panic!("wrong stats type"),
+        };
+        let column_index_min_bytes = column_index.min_value(0).unwrap();
+        let column_index_max_bytes = column_index.max_value(0).unwrap();
         assert_eq!(expected_value, column_index_min_bytes);
         assert_eq!(expected_value, column_index_max_bytes);
 
@@ -3214,6 +3254,49 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_statistics_truncating_byte_array_default() {
+        let page_writer = get_test_page_writer();
+
+        // The default truncate length is 64 bytes
+        let props = WriterProperties::builder().build().into();
+        let mut writer = get_test_column_writer::<ByteArrayType>(page_writer, 0, 0, props);
+
+        let mut data = vec![ByteArray::default(); 1];
+        data[0].set_data(Bytes::from(String::from(
+            "This string is longer than 64 bytes, so it will almost certainly be truncated.",
+        )));
+        writer.write_batch(&data, None, None).unwrap();
+        writer.flush_data_pages().unwrap();
+
+        let r = writer.close().unwrap();
+
+        assert_eq!(1, r.rows_written);
+
+        let stats = r.metadata.statistics().expect("statistics");
+        if let Statistics::ByteArray(_stats) = stats {
+            let min_value = _stats.min_opt().unwrap();
+            let max_value = _stats.max_opt().unwrap();
+
+            assert!(!_stats.min_is_exact());
+            assert!(!_stats.max_is_exact());
+
+            let expected_len = 64;
+            assert_eq!(min_value.len(), expected_len);
+            assert_eq!(max_value.len(), expected_len);
+
+            let expected_min =
+                "This string is longer than 64 bytes, so it will almost certainly".as_bytes();
+            assert_eq!(expected_min, min_value.as_bytes());
+            // note the max value is different from the min value: the last byte is incremented
+            let expected_max =
+                "This string is longer than 64 bytes, so it will almost certainlz".as_bytes();
+            assert_eq!(expected_max, max_value.as_bytes());
+        } else {
+            panic!("expecting Statistics::ByteArray");
+        }
+    }
+
     #[test]
     fn test_statistics_truncating_byte_array() {
         let page_writer = get_test_page_writer();
@@ -3526,19 +3609,12 @@ mod tests {
         col_writer.close().unwrap();
         row_group_writer.close().unwrap();
         let file_metadata = writer.close().unwrap();
-        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
-        let stats = file_metadata.row_groups[0].columns[0]
-            .meta_data
-            .as_ref()
-            .unwrap()
-            .statistics
-            .as_ref()
-            .unwrap();
-        assert!(!stats.is_max_value_exact.unwrap());
+        let stats = file_metadata.row_group(0).column(0).statistics().unwrap();
+        assert!(!stats.max_is_exact());
         // Truncation of invalid UTF-8 should fall back to binary truncation, so last byte should
         // be incremented by 1.
         assert_eq!(
-            stats.max_value,
+            stats.max_bytes_opt().map(|v| v.to_vec()),
             Some([128, 128, 128, 128, 128, 128, 128, 129].to_vec())
         );
     }
@@ -3635,8 +3711,11 @@ mod tests {
                 &[Some(-5), Some(11)],
             ],
         )?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING));
 
         // min max both descending
         let column_close_result = write_multiple_pages::<Int32Type>(
@@ -3648,34 +3727,49 @@ mod tests {
                 &[Some(-5), Some(0)],
             ],
         )?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::DESCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::DESCENDING));
 
         // min max both equal
         let column_close_result = write_multiple_pages::<Int32Type>(
             &descr,
             &[&[Some(10), Some(11)], &[None], &[Some(10), Some(11)]],
         )?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING));
 
         // only nulls
         let column_close_result =
             write_multiple_pages::<Int32Type>(&descr, &[&[None], &[None], &[None]])?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING));
 
         // one page
         let column_close_result =
             write_multiple_pages::<Int32Type>(&descr, &[&[Some(-10), Some(10)]])?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING));
 
         // one non-null page
         let column_close_result =
             write_multiple_pages::<Int32Type>(&descr, &[&[Some(-10), Some(10)], &[None]])?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING));
 
         // min max both unordered
         let column_close_result = write_multiple_pages::<Int32Type>(
@@ -3687,8 +3781,11 @@ mod tests {
                 &[Some(-5), Some(0)],
             ],
         )?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED));
 
         // min max both ordered in different orders
         let column_close_result = write_multiple_pages::<Int32Type>(
@@ -3700,8 +3797,11 @@ mod tests {
                 &[Some(3), Some(7)],
             ],
         )?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED));
 
         Ok(())
     }
@@ -3738,14 +3838,20 @@ mod tests {
         // f16 descending
         let column_close_result =
             write_multiple_pages::<FixedLenByteArrayType>(&f16_descr, values)?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::DESCENDING);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::DESCENDING));
 
         // same bytes, but fba unordered
         let column_close_result =
             write_multiple_pages::<FixedLenByteArrayType>(&fba_descr, values)?;
-        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
-        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+        let boundary_order = column_close_result
+            .column_index
+            .unwrap()
+            .get_boundary_order();
+        assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED));
 
         Ok(())
     }
@@ -3997,7 +4103,7 @@ mod tests {
             .build();
         let meta = column_write_and_get_metadata::<T>(props, data);
         assert_eq!(meta.dictionary_page_offset(), dictionary_page_offset);
-        assert_eq!(meta.encodings(), encodings);
+        assert_eq!(meta.encodings().collect::<Vec<_>>(), encodings);
         assert_eq!(meta.page_encoding_stats().unwrap(), page_encoding_stats);
     }
 
@@ -4193,4 +4299,33 @@ mod tests {
             .unwrap();
         ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
     }
+
+    #[test]
+    fn test_page_v2_snappy_compression_fallback() {
+        // Test that PageV2 sets is_compressed to false when Snappy compression increases data size
+        let page_writer = TestPageWriter {};
+
+        // Create WriterProperties with PageV2 and Snappy compression
+        let props = WriterProperties::builder()
+            .set_writer_version(WriterVersion::PARQUET_2_0)
+            // Disable dictionary to ensure data is written directly
+            .set_dictionary_enabled(false)
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let mut column_writer =
+            get_test_column_writer::<ByteArrayType>(Box::new(page_writer), 0, 0, Arc::new(props));
+
+        // Create small, simple data that Snappy compression will likely increase in size
+        // due to compression overhead for very small data
+        let values = vec![ByteArray::from("a")];
+
+        column_writer.write_batch(&values, None, None).unwrap();
+
+        let result = column_writer.close().unwrap();
+        assert_eq!(
+            result.metadata.uncompressed_size(),
+            result.metadata.compressed_size()
+        );
+    }
 }
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index 23c4bce50fa2..530838955c0f 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -198,7 +198,7 @@ pub fn create_codec(codec: CodecType, _options: &CodecOptions) -> Result<Option<
 
 #[cfg(any(feature = "snap", test))]
 mod snappy_codec {
-    use snap::raw::{decompress_len, max_compress_len, Decoder, Encoder};
+    use snap::raw::{Decoder, Encoder, decompress_len, max_compress_len};
 
     use crate::compression::Codec;
     use crate::errors::Result;
@@ -257,7 +257,7 @@ mod gzip_codec {
 
     use std::io::{Read, Write};
 
-    use flate2::{read, write, Compression};
+    use flate2::{Compression, read, write};
 
     use crate::compression::Codec;
     use crate::errors::Result;
@@ -607,7 +607,7 @@ mod lz4_raw_codec {
                 None => {
                     return Err(ParquetError::General(
                         "LZ4RawCodec unsupported without uncompress_size".into(),
-                    ))
+                    ));
                 }
             };
             output_buf.resize(offset + required_len, 0);
@@ -643,9 +643,9 @@ pub use lz4_raw_codec::*;
 
 #[cfg(any(feature = "lz4", test))]
 mod lz4_hadoop_codec {
+    use crate::compression::Codec;
     use crate::compression::lz4_codec::LZ4Codec;
     use crate::compression::lz4_raw_codec::LZ4RawCodec;
-    use crate::compression::Codec;
     use crate::errors::{ParquetError, Result};
     use std::io;
 
@@ -746,7 +746,7 @@ mod lz4_hadoop_codec {
                 None => {
                     return Err(ParquetError::General(
                         "LZ4HadoopCodec unsupported without uncompress_size".into(),
-                    ))
+                    ));
                 }
             };
             output_buf.resize(output_len + required_len, 0);
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 79ecbea45ebe..df5702d1bb00 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -33,7 +33,7 @@ use crate::util::bit_util::FromBytes;
 
 /// Rust representation for logical type INT96, value is backed by an array of `u32`.
 /// The type only takes 12 bytes, without extra padding.
-#[derive(Clone, Copy, Debug, PartialOrd, Default, PartialEq, Eq)]
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
 pub struct Int96 {
     value: [u32; 3],
 }
@@ -74,12 +74,6 @@ impl Int96 {
         self.value = [elem0, elem1, elem2];
     }
 
-    /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch
-    #[deprecated(since = "54.0.0", note = "Use `to_millis` instead")]
-    pub fn to_i64(&self) -> i64 {
-        self.to_millis()
-    }
-
     /// Converts this INT96 into an i64 representing the number of SECONDS since EPOCH
     ///
     /// Will wrap around on overflow
@@ -124,14 +118,44 @@ impl Int96 {
             .wrapping_add(nanos)
     }
 
+    #[inline]
+    fn get_days(&self) -> i32 {
+        self.data()[2] as i32
+    }
+
+    #[inline]
+    fn get_nanos(&self) -> i64 {
+        ((self.data()[1] as i64) << 32) + self.data()[0] as i64
+    }
+
     #[inline]
     fn data_as_days_and_nanos(&self) -> (i32, i64) {
-        let day = self.data()[2] as i32;
-        let nanos = ((self.data()[1] as i64) << 32) + self.data()[0] as i64;
-        (day, nanos)
+        (self.get_days(), self.get_nanos())
+    }
+}
+
+impl PartialOrd for Int96 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
     }
 }
 
+impl Ord for Int96 {
+    /// Order `Int96` correctly for (deprecated) timestamp types.
+    ///
+    /// Note: this is done even though the Int96 type is deprecated and the
+    /// [spec does not define the sort order]
+    /// because some engines, notably Spark and Databricks Photon still write
+    /// Int96 timestamps and rely on their order for optimization.
+    ///
+    /// [spec does not define the sort order]: https://github.com/apache/parquet-format/blob/cf943c197f4fad826b14ba0c40eb0ffdab585285/src/main/thrift/parquet.thrift#L1079
+    fn cmp(&self, other: &Self) -> Ordering {
+        match self.get_days().cmp(&other.get_days()) {
+            Ordering::Equal => self.get_nanos().cmp(&other.get_nanos()),
+            ord => ord,
+        }
+    }
+}
 impl From<Vec<u32>> for Int96 {
     fn from(buf: Vec<u32>) -> Self {
         assert_eq!(buf.len(), 3);
@@ -654,7 +678,7 @@ pub(crate) mod private {
     use bytes::Bytes;
 
     use crate::encodings::decoding::PlainDecoderDetails;
-    use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
+    use crate::util::bit_util::{BitReader, BitWriter, read_num_bytes};
 
     use super::{ParquetError, Result, SliceAsBytes};
     use crate::basic::Type;
@@ -1214,26 +1238,6 @@ pub trait DataType: 'static + Send {
         Self: Sized;
 }
 
-// Workaround bug in specialization
-#[deprecated(
-    since = "54.0.0",
-    note = "Seems like a stray and nobody knows what's it for. Will be removed in 55.0.0"
-)]
-#[allow(missing_docs)]
-pub trait SliceAsBytesDataType: DataType
-where
-    Self::T: SliceAsBytes,
-{
-}
-
-#[allow(deprecated)]
-impl<T> SliceAsBytesDataType for T
-where
-    T: DataType,
-    <T as DataType>::T: SliceAsBytes,
-{
-}
-
 macro_rules! make_type {
     ($name:ident, $reader_ident: ident, $writer_ident: ident, $native_ty:ty, $size:expr) => {
         #[doc = concat!("Parquet physical type: ", stringify!($name))]
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 03bed70cd67c..1f81c67dab3b 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -18,8 +18,7 @@
 //! Contains all supported decoders for Parquet.
 
 use bytes::Bytes;
-use num::traits::WrappingAdd;
-use num::FromPrimitive;
+use num_traits::{FromPrimitive, WrappingAdd};
 use std::{cmp, marker::PhantomData, mem};
 
 use super::rle::RleDecoder;
@@ -382,9 +381,19 @@ impl<T: DataType> DictDecoder<T> {
 impl<T: DataType> Decoder<T> for DictDecoder<T> {
     fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         // First byte in `data` is bit width
+        if data.is_empty() {
+            return Err(eof_err!("Not enough bytes to decode bit_width"));
+        }
+
         let bit_width = data.as_ref()[0];
+        if bit_width > 32 {
+            return Err(general_err!(
+                "Invalid or corrupted RLE bit width {}. Max allowed is 32",
+                bit_width
+            ));
+        }
         let mut rle_decoder = RleDecoder::new(bit_width);
-        rle_decoder.set_data(data.slice(1..));
+        rle_decoder.set_data(data.slice(1..))?;
         self.num_values = num_values;
         self.rle_decoder = Some(rle_decoder);
         Ok(())
@@ -464,7 +473,7 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
 
         self.decoder = RleDecoder::new(1);
         self.decoder
-            .set_data(data.slice(I32_SIZE..I32_SIZE + data_size));
+            .set_data(data.slice(I32_SIZE..I32_SIZE + data_size))?;
         self.values_left = num_values;
         Ok(())
     }
@@ -632,6 +641,19 @@ where
             self.next_block()
         }
     }
+
+    /// Verify the bit width is smaller then the integer type that it is trying to decode.
+    #[inline]
+    fn check_bit_width(&self, bit_width: usize) -> Result<()> {
+        if bit_width > std::mem::size_of::<T::T>() * 8 {
+            return Err(general_err!(
+                "Invalid delta bit width {} which is larger than expected {} ",
+                bit_width,
+                std::mem::size_of::<T::T>() * 8
+            ));
+        }
+        Ok(())
+    }
 }
 
 impl<T: DataType> Decoder<T> for DeltaBitPackDecoder<T>
@@ -659,6 +681,10 @@ where
             .try_into()
             .map_err(|_| general_err!("invalid 'mini_blocks_per_block'"))?;
 
+        if self.mini_blocks_per_block == 0 {
+            return Err(general_err!("cannot have zero miniblocks per block"));
+        }
+
         self.values_left = self
             .bit_reader
             .get_vlq_int()
@@ -727,6 +753,7 @@ where
             }
 
             let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
+            self.check_bit_width(bit_width)?;
             let batch_to_read = self.mini_block_remaining.min(to_read - read);
 
             let batch_read = self
@@ -797,6 +824,7 @@ where
             }
 
             let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as usize;
+            self.check_bit_width(bit_width)?;
             let mini_block_to_skip = self.mini_block_remaining.min(to_skip - skip);
             let mini_block_should_skip = mini_block_to_skip;
 
@@ -1381,6 +1409,13 @@ mod tests {
         test_plain_skip::<FixedLenByteArrayType>(Bytes::from(data_bytes), 3, 6, 4, &[]);
     }
 
+    #[test]
+    fn test_dict_decoder_empty_data() {
+        let mut decoder = DictDecoder::<Int32Type>::new();
+        let err = decoder.set_data(Bytes::new(), 10).unwrap_err();
+        assert_eq!(err.to_string(), "EOF: Not enough bytes to decode bit_width");
+    }
+
     fn test_plain_decode<T: DataType>(
         data: Bytes,
         num_values: usize,
@@ -1657,6 +1692,21 @@ mod tests {
         test_delta_bit_packed_decode::<Int64Type>(data);
     }
 
+    #[test]
+    fn test_delta_bit_packed_zero_miniblocks() {
+        // It is invalid for mini_blocks_per_block to be 0
+        let data = vec![
+            128, 1, // block_size = 128
+            0, // mini_blocks_per_block = 0
+        ];
+        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        let err = decoder.set_data(data.into(), 0).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: cannot have zero miniblocks per block"
+        );
+    }
+
     #[test]
     fn test_delta_bit_packed_decoder_sample() {
         let data_bytes = vec![
@@ -1831,10 +1881,10 @@ mod tests {
     fn test_byte_stream_split_flba(type_width: usize) {
         let data = vec![
             vec![
-                FixedLenByteArrayType::gen(type_width as i32),
-                FixedLenByteArrayType::gen(type_width as i32),
+                FixedLenByteArrayType::r#gen(type_width as i32),
+                FixedLenByteArrayType::r#gen(type_width as i32),
             ],
-            vec![FixedLenByteArrayType::gen(type_width as i32)],
+            vec![FixedLenByteArrayType::r#gen(type_width as i32)],
         ];
         test_byte_stream_split_decode::<FixedLenByteArrayType>(data, type_width as i32);
     }
@@ -2092,4 +2142,51 @@ mod tests {
             v
         }
     }
+
+    #[test]
+    // Allow initializing a vector and pushing to it for clarity in this test
+    #[allow(clippy::vec_init_then_push)]
+    fn test_delta_bit_packed_invalid_bit_width() {
+        // Manually craft a buffer with an invalid bit width
+        let mut buffer = vec![];
+        // block_size = 128
+        buffer.push(128);
+        buffer.push(1);
+        // mini_blocks_per_block = 4
+        buffer.push(4);
+        // num_values = 32
+        buffer.push(32);
+        // first_value = 0
+        buffer.push(0);
+        // min_delta = 0
+        buffer.push(0);
+        // bit_widths, one for each of the 4 mini blocks
+        buffer.push(33); // Invalid bit width
+        buffer.push(0);
+        buffer.push(0);
+        buffer.push(0);
+
+        let corrupted_buffer = Bytes::from(buffer);
+
+        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        decoder.set_data(corrupted_buffer.clone(), 32).unwrap();
+        let mut read_buffer = vec![0; 32];
+        let err = decoder.get(&mut read_buffer).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Invalid delta bit width 33 which is larger than expected 32"),
+            "{}",
+            err
+        );
+
+        let mut decoder = DeltaBitPackDecoder::<Int32Type>::new();
+        decoder.set_data(corrupted_buffer, 32).unwrap();
+        let err = decoder.skip(32).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Invalid delta bit width 33 which is larger than expected 32"),
+            "{}",
+            err
+        );
+    }
 }
diff --git a/parquet/src/encodings/encoding/dict_encoder.rs b/parquet/src/encodings/encoding/dict_encoder.rs
index 8efb845219d3..79a1f247670c 100644
--- a/parquet/src/encodings/encoding/dict_encoder.rs
+++ b/parquet/src/encodings/encoding/dict_encoder.rs
@@ -21,8 +21,8 @@
 use bytes::Bytes;
 
 use crate::basic::{Encoding, Type};
-use crate::data_type::private::ParquetValueType;
 use crate::data_type::DataType;
+use crate::data_type::private::ParquetValueType;
 use crate::encodings::encoding::{Encoder, PlainEncoder};
 use crate::encodings::rle::RleEncoder;
 use crate::errors::Result;
diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs
index f6d94e03317d..e5e74ac53fa7 100644
--- a/parquet/src/encodings/encoding/mod.rs
+++ b/parquet/src/encodings/encoding/mod.rs
@@ -25,7 +25,7 @@ use crate::data_type::*;
 use crate::encodings::rle::RleEncoder;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::bit_util::{num_required_bits, BitWriter};
+use crate::util::bit_util::{BitWriter, num_required_bits};
 
 use byte_stream_split_encoder::{ByteStreamSplitEncoder, VariableWidthByteStreamSplitEncoder};
 use bytes::Bytes;
@@ -768,10 +768,10 @@ mod tests {
 
     use std::sync::Arc;
 
-    use crate::encodings::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder};
+    use crate::encodings::decoding::{Decoder, DictDecoder, PlainDecoder, get_decoder};
     use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType};
     use crate::util::bit_util;
-    use crate::util::test_common::rand_gen::{random_bytes, RandGen};
+    use crate::util::test_common::rand_gen::{RandGen, random_bytes};
 
     const TEST_SET_SIZE: usize = 1024;
 
diff --git a/parquet/src/encodings/levels.rs b/parquet/src/encodings/levels.rs
index 6f662b614fca..8c95e5ca51aa 100644
--- a/parquet/src/encodings/levels.rs
+++ b/parquet/src/encodings/levels.rs
@@ -21,7 +21,7 @@ use super::rle::RleEncoder;
 
 use crate::basic::Encoding;
 use crate::data_type::AsBytes;
-use crate::util::bit_util::{ceil, num_required_bits, BitWriter};
+use crate::util::bit_util::{BitWriter, ceil, num_required_bits};
 
 /// Computes max buffer size for level encoder/decoder based on encoding, max
 /// repetition/definition level and number of total buffered values (includes null
diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs
index d6e32600d321..c95a46c634d2 100644
--- a/parquet/src/encodings/rle.rs
+++ b/parquet/src/encodings/rle.rs
@@ -84,6 +84,7 @@ impl RleEncoder {
 
     /// Initialize the encoder from existing `buffer`
     pub fn new_from_buf(bit_width: u8, buffer: Vec<u8>) -> Self {
+        assert!(bit_width <= 64);
         let bit_writer = BitWriter::new_from_buf(buffer);
         RleEncoder {
             bit_width,
@@ -135,7 +136,7 @@ impl RleEncoder {
         } else {
             if self.repeat_count >= 8 {
                 // The current RLE run has ended and we've gathered enough. Flush first.
-                assert_eq!(self.bit_packed_count, 0);
+                debug_assert_eq!(self.bit_packed_count, 0);
                 self.flush_rle_run();
             }
             self.repeat_count = 1;
@@ -146,7 +147,7 @@ impl RleEncoder {
         self.num_buffered_values += 1;
         if self.num_buffered_values == 8 {
             // Buffered values are full. Flush them.
-            assert_eq!(self.bit_packed_count % 8, 0);
+            debug_assert_eq!(self.bit_packed_count % 8, 0);
             self.flush_buffered_values();
         }
     }
@@ -220,7 +221,7 @@ impl RleEncoder {
     }
 
     fn flush_rle_run(&mut self) {
-        assert!(self.repeat_count > 0);
+        debug_assert!(self.repeat_count > 0);
         let indicator_value = self.repeat_count << 1;
         self.bit_writer.put_vlq_int(indicator_value as u64);
         self.bit_writer.put_aligned(
@@ -237,9 +238,8 @@ impl RleEncoder {
         }
 
         // Write all buffered values as bit-packed literals
-        for i in 0..self.num_buffered_values {
-            self.bit_writer
-                .put_value(self.buffered_values[i], self.bit_width as usize);
+        for v in &self.buffered_values[..self.num_buffered_values] {
+            self.bit_writer.put_value(*v, self.bit_width as usize);
         }
         self.num_buffered_values = 0;
         if update_indicator_byte {
@@ -253,14 +253,13 @@ impl RleEncoder {
         }
     }
 
-    #[inline(never)]
     fn flush_buffered_values(&mut self) {
         if self.repeat_count >= 8 {
             self.num_buffered_values = 0;
             if self.bit_packed_count > 0 {
                 // In this case we choose RLE encoding. Flush the current buffered values
                 // as bit-packed encoding.
-                assert_eq!(self.bit_packed_count % 8, 0);
+                debug_assert_eq!(self.bit_packed_count % 8, 0);
                 self.flush_bit_packed_run(true)
             }
             return;
@@ -271,7 +270,7 @@ impl RleEncoder {
         if num_groups + 1 >= MAX_GROUPS_PER_BIT_PACKED_RUN {
             // We've reached the maximum value that can be hold in a single bit-packed
             // run.
-            assert!(self.indicator_byte_pos >= 0);
+            debug_assert!(self.indicator_byte_pos >= 0);
             self.flush_bit_packed_run(true);
         } else {
             self.flush_bit_packed_run(false);
@@ -322,14 +321,18 @@ impl RleDecoder {
     }
 
     #[inline]
-    pub fn set_data(&mut self, data: Bytes) {
+    pub fn set_data(&mut self, data: Bytes) -> Result<()> {
         if let Some(ref mut bit_reader) = self.bit_reader {
             bit_reader.reset(data);
         } else {
             self.bit_reader = Some(BitReader::new(data));
         }
 
-        let _ = self.reload();
+        // Initialize decoder state. The boolean only reports whether the first run contained data,
+        // and `get`/`get_batch` already interpret that result to drive iteration. We only need
+        // errors propagated here, so the flag returned is intentionally ignored.
+        let _ = self.reload()?;
+        Ok(())
     }
 
     // These functions inline badly, they tend to inline and then create very large loop unrolls
@@ -340,7 +343,7 @@ impl RleDecoder {
         assert!(size_of::<T>() <= 8);
 
         while self.rle_left == 0 && self.bit_packed_left == 0 {
-            if !self.reload() {
+            if !self.reload()? {
                 return Ok(None);
             }
         }
@@ -350,14 +353,17 @@ impl RleDecoder {
                 &self
                     .current_value
                     .as_mut()
-                    .expect("current_value should be Some")
+                    .ok_or_else(|| general_err!("current_value should be Some"))?
                     .to_ne_bytes(),
             )?;
             self.rle_left -= 1;
             rle_value
         } else {
             // self.bit_packed_left > 0
-            let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be Some");
+            let bit_reader = self
+                .bit_reader
+                .as_mut()
+                .ok_or_else(|| general_err!("bit_reader should be Some"))?;
             let bit_packed_value = bit_reader
                 .get_value(self.bit_width as usize)
                 .ok_or_else(|| eof_err!("Not enough data for 'bit_packed_value'"))?;
@@ -384,7 +390,10 @@ impl RleDecoder {
             } else if self.bit_packed_left > 0 {
                 let mut num_values =
                     cmp::min(buffer.len() - values_read, self.bit_packed_left as usize);
-                let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
+                let bit_reader = self
+                    .bit_reader
+                    .as_mut()
+                    .ok_or_else(|| ParquetError::General("bit_reader should be set".into()))?;
 
                 num_values = bit_reader.get_batch::<T>(
                     &mut buffer[values_read..values_read + num_values],
@@ -397,7 +406,7 @@ impl RleDecoder {
                 }
                 self.bit_packed_left -= num_values as u32;
                 values_read += num_values;
-            } else if !self.reload() {
+            } else if !self.reload()? {
                 break;
             }
         }
@@ -416,7 +425,10 @@ impl RleDecoder {
             } else if self.bit_packed_left > 0 {
                 let mut num_values =
                     cmp::min(num_values - values_skipped, self.bit_packed_left as usize);
-                let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
+                let bit_reader = self
+                    .bit_reader
+                    .as_mut()
+                    .ok_or_else(|| general_err!("bit_reader should be set"))?;
 
                 num_values = bit_reader.skip(num_values, self.bit_width as usize);
                 if num_values == 0 {
@@ -426,7 +438,7 @@ impl RleDecoder {
                 }
                 self.bit_packed_left -= num_values as u32;
                 values_skipped += num_values;
-            } else if !self.reload() {
+            } else if !self.reload()? {
                 break;
             }
         }
@@ -460,7 +472,10 @@ impl RleDecoder {
                 self.rle_left -= num_values as u32;
                 values_read += num_values;
             } else if self.bit_packed_left > 0 {
-                let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
+                let bit_reader = self
+                    .bit_reader
+                    .as_mut()
+                    .ok_or_else(|| general_err!("bit_reader should be set"))?;
 
                 loop {
                     let to_read = index_buf
@@ -489,7 +504,7 @@ impl RleDecoder {
                         break;
                     }
                 }
-            } else if !self.reload() {
+            } else if !self.reload()? {
                 break;
             }
         }
@@ -498,15 +513,18 @@ impl RleDecoder {
     }
 
     #[inline]
-    fn reload(&mut self) -> bool {
-        let bit_reader = self.bit_reader.as_mut().expect("bit_reader should be set");
+    fn reload(&mut self) -> Result<bool> {
+        let bit_reader = self
+            .bit_reader
+            .as_mut()
+            .ok_or_else(|| general_err!("bit_reader should be set"))?;
 
         if let Some(indicator_value) = bit_reader.get_vlq_int() {
             // fastparquet adds padding to the end of pages. This is not spec-compliant
             // but is handled by the C++ implementation
             // <https://github.com/apache/arrow/blob/8074496cb41bc8ec8fe9fc814ca5576d89a6eb94/cpp/src/arrow/util/rle_encoding.h#L653>
             if indicator_value == 0 {
-                return false;
+                return Ok(false);
             }
             if indicator_value & 1 == 1 {
                 self.bit_packed_left = ((indicator_value >> 1) * 8) as u32;
@@ -514,11 +532,13 @@ impl RleDecoder {
                 self.rle_left = (indicator_value >> 1) as u32;
                 let value_width = bit_util::ceil(self.bit_width as usize, 8);
                 self.current_value = bit_reader.get_aligned::<u64>(value_width);
-                assert!(self.current_value.is_some());
+                self.current_value.ok_or_else(|| {
+                    general_err!("parquet_data_error: not enough data for RLE decoding")
+                })?;
             }
-            true
+            Ok(true)
         } else {
-            false
+            Ok(false)
         }
     }
 }
@@ -528,7 +548,7 @@ mod tests {
     use super::*;
 
     use crate::util::bit_util::ceil;
-    use rand::{self, distr::StandardUniform, rng, Rng, SeedableRng};
+    use rand::{self, Rng, SeedableRng, distr::StandardUniform, rng};
 
     const MAX_WIDTH: usize = 32;
 
@@ -538,7 +558,7 @@ mod tests {
         // 00000011 10001000 11000110 11111010
         let data = vec![0x03, 0x88, 0xC6, 0xFA];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let mut buffer = vec![0; 8];
         let expected = vec![0, 1, 2, 3, 4, 5, 6, 7];
         let result = decoder.get_batch::<i32>(&mut buffer);
@@ -552,7 +572,7 @@ mod tests {
         // 00000011 10001000 11000110 11111010
         let data = vec![0x03, 0x88, 0xC6, 0xFA];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let expected = vec![2, 3, 4, 5, 6, 7];
         let skipped = decoder.skip(2).expect("skipping values");
         assert_eq!(skipped, 2);
@@ -593,7 +613,7 @@ mod tests {
         ];
 
         let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1.into());
+        decoder.set_data(data1.into()).unwrap();
         let mut buffer = vec![false; 100];
         let mut expected = vec![];
         for i in 0..100 {
@@ -607,7 +627,7 @@ mod tests {
         assert!(result.is_ok());
         assert_eq!(buffer, expected);
 
-        decoder.set_data(data2.into());
+        decoder.set_data(data2.into()).unwrap();
         let mut buffer = vec![false; 100];
         let mut expected = vec![];
         for i in 0..100 {
@@ -636,7 +656,7 @@ mod tests {
         ];
 
         let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1.into());
+        decoder.set_data(data1.into()).unwrap();
         let mut buffer = vec![true; 50];
         let expected = vec![false; 50];
 
@@ -648,7 +668,7 @@ mod tests {
         assert_eq!(remainder, 50);
         assert_eq!(buffer, expected);
 
-        decoder.set_data(data2.into());
+        decoder.set_data(data2.into()).unwrap();
         let mut buffer = vec![false; 50];
         let mut expected = vec![];
         for i in 0..50 {
@@ -674,7 +694,7 @@ mod tests {
         let dict = vec![10, 20, 30];
         let data = vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let mut buffer = vec![0; 12];
         let expected = vec![10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
         let result = decoder.get_batch_with_dict::<i32>(&dict, &mut buffer, 12);
@@ -687,7 +707,7 @@ mod tests {
         let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
         let data = vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let mut buffer = vec![""; 12];
         let expected = vec![
             "ddd", "eee", "fff", "ddd", "eee", "fff", "ddd", "eee", "fff", "eee", "fff", "fff",
@@ -705,7 +725,7 @@ mod tests {
         let dict = vec![10, 20, 30];
         let data = vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let mut buffer = vec![0; 10];
         let expected = vec![10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
         let skipped = decoder.skip(2).expect("skipping two values");
@@ -722,7 +742,7 @@ mod tests {
         let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
         let data = vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data.into());
+        decoder.set_data(data.into()).unwrap();
         let mut buffer = vec![""; 8];
         let expected = vec!["eee", "fff", "ddd", "eee", "fff", "eee", "fff", "fff"];
         let skipped = decoder.skip(4).expect("skipping four values");
@@ -755,7 +775,7 @@ mod tests {
 
         // Verify read
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.clone());
+        decoder.set_data(buffer.clone()).unwrap();
         for v in values {
             let val: i64 = decoder
                 .get()
@@ -765,7 +785,7 @@ mod tests {
         }
 
         // Verify batch read
-        decoder.set_data(buffer);
+        decoder.set_data(buffer).unwrap();
         let mut values_read: Vec<i64> = vec![0; values.len()];
         decoder
             .get_batch(&mut values_read[..])
@@ -866,11 +886,11 @@ mod tests {
         let mut data: Vec<u8> = vec![
             (3 << 1) | 1, // bit-packed run of 3 * 8
         ];
-        data.extend(std::iter::repeat(0xFF).take(20));
+        data.extend(std::iter::repeat_n(0xFF, 20));
         let data: Bytes = data.into();
 
         let mut decoder = RleDecoder::new(8);
-        decoder.set_data(data.clone());
+        decoder.set_data(data.clone()).unwrap();
 
         let mut output = vec![0_u16; 100];
         let read = decoder.get_batch(&mut output).unwrap();
@@ -879,7 +899,7 @@ mod tests {
         assert!(output.iter().take(20).all(|x| *x == 255));
 
         // Reset decoder
-        decoder.set_data(data);
+        decoder.set_data(data).unwrap();
 
         let dict: Vec<u16> = (0..256).collect();
         let mut output = vec![0_u16; 100];
@@ -905,7 +925,7 @@ mod tests {
         buffer.push(0);
 
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.into());
+        decoder.set_data(buffer.into()).unwrap();
 
         // We don't always reliably know how many non-null values are contained in a page
         // and so the decoder must work correctly without a precise value count
@@ -945,14 +965,14 @@ mod tests {
         let buffer: Bytes = writer.consume().into();
 
         let mut decoder = RleDecoder::new(1);
-        decoder.set_data(buffer.clone());
+        decoder.set_data(buffer.clone()).unwrap();
 
         let mut decoded: Vec<i16> = vec![0; num_values];
         let r = decoder.get_batch(&mut decoded).unwrap();
         assert_eq!(r, num_values);
         assert_eq!(vec![1; num_values], decoded);
 
-        decoder.set_data(buffer);
+        decoder.set_data(buffer).unwrap();
         let r = decoder
             .get_batch_with_dict(&[0, 23], &mut decoded, num_values)
             .unwrap();
@@ -971,7 +991,7 @@ mod tests {
         }
         let buffer = encoder.consume();
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(Bytes::from(buffer));
+        decoder.set_data(Bytes::from(buffer)).unwrap();
         let mut actual_values: Vec<i16> = vec![0; values.len()];
         decoder
             .get_batch(&mut actual_values)
@@ -990,7 +1010,7 @@ mod tests {
 
         // Verify read
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.clone());
+        decoder.set_data(buffer.clone()).unwrap();
         for v in values {
             let val = decoder
                 .get::<i32>()
@@ -1001,7 +1021,7 @@ mod tests {
 
         // Verify batch read
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer);
+        decoder.set_data(buffer).unwrap();
         let mut values_read: Vec<i32> = vec![0; values.len()];
         decoder
             .get_batch(&mut values_read[..])
@@ -1026,11 +1046,11 @@ mod tests {
                 .collect();
             let mut seed = [0u8; 32];
             seed.copy_from_slice(&seed_vec[0..seed_len]);
-            let mut gen = rand::rngs::StdRng::from_seed(seed);
+            let mut r#gen = rand::rngs::StdRng::from_seed(seed);
 
             let mut parity = false;
             for _ in 0..ngroups {
-                let mut group_size = gen.random_range(1..20);
+                let mut group_size = r#gen.random_range(1..20);
                 if group_size > max_group_size {
                     group_size = 1;
                 }
diff --git a/parquet/src/encryption/ciphers.rs b/parquet/src/encryption/ciphers.rs
index 5764694675ff..a94c72dcd5ec 100644
--- a/parquet/src/encryption/ciphers.rs
+++ b/parquet/src/encryption/ciphers.rs
@@ -18,7 +18,8 @@
 use crate::errors::ParquetError;
 use crate::errors::ParquetError::General;
 use crate::errors::Result;
-use ring::aead::{Aad, LessSafeKey, NonceSequence, UnboundKey, AES_128_GCM};
+use crate::file::metadata::HeapSize;
+use ring::aead::{AES_128_GCM, Aad, LessSafeKey, NonceSequence, UnboundKey};
 use ring::rand::{SecureRandom, SystemRandom};
 use std::fmt::Debug;
 
@@ -27,7 +28,7 @@ pub(crate) const NONCE_LEN: usize = 12;
 pub(crate) const TAG_LEN: usize = 16;
 pub(crate) const SIZE_LEN: usize = 4;
 
-pub(crate) trait BlockDecryptor: Debug + Send + Sync {
+pub(crate) trait BlockDecryptor: Debug + Send + Sync + HeapSize {
     fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result<Vec<u8>>;
 
     fn compute_plaintext_tag(&self, aad: &[u8], plaintext: &[u8]) -> Result<Vec<u8>>;
@@ -50,6 +51,13 @@ impl RingGcmBlockDecryptor {
     }
 }
 
+impl HeapSize for RingGcmBlockDecryptor {
+    fn heap_size(&self) -> usize {
+        // Ring's LessSafeKey doesn't allocate on the heap
+        0
+    }
+}
+
 impl BlockDecryptor for RingGcmBlockDecryptor {
     fn decrypt(&self, length_and_ciphertext: &[u8], aad: &[u8]) -> Result<Vec<u8>> {
         let mut result = Vec::with_capacity(length_and_ciphertext.len() - SIZE_LEN - NONCE_LEN);
@@ -155,7 +163,7 @@ impl BlockEncryptor for RingGcmBlockEncryptor {
         // Format is: [ciphertext size, nonce, ciphertext, authentication tag]
         let ciphertext_length: u32 = (NONCE_LEN + plaintext.len() + TAG_LEN)
             .try_into()
-            .map_err(|err| General(format!("Plaintext data too long. {:?}", err)))?;
+            .map_err(|err| General(format!("Plaintext data too long. {err:?}")))?;
         // Not checking for overflow here because we've already checked for it with ciphertext_length
         let mut ciphertext = Vec::with_capacity(SIZE_LEN + ciphertext_length as usize);
         ciphertext.extend((ciphertext_length).to_le_bytes());
diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs
index 43b2bb493a1d..0066523419de 100644
--- a/parquet/src/encryption/decrypt.rs
+++ b/parquet/src/encryption/decrypt.rs
@@ -18,9 +18,10 @@
 //! Configuration and utilities for decryption of files using Parquet Modular Encryption
 
 use crate::encryption::ciphers::{BlockDecryptor, RingGcmBlockDecryptor, TAG_LEN};
-use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType};
+use crate::encryption::modules::{ModuleType, create_footer_aad, create_module_aad};
 use crate::errors::{ParquetError, Result};
 use crate::file::column_crypto_metadata::ColumnCryptoMetaData;
+use crate::file::metadata::HeapSize;
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::Formatter;
@@ -142,13 +143,13 @@ impl CryptoContext {
         column_ordinal: usize,
     ) -> Result<Self> {
         let (data_decryptor, metadata_decryptor) = match column_crypto_metadata {
-            ColumnCryptoMetaData::EncryptionWithFooterKey => {
+            ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY => {
                 // TODO: In GCM-CTR mode will this need to be a non-GCM decryptor?
                 let data_decryptor = file_decryptor.get_footer_decryptor()?;
                 let metadata_decryptor = file_decryptor.get_footer_decryptor()?;
                 (data_decryptor, metadata_decryptor)
             }
-            ColumnCryptoMetaData::EncryptionWithColumnKey(column_key_encryption) => {
+            ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(column_key_encryption) => {
                 let key_metadata = &column_key_encryption.key_metadata;
                 let full_column_name;
                 let column_name = if column_key_encryption.path_in_schema.len() == 1 {
@@ -271,6 +272,12 @@ struct ExplicitDecryptionKeys {
     column_keys: HashMap<String, Vec<u8>>,
 }
 
+impl HeapSize for ExplicitDecryptionKeys {
+    fn heap_size(&self) -> usize {
+        self.footer_key.heap_size() + self.column_keys.heap_size()
+    }
+}
+
 #[derive(Clone)]
 enum DecryptionKeys {
     Explicit(ExplicitDecryptionKeys),
@@ -290,6 +297,19 @@ impl PartialEq for DecryptionKeys {
     }
 }
 
+impl HeapSize for DecryptionKeys {
+    fn heap_size(&self) -> usize {
+        match self {
+            Self::Explicit(keys) => keys.heap_size(),
+            Self::ViaRetriever(_) => {
+                // The retriever is a user-defined type we don't control,
+                // so we can't determine the heap size.
+                0
+            }
+        }
+    }
+}
+
 /// `FileDecryptionProperties` hold keys and AAD data required to decrypt a Parquet file.
 ///
 /// When reading Arrow data, the `FileDecryptionProperties` should be included in the
@@ -334,6 +354,11 @@ pub struct FileDecryptionProperties {
     footer_signature_verification: bool,
 }
 
+impl HeapSize for FileDecryptionProperties {
+    fn heap_size(&self) -> usize {
+        self.keys.heap_size() + self.aad_prefix.heap_size()
+    }
+}
 impl FileDecryptionProperties {
     /// Returns a new [`FileDecryptionProperties`] builder that will use the provided key to
     /// decrypt footer metadata.
@@ -361,7 +386,7 @@ impl FileDecryptionProperties {
 
     /// Get the encryption key for decrypting a file's footer,
     /// and also column data if uniform encryption is used.
-    pub fn footer_key(&self, key_metadata: Option<&[u8]>) -> Result<Cow<Vec<u8>>> {
+    pub fn footer_key(&self, key_metadata: Option<&[u8]>) -> Result<Cow<'_, Vec<u8>>> {
         match &self.keys {
             DecryptionKeys::Explicit(keys) => Ok(Cow::Borrowed(&keys.footer_key)),
             DecryptionKeys::ViaRetriever(retriever) => {
@@ -376,7 +401,7 @@ impl FileDecryptionProperties {
         &self,
         column_name: &str,
         key_metadata: Option<&[u8]>,
-    ) -> Result<Cow<Vec<u8>>> {
+    ) -> Result<Cow<'_, Vec<u8>>> {
         match &self.keys {
             DecryptionKeys::Explicit(keys) => match keys.column_keys.get(column_name) {
                 None => Err(general_err!(
@@ -438,16 +463,16 @@ impl DecryptionPropertiesBuilder {
     }
 
     /// Finalize the builder and return created [`FileDecryptionProperties`]
-    pub fn build(self) -> Result<FileDecryptionProperties> {
+    pub fn build(self) -> Result<Arc<FileDecryptionProperties>> {
         let keys = DecryptionKeys::Explicit(ExplicitDecryptionKeys {
             footer_key: self.footer_key,
             column_keys: self.column_keys,
         });
-        Ok(FileDecryptionProperties {
+        Ok(Arc::new(FileDecryptionProperties {
             keys,
             aad_prefix: self.aad_prefix,
             footer_signature_verification: self.footer_signature_verification,
-        })
+        }))
     }
 
     /// Specify the expected AAD prefix to be used for decryption.
@@ -509,13 +534,13 @@ impl DecryptionPropertiesBuilderWithRetriever {
     }
 
     /// Finalize the builder and return created [`FileDecryptionProperties`]
-    pub fn build(self) -> Result<FileDecryptionProperties> {
+    pub fn build(self) -> Result<Arc<FileDecryptionProperties>> {
         let keys = DecryptionKeys::ViaRetriever(self.key_retriever);
-        Ok(FileDecryptionProperties {
+        Ok(Arc::new(FileDecryptionProperties {
             keys,
             aad_prefix: self.aad_prefix,
             footer_signature_verification: self.footer_signature_verification,
-        })
+        }))
     }
 
     /// Specify the expected AAD prefix to be used for decryption.
@@ -536,7 +561,7 @@ impl DecryptionPropertiesBuilderWithRetriever {
 
 #[derive(Clone, Debug)]
 pub(crate) struct FileDecryptor {
-    decryption_properties: FileDecryptionProperties,
+    decryption_properties: Arc<FileDecryptionProperties>,
     footer_decryptor: Arc<dyn BlockDecryptor>,
     file_aad: Vec<u8>,
 }
@@ -547,9 +572,24 @@ impl PartialEq for FileDecryptor {
     }
 }
 
+/// Estimate the size in bytes required for the file decryptor.
+/// This is important to track the memory usage of cached Parquet meta data,
+/// and is used via [`crate::file::metadata::ParquetMetaData::memory_size`].
+/// Note that when a [`KeyRetriever`] is used, its heap size won't be included
+/// and the result will be an underestimate.
+/// If the [`FileDecryptionProperties`] are shared between multiple files then the
+/// heap size may also be an overestimate.
+impl HeapSize for FileDecryptor {
+    fn heap_size(&self) -> usize {
+        self.decryption_properties.heap_size()
+            + (Arc::clone(&self.footer_decryptor) as Arc<dyn HeapSize>).heap_size()
+            + self.file_aad.heap_size()
+    }
+}
+
 impl FileDecryptor {
     pub(crate) fn new(
-        decryption_properties: &FileDecryptionProperties,
+        decryption_properties: &Arc<FileDecryptionProperties>,
         footer_key_metadata: Option<&[u8]>,
         aad_file_unique: Vec<u8>,
         aad_prefix: Vec<u8>,
@@ -565,7 +605,7 @@ impl FileDecryptor {
 
         Ok(Self {
             footer_decryptor: Arc::new(footer_decryptor),
-            decryption_properties: decryption_properties.clone(),
+            decryption_properties: Arc::clone(decryption_properties),
             file_aad,
         })
     }
diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs
index c8d3ffc0eef4..d69e3c02500a 100644
--- a/parquet/src/encryption/encrypt.rs
+++ b/parquet/src/encryption/encrypt.rs
@@ -18,16 +18,16 @@
 //! Configuration and utilities for Parquet Modular Encryption
 
 use crate::encryption::ciphers::{
-    BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN,
+    BlockEncryptor, NONCE_LEN, RingGcmBlockEncryptor, SIZE_LEN, TAG_LEN,
 };
 use crate::errors::{ParquetError, Result};
 use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey};
+use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift};
 use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
-use crate::thrift::TSerializable;
 use ring::rand::{SecureRandom, SystemRandom};
 use std::collections::{HashMap, HashSet};
 use std::io::Write;
-use thrift::protocol::TCompactOutputProtocol;
+use std::sync::Arc;
 
 #[derive(Debug, Clone, PartialEq)]
 struct EncryptionKey {
@@ -275,27 +275,27 @@ impl EncryptionPropertiesBuilder {
     }
 
     /// Build the encryption properties
-    pub fn build(self) -> Result<FileEncryptionProperties> {
-        Ok(FileEncryptionProperties {
+    pub fn build(self) -> Result<Arc<FileEncryptionProperties>> {
+        Ok(Arc::new(FileEncryptionProperties {
             encrypt_footer: self.encrypt_footer,
             footer_key: self.footer_key,
             column_keys: self.column_keys,
             aad_prefix: self.aad_prefix,
             store_aad_prefix: self.store_aad_prefix,
-        })
+        }))
     }
 }
 
 #[derive(Debug)]
 /// The encryption configuration for a single Parquet file
 pub(crate) struct FileEncryptor {
-    properties: FileEncryptionProperties,
+    properties: Arc<FileEncryptionProperties>,
     aad_file_unique: Vec<u8>,
     file_aad: Vec<u8>,
 }
 
 impl FileEncryptor {
-    pub(crate) fn new(properties: FileEncryptionProperties) -> Result<Self> {
+    pub(crate) fn new(properties: Arc<FileEncryptionProperties>) -> Result<Self> {
         // Generate unique AAD for file
         let rng = SystemRandom::new();
         let mut aad_file_unique = vec![0u8; 8];
@@ -314,7 +314,7 @@ impl FileEncryptor {
     }
 
     /// Get the encryptor's file encryption properties
-    pub fn properties(&self) -> &FileEncryptionProperties {
+    pub fn properties(&self) -> &Arc<FileEncryptionProperties> {
         &self.properties
     }
 
@@ -365,18 +365,18 @@ impl FileEncryptor {
 }
 
 /// Write an encrypted Thrift serializable object
-pub(crate) fn encrypt_object<T: TSerializable, W: Write>(
+pub(crate) fn encrypt_thrift_object<T: WriteThrift, W: Write>(
     object: &T,
     encryptor: &mut Box<dyn BlockEncryptor>,
     sink: &mut W,
     module_aad: &[u8],
 ) -> Result<()> {
-    let encrypted_buffer = encrypt_object_to_vec(object, encryptor, module_aad)?;
+    let encrypted_buffer = encrypt_thrift_object_to_vec(object, encryptor, module_aad)?;
     sink.write_all(&encrypted_buffer)?;
     Ok(())
 }
 
-pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
+pub(crate) fn write_signed_plaintext_thrift_object<T: WriteThrift, W: Write>(
     object: &T,
     encryptor: &mut Box<dyn BlockEncryptor>,
     sink: &mut W,
@@ -384,8 +384,8 @@ pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
 ) -> Result<()> {
     let mut buffer: Vec<u8> = vec![];
     {
-        let mut protocol = TCompactOutputProtocol::new(&mut buffer);
-        object.write_to_out_protocol(&mut protocol)?;
+        let mut protocol = ThriftCompactOutputProtocol::new(&mut buffer);
+        object.write_thrift(&mut protocol)?;
     }
     sink.write_all(&buffer)?;
     buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?;
@@ -400,15 +400,15 @@ pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
 }
 
 /// Encrypt a Thrift serializable object to a byte vector
-pub(crate) fn encrypt_object_to_vec<T: TSerializable>(
+pub(crate) fn encrypt_thrift_object_to_vec<T: WriteThrift>(
     object: &T,
     encryptor: &mut Box<dyn BlockEncryptor>,
     module_aad: &[u8],
 ) -> Result<Vec<u8>> {
     let mut buffer: Vec<u8> = vec![];
     {
-        let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut buffer);
-        object.write_to_out_protocol(&mut unencrypted_protocol)?;
+        let mut unencrypted_protocol = ThriftCompactOutputProtocol::new(&mut buffer);
+        object.write_thrift(&mut unencrypted_protocol)?;
     }
 
     encryptor.encrypt(buffer.as_ref(), module_aad)
@@ -416,19 +416,19 @@ pub(crate) fn encrypt_object_to_vec<T: TSerializable>(
 
 /// Get the crypto metadata for a column from the file encryption properties
 pub(crate) fn get_column_crypto_metadata(
-    properties: &FileEncryptionProperties,
+    properties: &Arc<FileEncryptionProperties>,
     column: &ColumnDescPtr,
 ) -> Option<ColumnCryptoMetaData> {
     if properties.column_keys.is_empty() {
         // Uniform encryption
-        Some(ColumnCryptoMetaData::EncryptionWithFooterKey)
+        Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY)
     } else {
         properties
             .column_keys
             .get(&column.path().string())
             .map(|encryption_key| {
                 // Column is encrypted with a column specific key
-                ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey {
+                ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey {
                     path_in_schema: column.path().parts().to_vec(),
                     key_metadata: encryption_key.key_metadata.clone(),
                 })
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index 93b2c1b7e028..0533d7662c5f 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -19,6 +19,7 @@
 
 use core::num::TryFromIntError;
 use std::error::Error;
+use std::string::FromUtf8Error;
 use std::{cell, io, result, str};
 
 #[cfg(feature = "arrow")]
@@ -52,6 +53,9 @@ pub enum ParquetError {
     /// Returned when a function needs more data to complete properly. The `usize` field indicates
     /// the total number of bytes required, not the number of additional bytes.
     NeedMoreData(usize),
+    /// Returned when a function needs more data to complete properly.
+    /// The `Range<u64>` indicates the range of bytes that are needed.
+    NeedMoreDataRange(std::ops::Range<u64>),
 }
 
 impl std::fmt::Display for ParquetError {
@@ -64,11 +68,14 @@ impl std::fmt::Display for ParquetError {
             ParquetError::EOF(message) => write!(fmt, "EOF: {message}"),
             #[cfg(feature = "arrow")]
             ParquetError::ArrowError(message) => write!(fmt, "Arrow: {message}"),
-            ParquetError::IndexOutOfBound(index, ref bound) => {
+            ParquetError::IndexOutOfBound(index, bound) => {
                 write!(fmt, "Index {index} out of bound: {bound}")
             }
             ParquetError::External(e) => write!(fmt, "External: {e}"),
             ParquetError::NeedMoreData(needed) => write!(fmt, "NeedMoreData: {needed}"),
+            ParquetError::NeedMoreDataRange(range) => {
+                write!(fmt, "NeedMoreDataRange: {}..{}", range.start, range.end)
+            }
         }
     }
 }
@@ -118,6 +125,13 @@ impl From<str::Utf8Error> for ParquetError {
         ParquetError::External(Box::new(e))
     }
 }
+
+impl From<FromUtf8Error> for ParquetError {
+    fn from(e: FromUtf8Error) -> ParquetError {
+        ParquetError::External(Box::new(e))
+    }
+}
+
 #[cfg(feature = "arrow")]
 impl From<ArrowError> for ParquetError {
     fn from(e: ArrowError) -> ParquetError {
diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs
index af670e675fcd..53d41e58ab6c 100644
--- a/parquet/src/file/column_crypto_metadata.rs
+++ b/parquet/src/file/column_crypto_metadata.rs
@@ -17,60 +17,49 @@
 
 //! Column chunk encryption metadata
 
-use crate::errors::Result;
-use crate::format::{
-    ColumnCryptoMetaData as TColumnCryptoMetaData,
-    EncryptionWithColumnKey as TEncryptionWithColumnKey,
-    EncryptionWithFooterKey as TEncryptionWithFooterKey,
+use std::io::Write;
+
+use crate::errors::{ParquetError, Result};
+use crate::file::metadata::HeapSize;
+use crate::parquet_thrift::{
+    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
+    WriteThrift, WriteThriftField, read_thrift_vec,
 };
+use crate::{thrift_struct, thrift_union};
 
-/// ColumnCryptoMetadata for a column chunk
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum ColumnCryptoMetaData {
-    /// The column is encrypted with the footer key
-    EncryptionWithFooterKey,
-    /// The column is encrypted with a column-specific key
-    EncryptionWithColumnKey(EncryptionWithColumnKey),
-}
+// define this and ColumnCryptoMetadata here so they're only defined when
+// the encryption feature is enabled
 
+thrift_struct!(
 /// Encryption metadata for a column chunk encrypted with a column-specific key
-#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct EncryptionWithColumnKey {
-    /// Path to the column in the Parquet schema
-    pub path_in_schema: Vec<String>,
-    /// Metadata required to retrieve the column encryption key
-    pub key_metadata: Option<Vec<u8>>,
+  /// Path to the column in the Parquet schema
+  1: required list<string> path_in_schema
+
+  /// Path to the column in the Parquet schema
+  2: optional binary key_metadata
 }
+);
 
-/// Converts Thrift definition into `ColumnCryptoMetadata`.
-pub fn try_from_thrift(
-    thrift_column_crypto_metadata: &TColumnCryptoMetaData,
-) -> Result<ColumnCryptoMetaData> {
-    let crypto_metadata = match thrift_column_crypto_metadata {
-        TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_) => {
-            ColumnCryptoMetaData::EncryptionWithFooterKey
-        }
-        TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(encryption_with_column_key) => {
-            ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey {
-                path_in_schema: encryption_with_column_key.path_in_schema.clone(),
-                key_metadata: encryption_with_column_key.key_metadata.clone(),
-            })
-        }
-    };
-    Ok(crypto_metadata)
+impl HeapSize for EncryptionWithColumnKey {
+    fn heap_size(&self) -> usize {
+        self.path_in_schema.heap_size() + self.key_metadata.heap_size()
+    }
 }
 
-/// Converts `ColumnCryptoMetadata` into Thrift definition.
-pub fn to_thrift(column_crypto_metadata: &ColumnCryptoMetaData) -> TColumnCryptoMetaData {
-    match column_crypto_metadata {
-        ColumnCryptoMetaData::EncryptionWithFooterKey => {
-            TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(TEncryptionWithFooterKey {})
-        }
-        ColumnCryptoMetaData::EncryptionWithColumnKey(encryption_with_column_key) => {
-            TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(TEncryptionWithColumnKey {
-                path_in_schema: encryption_with_column_key.path_in_schema.clone(),
-                key_metadata: encryption_with_column_key.key_metadata.clone(),
-            })
+thrift_union!(
+/// ColumnCryptoMetadata for a column chunk
+union ColumnCryptoMetaData {
+  1: ENCRYPTION_WITH_FOOTER_KEY
+  2: (EncryptionWithColumnKey) ENCRYPTION_WITH_COLUMN_KEY
+}
+);
+
+impl HeapSize for ColumnCryptoMetaData {
+    fn heap_size(&self) -> usize {
+        match self {
+            Self::ENCRYPTION_WITH_FOOTER_KEY => 0,
+            Self::ENCRYPTION_WITH_COLUMN_KEY(path) => path.heap_size(),
         }
     }
 }
@@ -78,21 +67,25 @@ pub fn to_thrift(column_crypto_metadata: &ColumnCryptoMetaData) -> TColumnCrypto
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::parquet_thrift::tests::test_roundtrip;
 
     #[test]
-    fn test_encryption_with_footer_key_from_thrift() {
-        let metadata = ColumnCryptoMetaData::EncryptionWithFooterKey;
-
-        assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata);
-    }
-
-    #[test]
-    fn test_encryption_with_column_key_from_thrift() {
-        let metadata = ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey {
-            path_in_schema: vec!["abc".to_owned(), "def".to_owned()],
-            key_metadata: Some(vec![0, 1, 2, 3, 4, 5]),
-        });
+    fn test_column_crypto_roundtrip() {
+        test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY);
 
-        assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata);
+        let path_in_schema = vec!["foo".to_owned(), "bar".to_owned(), "really".to_owned()];
+        let key_metadata = vec![1u8; 32];
+        test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(
+            EncryptionWithColumnKey {
+                path_in_schema: path_in_schema.clone(),
+                key_metadata: None,
+            },
+        ));
+        test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(
+            EncryptionWithColumnKey {
+                path_in_schema,
+                key_metadata: Some(key_metadata),
+            },
+        ));
     }
 }
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
deleted file mode 100644
index 85ef30cd0ecc..000000000000
--- a/parquet/src/file/footer.rs
+++ /dev/null
@@ -1,81 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Module for working with Parquet file footers.
-
-use crate::errors::Result;
-use crate::file::{metadata::*, reader::ChunkReader, FOOTER_SIZE};
-
-/// Reads the [ParquetMetaData] from the footer of the parquet file.
-///
-/// # Layout of Parquet file
-/// ```text
-/// +---------------------------+-----+---+
-/// |      Rest of file         |  B  | A |
-/// +---------------------------+-----+---+
-/// ```
-/// where
-/// * `A`: parquet footer which stores the length of the metadata.
-/// * `B`: parquet metadata.
-///
-/// # I/O
-///
-/// This method first reads the last 8 bytes of the file via
-/// [`ChunkReader::get_read`] to get the the parquet footer which contains the
-/// metadata length.
-///
-/// It then issues a second `get_read` to read the encoded metadata
-/// metadata.
-///
-/// # See Also
-/// [`decode_metadata`] for decoding the metadata from the bytes.
-/// [`decode_footer`] for decoding the metadata length from the footer.
-#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader")]
-pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaData> {
-    ParquetMetaDataReader::new().parse_and_finish(chunk_reader)
-}
-
-/// Decodes [`ParquetMetaData`] from the provided bytes.
-///
-/// Typically this is used to decode the metadata from the end of a parquet
-/// file. The format of `buf` is the Thrift compact binary protocol, as specified
-/// by the [Parquet Spec].
-///
-/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
-#[deprecated(since = "53.1.0", note = "Use ParquetMetaDataReader::decode_metadata")]
-pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
-    ParquetMetaDataReader::decode_metadata(buf)
-}
-
-/// Decodes the Parquet footer returning the metadata length in bytes
-///
-/// A parquet footer is 8 bytes long and has the following layout:
-/// * 4 bytes for the metadata length
-/// * 4 bytes for the magic bytes 'PAR1'
-///
-/// ```text
-/// +-----+--------+
-/// | len | 'PAR1' |
-/// +-----+--------+
-/// ```
-#[deprecated(
-    since = "53.1.0",
-    note = "Use ParquetMetaDataReader::decode_footer_tail"
-)]
-pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
-    ParquetMetaDataReader::decode_footer_tail(slice).map(|f| f.metadata_length())
-}
diff --git a/parquet/src/file/metadata/footer_tail.rs b/parquet/src/file/metadata/footer_tail.rs
new file mode 100644
index 000000000000..c33bc7a25c5a
--- /dev/null
+++ b/parquet/src/file/metadata/footer_tail.rs
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::errors::{ParquetError, Result};
+use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER};
+
+/// Parsed Parquet footer tail (last 8 bytes of a Parquet file)
+///
+/// There are 8 bytes at the end of the Parquet footer with the following layout:
+/// * 4 bytes for the metadata length
+/// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
+///
+/// ```text
+/// +-----+------------------+
+/// | len | 'PAR1' or 'PARE' |
+/// +-----+------------------+
+/// ```
+///
+/// # Examples
+/// ```
+/// # use parquet::file::metadata::FooterTail;
+/// // a non encrypted footer with 28 bytes of metadata
+/// let last_8_bytes: [u8; 8] = [0x1C, 0x00, 0x00, 0x00, b'P', b'A', b'R', b'1'];
+/// let footer_tail = FooterTail::try_from(last_8_bytes).unwrap();
+/// assert_eq!(footer_tail.metadata_length(), 28);
+/// assert_eq!(footer_tail.is_encrypted_footer(), false);
+/// ```
+///
+/// ```
+/// # use parquet::file::metadata::FooterTail;
+/// // an encrypted footer with 512 bytes of metadata
+/// let last_8_bytes = vec![0x00, 0x02, 0x00, 0x00, b'P', b'A', b'R', b'E'];
+/// let footer_tail = FooterTail::try_from(&last_8_bytes[..]).unwrap();
+/// assert_eq!(footer_tail.metadata_length(), 512);
+/// assert_eq!(footer_tail.is_encrypted_footer(), true);
+/// ```
+///
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct FooterTail {
+    metadata_length: usize,
+    encrypted_footer: bool,
+}
+
+impl FooterTail {
+    /// Try to decode the footer tail from the given 8 bytes
+    pub fn try_new(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> {
+        let magic = &slice[4..];
+        let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
+            true
+        } else if magic == PARQUET_MAGIC {
+            false
+        } else {
+            return Err(general_err!("Invalid Parquet file. Corrupt footer"));
+        };
+        // get the metadata length from the footer
+        let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
+
+        Ok(FooterTail {
+            // u32 won't be larger than usize in most cases
+            metadata_length: metadata_len.try_into()?,
+            encrypted_footer,
+        })
+    }
+
+    /// The length of the footer metadata in bytes
+    pub fn metadata_length(&self) -> usize {
+        self.metadata_length
+    }
+
+    /// Whether the footer metadata is encrypted
+    pub fn is_encrypted_footer(&self) -> bool {
+        self.encrypted_footer
+    }
+}
+
+impl TryFrom<[u8; FOOTER_SIZE]> for FooterTail {
+    type Error = ParquetError;
+
+    fn try_from(value: [u8; FOOTER_SIZE]) -> Result<Self> {
+        Self::try_new(&value)
+    }
+}
+
+impl TryFrom<&[u8]> for FooterTail {
+    type Error = ParquetError;
+
+    fn try_from(value: &[u8]) -> Result<Self> {
+        if value.len() != FOOTER_SIZE {
+            return Err(general_err!(
+                "Invalid footer length {}, expected {FOOTER_SIZE}",
+                value.len()
+            ));
+        }
+        let slice: &[u8; FOOTER_SIZE] = value.try_into().unwrap();
+        Self::try_new(slice)
+    }
+}
diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs
index ad452267901a..30c10e7f2293 100644
--- a/parquet/src/file/metadata/memory.rs
+++ b/parquet/src/file/metadata/memory.rs
@@ -18,14 +18,18 @@
 //! Memory calculations for [`ParquetMetadata::memory_size`]
 //!
 //! [`ParquetMetadata::memory_size`]: crate::file::metadata::ParquetMetaData::memory_size
-use crate::basic::{ColumnOrder, Compression, Encoding, PageType};
+use crate::basic::{BoundaryOrder, ColumnOrder, Compression, Encoding, PageType};
 use crate::data_type::private::ParquetValueType;
-use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData};
-use crate::file::page_encoding_stats::PageEncodingStats;
-use crate::file::page_index::index::{Index, NativeIndex, PageIndex};
-use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::file::metadata::{
+    ColumnChunkMetaData, FileMetaData, KeyValue, PageEncodingStats, ParquetPageEncodingStats,
+    RowGroupMetaData, SortingColumn,
+};
+use crate::file::page_index::column_index::{
+    ByteArrayColumnIndex, ColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
+};
+use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation};
 use crate::file::statistics::{Statistics, ValueStatistics};
-use crate::format::{BoundaryOrder, PageLocation, SortingColumn};
+use std::collections::HashMap;
 use std::sync::Arc;
 
 /// Trait for calculating the size of various containers
@@ -48,9 +52,66 @@ impl<T: HeapSize> HeapSize for Vec<T> {
     }
 }
 
+impl<K: HeapSize, V: HeapSize> HeapSize for HashMap<K, V> {
+    fn heap_size(&self) -> usize {
+        let capacity = self.capacity();
+        if capacity == 0 {
+            return 0;
+        }
+
+        // HashMap doesn't provide a way to get its heap size, so this is an approximation based on
+        // the behavior of hashbrown::HashMap as at version 0.16.0, and may become inaccurate
+        // if the implementation changes.
+        let key_val_size = std::mem::size_of::<(K, V)>();
+        // Overhead for the control tags group, which may be smaller depending on architecture
+        let group_size = 16;
+        // 1 byte of metadata stored per bucket.
+        let metadata_size = 1;
+
+        // Compute the number of buckets for the capacity. Based on hashbrown's capacity_to_buckets
+        let buckets = if capacity < 15 {
+            let min_cap = match key_val_size {
+                0..=1 => 14,
+                2..=3 => 7,
+                _ => 3,
+            };
+            let cap = min_cap.max(capacity);
+            if cap < 4 {
+                4
+            } else if cap < 8 {
+                8
+            } else {
+                16
+            }
+        } else {
+            (capacity.saturating_mul(8) / 7).next_power_of_two()
+        };
+
+        group_size
+            + (buckets * (key_val_size + metadata_size))
+            + self.keys().map(|k| k.heap_size()).sum::<usize>()
+            + self.values().map(|v| v.heap_size()).sum::<usize>()
+    }
+}
+
 impl<T: HeapSize> HeapSize for Arc<T> {
     fn heap_size(&self) -> usize {
-        self.as_ref().heap_size()
+        // Arc stores weak and strong counts on the heap alongside an instance of T
+        2 * std::mem::size_of::<usize>() + std::mem::size_of::<T>() + self.as_ref().heap_size()
+    }
+}
+
+impl HeapSize for Arc<dyn HeapSize> {
+    fn heap_size(&self) -> usize {
+        2 * std::mem::size_of::<usize>()
+            + std::mem::size_of_val(self.as_ref())
+            + self.as_ref().heap_size()
+    }
+}
+
+impl<T: HeapSize> HeapSize for Box<T> {
+    fn heap_size(&self) -> usize {
+        std::mem::size_of::<T>() + self.as_ref().heap_size()
     }
 }
 
@@ -68,10 +129,17 @@ impl HeapSize for String {
 
 impl HeapSize for FileMetaData {
     fn heap_size(&self) -> usize {
+        #[cfg(feature = "encryption")]
+        let encryption_heap_size =
+            self.encryption_algorithm.heap_size() + self.footer_signing_key_metadata.heap_size();
+        #[cfg(not(feature = "encryption"))]
+        let encryption_heap_size = 0;
+
         self.created_by.heap_size()
             + self.key_value_metadata.heap_size()
             + self.schema_descr.heap_size()
             + self.column_orders.heap_size()
+            + encryption_heap_size
     }
 }
 
@@ -91,6 +159,12 @@ impl HeapSize for RowGroupMetaData {
 
 impl HeapSize for ColumnChunkMetaData {
     fn heap_size(&self) -> usize {
+        #[cfg(feature = "encryption")]
+        let encryption_heap_size =
+            self.column_crypto_metadata.heap_size() + self.encrypted_column_metadata.heap_size();
+        #[cfg(not(feature = "encryption"))]
+        let encryption_heap_size = 0;
+
         // don't count column_descr here because it is already counted in
         // FileMetaData
         self.encodings.heap_size()
@@ -101,6 +175,8 @@ impl HeapSize for ColumnChunkMetaData {
             + self.unencoded_byte_array_data_bytes.heap_size()
             + self.repetition_level_histogram.heap_size()
             + self.definition_level_histogram.heap_size()
+            + self.geo_statistics.heap_size()
+            + encryption_heap_size
     }
 }
 
@@ -110,6 +186,15 @@ impl HeapSize for Encoding {
     }
 }
 
+impl HeapSize for ParquetPageEncodingStats {
+    fn heap_size(&self) -> usize {
+        match self {
+            Self::Full(v) => v.heap_size(),
+            Self::Mask(_) => 0,
+        }
+    }
+}
+
 impl HeapSize for PageEncodingStats {
     fn heap_size(&self) -> usize {
         self.page_type.heap_size() + self.encoding.heap_size()
@@ -132,6 +217,7 @@ impl HeapSize for PageType {
         0 // no heap allocations
     }
 }
+
 impl HeapSize for Statistics {
     fn heap_size(&self) -> usize {
         match self {
@@ -153,31 +239,45 @@ impl HeapSize for OffsetIndexMetaData {
     }
 }
 
-impl HeapSize for Index {
+impl HeapSize for ColumnIndexMetaData {
     fn heap_size(&self) -> usize {
         match self {
-            Index::NONE => 0,
-            Index::BOOLEAN(native_index) => native_index.heap_size(),
-            Index::INT32(native_index) => native_index.heap_size(),
-            Index::INT64(native_index) => native_index.heap_size(),
-            Index::INT96(native_index) => native_index.heap_size(),
-            Index::FLOAT(native_index) => native_index.heap_size(),
-            Index::DOUBLE(native_index) => native_index.heap_size(),
-            Index::BYTE_ARRAY(native_index) => native_index.heap_size(),
-            Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(),
+            Self::NONE => 0,
+            Self::BOOLEAN(native_index) => native_index.heap_size(),
+            Self::INT32(native_index) => native_index.heap_size(),
+            Self::INT64(native_index) => native_index.heap_size(),
+            Self::INT96(native_index) => native_index.heap_size(),
+            Self::FLOAT(native_index) => native_index.heap_size(),
+            Self::DOUBLE(native_index) => native_index.heap_size(),
+            Self::BYTE_ARRAY(native_index) => native_index.heap_size(),
+            Self::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(),
         }
     }
 }
 
-impl<T: ParquetValueType> HeapSize for NativeIndex<T> {
+impl HeapSize for ColumnIndex {
     fn heap_size(&self) -> usize {
-        self.indexes.heap_size() + self.boundary_order.heap_size()
+        self.null_pages.heap_size()
+            + self.boundary_order.heap_size()
+            + self.null_counts.heap_size()
+            + self.definition_level_histograms.heap_size()
+            + self.repetition_level_histograms.heap_size()
     }
 }
 
-impl<T: ParquetValueType> HeapSize for PageIndex<T> {
+impl<T: ParquetValueType> HeapSize for PrimitiveColumnIndex<T> {
     fn heap_size(&self) -> usize {
-        self.min.heap_size() + self.max.heap_size() + self.null_count.heap_size()
+        self.column_index.heap_size() + self.min_values.heap_size() + self.max_values.heap_size()
+    }
+}
+
+impl HeapSize for ByteArrayColumnIndex {
+    fn heap_size(&self) -> usize {
+        self.column_index.heap_size()
+            + self.min_bytes.heap_size()
+            + self.min_offsets.heap_size()
+            + self.max_bytes.heap_size()
+            + self.max_offsets.heap_size()
     }
 }
 
@@ -192,6 +292,11 @@ impl HeapSize for bool {
         0 // no heap allocations
     }
 }
+impl HeapSize for u8 {
+    fn heap_size(&self) -> usize {
+        0 // no heap allocations
+    }
+}
 impl HeapSize for i32 {
     fn heap_size(&self) -> usize {
         0 // no heap allocations
diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs
index d5877aa4566a..ca3a9e10978b 100644
--- a/parquet/src/file/metadata/mod.rs
+++ b/parquet/src/file/metadata/mod.rs
@@ -17,9 +17,7 @@
 
 //! Parquet metadata API
 //!
-//! Most users should use these structures to interact with Parquet metadata.
-//! The [crate::format] module contains lower level structures generated from the
-//! Parquet thrift definition.
+//! Users should use these structures to interact with Parquet metadata.
 //!
 //! * [`ParquetMetaData`]: Top level metadata container, read from the Parquet
 //!   file footer.
@@ -40,12 +38,10 @@
 //! metadata into parquet files. To work with metadata directly,
 //! the following APIs are available:
 //!
-//! * [`ParquetMetaDataReader`] for reading
+//! * [`ParquetMetaDataReader`] for reading metadata from an I/O source (sync and async)
+//! * [`ParquetMetaDataPushDecoder`] for decoding from bytes without I/O
 //! * [`ParquetMetaDataWriter`] for writing.
 //!
-//! [`ParquetMetaDataReader`]: https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html
-//! [`ParquetMetaDataWriter`]: https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataWriter.html
-//!
 //! # Examples
 //!
 //! Please see [`external_metadata.rs`]
@@ -67,7 +63,6 @@
 //!    with a more idiomatic API. Note that, confusingly, some but not all
 //!    of these structures have the same name as the [`format`] structures.
 //!
-//! [`format`]: crate::format
 //! [`file::metadata`]: crate::file::metadata
 //! [parquet.thrift]:  https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift
 //!
@@ -91,36 +86,52 @@
 //!
 //!                         * Same name, different struct
 //! ```
+mod footer_tail;
 mod memory;
+mod options;
+mod parser;
+mod push_decoder;
 pub(crate) mod reader;
+pub(crate) mod thrift;
 mod writer;
 
-use crate::basic::{ColumnOrder, Compression, Encoding, Type};
+use crate::basic::{EncodingMask, PageType};
 #[cfg(feature = "encryption")]
-use crate::encryption::{
-    decrypt::FileDecryptor,
-    modules::{create_module_aad, ModuleType},
-};
-use crate::errors::{ParquetError, Result};
+use crate::encryption::decrypt::FileDecryptor;
 #[cfg(feature = "encryption")]
-use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData};
+use crate::file::column_crypto_metadata::ColumnCryptoMetaData;
 pub(crate) use crate::file::metadata::memory::HeapSize;
-use crate::file::page_encoding_stats::{self, PageEncodingStats};
-use crate::file::page_index::index::Index;
-use crate::file::page_index::offset_index::OffsetIndexMetaData;
-use crate::file::statistics::{self, Statistics};
-use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData;
-use crate::format::{
-    BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation, RowGroup,
-    SizeStatistics, SortingColumn,
-};
+#[cfg(feature = "encryption")]
+use crate::file::metadata::thrift::encryption::EncryptionAlgorithm;
+use crate::file::page_index::column_index::{ByteArrayColumnIndex, PrimitiveColumnIndex};
+use crate::file::page_index::{column_index::ColumnIndexMetaData, offset_index::PageLocation};
+use crate::file::statistics::Statistics;
+use crate::geospatial::statistics as geo_statistics;
 use crate::schema::types::{
     ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
     Type as SchemaType,
 };
-#[cfg(feature = "encryption")]
-use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
-pub use reader::{FooterTail, ParquetMetaDataReader};
+use crate::thrift_struct;
+use crate::{
+    basic::BoundaryOrder,
+    errors::{ParquetError, Result},
+};
+use crate::{
+    basic::{ColumnOrder, Compression, Encoding, Type},
+    parquet_thrift::{
+        ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol,
+        ThriftCompactOutputProtocol, WriteThrift, WriteThriftField,
+    },
+};
+use crate::{
+    data_type::private::ParquetValueType, file::page_index::offset_index::OffsetIndexMetaData,
+};
+
+pub use footer_tail::FooterTail;
+pub use options::{ParquetMetaDataOptions, ParquetStatisticsPolicy};
+pub use push_decoder::ParquetMetaDataPushDecoder;
+pub use reader::{PageIndexPolicy, ParquetMetaDataReader};
+use std::io::Write;
 use std::ops::Range;
 use std::sync::Arc;
 pub use writer::ParquetMetaDataWriter;
@@ -130,18 +141,19 @@ pub(crate) use writer::ThriftMetadataWriter;
 ///
 /// This structure is an in-memory representation of multiple [`ColumnIndex`]
 /// structures in a parquet file footer, as described in the Parquet [PageIndex
-/// documentation]. Each [`Index`] holds statistics about all the pages in a
+/// documentation]. Each [`ColumnIndex`] holds statistics about all the pages in a
 /// particular column chunk.
 ///
 /// `column_index[row_group_number][column_number]` holds the
-/// [`Index`] corresponding to column `column_number` of row group
+/// [`ColumnIndex`] corresponding to column `column_number` of row group
 /// `row_group_number`.
 ///
-/// For example `column_index[2][3]` holds the [`Index`] for the fourth
+/// For example `column_index[2][3]` holds the [`ColumnIndex`] for the fourth
 /// column in the third row group of the parquet file.
 ///
 /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
-pub type ParquetColumnIndex = Vec<Vec<Index>>;
+/// [`ColumnIndex`]: crate::file::page_index::column_index::ColumnIndexMetaData
+pub type ParquetColumnIndex = Vec<Vec<ColumnIndexMetaData>>;
 
 /// [`OffsetIndexMetaData`] for each data page of each row group of each column
 ///
@@ -153,6 +165,7 @@ pub type ParquetColumnIndex = Vec<Vec<Index>>;
 /// `column_number`of row group `row_group_number`.
 ///
 /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 pub type ParquetOffsetIndex = Vec<Vec<OffsetIndexMetaData>>;
 
 /// Parsed metadata for a single Parquet file
@@ -184,7 +197,7 @@ pub struct ParquetMetaData {
     offset_index: Option<ParquetOffsetIndex>,
     /// Optional file decryptor
     #[cfg(feature = "encryption")]
-    file_decryptor: Option<FileDecryptor>,
+    file_decryptor: Option<Box<FileDecryptor>>,
 }
 
 impl ParquetMetaData {
@@ -194,10 +207,10 @@ impl ParquetMetaData {
         ParquetMetaData {
             file_metadata,
             row_groups,
-            #[cfg(feature = "encryption")]
-            file_decryptor: None,
             column_index: None,
             offset_index: None,
+            #[cfg(feature = "encryption")]
+            file_decryptor: None,
         }
     }
 
@@ -205,23 +218,7 @@ impl ParquetMetaData {
     /// encrypted data.
     #[cfg(feature = "encryption")]
     pub(crate) fn with_file_decryptor(&mut self, file_decryptor: Option<FileDecryptor>) {
-        self.file_decryptor = file_decryptor;
-    }
-
-    /// Creates Parquet metadata from file metadata, a list of row
-    /// group metadata, and the column index structures.
-    #[deprecated(since = "53.1.0", note = "Use ParquetMetaDataBuilder")]
-    pub fn new_with_page_index(
-        file_metadata: FileMetaData,
-        row_groups: Vec<RowGroupMetaData>,
-        column_index: Option<ParquetColumnIndex>,
-        offset_index: Option<ParquetOffsetIndex>,
-    ) -> Self {
-        ParquetMetaDataBuilder::new(file_metadata)
-            .set_row_groups(row_groups)
-            .set_column_index(column_index)
-            .set_offset_index(offset_index)
-            .build()
+        self.file_decryptor = file_decryptor.map(Box::new);
     }
 
     /// Convert this ParquetMetaData into a [`ParquetMetaDataBuilder`]
@@ -237,7 +234,7 @@ impl ParquetMetaData {
     /// Returns file decryptor as reference.
     #[cfg(feature = "encryption")]
     pub(crate) fn file_decryptor(&self) -> Option<&FileDecryptor> {
-        self.file_decryptor.as_ref()
+        self.file_decryptor.as_deref()
     }
 
     /// Returns number of row groups in this file.
@@ -291,11 +288,17 @@ impl ParquetMetaData {
     ///
     /// 4. Does not include any allocator overheads
     pub fn memory_size(&self) -> usize {
+        #[cfg(feature = "encryption")]
+        let encryption_size = self.file_decryptor.heap_size();
+        #[cfg(not(feature = "encryption"))]
+        let encryption_size = 0usize;
+
         std::mem::size_of::<Self>()
             + self.file_metadata.heap_size()
             + self.row_groups.heap_size()
             + self.column_index.heap_size()
             + self.offset_index.heap_size()
+            + encryption_size
     }
 
     /// Override the column index
@@ -417,6 +420,13 @@ impl ParquetMetaDataBuilder {
         self.0.offset_index.as_ref()
     }
 
+    /// Sets the file decryptor needed to decrypt this metadata.
+    #[cfg(feature = "encryption")]
+    pub(crate) fn set_file_decryptor(mut self, file_decryptor: Option<FileDecryptor>) -> Self {
+        self.0.with_file_decryptor(file_decryptor);
+        self
+    }
+
     /// Creates a new ParquetMetaData from the builder
     pub fn build(self) -> ParquetMetaData {
         let Self(metadata) = self;
@@ -430,8 +440,45 @@ impl From<ParquetMetaData> for ParquetMetaDataBuilder {
     }
 }
 
+thrift_struct!(
 /// A key-value pair for [`FileMetaData`].
-pub type KeyValue = crate::format::KeyValue;
+pub struct KeyValue {
+  1: required string key
+  2: optional string value
+}
+);
+
+impl KeyValue {
+    /// Create a new key value pair
+    pub fn new<F2>(key: String, value: F2) -> KeyValue
+    where
+        F2: Into<Option<String>>,
+    {
+        KeyValue {
+            key,
+            value: value.into(),
+        }
+    }
+}
+
+thrift_struct!(
+/// PageEncodingStats for a column chunk and data page.
+pub struct PageEncodingStats {
+  1: required PageType page_type;
+  2: required Encoding encoding;
+  3: required i32 count;
+}
+);
+
+/// Internal representation of the page encoding stats in the [`ColumnChunkMetaData`].
+/// This is not publicly exposed, with different getters defined for each variant.
+#[derive(Debug, Clone, PartialEq)]
+enum ParquetPageEncodingStats {
+    /// The full array of stats as defined in the Parquet spec.
+    Full(Vec<PageEncodingStats>),
+    /// A condensed version of only page encodings seen.
+    Mask(EncodingMask),
+}
 
 /// Reference counted pointer for [`FileMetaData`].
 pub type FileMetaDataPtr = Arc<FileMetaData>;
@@ -447,6 +494,10 @@ pub struct FileMetaData {
     key_value_metadata: Option<Vec<KeyValue>>,
     schema_descr: SchemaDescPtr,
     column_orders: Option<Vec<ColumnOrder>>,
+    #[cfg(feature = "encryption")]
+    encryption_algorithm: Option<Box<EncryptionAlgorithm>>,
+    #[cfg(feature = "encryption")]
+    footer_signing_key_metadata: Option<Vec<u8>>,
 }
 
 impl FileMetaData {
@@ -466,9 +517,31 @@ impl FileMetaData {
             key_value_metadata,
             schema_descr,
             column_orders,
+            #[cfg(feature = "encryption")]
+            encryption_algorithm: None,
+            #[cfg(feature = "encryption")]
+            footer_signing_key_metadata: None,
         }
     }
 
+    #[cfg(feature = "encryption")]
+    pub(crate) fn with_encryption_algorithm(
+        mut self,
+        encryption_algorithm: Option<EncryptionAlgorithm>,
+    ) -> Self {
+        self.encryption_algorithm = encryption_algorithm.map(Box::new);
+        self
+    }
+
+    #[cfg(feature = "encryption")]
+    pub(crate) fn with_footer_signing_key_metadata(
+        mut self,
+        footer_signing_key_metadata: Option<Vec<u8>>,
+    ) -> Self {
+        self.footer_signing_key_metadata = footer_signing_key_metadata;
+        self
+    }
+
     /// Returns version of this file.
     pub fn version(&self) -> i32 {
         self.version
@@ -534,6 +607,21 @@ impl FileMetaData {
     }
 }
 
+thrift_struct!(
+/// Sort order within a RowGroup of a leaf column
+pub struct SortingColumn {
+  /// The ordinal position of the column (in this row group)
+  1: required i32 column_idx
+
+  /// If true, indicates this column is sorted in descending order.
+  2: required bool descending
+
+  /// If true, nulls will come before non-null values, otherwise,
+  /// nulls go at the end. */
+  3: required bool nulls_first
+}
+);
+
 /// Reference counted pointer for [`RowGroupMetaData`].
 pub type RowGroupMetaDataPtr = Arc<RowGroupMetaData>;
 
@@ -625,129 +713,6 @@ impl RowGroupMetaData {
         self.file_offset
     }
 
-    /// Method to convert from encrypted Thrift.
-    #[cfg(feature = "encryption")]
-    fn from_encrypted_thrift(
-        schema_descr: SchemaDescPtr,
-        mut rg: RowGroup,
-        decryptor: Option<&FileDecryptor>,
-    ) -> Result<RowGroupMetaData> {
-        if schema_descr.num_columns() != rg.columns.len() {
-            return Err(general_err!(
-                "Column count mismatch. Schema has {} columns while Row Group has {}",
-                schema_descr.num_columns(),
-                rg.columns.len()
-            ));
-        }
-        let total_byte_size = rg.total_byte_size;
-        let num_rows = rg.num_rows;
-        let mut columns = vec![];
-
-        for (i, (mut c, d)) in rg
-            .columns
-            .drain(0..)
-            .zip(schema_descr.columns())
-            .enumerate()
-        {
-            // Read encrypted metadata if it's present and we have a decryptor.
-            if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) {
-                let column_decryptor = match c.crypto_metadata.as_ref() {
-                    None => {
-                        return Err(general_err!(
-                            "No crypto_metadata is set for column '{}', which has encrypted metadata",
-                            d.path().string()
-                        ));
-                    }
-                    Some(TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => {
-                        let column_name = crypto_metadata.path_in_schema.join(".");
-                        decryptor.get_column_metadata_decryptor(
-                            column_name.as_str(),
-                            crypto_metadata.key_metadata.as_deref(),
-                        )?
-                    }
-                    Some(TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
-                        decryptor.get_footer_decryptor()?
-                    }
-                };
-
-                let column_aad = create_module_aad(
-                    decryptor.file_aad(),
-                    ModuleType::ColumnMetaData,
-                    rg.ordinal.unwrap() as usize,
-                    i,
-                    None,
-                )?;
-
-                let buf = c.encrypted_column_metadata.clone().unwrap();
-                let decrypted_cc_buf = column_decryptor
-                    .decrypt(buf.as_slice(), column_aad.as_ref())
-                    .map_err(|_| {
-                        general_err!(
-                            "Unable to decrypt column '{}', perhaps the column key is wrong?",
-                            d.path().string()
-                        )
-                    })?;
-
-                let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice());
-                c.meta_data = Some(ColumnMetaData::read_from_in_protocol(&mut prot)?);
-            }
-            columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?);
-        }
-
-        let sorting_columns = rg.sorting_columns;
-        Ok(RowGroupMetaData {
-            columns,
-            num_rows,
-            sorting_columns,
-            total_byte_size,
-            schema_descr,
-            file_offset: rg.file_offset,
-            ordinal: rg.ordinal,
-        })
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(schema_descr: SchemaDescPtr, mut rg: RowGroup) -> Result<RowGroupMetaData> {
-        if schema_descr.num_columns() != rg.columns.len() {
-            return Err(general_err!(
-                "Column count mismatch. Schema has {} columns while Row Group has {}",
-                schema_descr.num_columns(),
-                rg.columns.len()
-            ));
-        }
-        let total_byte_size = rg.total_byte_size;
-        let num_rows = rg.num_rows;
-        let mut columns = vec![];
-
-        for (c, d) in rg.columns.drain(0..).zip(schema_descr.columns()) {
-            columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?);
-        }
-
-        let sorting_columns = rg.sorting_columns;
-        Ok(RowGroupMetaData {
-            columns,
-            num_rows,
-            sorting_columns,
-            total_byte_size,
-            schema_descr,
-            file_offset: rg.file_offset,
-            ordinal: rg.ordinal,
-        })
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> RowGroup {
-        RowGroup {
-            columns: self.columns().iter().map(|v| v.to_thrift()).collect(),
-            total_byte_size: self.total_byte_size,
-            num_rows: self.num_rows,
-            sorting_columns: self.sorting_columns().cloned(),
-            file_offset: self.file_offset(),
-            total_compressed_size: Some(self.compressed_size()),
-            ordinal: self.ordinal,
-        }
-    }
-
     /// Converts this [`RowGroupMetaData`] into a [`RowGroupMetaDataBuilder`]
     pub fn into_builder(self) -> RowGroupMetaDataBuilder {
         RowGroupMetaDataBuilder(self)
@@ -834,13 +799,18 @@ impl RowGroupMetaDataBuilder {
 
         Ok(self.0)
     }
+
+    /// Build row group metadata without validation.
+    pub(super) fn build_unchecked(self) -> RowGroupMetaData {
+        self.0
+    }
 }
 
 /// Metadata for a column chunk.
 #[derive(Debug, Clone, PartialEq)]
 pub struct ColumnChunkMetaData {
     column_descr: ColumnDescPtr,
-    encodings: Vec<Encoding>,
+    encodings: EncodingMask,
     file_path: Option<String>,
     file_offset: i64,
     num_values: i64,
@@ -851,7 +821,8 @@ pub struct ColumnChunkMetaData {
     index_page_offset: Option<i64>,
     dictionary_page_offset: Option<i64>,
     statistics: Option<Statistics>,
-    encoding_stats: Option<Vec<PageEncodingStats>>,
+    geo_statistics: Option<Box<geo_statistics::GeospatialStatistics>>,
+    encoding_stats: Option<ParquetPageEncodingStats>,
     bloom_filter_offset: Option<i64>,
     bloom_filter_length: Option<i32>,
     offset_index_offset: Option<i64>,
@@ -862,7 +833,9 @@ pub struct ColumnChunkMetaData {
     repetition_level_histogram: Option<LevelHistogram>,
     definition_level_histogram: Option<LevelHistogram>,
     #[cfg(feature = "encryption")]
-    column_crypto_metadata: Option<ColumnCryptoMetaData>,
+    column_crypto_metadata: Option<Box<ColumnCryptoMetaData>>,
+    #[cfg(feature = "encryption")]
+    encrypted_column_metadata: Option<Vec<u8>>,
 }
 
 /// Histograms for repetition and definition levels.
@@ -1017,7 +990,12 @@ impl ColumnChunkMetaData {
     }
 
     /// All encodings used for this column.
-    pub fn encodings(&self) -> &Vec<Encoding> {
+    pub fn encodings(&self) -> impl Iterator<Item = Encoding> {
+        self.encodings.encodings()
+    }
+
+    /// All encodings used for this column, returned as a bitmask.
+    pub fn encodings_mask(&self) -> &EncodingMask {
         &self.encodings
     }
 
@@ -1076,10 +1054,59 @@ impl ColumnChunkMetaData {
         self.statistics.as_ref()
     }
 
-    /// Returns the offset for the page encoding stats,
-    /// or `None` if no page encoding stats are available.
+    /// Returns geospatial statistics that are set for this column chunk,
+    /// or `None` if no geospatial statistics are available.
+    pub fn geo_statistics(&self) -> Option<&geo_statistics::GeospatialStatistics> {
+        self.geo_statistics.as_deref()
+    }
+
+    /// Returns the page encoding statistics, or `None` if no page encoding statistics
+    /// are available (or they were converted to a mask).
+    ///
+    /// Note: By default, this crate converts page encoding statistics to a mask for performance
+    /// reasons. To get the full statistics, you must set [`ParquetMetaDataOptions::with_encoding_stats_as_mask`]
+    /// to `false`.
     pub fn page_encoding_stats(&self) -> Option<&Vec<PageEncodingStats>> {
-        self.encoding_stats.as_ref()
+        match self.encoding_stats.as_ref() {
+            Some(ParquetPageEncodingStats::Full(stats)) => Some(stats),
+            _ => None,
+        }
+    }
+
+    /// Returns the page encoding statistics reduced to a bitmask, or `None` if statistics are
+    /// not available (or they were left in their original form).
+    ///
+    /// Note: This is the default behavior for this crate.
+    ///
+    /// The [`PageEncodingStats`] struct was added to the Parquet specification specifically to
+    /// enable fast determination of whether all pages in a column chunk are dictionary encoded
+    /// (see <https://github.com/apache/parquet-format/pull/16>).
+    /// Decoding the full page encoding statistics, however, can be very costly, and is not
+    /// necessary to support the aforementioned use case. As an alternative, this crate can
+    /// instead distill the list of `PageEncodingStats` down to a bitmask of just the encodings
+    /// used for data pages
+    /// (see [`ParquetMetaDataOptions::set_encoding_stats_as_mask`]).
+    /// To test for an all-dictionary-encoded chunk one could use this bitmask in the following way:
+    ///
+    /// ```rust
+    /// use parquet::basic::Encoding;
+    /// use parquet::file::metadata::ColumnChunkMetaData;
+    /// // test if all data pages in the column chunk are dictionary encoded
+    /// fn is_all_dictionary_encoded(col_meta: &ColumnChunkMetaData) -> bool {
+    ///     // check that dictionary encoding was used
+    ///     col_meta.dictionary_page_offset().is_some()
+    ///         && col_meta.page_encoding_stats_mask().is_some_and(|mask| {
+    ///             // mask should only have one bit set, either for PLAIN_DICTIONARY or
+    ///             // RLE_DICTIONARY
+    ///             mask.is_only(Encoding::PLAIN_DICTIONARY) || mask.is_only(Encoding::RLE_DICTIONARY)
+    ///         })
+    /// }
+    /// ```
+    pub fn page_encoding_stats_mask(&self) -> Option<&EncodingMask> {
+        match self.encoding_stats.as_ref() {
+            Some(ParquetPageEncodingStats::Mask(stats)) => Some(stats),
+            _ => None,
+        }
     }
 
     /// Returns the offset for the bloom filter.
@@ -1155,180 +1182,13 @@ impl ColumnChunkMetaData {
     /// Returns the encryption metadata for this column chunk.
     #[cfg(feature = "encryption")]
     pub fn crypto_metadata(&self) -> Option<&ColumnCryptoMetaData> {
-        self.column_crypto_metadata.as_ref()
-    }
-
-    /// Method to convert from Thrift.
-    pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result<Self> {
-        if cc.meta_data.is_none() {
-            return Err(general_err!("Expected to have column metadata"));
-        }
-        let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap();
-        let column_type = Type::try_from(col_metadata.type_)?;
-        let encodings = col_metadata
-            .encodings
-            .drain(0..)
-            .map(Encoding::try_from)
-            .collect::<Result<_>>()?;
-        let compression = Compression::try_from(col_metadata.codec)?;
-        let file_path = cc.file_path;
-        let file_offset = cc.file_offset;
-        let num_values = col_metadata.num_values;
-        let total_compressed_size = col_metadata.total_compressed_size;
-        let total_uncompressed_size = col_metadata.total_uncompressed_size;
-        let data_page_offset = col_metadata.data_page_offset;
-        let index_page_offset = col_metadata.index_page_offset;
-        let dictionary_page_offset = col_metadata.dictionary_page_offset;
-        let statistics = statistics::from_thrift(column_type, col_metadata.statistics)?;
-        let encoding_stats = col_metadata
-            .encoding_stats
-            .as_ref()
-            .map(|vec| {
-                vec.iter()
-                    .map(page_encoding_stats::try_from_thrift)
-                    .collect::<Result<_>>()
-            })
-            .transpose()?;
-        let bloom_filter_offset = col_metadata.bloom_filter_offset;
-        let bloom_filter_length = col_metadata.bloom_filter_length;
-        let offset_index_offset = cc.offset_index_offset;
-        let offset_index_length = cc.offset_index_length;
-        let column_index_offset = cc.column_index_offset;
-        let column_index_length = cc.column_index_length;
-        let (
-            unencoded_byte_array_data_bytes,
-            repetition_level_histogram,
-            definition_level_histogram,
-        ) = if let Some(size_stats) = col_metadata.size_statistics {
-            (
-                size_stats.unencoded_byte_array_data_bytes,
-                size_stats.repetition_level_histogram,
-                size_stats.definition_level_histogram,
-            )
-        } else {
-            (None, None, None)
-        };
-
-        let repetition_level_histogram = repetition_level_histogram.map(LevelHistogram::from);
-        let definition_level_histogram = definition_level_histogram.map(LevelHistogram::from);
-
-        #[cfg(feature = "encryption")]
-        let column_crypto_metadata = if let Some(crypto_metadata) = cc.crypto_metadata {
-            Some(column_crypto_metadata::try_from_thrift(&crypto_metadata)?)
-        } else {
-            None
-        };
-
-        let result = ColumnChunkMetaData {
-            column_descr,
-            encodings,
-            file_path,
-            file_offset,
-            num_values,
-            compression,
-            total_compressed_size,
-            total_uncompressed_size,
-            data_page_offset,
-            index_page_offset,
-            dictionary_page_offset,
-            statistics,
-            encoding_stats,
-            bloom_filter_offset,
-            bloom_filter_length,
-            offset_index_offset,
-            offset_index_length,
-            column_index_offset,
-            column_index_length,
-            unencoded_byte_array_data_bytes,
-            repetition_level_histogram,
-            definition_level_histogram,
-            #[cfg(feature = "encryption")]
-            column_crypto_metadata,
-        };
-        Ok(result)
-    }
-
-    /// Method to convert to Thrift.
-    pub fn to_thrift(&self) -> ColumnChunk {
-        let column_metadata = self.to_column_metadata_thrift();
-
-        ColumnChunk {
-            file_path: self.file_path().map(|s| s.to_owned()),
-            file_offset: self.file_offset,
-            meta_data: Some(column_metadata),
-            offset_index_offset: self.offset_index_offset,
-            offset_index_length: self.offset_index_length,
-            column_index_offset: self.column_index_offset,
-            column_index_length: self.column_index_length,
-            crypto_metadata: self.column_crypto_metadata_thrift(),
-            encrypted_column_metadata: None,
-        }
-    }
-
-    /// Method to convert to Thrift `ColumnMetaData`
-    pub fn to_column_metadata_thrift(&self) -> ColumnMetaData {
-        let size_statistics = if self.unencoded_byte_array_data_bytes.is_some()
-            || self.repetition_level_histogram.is_some()
-            || self.definition_level_histogram.is_some()
-        {
-            let repetition_level_histogram = self
-                .repetition_level_histogram
-                .as_ref()
-                .map(|hist| hist.clone().into_inner());
-
-            let definition_level_histogram = self
-                .definition_level_histogram
-                .as_ref()
-                .map(|hist| hist.clone().into_inner());
-
-            Some(SizeStatistics {
-                unencoded_byte_array_data_bytes: self.unencoded_byte_array_data_bytes,
-                repetition_level_histogram,
-                definition_level_histogram,
-            })
-        } else {
-            None
-        };
-
-        ColumnMetaData {
-            type_: self.column_type().into(),
-            encodings: self.encodings().iter().map(|&v| v.into()).collect(),
-            path_in_schema: self.column_path().as_ref().to_vec(),
-            codec: self.compression.into(),
-            num_values: self.num_values,
-            total_uncompressed_size: self.total_uncompressed_size,
-            total_compressed_size: self.total_compressed_size,
-            key_value_metadata: None,
-            data_page_offset: self.data_page_offset,
-            index_page_offset: self.index_page_offset,
-            dictionary_page_offset: self.dictionary_page_offset,
-            statistics: statistics::to_thrift(self.statistics.as_ref()),
-            encoding_stats: self
-                .encoding_stats
-                .as_ref()
-                .map(|vec| vec.iter().map(page_encoding_stats::to_thrift).collect()),
-            bloom_filter_offset: self.bloom_filter_offset,
-            bloom_filter_length: self.bloom_filter_length,
-            size_statistics,
-        }
+        self.column_crypto_metadata.as_deref()
     }
 
     /// Converts this [`ColumnChunkMetaData`] into a [`ColumnChunkMetaDataBuilder`]
     pub fn into_builder(self) -> ColumnChunkMetaDataBuilder {
         ColumnChunkMetaDataBuilder::from(self)
     }
-
-    #[cfg(feature = "encryption")]
-    fn column_crypto_metadata_thrift(&self) -> Option<TColumnCryptoMetaData> {
-        self.column_crypto_metadata
-            .as_ref()
-            .map(column_crypto_metadata::to_thrift)
-    }
-
-    #[cfg(not(feature = "encryption"))]
-    fn column_crypto_metadata_thrift(&self) -> Option<TColumnCryptoMetaData> {
-        None
-    }
 }
 
 /// Builder for [`ColumnChunkMetaData`]
@@ -1358,7 +1218,7 @@ impl ColumnChunkMetaDataBuilder {
     fn new(column_descr: ColumnDescPtr) -> Self {
         Self(ColumnChunkMetaData {
             column_descr,
-            encodings: Vec::new(),
+            encodings: Default::default(),
             file_path: None,
             file_offset: 0,
             num_values: 0,
@@ -1369,6 +1229,7 @@ impl ColumnChunkMetaDataBuilder {
             index_page_offset: None,
             dictionary_page_offset: None,
             statistics: None,
+            geo_statistics: None,
             encoding_stats: None,
             bloom_filter_offset: None,
             bloom_filter_length: None,
@@ -1381,11 +1242,19 @@ impl ColumnChunkMetaDataBuilder {
             definition_level_histogram: None,
             #[cfg(feature = "encryption")]
             column_crypto_metadata: None,
+            #[cfg(feature = "encryption")]
+            encrypted_column_metadata: None,
         })
     }
 
     /// Sets list of encodings for this column chunk.
     pub fn set_encodings(mut self, encodings: Vec<Encoding>) -> Self {
+        self.0.encodings = EncodingMask::new_from_encodings(encodings.iter());
+        self
+    }
+
+    /// Sets the encodings mask for this column chunk.
+    pub fn set_encodings_mask(mut self, encodings: EncodingMask) -> Self {
         self.0.encodings = encodings;
         self
     }
@@ -1396,20 +1265,6 @@ impl ColumnChunkMetaDataBuilder {
         self
     }
 
-    /// Sets file offset in bytes.
-    ///
-    /// This field was meant to provide an alternate to storing `ColumnMetadata` directly in
-    /// the `ColumnChunkMetadata`. However, most Parquet readers assume the `ColumnMetadata`
-    /// is stored inline and ignore this field.
-    #[deprecated(
-        since = "53.0.0",
-        note = "The Parquet specification requires this field to be 0"
-    )]
-    pub fn set_file_offset(mut self, value: i64) -> Self {
-        self.0.file_offset = value;
-        self
-    }
-
     /// Sets number of values.
     pub fn set_num_values(mut self, value: i64) -> Self {
         self.0.num_values = value;
@@ -1458,6 +1313,12 @@ impl ColumnChunkMetaDataBuilder {
         self
     }
 
+    /// Sets geospatial statistics for this column chunk.
+    pub fn set_geo_statistics(mut self, value: Box<geo_statistics::GeospatialStatistics>) -> Self {
+        self.0.geo_statistics = Some(value);
+        self
+    }
+
     /// Clears the statistics for this column chunk.
     pub fn clear_statistics(mut self) -> Self {
         self.0.statistics = None;
@@ -1465,8 +1326,18 @@ impl ColumnChunkMetaDataBuilder {
     }
 
     /// Sets page encoding stats for this column chunk.
+    ///
+    /// This will overwrite any existing stats, either `Vec` based or bitmask.
     pub fn set_page_encoding_stats(mut self, value: Vec<PageEncodingStats>) -> Self {
-        self.0.encoding_stats = Some(value);
+        self.0.encoding_stats = Some(ParquetPageEncodingStats::Full(value));
+        self
+    }
+
+    /// Sets page encoding stats mask for this column chunk.
+    ///
+    /// This will overwrite any existing stats, either `Vec` based or bitmask.
+    pub fn set_page_encoding_stats_mask(mut self, value: EncodingMask) -> Self {
+        self.0.encoding_stats = Some(ParquetPageEncodingStats::Mask(value));
         self
     }
 
@@ -1533,7 +1404,14 @@ impl ColumnChunkMetaDataBuilder {
     #[cfg(feature = "encryption")]
     /// Set the encryption metadata for an encrypted column
     pub fn set_column_crypto_metadata(mut self, value: Option<ColumnCryptoMetaData>) -> Self {
-        self.0.column_crypto_metadata = value;
+        self.0.column_crypto_metadata = value.map(Box::new);
+        self
+    }
+
+    #[cfg(feature = "encryption")]
+    /// Set the encryption metadata for an encrypted column
+    pub fn set_encrypted_column_metadata(mut self, value: Option<Vec<u8>>) -> Self {
+        self.0.encrypted_column_metadata = value;
         self
     }
 
@@ -1546,7 +1424,9 @@ impl ColumnChunkMetaDataBuilder {
 /// Builder for Parquet [`ColumnIndex`], part of the Parquet [PageIndex]
 ///
 /// [PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 pub struct ColumnIndexBuilder {
+    column_type: Type,
     null_pages: Vec<bool>,
     min_values: Vec<Vec<u8>>,
     max_values: Vec<Vec<u8>>,
@@ -1566,16 +1446,11 @@ pub struct ColumnIndexBuilder {
     valid: bool,
 }
 
-impl Default for ColumnIndexBuilder {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl ColumnIndexBuilder {
     /// Creates a new column index builder.
-    pub fn new() -> Self {
+    pub fn new(column_type: Type) -> Self {
         ColumnIndexBuilder {
+            column_type,
             null_pages: Vec::new(),
             min_values: Vec::new(),
             max_values: Vec::new(),
@@ -1603,6 +1478,8 @@ impl ColumnIndexBuilder {
 
     /// Append the given page-level histograms to the [`ColumnIndex`] histograms.
     /// Does nothing if the `ColumnIndexBuilder` is not in the `valid` state.
+    ///
+    /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
     pub fn append_histograms(
         &mut self,
         repetition_level_histogram: &Option<LevelHistogram>,
@@ -1611,12 +1488,12 @@ impl ColumnIndexBuilder {
         if !self.valid {
             return;
         }
-        if let Some(ref rep_lvl_hist) = repetition_level_histogram {
+        if let Some(rep_lvl_hist) = repetition_level_histogram {
             let hist = self.repetition_level_histograms.get_or_insert(Vec::new());
             hist.reserve(rep_lvl_hist.len());
             hist.extend(rep_lvl_hist.values());
         }
-        if let Some(ref def_lvl_hist) = definition_level_histogram {
+        if let Some(def_lvl_hist) = definition_level_histogram {
             let hist = self.definition_level_histograms.get_or_insert(Vec::new());
             hist.reserve(def_lvl_hist.len());
             hist.extend(def_lvl_hist.values());
@@ -1638,18 +1515,76 @@ impl ColumnIndexBuilder {
         self.valid
     }
 
-    /// Build and get the thrift metadata of column index
+    /// Build and get the column index
     ///
     /// Note: callers should check [`Self::valid`] before calling this method
-    pub fn build_to_thrift(self) -> ColumnIndex {
-        ColumnIndex::new(
+    pub fn build(self) -> Result<ColumnIndexMetaData> {
+        Ok(match self.column_type {
+            Type::BOOLEAN => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::BOOLEAN(index)
+            }
+            Type::INT32 => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::INT32(index)
+            }
+            Type::INT64 => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::INT64(index)
+            }
+            Type::INT96 => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::INT96(index)
+            }
+            Type::FLOAT => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::FLOAT(index)
+            }
+            Type::DOUBLE => {
+                let index = self.build_page_index()?;
+                ColumnIndexMetaData::DOUBLE(index)
+            }
+            Type::BYTE_ARRAY => {
+                let index = self.build_byte_array_index()?;
+                ColumnIndexMetaData::BYTE_ARRAY(index)
+            }
+            Type::FIXED_LEN_BYTE_ARRAY => {
+                let index = self.build_byte_array_index()?;
+                ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(index)
+            }
+        })
+    }
+
+    fn build_page_index<T>(self) -> Result<PrimitiveColumnIndex<T>>
+    where
+        T: ParquetValueType,
+    {
+        let min_values: Vec<&[u8]> = self.min_values.iter().map(|v| v.as_slice()).collect();
+        let max_values: Vec<&[u8]> = self.max_values.iter().map(|v| v.as_slice()).collect();
+
+        PrimitiveColumnIndex::try_new(
             self.null_pages,
-            self.min_values,
-            self.max_values,
             self.boundary_order,
-            self.null_counts,
+            Some(self.null_counts),
             self.repetition_level_histograms,
             self.definition_level_histograms,
+            min_values,
+            max_values,
+        )
+    }
+
+    fn build_byte_array_index(self) -> Result<ByteArrayColumnIndex> {
+        let min_values: Vec<&[u8]> = self.min_values.iter().map(|v| v.as_slice()).collect();
+        let max_values: Vec<&[u8]> = self.max_values.iter().map(|v| v.as_slice()).collect();
+
+        ByteArrayColumnIndex::try_new(
+            self.null_pages,
+            self.boundary_order,
+            Some(self.null_counts),
+            self.repetition_level_histograms,
+            self.definition_level_histograms,
+            min_values,
+            max_values,
         )
     }
 }
@@ -1715,15 +1650,22 @@ impl OffsetIndexBuilder {
     }
 
     /// Build and get the thrift metadata of offset index
-    pub fn build_to_thrift(self) -> OffsetIndex {
+    pub fn build(self) -> OffsetIndexMetaData {
         let locations = self
             .offset_array
             .iter()
             .zip(self.compressed_page_size_array.iter())
             .zip(self.first_row_index_array.iter())
-            .map(|((offset, size), row_index)| PageLocation::new(*offset, *size, *row_index))
+            .map(|((offset, size), row_index)| PageLocation {
+                offset: *offset,
+                compressed_page_size: *size,
+                first_row_index: *row_index,
+            })
             .collect::<Vec<_>>();
-        OffsetIndex::new(locations, self.unencoded_byte_array_data_bytes_array)
+        OffsetIndexMetaData {
+            page_locations: locations,
+            unencoded_byte_array_data_bytes: self.unencoded_byte_array_data_bytes_array,
+        }
     }
 }
 
@@ -1731,7 +1673,9 @@ impl OffsetIndexBuilder {
 mod tests {
     use super::*;
     use crate::basic::{PageType, SortOrder};
-    use crate::file::page_index::index::NativeIndex;
+    use crate::file::metadata::thrift::tests::{
+        read_column_chunk, read_column_chunk_with_options, read_row_group,
+    };
 
     #[test]
     fn test_row_group_metadata_thrift_conversion() {
@@ -1750,12 +1694,13 @@ mod tests {
             .build()
             .unwrap();
 
-        let row_group_exp = row_group_meta.to_thrift();
-        let row_group_res = RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone())
-            .unwrap()
-            .to_thrift();
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+        row_group_meta.write_thrift(&mut writer).unwrap();
 
-        assert_eq!(row_group_res, row_group_exp);
+        let row_group_res = read_row_group(&mut buf, schema_descr).unwrap();
+
+        assert_eq!(row_group_res, row_group_meta);
     }
 
     #[test]
@@ -1831,11 +1776,13 @@ mod tests {
             .set_ordinal(1)
             .build()
             .unwrap();
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+        row_group_meta_2cols.write_thrift(&mut writer).unwrap();
 
-        let err =
-            RowGroupMetaData::from_thrift(schema_descr_3cols, row_group_meta_2cols.to_thrift())
-                .unwrap_err()
-                .to_string();
+        let err = read_row_group(&mut buf, schema_descr_3cols)
+            .unwrap_err()
+            .to_string();
         assert_eq!(
             err,
             "Parquet error: Column count mismatch. Schema has 3 columns while Row Group has 2"
@@ -1845,9 +1792,10 @@ mod tests {
     #[test]
     fn test_column_chunk_metadata_thrift_conversion() {
         let column_descr = get_test_schema_descr().column(0);
-
         let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
-            .set_encodings(vec![Encoding::PLAIN, Encoding::RLE])
+            .set_encodings_mask(EncodingMask::new_from_encodings(
+                [Encoding::PLAIN, Encoding::RLE].iter(),
+            ))
             .set_file_path("file_path".to_owned())
             .set_num_values(1000)
             .set_compression(Compression::SNAPPY)
@@ -1879,8 +1827,75 @@ mod tests {
             .build()
             .unwrap();
 
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+        col_metadata.write_thrift(&mut writer).unwrap();
+        let col_chunk_res = read_column_chunk(&mut buf, column_descr.clone()).unwrap();
+
+        let expected_metadata = ColumnChunkMetaData::builder(column_descr)
+            .set_encodings_mask(EncodingMask::new_from_encodings(
+                [Encoding::PLAIN, Encoding::RLE].iter(),
+            ))
+            .set_file_path("file_path".to_owned())
+            .set_num_values(1000)
+            .set_compression(Compression::SNAPPY)
+            .set_total_compressed_size(2000)
+            .set_total_uncompressed_size(3000)
+            .set_data_page_offset(4000)
+            .set_dictionary_page_offset(Some(5000))
+            .set_page_encoding_stats_mask(EncodingMask::new_from_encodings(
+                [Encoding::PLAIN, Encoding::RLE].iter(),
+            ))
+            .set_bloom_filter_offset(Some(6000))
+            .set_bloom_filter_length(Some(25))
+            .set_offset_index_offset(Some(7000))
+            .set_offset_index_length(Some(25))
+            .set_column_index_offset(Some(8000))
+            .set_column_index_length(Some(25))
+            .set_unencoded_byte_array_data_bytes(Some(2000))
+            .set_repetition_level_histogram(Some(LevelHistogram::from(vec![100, 100])))
+            .set_definition_level_histogram(Some(LevelHistogram::from(vec![0, 200])))
+            .build()
+            .unwrap();
+
+        assert_eq!(col_chunk_res, expected_metadata);
+    }
+
+    #[test]
+    fn test_column_chunk_metadata_thrift_conversion_full_stats() {
+        let column_descr = get_test_schema_descr().column(0);
+        let stats = vec![
+            PageEncodingStats {
+                page_type: PageType::DATA_PAGE,
+                encoding: Encoding::PLAIN,
+                count: 3,
+            },
+            PageEncodingStats {
+                page_type: PageType::DATA_PAGE,
+                encoding: Encoding::RLE,
+                count: 5,
+            },
+        ];
+        let col_metadata = ColumnChunkMetaData::builder(column_descr.clone())
+            .set_encodings_mask(EncodingMask::new_from_encodings(
+                [Encoding::PLAIN, Encoding::RLE].iter(),
+            ))
+            .set_num_values(1000)
+            .set_compression(Compression::SNAPPY)
+            .set_total_compressed_size(2000)
+            .set_total_uncompressed_size(3000)
+            .set_data_page_offset(4000)
+            .set_page_encoding_stats(stats)
+            .build()
+            .unwrap();
+
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+        col_metadata.write_thrift(&mut writer).unwrap();
+
+        let options = ParquetMetaDataOptions::new().with_encoding_stats_as_mask(false);
         let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_metadata.to_thrift()).unwrap();
+            read_column_chunk_with_options(&mut buf, column_descr, Some(&options)).unwrap();
 
         assert_eq!(col_chunk_res, col_metadata);
     }
@@ -1893,12 +1908,12 @@ mod tests {
             .build()
             .unwrap();
 
-        let col_chunk_exp = col_metadata.to_thrift();
-        let col_chunk_res = ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-            .unwrap()
-            .to_thrift();
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+        col_metadata.write_thrift(&mut writer).unwrap();
+        let col_chunk_res = read_column_chunk(&mut buf, column_descr).unwrap();
 
-        assert_eq!(col_chunk_res, col_chunk_exp);
+        assert_eq!(col_chunk_res, col_metadata);
     }
 
     #[test]
@@ -1997,16 +2012,19 @@ mod tests {
             .build();
 
         #[cfg(not(feature = "encryption"))]
-        let base_expected_size = 2312;
+        let base_expected_size = 2766;
         #[cfg(feature = "encryption")]
-        let base_expected_size = 2648;
+        let base_expected_size = 2934;
 
         assert_eq!(parquet_meta.memory_size(), base_expected_size);
 
-        let mut column_index = ColumnIndexBuilder::new();
+        let mut column_index = ColumnIndexBuilder::new(Type::BOOLEAN);
         column_index.append(false, vec![1u8], vec![2u8, 3u8], 4);
-        let column_index = column_index.build_to_thrift();
-        let native_index = NativeIndex::<bool>::try_new(column_index).unwrap();
+        let column_index = column_index.build().unwrap();
+        let native_index = match column_index {
+            ColumnIndexMetaData::BOOLEAN(index) => index,
+            _ => panic!("wrong type of column index"),
+        };
 
         // Now, add in OffsetIndex
         let mut offset_index = OffsetIndexBuilder::new();
@@ -2016,26 +2034,99 @@ mod tests {
         offset_index.append_row_count(1);
         offset_index.append_offset_and_size(2, 3);
         offset_index.append_unencoded_byte_array_data_bytes(Some(10));
-        let offset_index = offset_index.build_to_thrift();
+        let offset_index = offset_index.build();
 
         let parquet_meta = ParquetMetaDataBuilder::new(file_metadata)
             .set_row_groups(row_group_meta)
-            .set_column_index(Some(vec![vec![Index::BOOLEAN(native_index)]]))
-            .set_offset_index(Some(vec![vec![
-                OffsetIndexMetaData::try_new(offset_index).unwrap()
-            ]]))
+            .set_column_index(Some(vec![vec![ColumnIndexMetaData::BOOLEAN(native_index)]]))
+            .set_offset_index(Some(vec![vec![offset_index]]))
             .build();
 
         #[cfg(not(feature = "encryption"))]
-        let bigger_expected_size = 2816;
+        let bigger_expected_size = 3192;
         #[cfg(feature = "encryption")]
-        let bigger_expected_size = 3152;
+        let bigger_expected_size = 3360;
 
         // more set fields means more memory usage
         assert!(bigger_expected_size > base_expected_size);
         assert_eq!(parquet_meta.memory_size(), bigger_expected_size);
     }
 
+    #[test]
+    #[cfg(feature = "encryption")]
+    fn test_memory_size_with_decryptor() {
+        use crate::encryption::decrypt::FileDecryptionProperties;
+        use crate::file::metadata::thrift::encryption::AesGcmV1;
+
+        let schema_descr = get_test_schema_descr();
+
+        let columns = schema_descr
+            .columns()
+            .iter()
+            .map(|column_descr| ColumnChunkMetaData::builder(column_descr.clone()).build())
+            .collect::<Result<Vec<_>>>()
+            .unwrap();
+        let row_group_meta = RowGroupMetaData::builder(schema_descr.clone())
+            .set_num_rows(1000)
+            .set_column_metadata(columns)
+            .build()
+            .unwrap();
+        let row_group_meta = vec![row_group_meta];
+
+        let version = 2;
+        let num_rows = 1000;
+        let aad_file_unique = vec![1u8; 8];
+        let aad_prefix = vec![2u8; 8];
+        let encryption_algorithm = EncryptionAlgorithm::AES_GCM_V1(AesGcmV1 {
+            aad_prefix: Some(aad_prefix.clone()),
+            aad_file_unique: Some(aad_file_unique.clone()),
+            supply_aad_prefix: Some(true),
+        });
+        let footer_key_metadata = Some(vec![3u8; 8]);
+        let file_metadata =
+            FileMetaData::new(version, num_rows, None, None, schema_descr.clone(), None)
+                .with_encryption_algorithm(Some(encryption_algorithm))
+                .with_footer_signing_key_metadata(footer_key_metadata.clone());
+
+        let parquet_meta_data = ParquetMetaDataBuilder::new(file_metadata.clone())
+            .set_row_groups(row_group_meta.clone())
+            .build();
+
+        let base_expected_size = 2058;
+        assert_eq!(parquet_meta_data.memory_size(), base_expected_size);
+
+        let footer_key = "0123456789012345".as_bytes();
+        let column_key = "1234567890123450".as_bytes();
+        let mut decryption_properties_builder =
+            FileDecryptionProperties::builder(footer_key.to_vec())
+                .with_aad_prefix(aad_prefix.clone());
+        for column in schema_descr.columns() {
+            decryption_properties_builder = decryption_properties_builder
+                .with_column_key(&column.path().string(), column_key.to_vec());
+        }
+        let decryption_properties = decryption_properties_builder.build().unwrap();
+        let decryptor = FileDecryptor::new(
+            &decryption_properties,
+            footer_key_metadata.as_deref(),
+            aad_file_unique,
+            aad_prefix,
+        )
+        .unwrap();
+
+        let parquet_meta_data = ParquetMetaDataBuilder::new(file_metadata.clone())
+            .set_row_groups(row_group_meta.clone())
+            .set_file_decryptor(Some(decryptor))
+            .build();
+
+        let expected_size_with_decryptor = 3072;
+        assert!(expected_size_with_decryptor > base_expected_size);
+
+        assert_eq!(
+            parquet_meta_data.memory_size(),
+            expected_size_with_decryptor
+        );
+    }
+
     /// Returns sample schema descriptor so we can create column metadata.
     fn get_test_schema_descr() -> SchemaDescPtr {
         let schema = SchemaType::group_type_builder("schema")
diff --git a/parquet/src/file/metadata/options.rs b/parquet/src/file/metadata/options.rs
new file mode 100644
index 000000000000..e91f5bdd1028
--- /dev/null
+++ b/parquet/src/file/metadata/options.rs
@@ -0,0 +1,305 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Options used to control metadata parsing
+
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use crate::schema::types::SchemaDescPtr;
+
+/// Enum to control decoding of some Parquet statistics fields.
+///
+/// # Example
+/// ```rust
+/// use parquet::file::metadata::ParquetStatisticsPolicy;
+/// use parquet::file::serialized_reader::ReadOptionsBuilder;
+/// use parquet::arrow::arrow_reader::ArrowReaderOptions;
+///
+/// // Set arrow options to skip encoding statistics for all columns.
+/// let options =
+///     ArrowReaderOptions::new().with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll);
+///
+/// // Set serialized reader options to decode encoding statistics for all columns.
+/// let options =
+///     ReadOptionsBuilder::new().with_encoding_stats_policy(ParquetStatisticsPolicy::KeepAll)
+///     .build();
+///
+/// // Set arrow options to skip encoding statistics for all columns, but to decode statistics
+/// // for columns 0 and 1.
+/// let options = ArrowReaderOptions::new()
+///     .with_encoding_stats_policy(ParquetStatisticsPolicy::skip_except(&[0, 1]));
+/// ```
+#[derive(Default, Debug, Clone)]
+pub enum ParquetStatisticsPolicy {
+    /// Decode the relevant statistics for all columns.
+    #[default]
+    KeepAll,
+    /// Skip decoding the relevant statistics for all columns.
+    SkipAll,
+    /// Skip decoding the relevant statistics for all columns not in the provided set
+    /// of column indices.
+    SkipExcept(Arc<HashSet<usize>>),
+}
+
+impl ParquetStatisticsPolicy {
+    /// Create a `ParquetStatisticsPolicy` to skip all columns except those in `keep`.
+    ///
+    /// If `keep` is empty, then this returns [`Self::SkipAll`]
+    pub fn skip_except(keep: &[usize]) -> Self {
+        if keep.is_empty() {
+            Self::SkipAll
+        } else {
+            let mut keep_set = HashSet::<usize>::with_capacity(keep.len());
+            keep_set.extend(keep.iter());
+            Self::SkipExcept(Arc::new(keep_set))
+        }
+    }
+
+    /// Returns whether the policy for the given column index is to skip the statistics.
+    pub(crate) fn is_skip(&self, col_index: usize) -> bool {
+        match self {
+            Self::KeepAll => false,
+            Self::SkipAll => true,
+            Self::SkipExcept(keep) => !keep.contains(&col_index),
+        }
+    }
+}
+
+/// Options that can be set to control what parts of the Parquet file footer
+/// metadata will be decoded and made present in the [`ParquetMetaData`] returned
+/// by [`ParquetMetaDataReader`] and [`ParquetMetaDataPushDecoder`].
+///
+/// [`ParquetMetaData`]: crate::file::metadata::ParquetMetaData
+/// [`ParquetMetaDataReader`]: crate::file::metadata::ParquetMetaDataReader
+/// [`ParquetMetaDataPushDecoder`]: crate::file::metadata::ParquetMetaDataPushDecoder
+#[derive(Debug, Clone)]
+pub struct ParquetMetaDataOptions {
+    schema_descr: Option<SchemaDescPtr>,
+    encoding_stats_as_mask: bool,
+    encoding_stats_policy: ParquetStatisticsPolicy,
+    column_stats_policy: ParquetStatisticsPolicy,
+    size_stats_policy: ParquetStatisticsPolicy,
+}
+
+impl Default for ParquetMetaDataOptions {
+    fn default() -> Self {
+        Self {
+            schema_descr: None,
+            encoding_stats_as_mask: true,
+            encoding_stats_policy: ParquetStatisticsPolicy::KeepAll,
+            column_stats_policy: ParquetStatisticsPolicy::KeepAll,
+            size_stats_policy: ParquetStatisticsPolicy::KeepAll,
+        }
+    }
+}
+
+impl ParquetMetaDataOptions {
+    /// Return a new default [`ParquetMetaDataOptions`].
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Returns an optional [`SchemaDescPtr`] to use when decoding. If this is not `None` then
+    /// the schema in the footer will be skipped.
+    pub fn schema(&self) -> Option<&SchemaDescPtr> {
+        self.schema_descr.as_ref()
+    }
+
+    /// Provide a schema to use when decoding the metadata.
+    pub fn set_schema(&mut self, val: SchemaDescPtr) {
+        self.schema_descr = Some(val);
+    }
+
+    /// Call [`Self::set_schema`] and return `Self` for chaining.
+    pub fn with_schema(mut self, val: SchemaDescPtr) -> Self {
+        self.set_schema(val);
+        self
+    }
+
+    /// Returns whether to present the [`encoding_stats`] field of the Parquet `ColumnMetaData`
+    /// as a bitmask (defaults to `true`).
+    ///
+    /// See [`ColumnChunkMetaData::page_encoding_stats_mask`] for an explanation of why this
+    /// might be desirable.
+    ///
+    /// [`ColumnChunkMetaData::page_encoding_stats_mask`]:
+    /// crate::file::metadata::ColumnChunkMetaData::page_encoding_stats_mask
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn encoding_stats_as_mask(&self) -> bool {
+        self.encoding_stats_as_mask
+    }
+
+    /// Convert [`encoding_stats`] from a vector of [`PageEncodingStats`] to a bitmask. This can
+    /// speed up metadata decoding while still enabling some use cases served by the full stats.
+    ///
+    /// Note that if for a given column both this option and `skip_encoding_stats` are `true`, the
+    /// stats will be skipped and not be returned as a mask.
+    ///
+    /// See [`ColumnChunkMetaData::page_encoding_stats_mask`] for more information.
+    ///
+    /// [`PageEncodingStats`]: crate::file::metadata::PageEncodingStats
+    /// [`ColumnChunkMetaData::page_encoding_stats_mask`]:
+    /// crate::file::metadata::ColumnChunkMetaData::page_encoding_stats_mask
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn set_encoding_stats_as_mask(&mut self, val: bool) {
+        self.encoding_stats_as_mask = val;
+    }
+
+    /// Call [`Self::set_encoding_stats_as_mask`] and return `Self` for chaining.
+    pub fn with_encoding_stats_as_mask(mut self, val: bool) -> Self {
+        self.set_encoding_stats_as_mask(val);
+        self
+    }
+
+    /// Returns whether to skip decoding the [`encoding_stats`] in the Parquet `ColumnMetaData`
+    /// for the column indexed by `col_index`.
+    ///
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn skip_encoding_stats(&self, col_index: usize) -> bool {
+        self.encoding_stats_policy.is_skip(col_index)
+    }
+
+    /// Sets the decoding policy for [`encoding_stats`] in the Parquet `ColumnMetaData`.
+    ///
+    /// The default policy is to decode all `encoding_stats`.
+    ///
+    /// This option takes precedence over [`Self::encoding_stats_as_mask`].
+    ///
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn set_encoding_stats_policy(&mut self, policy: ParquetStatisticsPolicy) {
+        self.encoding_stats_policy = policy;
+    }
+
+    /// Call [`Self::set_encoding_stats_policy`] and return `Self` for chaining.
+    pub fn with_encoding_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.set_encoding_stats_policy(policy);
+        self
+    }
+
+    /// Returns whether to skip decoding the [`statistics`] in the Parquet `ColumnMetaData`
+    /// for the column indexed by `col_index`.
+    ///
+    /// [`statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L912
+    pub fn skip_column_stats(&self, col_index: usize) -> bool {
+        self.column_stats_policy.is_skip(col_index)
+    }
+
+    /// Sets the decoding policy for [`statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// The default policy is to decode all `statistics`.
+    ///
+    /// [`statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L912
+    pub fn set_column_stats_policy(&mut self, policy: ParquetStatisticsPolicy) {
+        self.column_stats_policy = policy;
+    }
+
+    /// Call [`Self::set_column_stats_policy`] and return `Self` for chaining.
+    pub fn with_column_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.set_column_stats_policy(policy);
+        self
+    }
+
+    /// Returns whether to skip decoding the [`size_statistics`] in the Parquet `ColumnMetaData`
+    /// for the column indexed by `col_index`.
+    ///
+    /// [`size_statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L936
+    pub fn skip_size_stats(&self, col_index: usize) -> bool {
+        self.size_stats_policy.is_skip(col_index)
+    }
+
+    /// Sets the decoding policy for [`size_statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// The default policy is to decode all `size_statistics`.
+    ///
+    /// [`size_statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L936
+    pub fn set_size_stats_policy(&mut self, policy: ParquetStatisticsPolicy) {
+        self.size_stats_policy = policy;
+    }
+
+    /// Call [`Self::set_size_stats_policy`] and return `Self` for chaining.
+    pub fn with_size_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.set_size_stats_policy(policy);
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use bytes::Bytes;
+
+    use crate::{
+        DecodeResult,
+        file::metadata::{ParquetMetaDataOptions, ParquetMetaDataPushDecoder},
+        util::test_common::file_util::get_test_file,
+    };
+    use std::{io::Read, sync::Arc};
+
+    #[test]
+    fn test_options_default() {
+        let options = ParquetMetaDataOptions::default();
+        assert!(options.encoding_stats_as_mask());
+    }
+
+    #[test]
+    fn test_provide_schema() {
+        let mut buf: Vec<u8> = Vec::new();
+        get_test_file("alltypes_plain.parquet")
+            .read_to_end(&mut buf)
+            .unwrap();
+
+        let data = Bytes::from(buf);
+        let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64).unwrap();
+        decoder
+            .push_range(0..data.len() as u64, data.clone())
+            .unwrap();
+
+        let expected = match decoder.try_decode().unwrap() {
+            DecodeResult::Data(m) => m,
+            _ => panic!("could not parse metadata"),
+        };
+        let expected_schema = expected.file_metadata().schema_descr_ptr();
+
+        let mut options = ParquetMetaDataOptions::new();
+        options.set_schema(expected_schema);
+        let options = Arc::new(options);
+
+        let mut decoder = ParquetMetaDataPushDecoder::try_new(data.len() as u64)
+            .unwrap()
+            .with_metadata_options(Some(options));
+        decoder.push_range(0..data.len() as u64, data).unwrap();
+        let metadata = match decoder.try_decode().unwrap() {
+            DecodeResult::Data(m) => m,
+            _ => panic!("could not parse metadata"),
+        };
+
+        assert_eq!(expected, metadata);
+        // the schema pointers should be the same
+        assert!(Arc::ptr_eq(
+            &expected.file_metadata().schema_descr_ptr(),
+            &metadata.file_metadata().schema_descr_ptr()
+        ));
+    }
+}
diff --git a/parquet/src/file/metadata/parser.rs b/parquet/src/file/metadata/parser.rs
new file mode 100644
index 000000000000..9df6bcdd7185
--- /dev/null
+++ b/parquet/src/file/metadata/parser.rs
@@ -0,0 +1,330 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Internal metadata parsing routines
+//!
+//! These functions parse thrift-encoded metadata from a byte slice
+//! into the corresponding Rust structures
+
+use crate::errors::ParquetError;
+use crate::file::metadata::thrift::parquet_metadata_from_bytes;
+use crate::file::metadata::{
+    ColumnChunkMetaData, PageIndexPolicy, ParquetMetaData, ParquetMetaDataOptions,
+};
+
+use crate::file::page_index::column_index::ColumnIndexMetaData;
+use crate::file::page_index::index_reader::{decode_column_index, decode_offset_index};
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use bytes::Bytes;
+
+/// Helper struct for metadata parsing
+///
+/// This structure parses thrift-encoded bytes into the correct Rust structs,
+/// such as [`ParquetMetaData`], handling decryption if necessary.
+//
+// Note this structure is used to minimize the number of
+// places to add `#[cfg(feature = "encryption")]` checks.
+pub(crate) use inner::MetadataParser;
+
+#[cfg(feature = "encryption")]
+mod inner {
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::encryption::decrypt::FileDecryptionProperties;
+    use crate::errors::Result;
+
+    /// API for decoding metadata that may be encrypted
+    #[derive(Debug, Default)]
+    pub(crate) struct MetadataParser {
+        // the credentials and keys needed to decrypt metadata
+        file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
+        // metadata parsing options
+        metadata_options: Option<Arc<ParquetMetaDataOptions>>,
+    }
+
+    impl MetadataParser {
+        pub(crate) fn new() -> Self {
+            MetadataParser::default()
+        }
+
+        pub(crate) fn with_file_decryption_properties(
+            mut self,
+            file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
+        ) -> Self {
+            self.file_decryption_properties = file_decryption_properties;
+            self
+        }
+
+        pub(crate) fn with_metadata_options(
+            self,
+            options: Option<Arc<ParquetMetaDataOptions>>,
+        ) -> Self {
+            Self {
+                metadata_options: options,
+                ..self
+            }
+        }
+
+        pub(crate) fn decode_metadata(
+            &self,
+            buf: &[u8],
+            encrypted_footer: bool,
+        ) -> Result<ParquetMetaData> {
+            if encrypted_footer || self.file_decryption_properties.is_some() {
+                crate::file::metadata::thrift::encryption::parquet_metadata_with_encryption(
+                    self.file_decryption_properties.as_ref(),
+                    encrypted_footer,
+                    buf,
+                    self.metadata_options.as_deref(),
+                )
+            } else {
+                decode_metadata(buf, self.metadata_options.as_deref())
+            }
+        }
+    }
+
+    pub(super) fn parse_single_column_index(
+        bytes: &[u8],
+        metadata: &ParquetMetaData,
+        column: &ColumnChunkMetaData,
+        row_group_index: usize,
+        col_index: usize,
+    ) -> crate::errors::Result<ColumnIndexMetaData> {
+        use crate::encryption::decrypt::CryptoContext;
+        match &column.column_crypto_metadata {
+            Some(crypto_metadata) => {
+                let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| {
+                    general_err!("Cannot decrypt column index, no file decryptor set")
+                })?;
+                let crypto_context = CryptoContext::for_column(
+                    file_decryptor,
+                    crypto_metadata,
+                    row_group_index,
+                    col_index,
+                )?;
+                let column_decryptor = crypto_context.metadata_decryptor();
+                let aad = crypto_context.create_column_index_aad()?;
+                let plaintext = column_decryptor.decrypt(bytes, &aad)?;
+                decode_column_index(&plaintext, column.column_type())
+            }
+            None => decode_column_index(bytes, column.column_type()),
+        }
+    }
+
+    pub(super) fn parse_single_offset_index(
+        bytes: &[u8],
+        metadata: &ParquetMetaData,
+        column: &ColumnChunkMetaData,
+        row_group_index: usize,
+        col_index: usize,
+    ) -> crate::errors::Result<OffsetIndexMetaData> {
+        use crate::encryption::decrypt::CryptoContext;
+        match &column.column_crypto_metadata {
+            Some(crypto_metadata) => {
+                let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| {
+                    general_err!("Cannot decrypt offset index, no file decryptor set")
+                })?;
+                let crypto_context = CryptoContext::for_column(
+                    file_decryptor,
+                    crypto_metadata,
+                    row_group_index,
+                    col_index,
+                )?;
+                let column_decryptor = crypto_context.metadata_decryptor();
+                let aad = crypto_context.create_offset_index_aad()?;
+                let plaintext = column_decryptor.decrypt(bytes, &aad)?;
+                decode_offset_index(&plaintext)
+            }
+            None => decode_offset_index(bytes),
+        }
+    }
+}
+
+#[cfg(not(feature = "encryption"))]
+mod inner {
+    use super::*;
+    use crate::errors::Result;
+    use std::sync::Arc;
+    /// parallel implementation when encryption feature is not enabled
+    ///
+    /// This has the same API as the encryption-enabled version
+    #[derive(Debug, Default)]
+    pub(crate) struct MetadataParser {
+        // metadata parsing options
+        metadata_options: Option<Arc<ParquetMetaDataOptions>>,
+    }
+
+    impl MetadataParser {
+        pub(crate) fn new() -> Self {
+            MetadataParser::default()
+        }
+
+        pub(crate) fn with_metadata_options(
+            self,
+            options: Option<Arc<ParquetMetaDataOptions>>,
+        ) -> Self {
+            Self {
+                metadata_options: options,
+            }
+        }
+
+        pub(crate) fn decode_metadata(
+            &self,
+            buf: &[u8],
+            encrypted_footer: bool,
+        ) -> Result<ParquetMetaData> {
+            if encrypted_footer {
+                Err(general_err!(
+                    "Parquet file has an encrypted footer but the encryption feature is disabled"
+                ))
+            } else {
+                decode_metadata(buf, self.metadata_options.as_deref())
+            }
+        }
+    }
+
+    pub(super) fn parse_single_column_index(
+        bytes: &[u8],
+        _metadata: &ParquetMetaData,
+        column: &ColumnChunkMetaData,
+        _row_group_index: usize,
+        _col_index: usize,
+    ) -> crate::errors::Result<ColumnIndexMetaData> {
+        decode_column_index(bytes, column.column_type())
+    }
+
+    pub(super) fn parse_single_offset_index(
+        bytes: &[u8],
+        _metadata: &ParquetMetaData,
+        _column: &ColumnChunkMetaData,
+        _row_group_index: usize,
+        _col_index: usize,
+    ) -> crate::errors::Result<OffsetIndexMetaData> {
+        decode_offset_index(bytes)
+    }
+}
+
+/// Decodes [`ParquetMetaData`] from the provided bytes.
+///
+/// Typically this is used to decode the metadata from the end of a parquet
+/// file. The format of `buf` is the Thrift compact binary protocol, as specified
+/// by the [Parquet Spec].
+///
+/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
+pub(crate) fn decode_metadata(
+    buf: &[u8],
+    options: Option<&ParquetMetaDataOptions>,
+) -> crate::errors::Result<ParquetMetaData> {
+    parquet_metadata_from_bytes(buf, options)
+}
+
+/// Parses column index from the provided bytes and adds it to the metadata.
+///
+/// Arguments
+/// * `metadata` - The ParquetMetaData to which the parsed column index will be added.
+/// * `column_index_policy` - The policy for handling column index parsing (e.g.,
+///   Required, Optional, Skip).
+/// * `bytes` - The byte slice containing the column index data.
+/// * `start_offset` - The offset where `bytes` begin in the file.
+pub(crate) fn parse_column_index(
+    metadata: &mut ParquetMetaData,
+    column_index_policy: PageIndexPolicy,
+    bytes: &Bytes,
+    start_offset: u64,
+) -> crate::errors::Result<()> {
+    if column_index_policy == PageIndexPolicy::Skip {
+        return Ok(());
+    }
+    let index = metadata
+        .row_groups()
+        .iter()
+        .enumerate()
+        .map(|(rg_idx, x)| {
+            x.columns()
+                .iter()
+                .enumerate()
+                .map(|(col_idx, c)| match c.column_index_range() {
+                    Some(r) => {
+                        let r_start = usize::try_from(r.start - start_offset)?;
+                        let r_end = usize::try_from(r.end - start_offset)?;
+                        inner::parse_single_column_index(
+                            &bytes[r_start..r_end],
+                            metadata,
+                            c,
+                            rg_idx,
+                            col_idx,
+                        )
+                    }
+                    None => Ok(ColumnIndexMetaData::NONE),
+                })
+                .collect::<crate::errors::Result<Vec<_>>>()
+        })
+        .collect::<crate::errors::Result<Vec<_>>>()?;
+
+    metadata.set_column_index(Some(index));
+    Ok(())
+}
+
+pub(crate) fn parse_offset_index(
+    metadata: &mut ParquetMetaData,
+    offset_index_policy: PageIndexPolicy,
+    bytes: &Bytes,
+    start_offset: u64,
+) -> crate::errors::Result<()> {
+    if offset_index_policy == PageIndexPolicy::Skip {
+        return Ok(());
+    }
+    let row_groups = metadata.row_groups();
+    let mut all_indexes = Vec::with_capacity(row_groups.len());
+    for (rg_idx, x) in row_groups.iter().enumerate() {
+        let mut row_group_indexes = Vec::with_capacity(x.columns().len());
+        for (col_idx, c) in x.columns().iter().enumerate() {
+            let result = match c.offset_index_range() {
+                Some(r) => {
+                    let r_start = usize::try_from(r.start - start_offset)?;
+                    let r_end = usize::try_from(r.end - start_offset)?;
+                    inner::parse_single_offset_index(
+                        &bytes[r_start..r_end],
+                        metadata,
+                        c,
+                        rg_idx,
+                        col_idx,
+                    )
+                }
+                None => Err(general_err!("missing offset index")),
+            };
+
+            match result {
+                Ok(index) => row_group_indexes.push(index),
+                Err(e) => {
+                    if offset_index_policy == PageIndexPolicy::Required {
+                        return Err(e);
+                    } else {
+                        // Invalidate and return
+                        metadata.set_column_index(None);
+                        metadata.set_offset_index(None);
+                        return Ok(());
+                    }
+                }
+            }
+        }
+        all_indexes.push(row_group_indexes);
+    }
+    metadata.set_offset_index(Some(all_indexes));
+    Ok(())
+}
diff --git a/parquet/src/file/metadata/push_decoder.rs b/parquet/src/file/metadata/push_decoder.rs
new file mode 100644
index 000000000000..34b7fec2c0c5
--- /dev/null
+++ b/parquet/src/file/metadata/push_decoder.rs
@@ -0,0 +1,726 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::DecodeResult;
+#[cfg(feature = "encryption")]
+use crate::encryption::decrypt::FileDecryptionProperties;
+use crate::errors::{ParquetError, Result};
+use crate::file::FOOTER_SIZE;
+use crate::file::metadata::parser::{MetadataParser, parse_column_index, parse_offset_index};
+use crate::file::metadata::{FooterTail, PageIndexPolicy, ParquetMetaData, ParquetMetaDataOptions};
+use crate::file::page_index::index_reader::acc_range;
+use crate::file::reader::ChunkReader;
+use bytes::Bytes;
+use std::ops::Range;
+use std::sync::Arc;
+
+/// A push decoder for [`ParquetMetaData`].
+///
+/// This structure implements a push API for decoding Parquet metadata, which
+/// decouples IO from the metadata decoding logic (sometimes referred to as
+/// [Sans-IO]).
+///
+/// See [`ParquetMetaDataReader`] for a pull-based API that incorporates IO and
+/// is simpler to use for basic use cases. This decoder is best for customizing
+/// your IO operations to minimize bytes read, prefetch data, or use async IO.
+///
+/// [Sans-IO]: https://sans-io.readthedocs.io
+/// [`ParquetMetaDataReader`]: crate::file::metadata::ParquetMetaDataReader
+///
+/// # Example
+///
+/// The most basic usage is to feed the decoder with the necessary byte ranges
+/// as requested as shown below. This minimizes the number of bytes read, but
+/// requires the most IO operations - one to read the footer and then one
+/// to read the metadata, and possibly more if page indexes are requested.
+///
+#[cfg_attr(
+    feature = "arrow",
+    doc = r##"
+```rust
+# use std::ops::Range;
+# use bytes::Bytes;
+# use arrow_array::record_batch;
+# use parquet::DecodeResult;
+# use parquet::arrow::ArrowWriter;
+# use parquet::errors::ParquetError;
+# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
+#
+# fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
+# let file_bytes = {
+#   let mut buffer = vec![0];
+#   let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+#   let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+#   writer.write(&batch).unwrap();
+#   writer.close().unwrap();
+#   Bytes::from(buffer)
+# };
+# // mimic IO by returning a function that returns the bytes for a given range
+# let get_range = |range: &Range<u64>| -> Bytes {
+#    let start = range.start as usize;
+#     let end = range.end as usize;
+#    file_bytes.slice(start..end)
+# };
+#
+# let file_len = file_bytes.len() as u64;
+// The `ParquetMetaDataPushDecoder` needs to know the file length.
+let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+// try to decode the metadata. If more data is needed, the decoder will tell you what ranges
+loop {
+    match decoder.try_decode() {
+       Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
+       Ok(DecodeResult::NeedsData(ranges)) => {
+          // The decoder needs more data
+          //
+          // In this example, we call a function that returns the bytes for each given range.
+          // In a real application, you would likely read the data from a file or network.
+          let data = ranges.iter().map(|range| get_range(range)).collect();
+          // Push the data into the decoder and try to decode again on the next iteration.
+          decoder.push_ranges(ranges, data).unwrap();
+       }
+       Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
+       Err(e) => return Err(e),
+    }
+}
+# }
+```
+"##
+)]
+///
+/// # Example with "prefetching"
+///
+/// By default, the [`ParquetMetaDataPushDecoder`] will request only the exact byte
+/// ranges it needs. This minimizes the number of bytes read, however it
+/// requires at least two IO operations to read the metadata - one to read the
+/// footer and then one to read the metadata.
+///
+/// If the file has a "Page Index" (see [Self::with_page_index_policy]), three
+/// IO operations are required to read the metadata, as the page index is
+/// not part of the normal metadata footer.
+///
+/// To reduce the number of IO operations in systems with high per operation
+/// overhead (e.g. cloud storage), you can "prefetch" the data and then push
+/// the data into the decoder before calling [`Self::try_decode`]. If you do
+/// not push enough bytes, the decoder will return the ranges that are still
+/// needed.
+///
+/// This approach can also be used when you have the entire file already in memory
+/// for other reasons.
+#[cfg_attr(
+    feature = "arrow",
+    doc = r##"
+```rust
+# use std::ops::Range;
+# use bytes::Bytes;
+# use arrow_array::record_batch;
+# use parquet::DecodeResult;
+# use parquet::arrow::ArrowWriter;
+# use parquet::errors::ParquetError;
+# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
+#
+# fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
+# let file_bytes = {
+#   let mut buffer = vec![0];
+#   let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
+#   let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+#   writer.write(&batch).unwrap();
+#   writer.close().unwrap();
+#   Bytes::from(buffer)
+# };
+#
+let file_len = file_bytes.len() as u64;
+// For this example, we "prefetch" all the bytes which we have in memory,
+// but in a real application, you would likely read a chunk from the end
+// for example 1MB.
+let prefetched_bytes = file_bytes.clone();
+let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+// push the prefetched bytes into the decoder
+decoder.push_ranges(vec![0..file_len], vec![prefetched_bytes]).unwrap();
+// The decoder will now be able to decode the metadata. Note in a real application,
+// unless you can guarantee that the pushed data is enough to decode the metadata,
+// you still need to call `try_decode` in a loop until it returns `DecodeResult::Data`
+// as shown in  the previous example
+    match decoder.try_decode() {
+        Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
+        other => { panic!("expected DecodeResult::Data, got: {other:?}") }
+    }
+# }
+```
+"##
+)]
+///
+/// # Example using [`AsyncRead`]
+///
+/// [`ParquetMetaDataPushDecoder`] is designed to work with any data source that can
+/// provide byte ranges, including async IO sources. However, it does not
+/// implement async IO itself. To use async IO, you simply write an async
+/// wrapper around it that reads the required byte ranges and pushes them into the
+/// decoder.
+#[cfg_attr(
+    feature = "arrow",
+    doc = r##"
+```rust
+# use std::ops::Range;
+# use bytes::Bytes;
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
+# use arrow_array::record_batch;
+# use parquet::DecodeResult;
+# use parquet::arrow::ArrowWriter;
+# use parquet::errors::ParquetError;
+# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
+#
+// This function decodes Parquet Metadata from anything that implements
+// [`AsyncRead`] and [`AsyncSeek`] such as a tokio::fs::File
+async fn decode_metadata(
+  file_len: u64,
+  mut async_source: impl AsyncRead + AsyncSeek + Unpin
+) -> Result<ParquetMetaData, ParquetError> {
+  // We need a ParquetMetaDataPushDecoder to decode the metadata.
+  let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+  loop {
+    match decoder.try_decode() {
+       Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
+       Ok(DecodeResult::NeedsData(ranges)) => {
+          // The decoder needs more data
+          //
+          // In this example we use the AsyncRead and AsyncSeek traits to read the
+          // required ranges from the async source.
+          let mut data = Vec::with_capacity(ranges.len());
+          for range in &ranges {
+            let mut buffer = vec![0; (range.end - range.start) as usize];
+            async_source.seek(std::io::SeekFrom::Start(range.start)).await?;
+            async_source.read_exact(&mut buffer).await?;
+            data.push(Bytes::from(buffer));
+          }
+          // Push the data into the decoder and try to decode again on the next iteration.
+          decoder.push_ranges(ranges, data).unwrap();
+       }
+       Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
+       Err(e) => return Err(e),
+    }
+  }
+}
+```
+"##
+)]
+/// [`AsyncRead`]: tokio::io::AsyncRead
+#[derive(Debug)]
+pub struct ParquetMetaDataPushDecoder {
+    /// Decoding state
+    state: DecodeState,
+    /// policy for loading ColumnIndex (part of the PageIndex)
+    column_index_policy: PageIndexPolicy,
+    /// policy for loading OffsetIndex (part of the PageIndex)
+    offset_index_policy: PageIndexPolicy,
+    /// Underlying buffers
+    buffers: crate::util::push_buffers::PushBuffers,
+    /// Encryption API
+    metadata_parser: MetadataParser,
+}
+
+impl ParquetMetaDataPushDecoder {
+    /// Create a new `ParquetMetaDataPushDecoder` with the given file length.
+    ///
+    /// By default, this will read page indexes and column indexes. See
+    /// [`ParquetMetaDataPushDecoder::with_page_index_policy`] for more detail.
+    ///
+    /// See examples on [`ParquetMetaDataPushDecoder`].
+    pub fn try_new(file_len: u64) -> Result<Self> {
+        if file_len < 8 {
+            return Err(ParquetError::General(format!(
+                "Parquet files are at least 8 bytes long, but file length is {file_len}"
+            )));
+        };
+
+        Ok(Self {
+            state: DecodeState::ReadingFooter,
+            column_index_policy: PageIndexPolicy::Optional,
+            offset_index_policy: PageIndexPolicy::Optional,
+            buffers: crate::util::push_buffers::PushBuffers::new(file_len),
+            metadata_parser: MetadataParser::new(),
+        })
+    }
+
+    /// Begin decoding from the given footer tail.
+    pub(crate) fn try_new_with_footer_tail(file_len: u64, footer_tail: FooterTail) -> Result<Self> {
+        let mut new_self = Self::try_new(file_len)?;
+        new_self.state = DecodeState::ReadingMetadata(footer_tail);
+        Ok(new_self)
+    }
+
+    /// Create a decoder with the given `ParquetMetaData` already known.
+    ///
+    /// This can be used to parse and populate the page index structures
+    /// after the metadata has already been decoded.
+    pub fn try_new_with_metadata(file_len: u64, metadata: ParquetMetaData) -> Result<Self> {
+        let mut new_self = Self::try_new(file_len)?;
+        new_self.state = DecodeState::ReadingPageIndex(Box::new(metadata));
+        Ok(new_self)
+    }
+
+    /// Enable or disable reading the page index structures described in
+    /// "[Parquet page index] Layout to Support Page Skipping".
+    ///
+    /// Defaults to [`PageIndexPolicy::Optional`]
+    ///
+    /// This requires
+    /// 1. The Parquet file to have been written with page indexes
+    /// 2. Additional data to be pushed into the decoder (as the page indexes are not part of the thrift footer)
+    ///
+    /// [Parquet page index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+    pub fn with_page_index_policy(mut self, page_index_policy: PageIndexPolicy) -> Self {
+        self.column_index_policy = page_index_policy;
+        self.offset_index_policy = page_index_policy;
+        self
+    }
+
+    /// Set the policy for reading the ColumnIndex (part of the PageIndex)
+    pub fn with_column_index_policy(mut self, column_index_policy: PageIndexPolicy) -> Self {
+        self.column_index_policy = column_index_policy;
+        self
+    }
+
+    /// Set the policy for reading the OffsetIndex (part of the PageIndex)
+    pub fn with_offset_index_policy(mut self, offset_index_policy: PageIndexPolicy) -> Self {
+        self.offset_index_policy = offset_index_policy;
+        self
+    }
+
+    /// Set the options to use when decoding the Parquet metadata.
+    pub fn with_metadata_options(mut self, options: Option<Arc<ParquetMetaDataOptions>>) -> Self {
+        self.metadata_parser = self.metadata_parser.with_metadata_options(options);
+        self
+    }
+
+    #[cfg(feature = "encryption")]
+    /// Provide decryption properties for decoding encrypted Parquet files
+    pub(crate) fn with_file_decryption_properties(
+        mut self,
+        file_decryption_properties: Option<std::sync::Arc<FileDecryptionProperties>>,
+    ) -> Self {
+        self.metadata_parser = self
+            .metadata_parser
+            .with_file_decryption_properties(file_decryption_properties);
+        self
+    }
+
+    /// Push the data into the decoder's buffer.
+    ///
+    /// The decoder does not immediately attempt to decode the metadata
+    /// after pushing data. Instead, it accumulates the pushed data until you
+    /// call [`Self::try_decode`].
+    ///
+    /// # Determining required data:
+    ///
+    /// To determine what ranges are required to decode the metadata, you can
+    /// either:
+    ///
+    /// 1. Call [`Self::try_decode`] first to get the exact ranges required (see
+    ///    example on [`Self`])
+    ///
+    /// 2. Speculatively push any data that you have available, which may
+    ///    include more than the footer data or requested bytes.
+    ///
+    /// Speculatively pushing data can be used when  "prefetching" data. See
+    /// example on [`Self`]
+    pub fn push_ranges(&mut self, ranges: Vec<Range<u64>>, buffers: Vec<Bytes>) -> Result<()> {
+        if matches!(&self.state, DecodeState::Finished) {
+            return Err(general_err!(
+                "ParquetMetaDataPushDecoder: cannot push data after decoding is finished"
+            ));
+        }
+        self.buffers.push_ranges(ranges, buffers);
+        Ok(())
+    }
+
+    /// Pushes a single range of data into the decoder's buffer.
+    pub fn push_range(&mut self, range: Range<u64>, buffer: Bytes) -> Result<()> {
+        if matches!(&self.state, DecodeState::Finished) {
+            return Err(general_err!(
+                "ParquetMetaDataPushDecoder: cannot push data after decoding is finished"
+            ));
+        }
+        self.buffers.push_range(range, buffer);
+        Ok(())
+    }
+
+    /// Try to decode the metadata from the pushed data, returning the
+    /// decoded metadata or an error if not enough data is available.
+    pub fn try_decode(&mut self) -> Result<DecodeResult<ParquetMetaData>> {
+        let file_len = self.buffers.file_len();
+        let footer_len = FOOTER_SIZE as u64;
+        loop {
+            match std::mem::replace(&mut self.state, DecodeState::Intermediate) {
+                DecodeState::ReadingFooter => {
+                    // need to have the last 8 bytes of the file to decode the metadata
+                    let footer_start = file_len.saturating_sub(footer_len);
+                    let footer_range = footer_start..file_len;
+
+                    if !self.buffers.has_range(&footer_range) {
+                        self.state = DecodeState::ReadingFooter;
+                        return Ok(needs_range(footer_range));
+                    }
+                    let footer_bytes = self.get_bytes(&footer_range)?;
+                    let footer_tail = FooterTail::try_from(footer_bytes.as_ref())?;
+
+                    self.state = DecodeState::ReadingMetadata(footer_tail);
+                    continue;
+                }
+
+                DecodeState::ReadingMetadata(footer_tail) => {
+                    let metadata_len: u64 = footer_tail.metadata_length() as u64;
+                    let metadata_start = file_len - footer_len - metadata_len;
+                    let metadata_end = metadata_start + metadata_len;
+                    let metadata_range = metadata_start..metadata_end;
+
+                    if !self.buffers.has_range(&metadata_range) {
+                        self.state = DecodeState::ReadingMetadata(footer_tail);
+                        return Ok(needs_range(metadata_range));
+                    }
+
+                    let metadata = self.metadata_parser.decode_metadata(
+                        &self.get_bytes(&metadata_range)?,
+                        footer_tail.is_encrypted_footer(),
+                    )?;
+                    // Note: ReadingPageIndex first checks if page indexes are needed
+                    // and is a no-op if not
+                    self.state = DecodeState::ReadingPageIndex(Box::new(metadata));
+                    continue;
+                }
+
+                DecodeState::ReadingPageIndex(mut metadata) => {
+                    // First determine if any page indexes are needed based on
+                    // the specified policies
+                    let range = range_for_page_index(
+                        &metadata,
+                        self.column_index_policy,
+                        self.offset_index_policy,
+                    );
+
+                    let Some(page_index_range) = range else {
+                        self.state = DecodeState::Finished;
+                        return Ok(DecodeResult::Data(*metadata));
+                    };
+
+                    if !self.buffers.has_range(&page_index_range) {
+                        self.state = DecodeState::ReadingPageIndex(metadata);
+                        return Ok(needs_range(page_index_range));
+                    }
+
+                    let buffer = self.get_bytes(&page_index_range)?;
+                    let offset = page_index_range.start;
+                    parse_column_index(&mut metadata, self.column_index_policy, &buffer, offset)?;
+                    parse_offset_index(&mut metadata, self.offset_index_policy, &buffer, offset)?;
+                    self.state = DecodeState::Finished;
+                    return Ok(DecodeResult::Data(*metadata));
+                }
+
+                DecodeState::Finished => return Ok(DecodeResult::Finished),
+                DecodeState::Intermediate => {
+                    return Err(general_err!(
+                        "ParquetMetaDataPushDecoder: internal error, invalid state"
+                    ));
+                }
+            }
+        }
+    }
+
+    /// Returns the bytes for the given range from the internal buffer
+    fn get_bytes(&self, range: &Range<u64>) -> Result<Bytes> {
+        let start = range.start;
+        let raw_len = range.end - range.start;
+        let len: usize = raw_len.try_into().map_err(|_| {
+            ParquetError::General(format!(
+                "ParquetMetaDataPushDecoder: Range length too large to fit in usize: {raw_len}",
+            ))
+        })?;
+        self.buffers.get_bytes(start, len)
+    }
+}
+
+/// returns a DecodeResults that describes needing the given range
+fn needs_range(range: Range<u64>) -> DecodeResult<ParquetMetaData> {
+    DecodeResult::NeedsData(vec![range])
+}
+
+/// Decoding state machine
+#[derive(Debug)]
+enum DecodeState {
+    /// Reading the last 8 bytes of the file
+    ReadingFooter,
+    /// Reading the metadata thrift structure
+    ReadingMetadata(FooterTail),
+    // Actively reading the page index
+    ReadingPageIndex(Box<ParquetMetaData>),
+    // Decoding is complete
+    Finished,
+    /// State left during the `try_decode` method so something valid is present.
+    /// This state should never be observed.
+    Intermediate,
+}
+
+/// Returns the byte range needed to read the offset/page indexes, based on the
+/// specified policies
+///
+/// Returns None if no page indexes are needed
+pub fn range_for_page_index(
+    metadata: &ParquetMetaData,
+    column_index_policy: PageIndexPolicy,
+    offset_index_policy: PageIndexPolicy,
+) -> Option<Range<u64>> {
+    let mut range = None;
+    for c in metadata.row_groups().iter().flat_map(|r| r.columns()) {
+        if column_index_policy != PageIndexPolicy::Skip {
+            range = acc_range(range, c.column_index_range());
+        }
+        if offset_index_policy != PageIndexPolicy::Skip {
+            range = acc_range(range, c.offset_index_range());
+        }
+    }
+    range
+}
+
+// These tests use the arrow writer to create a parquet file in memory
+// so they need the arrow feature and the test feature
+#[cfg(all(test, feature = "arrow"))]
+mod tests {
+    use super::*;
+    use crate::arrow::ArrowWriter;
+    use crate::file::properties::WriterProperties;
+    use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringViewArray};
+    use bytes::Bytes;
+    use std::fmt::Debug;
+    use std::ops::Range;
+    use std::sync::{Arc, LazyLock};
+
+    /// It is possible to decode the metadata from the entire file at once before being asked
+    #[test]
+    fn test_metadata_decoder_all_data() {
+        let file_len = test_file_len();
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+        // Push the entire file data into the metadata decoder
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, vec![test_file_range()]);
+
+        // should be able to decode the metadata without needing more data
+        let metadata = expect_data(metadata_decoder.try_decode());
+
+        assert_eq!(metadata.num_row_groups(), 2);
+        assert_eq!(metadata.row_group(0).num_rows(), 200);
+        assert_eq!(metadata.row_group(1).num_rows(), 200);
+        assert!(metadata.column_index().is_some());
+        assert!(metadata.offset_index().is_some());
+    }
+
+    /// It is possible to feed some, but not all, of the footer into the metadata decoder
+    /// before asked. This avoids multiple IO requests
+    #[test]
+    fn test_metadata_decoder_prefetch_success() {
+        let file_len = test_file_len();
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+        // simulate pre-fetching the last 2k bytes of the file without asking the decoder
+        let prefetch_range = (file_len - 2 * 1024)..file_len;
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, vec![prefetch_range]);
+
+        // expect the decoder has enough data to decode the metadata
+        let metadata = expect_data(metadata_decoder.try_decode());
+        expect_finished(metadata_decoder.try_decode());
+        assert_eq!(metadata.num_row_groups(), 2);
+        assert_eq!(metadata.row_group(0).num_rows(), 200);
+        assert_eq!(metadata.row_group(1).num_rows(), 200);
+        assert!(metadata.column_index().is_some());
+        assert!(metadata.offset_index().is_some());
+    }
+
+    /// It is possible to pre-fetch some, but not all, of the necessary data
+    /// data
+    #[test]
+    fn test_metadata_decoder_prefetch_retry() {
+        let file_len = test_file_len();
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+        // simulate pre-fetching the last 1500 bytes of the file.
+        // this is enough to read the footer thrift metadata, but not the offset indexes
+        let prefetch_range = (file_len - 1500)..file_len;
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, vec![prefetch_range]);
+
+        // expect another request is needed to read the offset indexes (note
+        // try_decode only returns NeedsData once, whereas without any prefetching it would
+        // return NeedsData three times)
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect the decoder has enough data to decode the metadata
+        let metadata = expect_data(metadata_decoder.try_decode());
+        expect_finished(metadata_decoder.try_decode());
+
+        assert_eq!(metadata.num_row_groups(), 2);
+        assert_eq!(metadata.row_group(0).num_rows(), 200);
+        assert_eq!(metadata.row_group(1).num_rows(), 200);
+        assert!(metadata.column_index().is_some());
+        assert!(metadata.offset_index().is_some());
+    }
+
+    /// Decode the metadata incrementally, simulating a scenario where exactly the data needed
+    /// is read in each step
+    #[test]
+    fn test_metadata_decoder_incremental() {
+        let file_len = TEST_FILE_DATA.len() as u64;
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        assert_eq!(ranges.len(), 1);
+        assert_eq!(ranges[0], test_file_len() - 8..test_file_len());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect the first request to read the footer
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect the second request to read the offset indexes
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect the third request to read the actual data
+        let metadata = expect_data(metadata_decoder.try_decode());
+        expect_finished(metadata_decoder.try_decode());
+
+        assert_eq!(metadata.num_row_groups(), 2);
+        assert_eq!(metadata.row_group(0).num_rows(), 200);
+        assert_eq!(metadata.row_group(1).num_rows(), 200);
+        assert!(metadata.column_index().is_some());
+        assert!(metadata.offset_index().is_some());
+    }
+
+    /// Decode the metadata incrementally, but without reading the page indexes
+    /// (so only two requests)
+    #[test]
+    fn test_metadata_decoder_incremental_no_page_index() {
+        let file_len = TEST_FILE_DATA.len() as u64;
+        let mut metadata_decoder = ParquetMetaDataPushDecoder::try_new(file_len)
+            .unwrap()
+            .with_page_index_policy(PageIndexPolicy::Skip);
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        assert_eq!(ranges.len(), 1);
+        assert_eq!(ranges[0], test_file_len() - 8..test_file_len());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect the first request to read the footer
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        push_ranges_to_metadata_decoder(&mut metadata_decoder, ranges);
+
+        // expect NO second request to read the offset indexes, should just cough up the metadata
+        let metadata = expect_data(metadata_decoder.try_decode());
+        expect_finished(metadata_decoder.try_decode());
+
+        assert_eq!(metadata.num_row_groups(), 2);
+        assert_eq!(metadata.row_group(0).num_rows(), 200);
+        assert_eq!(metadata.row_group(1).num_rows(), 200);
+        assert!(metadata.column_index().is_none()); // of course, we did not read the column index
+        assert!(metadata.offset_index().is_none()); // or the offset index
+    }
+
+    static TEST_BATCH: LazyLock<RecordBatch> = LazyLock::new(|| {
+        // Input batch has 400 rows, with 3 columns: "a", "b", "c"
+        // Note c is a different types (so the data page sizes will be different)
+        let a: ArrayRef = Arc::new(Int64Array::from_iter_values(0..400));
+        let b: ArrayRef = Arc::new(Int64Array::from_iter_values(400..800));
+        let c: ArrayRef = Arc::new(StringViewArray::from_iter_values((0..400).map(|i| {
+            if i % 2 == 0 {
+                format!("string_{i}")
+            } else {
+                format!("A string larger than 12 bytes and thus not inlined {i}")
+            }
+        })));
+
+        RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap()
+    });
+
+    /// Create a parquet file in memory for testing. See [`test_file_range`] for details.
+    static TEST_FILE_DATA: LazyLock<Bytes> = LazyLock::new(|| {
+        let input_batch = &TEST_BATCH;
+        let mut output = Vec::new();
+
+        let writer_options = WriterProperties::builder()
+            .set_max_row_group_size(200)
+            .set_data_page_row_count_limit(100)
+            .build();
+        let mut writer =
+            ArrowWriter::try_new(&mut output, input_batch.schema(), Some(writer_options)).unwrap();
+
+        // since the limits are only enforced on batch boundaries, write the input
+        // batch in chunks of 50
+        let mut row_remain = input_batch.num_rows();
+        while row_remain > 0 {
+            let chunk_size = row_remain.min(50);
+            let chunk = input_batch.slice(input_batch.num_rows() - row_remain, chunk_size);
+            writer.write(&chunk).unwrap();
+            row_remain -= chunk_size;
+        }
+        writer.close().unwrap();
+        Bytes::from(output)
+    });
+
+    /// Return the length of the test file in bytes
+    fn test_file_len() -> u64 {
+        TEST_FILE_DATA.len() as u64
+    }
+
+    /// Return the range of the entire test file
+    fn test_file_range() -> Range<u64> {
+        0..test_file_len()
+    }
+
+    /// Return a slice of the test file data from the given range
+    pub fn test_file_slice(range: Range<u64>) -> Bytes {
+        let start: usize = range.start.try_into().unwrap();
+        let end: usize = range.end.try_into().unwrap();
+        TEST_FILE_DATA.slice(start..end)
+    }
+
+    /// Push the given ranges to the metadata decoder, simulating reading from a file
+    fn push_ranges_to_metadata_decoder(
+        metadata_decoder: &mut ParquetMetaDataPushDecoder,
+        ranges: Vec<Range<u64>>,
+    ) {
+        let data = ranges
+            .iter()
+            .map(|range| test_file_slice(range.clone()))
+            .collect::<Vec<_>>();
+        metadata_decoder.push_ranges(ranges, data).unwrap();
+    }
+
+    /// Expect that the [`DecodeResult`] is a [`DecodeResult::Data`] and return the corresponding element
+    fn expect_data<T: Debug>(result: Result<DecodeResult<T>>) -> T {
+        match result.expect("Expected Ok(DecodeResult::Data(T))") {
+            DecodeResult::Data(data) => data,
+            result => panic!("Expected DecodeResult::Data, got {result:?}"),
+        }
+    }
+
+    /// Expect that the [`DecodeResult`] is a [`DecodeResult::NeedsData`] and return the corresponding ranges
+    fn expect_needs_data<T: Debug>(result: Result<DecodeResult<T>>) -> Vec<Range<u64>> {
+        match result.expect("Expected Ok(DecodeResult::NeedsData{ranges})") {
+            DecodeResult::NeedsData(ranges) => ranges,
+            result => panic!("Expected DecodeResult::NeedsData, got {result:?}"),
+        }
+    }
+
+    fn expect_finished<T: Debug>(result: Result<DecodeResult<T>>) {
+        match result.expect("Expected Ok(DecodeResult::Finished)") {
+            DecodeResult::Finished => {}
+            result => panic!("Expected DecodeResult::Finished, got {result:?}"),
+        }
+    }
+}
diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs
index 356713837530..a18a5e68a9b5 100644
--- a/parquet/src/file/metadata/reader.rs
+++ b/parquet/src/file/metadata/reader.rs
@@ -15,46 +15,44 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{io::Read, ops::Range, sync::Arc};
-
-use crate::basic::ColumnOrder;
 #[cfg(feature = "encryption")]
-use crate::encryption::{
-    decrypt::{FileDecryptionProperties, FileDecryptor},
-    modules::create_footer_aad,
-};
-use bytes::Bytes;
-
+use crate::encryption::decrypt::FileDecryptionProperties;
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
-use crate::file::page_index::index::Index;
-use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
+use crate::file::FOOTER_SIZE;
+use crate::file::metadata::parser::decode_metadata;
+use crate::file::metadata::thrift::parquet_schema_from_bytes;
+use crate::file::metadata::{
+    FooterTail, ParquetMetaData, ParquetMetaDataOptions, ParquetMetaDataPushDecoder,
+};
 use crate::file::reader::ChunkReader;
-use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER};
-use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
-#[cfg(feature = "encryption")]
-use crate::format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData};
-use crate::schema::types;
 use crate::schema::types::SchemaDescriptor;
-use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+use bytes::Bytes;
+use std::sync::Arc;
+use std::{io::Read, ops::Range};
 
+use crate::DecodeResult;
 #[cfg(all(feature = "async", feature = "arrow"))]
 use crate::arrow::async_reader::{MetadataFetch, MetadataSuffixFetch};
-#[cfg(feature = "encryption")]
-use crate::encryption::decrypt::CryptoContext;
-use crate::file::page_index::offset_index::OffsetIndexMetaData;
 
-/// Reads the [`ParquetMetaData`] from a byte stream.
+/// Reads [`ParquetMetaData`] from a byte stream, with either synchronous or
+/// asynchronous I/O.
+///
+/// There are two flavors of APIs:
+/// * Synchronous: [`Self::try_parse()`], [`Self::try_parse_sized()`], [`Self::parse_and_finish()`], etc.
+/// * Asynchronous (requires `async` and `arrow` features): [`Self::try_load()`], etc
 ///
-/// See [`crate::file::metadata::ParquetMetaDataWriter#output-format`] for a description of
-/// the Parquet metadata.
+///  See the [`ParquetMetaDataPushDecoder`] for an API that does not require I/O.
 ///
-/// Parquet metadata is not necessarily contiguous in the files: part is stored
+/// # Format Notes
+///
+/// Parquet metadata is not necessarily contiguous in a Parquet file: a portion is stored
 /// in the footer (the last bytes of the file), but other portions (such as the
 /// PageIndex) can be stored elsewhere.
+/// See [`crate::file::metadata::ParquetMetaDataWriter#output-format`] for more details of
+/// Parquet metadata.
 ///
 /// This reader handles reading the footer as well as the non contiguous parts
-/// of the metadata such as the page indexes; excluding Bloom Filters.
+/// of the metadata (`PageIndex` and `ColumnIndex`). It does not handle reading Bloom Filters.
 ///
 /// # Example
 /// ```no_run
@@ -69,36 +67,38 @@ use crate::file::page_index::offset_index::OffsetIndexMetaData;
 /// assert!(metadata.column_index().is_some());
 /// assert!(metadata.offset_index().is_some());
 /// ```
-#[derive(Default)]
+#[derive(Default, Debug)]
 pub struct ParquetMetaDataReader {
     metadata: Option<ParquetMetaData>,
-    column_index: bool,
-    offset_index: bool,
+    column_index: PageIndexPolicy,
+    offset_index: PageIndexPolicy,
     prefetch_hint: Option<usize>,
+    metadata_options: Option<Arc<ParquetMetaDataOptions>>,
     // Size of the serialized thrift metadata plus the 8 byte footer. Only set if
     // `self.parse_metadata` is called.
     metadata_size: Option<usize>,
     #[cfg(feature = "encryption")]
-    file_decryption_properties: Option<FileDecryptionProperties>,
+    file_decryption_properties: Option<Arc<FileDecryptionProperties>>,
 }
 
-/// Describes how the footer metadata is stored
-///
-/// This is parsed from the last 8 bytes of the Parquet file
-pub struct FooterTail {
-    metadata_length: usize,
-    encrypted_footer: bool,
+/// Describes the policy for reading page indexes
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum PageIndexPolicy {
+    /// Do not read the page index.
+    #[default]
+    Skip,
+    /// Read the page index if it exists, otherwise do not error.
+    Optional,
+    /// Require the page index to exist, and error if it does not.
+    Required,
 }
 
-impl FooterTail {
-    /// The length of the footer metadata in bytes
-    pub fn metadata_length(&self) -> usize {
-        self.metadata_length
-    }
-
-    /// Whether the footer metadata is encrypted
-    pub fn is_encrypted_footer(&self) -> bool {
-        self.encrypted_footer
+impl From<bool> for PageIndexPolicy {
+    fn from(value: bool) -> Self {
+        match value {
+            true => Self::Required,
+            false => Self::Skip,
+        }
     }
 }
 
@@ -118,27 +118,55 @@ impl ParquetMetaDataReader {
     }
 
     /// Enable or disable reading the page index structures described in
-    /// "[Parquet page index]: Layout to Support Page Skipping". Equivalent to:
-    /// `self.with_column_indexes(val).with_offset_indexes(val)`
+    /// "[Parquet page index]: Layout to Support Page Skipping".
     ///
     /// [Parquet page index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+    #[deprecated(since = "56.1.0", note = "Use `with_page_index_policy` instead")]
     pub fn with_page_indexes(self, val: bool) -> Self {
-        self.with_column_indexes(val).with_offset_indexes(val)
+        let policy = PageIndexPolicy::from(val);
+        self.with_column_index_policy(policy)
+            .with_offset_index_policy(policy)
     }
 
     /// Enable or disable reading the Parquet [ColumnIndex] structure.
     ///
     /// [ColumnIndex]:  https://github.com/apache/parquet-format/blob/master/PageIndex.md
-    pub fn with_column_indexes(mut self, val: bool) -> Self {
-        self.column_index = val;
-        self
+    #[deprecated(since = "56.1.0", note = "Use `with_column_index_policy` instead")]
+    pub fn with_column_indexes(self, val: bool) -> Self {
+        let policy = PageIndexPolicy::from(val);
+        self.with_column_index_policy(policy)
     }
 
     /// Enable or disable reading the Parquet [OffsetIndex] structure.
     ///
     /// [OffsetIndex]:  https://github.com/apache/parquet-format/blob/master/PageIndex.md
-    pub fn with_offset_indexes(mut self, val: bool) -> Self {
-        self.offset_index = val;
+    #[deprecated(since = "56.1.0", note = "Use `with_offset_index_policy` instead")]
+    pub fn with_offset_indexes(self, val: bool) -> Self {
+        let policy = PageIndexPolicy::from(val);
+        self.with_offset_index_policy(policy)
+    }
+
+    /// Sets the [`PageIndexPolicy`] for the column and offset indexes
+    pub fn with_page_index_policy(self, policy: PageIndexPolicy) -> Self {
+        self.with_column_index_policy(policy)
+            .with_offset_index_policy(policy)
+    }
+
+    /// Sets the [`PageIndexPolicy`] for the column index
+    pub fn with_column_index_policy(mut self, policy: PageIndexPolicy) -> Self {
+        self.column_index = policy;
+        self
+    }
+
+    /// Sets the [`PageIndexPolicy`] for the offset index
+    pub fn with_offset_index_policy(mut self, policy: PageIndexPolicy) -> Self {
+        self.offset_index = policy;
+        self
+    }
+
+    /// Sets the [`ParquetMetaDataOptions`] to use when decoding
+    pub fn with_metadata_options(mut self, options: Option<ParquetMetaDataOptions>) -> Self {
+        self.metadata_options = options.map(Arc::new);
         self
     }
 
@@ -164,9 +192,9 @@ impl ParquetMetaDataReader {
     #[cfg(feature = "encryption")]
     pub fn with_decryption_properties(
         mut self,
-        properties: Option<&FileDecryptionProperties>,
+        properties: Option<std::sync::Arc<FileDecryptionProperties>>,
     ) -> Self {
-        self.file_decryption_properties = properties.cloned();
+        self.file_decryption_properties = properties;
         self
     }
 
@@ -277,7 +305,7 @@ impl ParquetMetaDataReader {
     ///             bytes = get_bytes(&file, len - needed as u64..len);
     ///             // If file metadata was read only read page indexes, otherwise continue loop
     ///             if reader.has_metadata() {
-    ///                 reader.read_page_indexes_sized(&bytes, len);
+    ///                 reader.read_page_indexes_sized(&bytes, len).unwrap();
     ///                 break;
     ///             }
     ///         }
@@ -307,7 +335,8 @@ impl ParquetMetaDataReader {
         };
 
         // we can return if page indexes aren't requested
-        if !self.column_index && !self.offset_index {
+        if self.column_index == PageIndexPolicy::Skip && self.offset_index == PageIndexPolicy::Skip
+        {
             return Ok(());
         }
 
@@ -330,15 +359,25 @@ impl ParquetMetaDataReader {
         reader: &R,
         file_size: u64,
     ) -> Result<()> {
-        if self.metadata.is_none() {
+        let Some(metadata) = self.metadata.take() else {
             return Err(general_err!(
                 "Tried to read page indexes without ParquetMetaData metadata"
             ));
-        }
+        };
+
+        let push_decoder = ParquetMetaDataPushDecoder::try_new_with_metadata(file_size, metadata)?
+            .with_offset_index_policy(self.offset_index)
+            .with_column_index_policy(self.column_index)
+            .with_metadata_options(self.metadata_options.clone());
+        let mut push_decoder = self.prepare_push_decoder(push_decoder);
 
         // Get bounds needed for page indexes (if any are present in the file).
-        let Some(range) = self.range_for_page_index() else {
-            return Ok(());
+        let range = match needs_index_data(&mut push_decoder)? {
+            NeedsIndexData::No(metadata) => {
+                self.metadata = Some(metadata);
+                return Ok(());
+            }
+            NeedsIndexData::Yes(range) => range,
         };
 
         // Check to see if needed range is within `file_range`. Checking `range.end` seems
@@ -348,8 +387,7 @@ impl ParquetMetaDataReader {
             // Requested range starts beyond EOF
             if range.end > file_size {
                 return Err(eof_err!(
-                    "Parquet file too small. Range {:?} is beyond file bounds {file_size}",
-                    range
+                    "Parquet file too small. Range {range:?} is beyond file bounds {file_size}",
                 ));
             } else {
                 // Ask for a larger buffer
@@ -365,19 +403,18 @@ impl ParquetMetaDataReader {
             let metadata_range = file_size.saturating_sub(metadata_size as u64)..file_size;
             if range.end > metadata_range.start {
                 return Err(eof_err!(
-                    "Parquet file too small. Page index range {:?} overlaps with file metadata {:?}",
-                    range,
-                    metadata_range
+                    "Parquet file too small. Page index range {range:?} overlaps with file metadata {metadata_range:?}",
                 ));
             }
         }
 
+        // add the needed ranges to the decoder
         let bytes_needed = usize::try_from(range.end - range.start)?;
         let bytes = reader.get_bytes(range.start - file_range.start, bytes_needed)?;
-        let offset = range.start;
 
-        self.parse_column_index(&bytes, offset)?;
-        self.parse_offset_index(&bytes, offset)?;
+        push_decoder.push_range(range, bytes)?;
+        let metadata = parse_index_data(&mut push_decoder)?;
+        self.metadata = Some(metadata);
 
         Ok(())
     }
@@ -424,7 +461,8 @@ impl ParquetMetaDataReader {
         self.metadata = Some(metadata);
 
         // we can return if page indexes aren't requested
-        if !self.column_index && !self.offset_index {
+        if self.column_index == PageIndexPolicy::Skip && self.offset_index == PageIndexPolicy::Skip
+        {
             return Ok(());
         }
 
@@ -446,7 +484,8 @@ impl ParquetMetaDataReader {
         self.metadata = Some(metadata);
 
         // we can return if page indexes aren't requested
-        if !self.column_index && !self.offset_index {
+        if self.column_index == PageIndexPolicy::Skip && self.offset_index == PageIndexPolicy::Skip
+        {
             return Ok(());
         }
 
@@ -466,15 +505,26 @@ impl ParquetMetaDataReader {
         mut fetch: F,
         remainder: Option<(usize, Bytes)>,
     ) -> Result<()> {
-        if self.metadata.is_none() {
+        let Some(metadata) = self.metadata.take() else {
             return Err(general_err!("Footer metadata is not present"));
-        }
+        };
+
+        // in this case we don't actually know what the file size is, so just use u64::MAX
+        // this is ok since the offsets in the metadata are always valid
+        let file_size = u64::MAX;
+        let push_decoder = ParquetMetaDataPushDecoder::try_new_with_metadata(file_size, metadata)?
+            .with_offset_index_policy(self.offset_index)
+            .with_column_index_policy(self.column_index)
+            .with_metadata_options(self.metadata_options.clone());
+        let mut push_decoder = self.prepare_push_decoder(push_decoder);
 
         // Get bounds needed for page indexes (if any are present in the file).
-        let range = self.range_for_page_index();
-        let range = match range {
-            Some(range) => range,
-            None => return Ok(()),
+        let range = match needs_index_data(&mut push_decoder)? {
+            NeedsIndexData::No(metadata) => {
+                self.metadata = Some(metadata);
+                return Ok(());
+            }
+            NeedsIndexData::Yes(range) => range,
         };
 
         let bytes = match &remainder {
@@ -491,176 +541,12 @@ impl ParquetMetaDataReader {
 
         // Sanity check
         assert_eq!(bytes.len() as u64, range.end - range.start);
-
-        self.parse_column_index(&bytes, range.start)?;
-        self.parse_offset_index(&bytes, range.start)?;
-
-        Ok(())
-    }
-
-    fn parse_column_index(&mut self, bytes: &Bytes, start_offset: u64) -> Result<()> {
-        let metadata = self.metadata.as_mut().unwrap();
-        if self.column_index {
-            let index = metadata
-                .row_groups()
-                .iter()
-                .enumerate()
-                .map(|(rg_idx, x)| {
-                    x.columns()
-                        .iter()
-                        .enumerate()
-                        .map(|(col_idx, c)| match c.column_index_range() {
-                            Some(r) => {
-                                let r_start = usize::try_from(r.start - start_offset)?;
-                                let r_end = usize::try_from(r.end - start_offset)?;
-                                Self::parse_single_column_index(
-                                    &bytes[r_start..r_end],
-                                    metadata,
-                                    c,
-                                    rg_idx,
-                                    col_idx,
-                                )
-                            }
-                            None => Ok(Index::NONE),
-                        })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .collect::<Result<Vec<_>>>()?;
-            metadata.set_column_index(Some(index));
-        }
-        Ok(())
-    }
-
-    #[cfg(feature = "encryption")]
-    fn parse_single_column_index(
-        bytes: &[u8],
-        metadata: &ParquetMetaData,
-        column: &ColumnChunkMetaData,
-        row_group_index: usize,
-        col_index: usize,
-    ) -> Result<Index> {
-        match &column.column_crypto_metadata {
-            Some(crypto_metadata) => {
-                let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| {
-                    general_err!("Cannot decrypt column index, no file decryptor set")
-                })?;
-                let crypto_context = CryptoContext::for_column(
-                    file_decryptor,
-                    crypto_metadata,
-                    row_group_index,
-                    col_index,
-                )?;
-                let column_decryptor = crypto_context.metadata_decryptor();
-                let aad = crypto_context.create_column_index_aad()?;
-                let plaintext = column_decryptor.decrypt(bytes, &aad)?;
-                decode_column_index(&plaintext, column.column_type())
-            }
-            None => decode_column_index(bytes, column.column_type()),
-        }
-    }
-
-    #[cfg(not(feature = "encryption"))]
-    fn parse_single_column_index(
-        bytes: &[u8],
-        _metadata: &ParquetMetaData,
-        column: &ColumnChunkMetaData,
-        _row_group_index: usize,
-        _col_index: usize,
-    ) -> Result<Index> {
-        decode_column_index(bytes, column.column_type())
-    }
-
-    fn parse_offset_index(&mut self, bytes: &Bytes, start_offset: u64) -> Result<()> {
-        let metadata = self.metadata.as_mut().unwrap();
-        if self.offset_index {
-            let index = metadata
-                .row_groups()
-                .iter()
-                .enumerate()
-                .map(|(rg_idx, x)| {
-                    x.columns()
-                        .iter()
-                        .enumerate()
-                        .map(|(col_idx, c)| match c.offset_index_range() {
-                            Some(r) => {
-                                let r_start = usize::try_from(r.start - start_offset)?;
-                                let r_end = usize::try_from(r.end - start_offset)?;
-                                Self::parse_single_offset_index(
-                                    &bytes[r_start..r_end],
-                                    metadata,
-                                    c,
-                                    rg_idx,
-                                    col_idx,
-                                )
-                            }
-                            None => Err(general_err!("missing offset index")),
-                        })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            metadata.set_offset_index(Some(index));
-        }
+        push_decoder.push_range(range.clone(), bytes)?;
+        let metadata = parse_index_data(&mut push_decoder)?;
+        self.metadata = Some(metadata);
         Ok(())
     }
 
-    #[cfg(feature = "encryption")]
-    fn parse_single_offset_index(
-        bytes: &[u8],
-        metadata: &ParquetMetaData,
-        column: &ColumnChunkMetaData,
-        row_group_index: usize,
-        col_index: usize,
-    ) -> Result<OffsetIndexMetaData> {
-        match &column.column_crypto_metadata {
-            Some(crypto_metadata) => {
-                let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| {
-                    general_err!("Cannot decrypt offset index, no file decryptor set")
-                })?;
-                let crypto_context = CryptoContext::for_column(
-                    file_decryptor,
-                    crypto_metadata,
-                    row_group_index,
-                    col_index,
-                )?;
-                let column_decryptor = crypto_context.metadata_decryptor();
-                let aad = crypto_context.create_offset_index_aad()?;
-                let plaintext = column_decryptor.decrypt(bytes, &aad)?;
-                decode_offset_index(&plaintext)
-            }
-            None => decode_offset_index(bytes),
-        }
-    }
-
-    #[cfg(not(feature = "encryption"))]
-    fn parse_single_offset_index(
-        bytes: &[u8],
-        _metadata: &ParquetMetaData,
-        _column: &ColumnChunkMetaData,
-        _row_group_index: usize,
-        _col_index: usize,
-    ) -> Result<OffsetIndexMetaData> {
-        decode_offset_index(bytes)
-    }
-
-    fn range_for_page_index(&self) -> Option<Range<u64>> {
-        // sanity check
-        self.metadata.as_ref()?;
-
-        // Get bounds needed for page indexes (if any are present in the file).
-        let mut range = None;
-        let metadata = self.metadata.as_ref().unwrap();
-        for c in metadata.row_groups().iter().flat_map(|r| r.columns()) {
-            if self.column_index {
-                range = acc_range(range, c.column_index_range());
-            }
-            if self.offset_index {
-                range = acc_range(range, c.offset_index_range());
-            }
-        }
-        range
-    }
-
     // One-shot parse of footer.
     // Side effect: this will set `self.metadata_size`
     fn parse_metadata<R: ChunkReader>(&mut self, chunk_reader: &R) -> Result<ParquetMetaData> {
@@ -670,12 +556,12 @@ impl ParquetMetaDataReader {
             return Err(ParquetError::NeedMoreData(FOOTER_SIZE));
         }
 
-        let mut footer = [0_u8; 8];
+        let mut footer = [0_u8; FOOTER_SIZE];
         chunk_reader
-            .get_read(file_size - 8)?
+            .get_read(file_size - FOOTER_SIZE as u64)?
             .read_exact(&mut footer)?;
 
-        let footer = Self::decode_footer_tail(&footer)?;
+        let footer = FooterTail::try_new(&footer)?;
         let metadata_len = footer.metadata_length();
         let footer_metadata_len = FOOTER_SIZE + metadata_len;
         self.metadata_size = Some(footer_metadata_len);
@@ -685,10 +571,14 @@ impl ParquetMetaDataReader {
         }
 
         let start = file_size - footer_metadata_len as u64;
-        self.decode_footer_metadata(
-            chunk_reader.get_bytes(start, metadata_len)?.as_ref(),
-            &footer,
-        )
+        let bytes = chunk_reader.get_bytes(start, metadata_len)?;
+        self.decode_footer_metadata(bytes, file_size, footer)
+    }
+
+    /// Size of the serialized thrift metadata plus the 8 byte footer. Only set if
+    /// `self.parse_metadata` is called.
+    pub fn metadata_size(&self) -> Option<usize> {
+        self.metadata_size
     }
 
     /// Return the number of bytes to read in the initial pass. If `prefetch_size` has
@@ -737,7 +627,7 @@ impl ParquetMetaDataReader {
         let mut footer = [0; FOOTER_SIZE];
         footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
 
-        let footer = Self::decode_footer_tail(&footer)?;
+        let footer = FooterTail::try_new(&footer)?;
         let length = footer.metadata_length();
 
         if file_size < (length + FOOTER_SIZE) as u64 {
@@ -754,14 +644,14 @@ impl ParquetMetaDataReader {
             let meta = fetch
                 .fetch(metadata_start..(file_size - FOOTER_SIZE as u64))
                 .await?;
-            Ok((self.decode_footer_metadata(&meta, &footer)?, None))
+            Ok((self.decode_footer_metadata(meta, file_size, footer)?, None))
         } else {
             let metadata_start = (file_size - (length + FOOTER_SIZE) as u64 - footer_start)
                 .try_into()
                 .expect("metadata length should never be larger than u32");
-            let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
+            let slice = suffix.slice(metadata_start..suffix_len - FOOTER_SIZE);
             Ok((
-                self.decode_footer_metadata(slice, &footer)?,
+                self.decode_footer_metadata(slice, file_size, footer)?,
                 Some((footer_start as usize, suffix.slice(..metadata_start))),
             ))
         }
@@ -788,8 +678,11 @@ impl ParquetMetaDataReader {
         let mut footer = [0; FOOTER_SIZE];
         footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
 
-        let footer = Self::decode_footer_tail(&footer)?;
+        let footer = FooterTail::try_new(&footer)?;
         let length = footer.metadata_length();
+        // fake file size as we are only parsing the footer metadata here
+        // (cant be parsing page indexes without the full file size)
+        let file_size = (length + FOOTER_SIZE) as u64;
 
         // Did not fetch the entire file metadata in the initial read, need to make a second request
         let metadata_offset = length + FOOTER_SIZE;
@@ -804,54 +697,29 @@ impl ParquetMetaDataReader {
                 ));
             }
 
-            Ok((
-                // need to slice off the footer or decryption fails
-                self.decode_footer_metadata(&meta.slice(0..length), &footer)?,
-                None,
-            ))
+            // need to slice off the footer or decryption fails
+            let meta = meta.slice(0..length);
+            Ok((self.decode_footer_metadata(meta, file_size, footer)?, None))
         } else {
             let metadata_start = suffix_len - metadata_offset;
-            let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
+            let slice = suffix.slice(metadata_start..suffix_len - FOOTER_SIZE);
             Ok((
-                self.decode_footer_metadata(slice, &footer)?,
+                self.decode_footer_metadata(slice, file_size, footer)?,
                 Some((0, suffix.slice(..metadata_start))),
             ))
         }
     }
 
-    /// Decodes the end of the Parquet footer
-    ///
-    /// There are 8 bytes at the end of the Parquet footer with the following layout:
-    /// * 4 bytes for the metadata length
-    /// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer)
-    ///
-    /// ```text
-    /// +-----+------------------+
-    /// | len | 'PAR1' or 'PARE' |
-    /// +-----+------------------+
-    /// ```
+    /// Decodes a [`FooterTail`] from the provided 8-byte slice.
+    #[deprecated(since = "57.0.0", note = "Use FooterTail::try_from instead")]
     pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> {
-        let magic = &slice[4..];
-        let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER {
-            true
-        } else if magic == PARQUET_MAGIC {
-            false
-        } else {
-            return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-        };
-        // get the metadata length from the footer
-        let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
-        Ok(FooterTail {
-            // u32 won't be larger than usize in most cases
-            metadata_length: metadata_len as usize,
-            encrypted_footer,
-        })
+        FooterTail::try_new(slice)
     }
 
     /// Decodes the Parquet footer, returning the metadata length in bytes
     #[deprecated(since = "54.3.0", note = "Use decode_footer_tail instead")]
     pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
-        Self::decode_footer_tail(slice).map(|f| f.metadata_length)
+        FooterTail::try_new(slice).map(|f| f.metadata_length())
     }
 
     /// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -860,6 +728,8 @@ impl ParquetMetaDataReader {
     /// file. The format of `buf` is the Thrift compact binary protocol, as specified
     /// by the [Parquet Spec].
     ///
+    /// It does **NOT** include the 8-byte footer.
+    ///
     /// This method handles using either `decode_metadata` or
     /// `decode_metadata_with_encryption` depending on whether the encryption
     /// feature is enabled.
@@ -867,132 +737,69 @@ impl ParquetMetaDataReader {
     /// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
     pub(crate) fn decode_footer_metadata(
         &self,
-        buf: &[u8],
-        footer_tail: &FooterTail,
-    ) -> Result<ParquetMetaData> {
-        #[cfg(feature = "encryption")]
-        let result = Self::decode_metadata_with_encryption(
-            buf,
-            footer_tail.is_encrypted_footer(),
-            self.file_decryption_properties.as_ref(),
-        );
-        #[cfg(not(feature = "encryption"))]
-        let result = {
-            if footer_tail.is_encrypted_footer() {
-                Err(general_err!(
-                    "Parquet file has an encrypted footer but the encryption feature is disabled"
-                ))
-            } else {
-                Self::decode_metadata(buf)
-            }
-        };
-        result
-    }
-
-    /// Decodes [`ParquetMetaData`] from the provided bytes, handling metadata that may be encrypted.
-    ///
-    /// Typically this is used to decode the metadata from the end of a parquet
-    /// file. The format of `buf` is the Thrift compact binary protocol, as specified
-    /// by the [Parquet Spec]. Buffer can be encrypted with AES GCM or AES CTR
-    /// ciphers as specfied in the [Parquet Encryption Spec].
-    ///
-    /// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
-    /// [Parquet Encryption Spec]: https://parquet.apache.org/docs/file-format/data-pages/encryption/
-    #[cfg(feature = "encryption")]
-    fn decode_metadata_with_encryption(
-        buf: &[u8],
-        encrypted_footer: bool,
-        file_decryption_properties: Option<&FileDecryptionProperties>,
+        buf: Bytes,
+        file_size: u64,
+        footer_tail: FooterTail,
     ) -> Result<ParquetMetaData> {
-        let mut prot = TCompactSliceInputProtocol::new(buf);
-        let mut file_decryptor = None;
-        let decrypted_fmd_buf;
-
-        if encrypted_footer {
-            if let Some(file_decryption_properties) = file_decryption_properties {
-                let t_file_crypto_metadata: TFileCryptoMetaData =
-                    TFileCryptoMetaData::read_from_in_protocol(&mut prot)
-                        .map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
-                let supply_aad_prefix = match &t_file_crypto_metadata.encryption_algorithm {
-                    EncryptionAlgorithm::AESGCMV1(algo) => algo.supply_aad_prefix,
-                    _ => Some(false),
-                }
-                .unwrap_or(false);
-                if supply_aad_prefix && file_decryption_properties.aad_prefix().is_none() {
-                    return Err(general_err!(
-                        "Parquet file was encrypted with an AAD prefix that is not stored in the file, \
-                        but no AAD prefix was provided in the file decryption properties"
-                    ));
-                }
-                let decryptor = get_file_decryptor(
-                    t_file_crypto_metadata.encryption_algorithm,
-                    t_file_crypto_metadata.key_metadata.as_deref(),
-                    file_decryption_properties,
-                )?;
-                let footer_decryptor = decryptor.get_footer_decryptor();
-                let aad_footer = create_footer_aad(decryptor.file_aad())?;
-
-                decrypted_fmd_buf = footer_decryptor?
-                    .decrypt(prot.as_slice().as_ref(), aad_footer.as_ref())
-                    .map_err(|_| {
-                        general_err!(
-                            "Provided footer key and AAD were unable to decrypt parquet footer"
-                        )
-                    })?;
-                prot = TCompactSliceInputProtocol::new(decrypted_fmd_buf.as_ref());
-
-                file_decryptor = Some(decryptor);
-            } else {
-                return Err(general_err!("Parquet file has an encrypted footer but decryption properties were not provided"));
-            }
-        }
-
-        let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
-            .map_err(|e| general_err!("Could not parse metadata: {}", e))?;
-        let schema = types::from_thrift(&t_file_metadata.schema)?;
-        let schema_descr = Arc::new(SchemaDescriptor::new(schema));
-
-        if let (Some(algo), Some(file_decryption_properties)) = (
-            t_file_metadata.encryption_algorithm,
-            file_decryption_properties,
-        ) {
-            // File has a plaintext footer but encryption algorithm is set
-            let file_decryptor_value = get_file_decryptor(
-                algo,
-                t_file_metadata.footer_signing_key_metadata.as_deref(),
-                file_decryption_properties,
-            )?;
-            if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer {
-                file_decryptor_value.verify_plaintext_footer_signature(buf)?;
-            }
-            file_decryptor = Some(file_decryptor_value);
-        }
+        // The push decoder expects the metadata to be at the end of the file
+        // (... data ...) + (metadata) + (footer)
+        // so we need to provide the starting offset of the metadata
+        // within the file.
+        let ending_offset = file_size.checked_sub(FOOTER_SIZE as u64).ok_or_else(|| {
+            general_err!(
+                "file size {file_size} is smaller than footer size {}",
+                FOOTER_SIZE
+            )
+        })?;
 
-        let mut row_groups = Vec::new();
-        for rg in t_file_metadata.row_groups {
-            let r = RowGroupMetaData::from_encrypted_thrift(
-                schema_descr.clone(),
-                rg,
-                file_decryptor.as_ref(),
-            )?;
-            row_groups.push(r);
+        let starting_offset = ending_offset.checked_sub(buf.len() as u64).ok_or_else(|| {
+            general_err!(
+                "file size {file_size} is smaller than buffer size {} + footer size {}",
+                buf.len(),
+                FOOTER_SIZE
+            )
+        })?;
+
+        let range = starting_offset..ending_offset;
+
+        let push_decoder =
+            ParquetMetaDataPushDecoder::try_new_with_footer_tail(file_size, footer_tail)?
+                // NOTE: DO NOT enable page indexes here, they are handled separately
+                .with_page_index_policy(PageIndexPolicy::Skip)
+                .with_metadata_options(self.metadata_options.clone());
+
+        let mut push_decoder = self.prepare_push_decoder(push_decoder);
+        push_decoder.push_range(range, buf)?;
+        match push_decoder.try_decode()? {
+            DecodeResult::Data(metadata) => Ok(metadata),
+            DecodeResult::Finished => Err(general_err!(
+                "could not parse parquet metadata -- previously finished"
+            )),
+            DecodeResult::NeedsData(ranges) => Err(general_err!(
+                "could not parse parquet metadata, needs ranges {:?}",
+                ranges
+            )),
         }
-        let column_orders =
-            Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?;
-
-        let file_metadata = FileMetaData::new(
-            t_file_metadata.version,
-            t_file_metadata.num_rows,
-            t_file_metadata.created_by,
-            t_file_metadata.key_value_metadata,
-            schema_descr,
-            column_orders,
-        );
-        let mut metadata = ParquetMetaData::new(file_metadata, row_groups);
-
-        metadata.with_file_decryptor(file_decryptor);
+    }
 
-        Ok(metadata)
+    /// Prepares a push decoder and runs it to decode the metadata.
+    #[cfg(feature = "encryption")]
+    fn prepare_push_decoder(
+        &self,
+        push_decoder: ParquetMetaDataPushDecoder,
+    ) -> ParquetMetaDataPushDecoder {
+        push_decoder.with_file_decryption_properties(
+            self.file_decryption_properties
+                .as_ref()
+                .map(std::sync::Arc::clone),
+        )
+    }
+    #[cfg(not(feature = "encryption"))]
+    fn prepare_push_decoder(
+        &self,
+        push_decoder: ParquetMetaDataPushDecoder,
+    ) -> ParquetMetaDataPushDecoder {
+        push_decoder
     }
 
     /// Decodes [`ParquetMetaData`] from the provided bytes.
@@ -1003,105 +810,71 @@ impl ParquetMetaDataReader {
     ///
     /// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
     pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
-        let mut prot = TCompactSliceInputProtocol::new(buf);
-
-        let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
-            .map_err(|e| general_err!("Could not parse metadata: {}", e))?;
-        let schema = types::from_thrift(&t_file_metadata.schema)?;
-        let schema_descr = Arc::new(SchemaDescriptor::new(schema));
+        decode_metadata(buf, None)
+    }
 
-        let mut row_groups = Vec::new();
-        for rg in t_file_metadata.row_groups {
-            row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?);
-        }
-        let column_orders =
-            Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?;
-
-        let file_metadata = FileMetaData::new(
-            t_file_metadata.version,
-            t_file_metadata.num_rows,
-            t_file_metadata.created_by,
-            t_file_metadata.key_value_metadata,
-            schema_descr,
-            column_orders,
-        );
+    /// Decodes [`ParquetMetaData`] from the provided bytes.
+    ///
+    /// Like [`Self::decode_metadata`] but this also accepts
+    /// metadata parsing options.
+    pub fn decode_metadata_with_options(
+        buf: &[u8],
+        options: Option<&ParquetMetaDataOptions>,
+    ) -> Result<ParquetMetaData> {
+        decode_metadata(buf, options)
+    }
 
-        Ok(ParquetMetaData::new(file_metadata, row_groups))
+    /// Decodes the schema from the Parquet footer in `buf`. Returned as
+    /// a [`SchemaDescriptor`].
+    pub fn decode_schema(buf: &[u8]) -> Result<Arc<SchemaDescriptor>> {
+        Ok(Arc::new(parquet_schema_from_bytes(buf)?))
     }
+}
 
-    /// Parses column orders from Thrift definition.
-    /// If no column orders are defined, returns `None`.
-    fn parse_column_orders(
-        t_column_orders: Option<Vec<TColumnOrder>>,
-        schema_descr: &SchemaDescriptor,
-    ) -> Result<Option<Vec<ColumnOrder>>> {
-        match t_column_orders {
-            Some(orders) => {
-                // Should always be the case
-                if orders.len() != schema_descr.num_columns() {
-                    return Err(general_err!("Column order length mismatch"));
-                };
-                let mut res = Vec::new();
-                for (i, column) in schema_descr.columns().iter().enumerate() {
-                    match orders[i] {
-                        TColumnOrder::TYPEORDER(_) => {
-                            let sort_order = ColumnOrder::get_sort_order(
-                                column.logical_type(),
-                                column.converted_type(),
-                                column.physical_type(),
-                            );
-                            res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order));
-                        }
-                    }
-                }
-                Ok(Some(res))
-            }
-            None => Ok(None),
+/// The bounds needed to read page indexes
+// this is an internal enum, so it is ok to allow differences in enum size
+#[allow(clippy::large_enum_variant)]
+enum NeedsIndexData {
+    /// no additional data is needed (e.g. the indexes weren't requested)
+    No(ParquetMetaData),
+    /// Additional data is needed, with the range that are required
+    Yes(Range<u64>),
+}
+
+/// Determines a single combined range of bytes needed to read the page indexes,
+/// or returns the metadata if no additional data is needed (e.g. if no page indexes are requested)
+fn needs_index_data(push_decoder: &mut ParquetMetaDataPushDecoder) -> Result<NeedsIndexData> {
+    match push_decoder.try_decode()? {
+        DecodeResult::NeedsData(ranges) => {
+            let range = ranges
+                .into_iter()
+                .reduce(|a, b| a.start.min(b.start)..a.end.max(b.end))
+                .ok_or_else(|| general_err!("Internal error: no ranges provided"))?;
+            Ok(NeedsIndexData::Yes(range))
         }
+        DecodeResult::Data(metadata) => Ok(NeedsIndexData::No(metadata)),
+        DecodeResult::Finished => Err(general_err!("Internal error: decoder was finished")),
     }
 }
 
-#[cfg(feature = "encryption")]
-fn get_file_decryptor(
-    encryption_algorithm: EncryptionAlgorithm,
-    footer_key_metadata: Option<&[u8]>,
-    file_decryption_properties: &FileDecryptionProperties,
-) -> Result<FileDecryptor> {
-    match encryption_algorithm {
-        EncryptionAlgorithm::AESGCMV1(algo) => {
-            let aad_file_unique = algo
-                .aad_file_unique
-                .ok_or_else(|| general_err!("AAD unique file identifier is not set"))?;
-            let aad_prefix = if let Some(aad_prefix) = file_decryption_properties.aad_prefix() {
-                aad_prefix.clone()
-            } else {
-                algo.aad_prefix.unwrap_or_default()
-            };
-
-            FileDecryptor::new(
-                file_decryption_properties,
-                footer_key_metadata,
-                aad_file_unique,
-                aad_prefix,
-            )
-        }
-        EncryptionAlgorithm::AESGCMCTRV1(_) => Err(nyi_err!(
-            "The AES_GCM_CTR_V1 encryption algorithm is not yet supported"
+/// Given a push decoder that has had the needed ranges pushed to it,
+/// attempt to decode indexes and return the updated metadata.
+fn parse_index_data(push_decoder: &mut ParquetMetaDataPushDecoder) -> Result<ParquetMetaData> {
+    match push_decoder.try_decode()? {
+        DecodeResult::NeedsData(_) => Err(general_err!(
+            "Internal error: decoder still needs data after reading required range"
         )),
+        DecodeResult::Data(metadata) => Ok(metadata),
+        DecodeResult::Finished => Err(general_err!("Internal error: decoder was finished")),
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use bytes::Bytes;
-
-    use crate::basic::SortOrder;
-    use crate::basic::Type;
     use crate::file::reader::Length;
-    use crate::format::TypeDefinedOrder;
-    use crate::schema::types::Type as SchemaType;
     use crate::util::test_common::file_util::get_test_file;
+    use std::ops::Range;
 
     #[test]
     fn test_parse_metadata_size_smaller_than_footer() {
@@ -1109,7 +882,7 @@ mod tests {
         let err = ParquetMetaDataReader::new()
             .parse_metadata(&test_file)
             .unwrap_err();
-        assert!(matches!(err, ParquetError::NeedMoreData(8)));
+        assert!(matches!(err, ParquetError::NeedMoreData(FOOTER_SIZE)));
     }
 
     #[test]
@@ -1132,59 +905,7 @@ mod tests {
     }
 
     #[test]
-    fn test_metadata_column_orders_parse() {
-        // Define simple schema, we do not need to provide logical types.
-        let fields = vec![
-            Arc::new(
-                SchemaType::primitive_type_builder("col1", Type::INT32)
-                    .build()
-                    .unwrap(),
-            ),
-            Arc::new(
-                SchemaType::primitive_type_builder("col2", Type::FLOAT)
-                    .build()
-                    .unwrap(),
-            ),
-        ];
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(fields)
-            .build()
-            .unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders = Some(vec![
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-            TColumnOrder::TYPEORDER(TypeDefinedOrder::new()),
-        ]);
-
-        assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr).unwrap(),
-            Some(vec![
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
-                ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
-            ])
-        );
-
-        // Test when no column orders are defined.
-        assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(None, &schema_descr).unwrap(),
-            None
-        );
-    }
-
-    #[test]
-    fn test_metadata_column_orders_len_mismatch() {
-        let schema = SchemaType::group_type_builder("schema").build().unwrap();
-        let schema_descr = SchemaDescriptor::new(Arc::new(schema));
-
-        let t_column_orders = Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
-
-        let res = ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr);
-        assert!(res.is_err());
-        assert!(format!("{:?}", res.unwrap_err()).contains("Column order length mismatch"));
-    }
-
-    #[test]
+    #[allow(deprecated)]
     fn test_try_parse() {
         let file = get_test_file("alltypes_tiny_pages.parquet");
         let len = file.len();
@@ -1302,15 +1023,23 @@ mod tests {
 #[cfg(all(feature = "async", feature = "arrow", test))]
 mod async_tests {
     use super::*;
+
+    use arrow::{array::Int32Array, datatypes::DataType};
+    use arrow_array::RecordBatch;
+    use arrow_schema::{Field, Schema};
     use bytes::Bytes;
-    use futures::future::BoxFuture;
     use futures::FutureExt;
+    use futures::future::BoxFuture;
     use std::fs::File;
     use std::future::Future;
     use std::io::{Read, Seek, SeekFrom};
     use std::ops::Range;
+    use std::sync::Arc;
     use std::sync::atomic::{AtomicUsize, Ordering};
+    use tempfile::NamedTempFile;
 
+    use crate::arrow::ArrowWriter;
+    use crate::file::properties::WriterProperties;
     use crate::file::reader::Length;
     use crate::util::test_common::file_util::get_test_file;
 
@@ -1554,7 +1283,7 @@ mod async_tests {
 
         // just make sure the metadata is properly decrypted and read
         let expected = ParquetMetaDataReader::new()
-            .with_decryption_properties(Some(&decryption_properties))
+            .with_decryption_properties(Some(decryption_properties))
             .load_via_suffix_and_finish(input)
             .await
             .unwrap();
@@ -1562,6 +1291,7 @@ mod async_tests {
     }
 
     #[tokio::test]
+    #[allow(deprecated)]
     async fn test_page_index() {
         let mut file = get_test_file("alltypes_tiny_pages.parquet");
         let len = file.len();
@@ -1648,4 +1378,50 @@ mod async_tests {
         assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
         assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
     }
+
+    fn write_parquet_file(offset_index_disabled: bool) -> Result<NamedTempFile> {
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )?;
+
+        let file = NamedTempFile::new().unwrap();
+
+        // Write properties with page index disabled
+        let props = WriterProperties::builder()
+            .set_offset_index_disabled(offset_index_disabled)
+            .build();
+
+        let mut writer = ArrowWriter::try_new(file.reopen()?, schema, Some(props))?;
+        writer.write(&batch)?;
+        writer.close()?;
+
+        Ok(file)
+    }
+
+    fn read_and_check(file: &File, policy: PageIndexPolicy) -> Result<ParquetMetaData> {
+        let mut reader = ParquetMetaDataReader::new().with_page_index_policy(policy);
+        reader.try_parse(file)?;
+        reader.finish()
+    }
+
+    #[test]
+    fn test_page_index_policy() {
+        // With page index
+        let f = write_parquet_file(false).unwrap();
+        read_and_check(f.as_file(), PageIndexPolicy::Required).unwrap();
+        read_and_check(f.as_file(), PageIndexPolicy::Optional).unwrap();
+        read_and_check(f.as_file(), PageIndexPolicy::Skip).unwrap();
+
+        // Without page index
+        let f = write_parquet_file(true).unwrap();
+        let res = read_and_check(f.as_file(), PageIndexPolicy::Required);
+        assert!(matches!(
+            res,
+            Err(ParquetError::General(e)) if e == "missing offset index"
+        ));
+        read_and_check(f.as_file(), PageIndexPolicy::Optional).unwrap();
+        read_and_check(f.as_file(), PageIndexPolicy::Skip).unwrap();
+    }
 }
diff --git a/parquet/src/file/metadata/thrift/encryption.rs b/parquet/src/file/metadata/thrift/encryption.rs
new file mode 100644
index 000000000000..9713cf936dd2
--- /dev/null
+++ b/parquet/src/file/metadata/thrift/encryption.rs
@@ -0,0 +1,340 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Encryption support for Thrift serialization
+
+use crate::{
+    encryption::decrypt::{FileDecryptionProperties, FileDecryptor},
+    errors::{ParquetError, Result},
+    file::{
+        column_crypto_metadata::ColumnCryptoMetaData,
+        metadata::{
+            HeapSize, ParquetMetaData, ParquetMetaDataOptions, RowGroupMetaData,
+            thrift::{parquet_metadata_from_bytes, read_column_metadata, validate_column_metadata},
+        },
+    },
+    parquet_thrift::{
+        ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol,
+        ThriftCompactOutputProtocol, ThriftSliceInputProtocol, WriteThrift, WriteThriftField,
+    },
+    thrift_struct, thrift_union,
+};
+use std::io::Write;
+use std::sync::Arc;
+
+thrift_struct!(
+pub(crate) struct AesGcmV1 {
+  /// AAD prefix
+  1: optional binary aad_prefix
+
+  /// Unique file identifier part of AAD suffix
+  2: optional binary aad_file_unique
+
+  /// In files encrypted with AAD prefix without storing it,
+  /// readers must supply the prefix
+  3: optional bool supply_aad_prefix
+}
+);
+
+impl HeapSize for AesGcmV1 {
+    fn heap_size(&self) -> usize {
+        self.aad_prefix.heap_size()
+            + self.aad_file_unique.heap_size()
+            + self.supply_aad_prefix.heap_size()
+    }
+}
+
+thrift_struct!(
+pub(crate) struct AesGcmCtrV1 {
+  /// AAD prefix
+  1: optional binary aad_prefix
+
+  /// Unique file identifier part of AAD suffix
+  2: optional binary aad_file_unique
+
+  /// In files encrypted with AAD prefix without storing it,
+  /// readers must supply the prefix
+  3: optional bool supply_aad_prefix
+}
+);
+
+impl HeapSize for AesGcmCtrV1 {
+    fn heap_size(&self) -> usize {
+        self.aad_prefix.heap_size()
+            + self.aad_file_unique.heap_size()
+            + self.supply_aad_prefix.heap_size()
+    }
+}
+
+thrift_union!(
+union EncryptionAlgorithm {
+  1: (AesGcmV1) AES_GCM_V1
+  2: (AesGcmCtrV1) AES_GCM_CTR_V1
+}
+);
+
+impl HeapSize for EncryptionAlgorithm {
+    fn heap_size(&self) -> usize {
+        match self {
+            Self::AES_GCM_V1(gcm) => gcm.heap_size(),
+            Self::AES_GCM_CTR_V1(gcm_ctr) => gcm_ctr.heap_size(),
+        }
+    }
+}
+
+thrift_struct!(
+/// Crypto metadata for files with encrypted footer
+pub(crate) struct FileCryptoMetaData<'a> {
+  /// Encryption algorithm. This field is only used for files
+  /// with encrypted footer. Files with plaintext footer store algorithm id
+  /// inside footer (FileMetaData structure).
+  1: required EncryptionAlgorithm encryption_algorithm
+
+  /// Retrieval metadata of key used for encryption of footer,
+  /// and (possibly) columns.
+  2: optional binary<'a> key_metadata
+}
+);
+
+fn row_group_from_encrypted_thrift(
+    mut rg: RowGroupMetaData,
+    decryptor: Option<&FileDecryptor>,
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<RowGroupMetaData> {
+    let schema_descr = rg.schema_descr;
+
+    if schema_descr.num_columns() != rg.columns.len() {
+        return Err(general_err!(
+            "Column count mismatch. Schema has {} columns while Row Group has {}",
+            schema_descr.num_columns(),
+            rg.columns.len()
+        ));
+    }
+    let total_byte_size = rg.total_byte_size;
+    let num_rows = rg.num_rows;
+    let mut columns = vec![];
+
+    for (i, (mut c, d)) in rg
+        .columns
+        .drain(0..)
+        .zip(schema_descr.columns())
+        .enumerate()
+    {
+        // Read encrypted metadata if it's present and we have a decryptor.
+        if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) {
+            let column_decryptor = match c.crypto_metadata() {
+                None => {
+                    return Err(general_err!(
+                        "No crypto_metadata is set for column '{}', which has encrypted metadata",
+                        d.path().string()
+                    ));
+                }
+                Some(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(crypto_metadata)) => {
+                    let column_name = crypto_metadata.path_in_schema.join(".");
+                    decryptor.get_column_metadata_decryptor(
+                        column_name.as_str(),
+                        crypto_metadata.key_metadata.as_deref(),
+                    )?
+                }
+                Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY) => {
+                    decryptor.get_footer_decryptor()?
+                }
+            };
+
+            let column_aad = crate::encryption::modules::create_module_aad(
+                decryptor.file_aad(),
+                crate::encryption::modules::ModuleType::ColumnMetaData,
+                rg.ordinal.unwrap() as usize,
+                i,
+                None,
+            )?;
+
+            // Take the encrypted column metadata as it is no longer needed.
+            let encrypted_column_metadata = c.encrypted_column_metadata.take();
+            let buf = encrypted_column_metadata.unwrap();
+            let decrypted_cc_buf = column_decryptor
+                .decrypt(&buf, column_aad.as_ref())
+                .map_err(|_| {
+                    general_err!(
+                        "Unable to decrypt column '{}', perhaps the column key is wrong?",
+                        d.path().string()
+                    )
+                })?;
+
+            // parse decrypted buffer and then replace fields in 'c'
+            let mut prot = ThriftSliceInputProtocol::new(&decrypted_cc_buf);
+            let mask = read_column_metadata(&mut prot, &mut c, i, options)?;
+            validate_column_metadata(mask)?;
+
+            columns.push(c);
+        } else {
+            columns.push(c);
+        }
+    }
+
+    let sorting_columns = rg.sorting_columns;
+    let file_offset = rg.file_offset;
+    let ordinal = rg.ordinal;
+
+    Ok(RowGroupMetaData {
+        columns,
+        num_rows,
+        sorting_columns,
+        total_byte_size,
+        schema_descr,
+        file_offset,
+        ordinal,
+    })
+}
+
+/// Decodes [`ParquetMetaData`] from the provided bytes, handling metadata that may be encrypted.
+///
+/// Typically this is used to decode the metadata from the end of a parquet
+/// file. The format of `buf` is the Thrift compact binary protocol, as specified
+/// by the [Parquet Spec]. Buffer can be encrypted with AES GCM or AES CTR
+/// ciphers as specfied in the [Parquet Encryption Spec].
+///
+/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata
+/// [Parquet Encryption Spec]: https://parquet.apache.org/docs/file-format/data-pages/encryption/
+pub(crate) fn parquet_metadata_with_encryption(
+    file_decryption_properties: Option<&Arc<FileDecryptionProperties>>,
+    encrypted_footer: bool,
+    buf: &[u8],
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<ParquetMetaData> {
+    use crate::file::metadata::ParquetMetaDataBuilder;
+
+    let mut buf = buf;
+    let mut file_decryptor = None;
+    let decrypted_fmd_buf;
+
+    if encrypted_footer {
+        let mut prot = ThriftSliceInputProtocol::new(buf);
+        if let Some(file_decryption_properties) = file_decryption_properties {
+            let t_file_crypto_metadata: FileCryptoMetaData =
+                FileCryptoMetaData::read_thrift(&mut prot)
+                    .map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?;
+            let supply_aad_prefix = match &t_file_crypto_metadata.encryption_algorithm {
+                EncryptionAlgorithm::AES_GCM_V1(algo) => algo.supply_aad_prefix,
+                _ => Some(false),
+            }
+            .unwrap_or(false);
+            if supply_aad_prefix && file_decryption_properties.aad_prefix().is_none() {
+                return Err(general_err!(
+                    "Parquet file was encrypted with an AAD prefix that is not stored in the file, \
+                        but no AAD prefix was provided in the file decryption properties"
+                ));
+            }
+            let decryptor = get_file_decryptor(
+                t_file_crypto_metadata.encryption_algorithm,
+                t_file_crypto_metadata.key_metadata,
+                file_decryption_properties,
+            )?;
+            let footer_decryptor = decryptor.get_footer_decryptor();
+            let aad_footer = crate::encryption::modules::create_footer_aad(decryptor.file_aad())?;
+
+            decrypted_fmd_buf = footer_decryptor?
+                .decrypt(prot.as_slice().as_ref(), aad_footer.as_ref())
+                .map_err(|_| {
+                    general_err!(
+                        "Provided footer key and AAD were unable to decrypt parquet footer"
+                    )
+                })?;
+
+            buf = &decrypted_fmd_buf;
+            file_decryptor = Some(decryptor);
+        } else {
+            return Err(general_err!(
+                "Parquet file has an encrypted footer but decryption properties were not provided"
+            ));
+        }
+    }
+
+    let parquet_meta = parquet_metadata_from_bytes(buf, options)
+        .map_err(|e| general_err!("Could not parse metadata: {}", e))?;
+
+    let ParquetMetaData {
+        mut file_metadata,
+        row_groups,
+        column_index: _,
+        offset_index: _,
+        file_decryptor: _,
+    } = parquet_meta;
+
+    // Take the encryption algorithm and footer signing key metadata as they are no longer
+    // needed after this.
+    if let (Some(algo), Some(file_decryption_properties)) = (
+        file_metadata.encryption_algorithm.take(),
+        file_decryption_properties,
+    ) {
+        let footer_signing_key_metadata = file_metadata.footer_signing_key_metadata.take();
+
+        // File has a plaintext footer but encryption algorithm is set
+        let file_decryptor_value = get_file_decryptor(
+            *algo,
+            footer_signing_key_metadata.as_deref(),
+            file_decryption_properties,
+        )?;
+        if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer {
+            file_decryptor_value.verify_plaintext_footer_signature(buf)?;
+        }
+        file_decryptor = Some(file_decryptor_value);
+    }
+
+    // decrypt column chunk info
+    let row_groups = row_groups
+        .into_iter()
+        .map(|rg| row_group_from_encrypted_thrift(rg, file_decryptor.as_ref(), options))
+        .collect::<Result<Vec<_>>>()?;
+
+    let metadata = ParquetMetaDataBuilder::new(file_metadata)
+        .set_row_groups(row_groups)
+        .set_file_decryptor(file_decryptor)
+        .build();
+
+    Ok(metadata)
+}
+
+fn get_file_decryptor(
+    encryption_algorithm: EncryptionAlgorithm,
+    footer_key_metadata: Option<&[u8]>,
+    file_decryption_properties: &Arc<FileDecryptionProperties>,
+) -> Result<FileDecryptor> {
+    match encryption_algorithm {
+        EncryptionAlgorithm::AES_GCM_V1(algo) => {
+            let aad_file_unique = algo
+                .aad_file_unique
+                .ok_or_else(|| general_err!("AAD unique file identifier is not set"))?;
+            let aad_prefix = if let Some(aad_prefix) = file_decryption_properties.aad_prefix() {
+                aad_prefix.clone()
+            } else {
+                algo.aad_prefix.map(|v| v.to_vec()).unwrap_or_default()
+            };
+            let aad_file_unique = aad_file_unique.to_vec();
+
+            FileDecryptor::new(
+                file_decryption_properties,
+                footer_key_metadata,
+                aad_file_unique,
+                aad_prefix,
+            )
+        }
+        EncryptionAlgorithm::AES_GCM_CTR_V1(_) => Err(nyi_err!(
+            "The AES_GCM_CTR_V1 encryption algorithm is not yet supported"
+        )),
+    }
+}
diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs
new file mode 100644
index 000000000000..b7e8aab7cc48
--- /dev/null
+++ b/parquet/src/file/metadata/thrift/mod.rs
@@ -0,0 +1,1815 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module is the bridge between a Parquet file's thrift encoded metadata
+//! and this crate's [Parquet metadata API]. It contains objects and functions used
+//! to serialize/deserialize metadata objects into/from the Thrift compact protocol
+//! format as defined by the [Parquet specification].
+//!
+//! [Parquet metadata API]: crate::file::metadata
+//! [Parquet specification]: https://github.com/apache/parquet-format/tree/master
+
+use std::io::Write;
+use std::sync::Arc;
+
+#[cfg(feature = "encryption")]
+pub(crate) mod encryption;
+
+#[cfg(feature = "encryption")]
+use crate::file::{
+    column_crypto_metadata::ColumnCryptoMetaData, metadata::thrift::encryption::EncryptionAlgorithm,
+};
+use crate::{
+    basic::{
+        ColumnOrder, Compression, ConvertedType, Encoding, EncodingMask, LogicalType, PageType,
+        Repetition, Type,
+    },
+    data_type::{ByteArray, FixedLenByteArray, Int96},
+    errors::{ParquetError, Result},
+    file::{
+        metadata::{
+            ColumnChunkMetaData, ColumnChunkMetaDataBuilder, KeyValue, LevelHistogram,
+            PageEncodingStats, ParquetMetaData, ParquetMetaDataOptions, ParquetPageEncodingStats,
+            RowGroupMetaData, RowGroupMetaDataBuilder, SortingColumn,
+        },
+        statistics::ValueStatistics,
+    },
+    parquet_thrift::{
+        ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol,
+        ThriftCompactOutputProtocol, ThriftSliceInputProtocol, WriteThrift, WriteThriftField,
+        read_thrift_vec,
+    },
+    schema::types::{
+        ColumnDescriptor, SchemaDescriptor, TypePtr, num_nodes, parquet_schema_from_array,
+    },
+    thrift_struct,
+    util::bit_util::FromBytes,
+    write_thrift_field,
+};
+
+// this needs to be visible to the schema conversion code
+thrift_struct!(
+pub(crate) struct SchemaElement<'a> {
+  /// Data type for this field. Not set if the current element is a non-leaf node
+  1: optional Type r#type;
+  /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values.
+  /// Otherwise, if specified, this is the maximum bit length to store any of the values.
+  /// (e.g. a low cardinality INT col could have this set to 3).  Note that this is
+  /// in the schema, and therefore fixed for the entire file.
+  2: optional i32 type_length;
+  /// Repetition of the field. The root of the schema does not have a repetition_type.
+  /// All other nodes must have one.
+  3: optional Repetition repetition_type;
+  /// Name of the field in the schema
+  4: required string<'a> name;
+  /// Nested fields. Since thrift does not support nested fields,
+  /// the nesting is flattened to a single list by a depth-first traversal.
+  /// The children count is used to construct the nested relationship.
+  /// This field is not set when the element is a primitive type.
+  5: optional i32 num_children;
+  /// DEPRECATED: When the schema is the result of a conversion from another model.
+  /// Used to record the original type to help with cross conversion.
+  ///
+  /// This is superseded by logical_type.
+  6: optional ConvertedType converted_type;
+  /// DEPRECATED: Used when this column contains decimal data.
+  /// See the DECIMAL converted type for more details.
+  ///
+  /// This is superseded by using the DecimalType annotation in logical_type.
+  7: optional i32 scale
+  8: optional i32 precision
+  /// When the original schema supports field ids, this will save the
+  /// original field id in the parquet schema
+  9: optional i32 field_id;
+  /// The logical type of this SchemaElement
+  ///
+  /// LogicalType replaces ConvertedType, but ConvertedType is still required
+  /// for some logical types to ensure forward-compatibility in format v1.
+  10: optional LogicalType logical_type
+}
+);
+
+thrift_struct!(
+struct Statistics<'a> {
+   1: optional binary<'a> max;
+   2: optional binary<'a> min;
+   3: optional i64 null_count;
+   4: optional i64 distinct_count;
+   5: optional binary<'a> max_value;
+   6: optional binary<'a> min_value;
+   7: optional bool is_max_value_exact;
+   8: optional bool is_min_value_exact;
+}
+);
+
+thrift_struct!(
+struct BoundingBox {
+  1: required double xmin;
+  2: required double xmax;
+  3: required double ymin;
+  4: required double ymax;
+  5: optional double zmin;
+  6: optional double zmax;
+  7: optional double mmin;
+  8: optional double mmax;
+}
+);
+
+thrift_struct!(
+struct GeospatialStatistics {
+  1: optional BoundingBox bbox;
+  2: optional list<i32> geospatial_types;
+}
+);
+
+thrift_struct!(
+struct SizeStatistics {
+   1: optional i64 unencoded_byte_array_data_bytes;
+   2: optional list<i64> repetition_level_histogram;
+   3: optional list<i64> definition_level_histogram;
+}
+);
+
+fn convert_geo_stats(
+    stats: Option<GeospatialStatistics>,
+) -> Option<Box<crate::geospatial::statistics::GeospatialStatistics>> {
+    stats.map(|st| {
+        let bbox = convert_bounding_box(st.bbox);
+        let geospatial_types: Option<Vec<i32>> = st.geospatial_types.filter(|v| !v.is_empty());
+        Box::new(crate::geospatial::statistics::GeospatialStatistics::new(
+            bbox,
+            geospatial_types,
+        ))
+    })
+}
+
+fn convert_bounding_box(
+    bbox: Option<BoundingBox>,
+) -> Option<crate::geospatial::bounding_box::BoundingBox> {
+    bbox.map(|bb| {
+        let mut newbb = crate::geospatial::bounding_box::BoundingBox::new(
+            bb.xmin.into(),
+            bb.xmax.into(),
+            bb.ymin.into(),
+            bb.ymax.into(),
+        );
+
+        newbb = match (bb.zmin, bb.zmax) {
+            (Some(zmin), Some(zmax)) => newbb.with_zrange(zmin.into(), zmax.into()),
+            // If either None or mismatch, leave it as None and don't error
+            _ => newbb,
+        };
+
+        newbb = match (bb.mmin, bb.mmax) {
+            (Some(mmin), Some(mmax)) => newbb.with_mrange(mmin.into(), mmax.into()),
+            // If either None or mismatch, leave it as None and don't error
+            _ => newbb,
+        };
+
+        newbb
+    })
+}
+
+/// Create a [`crate::file::statistics::Statistics`] from a thrift [`Statistics`] object.
+fn convert_stats(
+    column_descr: &Arc<ColumnDescriptor>,
+    thrift_stats: Option<Statistics>,
+) -> Result<Option<crate::file::statistics::Statistics>> {
+    use crate::file::statistics::Statistics as FStatistics;
+    Ok(match thrift_stats {
+        Some(stats) => {
+            // Number of nulls recorded, when it is not available, we just mark it as 0.
+            // TODO this should be `None` if there is no information about NULLS.
+            // see https://github.com/apache/arrow-rs/pull/6216/files
+            let null_count = stats.null_count.unwrap_or(0);
+
+            if null_count < 0 {
+                return Err(general_err!(
+                    "Statistics null count is negative {}",
+                    null_count
+                ));
+            }
+
+            // Generic null count.
+            let null_count = Some(null_count as u64);
+            // Generic distinct count (count of distinct values occurring)
+            let distinct_count = stats.distinct_count.map(|value| value as u64);
+            // Whether or not statistics use deprecated min/max fields.
+            let old_format = stats.min_value.is_none() && stats.max_value.is_none();
+            // Generic min value as bytes.
+            let min = if old_format {
+                stats.min
+            } else {
+                stats.min_value
+            };
+            // Generic max value as bytes.
+            let max = if old_format {
+                stats.max
+            } else {
+                stats.max_value
+            };
+
+            fn check_len(min: &Option<&[u8]>, max: &Option<&[u8]>, len: usize) -> Result<()> {
+                if let Some(min) = min {
+                    if min.len() < len {
+                        return Err(general_err!("Insufficient bytes to parse min statistic",));
+                    }
+                }
+                if let Some(max) = max {
+                    if max.len() < len {
+                        return Err(general_err!("Insufficient bytes to parse max statistic",));
+                    }
+                }
+                Ok(())
+            }
+
+            let physical_type = column_descr.physical_type();
+            match physical_type {
+                Type::BOOLEAN => check_len(&min, &max, 1),
+                Type::INT32 | Type::FLOAT => check_len(&min, &max, 4),
+                Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8),
+                Type::INT96 => check_len(&min, &max, 12),
+                _ => Ok(()),
+            }?;
+
+            // Values are encoded using PLAIN encoding definition, except that
+            // variable-length byte arrays do not include a length prefix.
+            //
+            // Instead of using actual decoder, we manually convert values.
+            let res = match physical_type {
+                Type::BOOLEAN => FStatistics::boolean(
+                    min.map(|data| data[0] != 0),
+                    max.map(|data| data[0] != 0),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT32 => FStatistics::int32(
+                    min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
+                    max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT64 => FStatistics::int64(
+                    min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
+                    max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::INT96 => {
+                    // INT96 statistics may not be correct, because comparison is signed
+                    let min = if let Some(data) = min {
+                        assert_eq!(data.len(), 12);
+                        Some(Int96::try_from_le_slice(data)?)
+                    } else {
+                        None
+                    };
+                    let max = if let Some(data) = max {
+                        assert_eq!(data.len(), 12);
+                        Some(Int96::try_from_le_slice(data)?)
+                    } else {
+                        None
+                    };
+                    FStatistics::int96(min, max, distinct_count, null_count, old_format)
+                }
+                Type::FLOAT => FStatistics::float(
+                    min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
+                    max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::DOUBLE => FStatistics::double(
+                    min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
+                    max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
+                    distinct_count,
+                    null_count,
+                    old_format,
+                ),
+                Type::BYTE_ARRAY => FStatistics::ByteArray(
+                    ValueStatistics::new(
+                        min.map(ByteArray::from),
+                        max.map(ByteArray::from),
+                        distinct_count,
+                        null_count,
+                        old_format,
+                    )
+                    .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
+                    .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
+                ),
+                Type::FIXED_LEN_BYTE_ARRAY => FStatistics::FixedLenByteArray(
+                    ValueStatistics::new(
+                        min.map(ByteArray::from).map(FixedLenByteArray::from),
+                        max.map(ByteArray::from).map(FixedLenByteArray::from),
+                        distinct_count,
+                        null_count,
+                        old_format,
+                    )
+                    .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
+                    .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
+                ),
+            };
+
+            Some(res)
+        }
+        None => None,
+    })
+}
+
+// bit positions for required fields in the Thrift ColumnMetaData struct
+const COL_META_TYPE: u16 = 1 << 1;
+const COL_META_ENCODINGS: u16 = 1 << 2;
+const COL_META_CODEC: u16 = 1 << 4;
+const COL_META_NUM_VALUES: u16 = 1 << 5;
+const COL_META_TOTAL_UNCOMP_SZ: u16 = 1 << 6;
+const COL_META_TOTAL_COMP_SZ: u16 = 1 << 7;
+const COL_META_DATA_PAGE_OFFSET: u16 = 1 << 9;
+
+// a mask where all required fields' bits are set
+const COL_META_ALL_REQUIRED: u16 = COL_META_TYPE
+    | COL_META_ENCODINGS
+    | COL_META_CODEC
+    | COL_META_NUM_VALUES
+    | COL_META_TOTAL_UNCOMP_SZ
+    | COL_META_TOTAL_COMP_SZ
+    | COL_META_DATA_PAGE_OFFSET;
+
+// check mask to see if all required fields are set. return an appropriate error if
+// any are missing.
+fn validate_column_metadata(mask: u16) -> Result<()> {
+    if mask != COL_META_ALL_REQUIRED {
+        if mask & COL_META_ENCODINGS == 0 {
+            return Err(general_err!("Required field encodings is missing"));
+        }
+
+        if mask & COL_META_CODEC == 0 {
+            return Err(general_err!("Required field codec is missing"));
+        }
+        if mask & COL_META_NUM_VALUES == 0 {
+            return Err(general_err!("Required field num_values is missing"));
+        }
+        if mask & COL_META_TOTAL_UNCOMP_SZ == 0 {
+            return Err(general_err!(
+                "Required field total_uncompressed_size is missing"
+            ));
+        }
+        if mask & COL_META_TOTAL_COMP_SZ == 0 {
+            return Err(general_err!(
+                "Required field total_compressed_size is missing"
+            ));
+        }
+        if mask & COL_META_DATA_PAGE_OFFSET == 0 {
+            return Err(general_err!("Required field data_page_offset is missing"));
+        }
+    }
+
+    Ok(())
+}
+
+fn read_encoding_stats_as_mask<'a>(
+    prot: &mut ThriftSliceInputProtocol<'a>,
+) -> Result<EncodingMask> {
+    // read the vector of stats, setting mask bits for data pages
+    let mut mask = 0i32;
+    let list_ident = prot.read_list_begin()?;
+    for _ in 0..list_ident.size {
+        let pes = PageEncodingStats::read_thrift(prot)?;
+        match pes.page_type {
+            PageType::DATA_PAGE | PageType::DATA_PAGE_V2 => mask |= 1 << pes.encoding as i32,
+            _ => {}
+        }
+    }
+    EncodingMask::try_new(mask)
+}
+
+// Decode `ColumnMetaData`. Returns a mask of all required fields that were observed.
+// This mask can be passed to `validate_column_metadata`.
+fn read_column_metadata<'a>(
+    prot: &mut ThriftSliceInputProtocol<'a>,
+    column: &mut ColumnChunkMetaData,
+    col_index: usize,
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<u16> {
+    // mask for seen required fields in ColumnMetaData
+    let mut seen_mask = 0u16;
+
+    let mut skip_pes = false;
+    let mut pes_mask = true;
+    let mut skip_col_stats = false;
+    let mut skip_size_stats = false;
+
+    if let Some(opts) = options {
+        skip_pes = opts.skip_encoding_stats(col_index);
+        pes_mask = opts.encoding_stats_as_mask();
+        skip_col_stats = opts.skip_column_stats(col_index);
+        skip_size_stats = opts.skip_size_stats(col_index);
+    }
+
+    // struct ColumnMetaData {
+    //   1: required Type type
+    //   2: required list<Encoding> encodings
+    //   3: required list<string> path_in_schema
+    //   4: required CompressionCodec codec
+    //   5: required i64 num_values
+    //   6: required i64 total_uncompressed_size
+    //   7: required i64 total_compressed_size
+    //   8: optional list<KeyValue> key_value_metadata
+    //   9: required i64 data_page_offset
+    //   10: optional i64 index_page_offset
+    //   11: optional i64 dictionary_page_offset
+    //   12: optional Statistics statistics;
+    //   13: optional list<PageEncodingStats> encoding_stats;
+    //   14: optional i64 bloom_filter_offset;
+    //   15: optional i32 bloom_filter_length;
+    //   16: optional SizeStatistics size_statistics;
+    //   17: optional GeospatialStatistics geospatial_statistics;
+    // }
+    let column_descr = &column.column_descr;
+
+    let mut last_field_id = 0i16;
+    loop {
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        match field_ident.id {
+            // 1: type is never used, we can use the column descriptor
+            1 => {
+                // read for error handling
+                Type::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_TYPE;
+            }
+            2 => {
+                column.encodings = EncodingMask::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_ENCODINGS;
+            }
+            // 3: path_in_schema is redundant
+            4 => {
+                column.compression = Compression::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_CODEC;
+            }
+            5 => {
+                column.num_values = i64::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_NUM_VALUES;
+            }
+            6 => {
+                column.total_uncompressed_size = i64::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_TOTAL_UNCOMP_SZ;
+            }
+            7 => {
+                column.total_compressed_size = i64::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_TOTAL_COMP_SZ;
+            }
+            // 8: we don't expose this key value
+            9 => {
+                column.data_page_offset = i64::read_thrift(&mut *prot)?;
+                seen_mask |= COL_META_DATA_PAGE_OFFSET;
+            }
+            10 => {
+                column.index_page_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            11 => {
+                column.dictionary_page_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            12 if !skip_col_stats => {
+                column.statistics =
+                    convert_stats(column_descr, Some(Statistics::read_thrift(&mut *prot)?))?;
+            }
+            13 if !skip_pes => {
+                if pes_mask {
+                    let val = read_encoding_stats_as_mask(&mut *prot)?;
+                    column.encoding_stats = Some(ParquetPageEncodingStats::Mask(val));
+                } else {
+                    let val =
+                        read_thrift_vec::<PageEncodingStats, ThriftSliceInputProtocol>(&mut *prot)?;
+                    column.encoding_stats = Some(ParquetPageEncodingStats::Full(val));
+                }
+            }
+            14 => {
+                column.bloom_filter_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            15 => {
+                column.bloom_filter_length = Some(i32::read_thrift(&mut *prot)?);
+            }
+            16 if !skip_size_stats => {
+                let val = SizeStatistics::read_thrift(&mut *prot)?;
+                column.unencoded_byte_array_data_bytes = val.unencoded_byte_array_data_bytes;
+                column.repetition_level_histogram =
+                    val.repetition_level_histogram.map(LevelHistogram::from);
+                column.definition_level_histogram =
+                    val.definition_level_histogram.map(LevelHistogram::from);
+            }
+            17 => {
+                let val = GeospatialStatistics::read_thrift(&mut *prot)?;
+                column.geo_statistics = convert_geo_stats(Some(val));
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+            }
+        };
+        last_field_id = field_ident.id;
+    }
+
+    Ok(seen_mask)
+}
+
+// using ThriftSliceInputProtocol rather than ThriftCompactInputProtocl trait because
+// these are all internal and operate on slices.
+fn read_column_chunk<'a>(
+    prot: &mut ThriftSliceInputProtocol<'a>,
+    column_descr: &Arc<ColumnDescriptor>,
+    col_index: usize,
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<ColumnChunkMetaData> {
+    // create a default initialized ColumnMetaData
+    let mut col = ColumnChunkMetaDataBuilder::new(column_descr.clone()).build()?;
+
+    // seen flag for file_offset
+    let mut has_file_offset = false;
+
+    // mask of seen flags for ColumnMetaData
+    let mut col_meta_mask = 0u16;
+
+    // struct ColumnChunk {
+    //   1: optional string file_path
+    //   2: required i64 file_offset = 0
+    //   3: optional ColumnMetaData meta_data
+    //   4: optional i64 offset_index_offset
+    //   5: optional i32 offset_index_length
+    //   6: optional i64 column_index_offset
+    //   7: optional i32 column_index_length
+    //   8: optional ColumnCryptoMetaData crypto_metadata
+    //   9: optional binary encrypted_column_metadata
+    // }
+    let mut last_field_id = 0i16;
+    loop {
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        match field_ident.id {
+            1 => {
+                col.file_path = Some(String::read_thrift(&mut *prot)?);
+            }
+            2 => {
+                col.file_offset = i64::read_thrift(&mut *prot)?;
+                has_file_offset = true;
+            }
+            3 => {
+                col_meta_mask = read_column_metadata(&mut *prot, &mut col, col_index, options)?;
+            }
+            4 => {
+                col.offset_index_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            5 => {
+                col.offset_index_length = Some(i32::read_thrift(&mut *prot)?);
+            }
+            6 => {
+                col.column_index_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            7 => {
+                col.column_index_length = Some(i32::read_thrift(&mut *prot)?);
+            }
+            #[cfg(feature = "encryption")]
+            8 => {
+                let val = ColumnCryptoMetaData::read_thrift(&mut *prot)?;
+                col.column_crypto_metadata = Some(Box::new(val));
+            }
+            #[cfg(feature = "encryption")]
+            9 => {
+                col.encrypted_column_metadata = Some(<&[u8]>::read_thrift(&mut *prot)?.to_vec());
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+            }
+        };
+        last_field_id = field_ident.id;
+    }
+
+    // the only required field from ColumnChunk
+    if !has_file_offset {
+        return Err(general_err!("Required field file_offset is missing"));
+    };
+
+    // if encrypted just return. we'll decrypt after finishing the footer and populate the rest.
+    #[cfg(feature = "encryption")]
+    if col.encrypted_column_metadata.is_some() {
+        return Ok(col);
+    }
+
+    // not encrypted, so make sure all required fields were read
+    validate_column_metadata(col_meta_mask)?;
+
+    Ok(col)
+}
+
+fn read_row_group(
+    prot: &mut ThriftSliceInputProtocol,
+    schema_descr: &Arc<SchemaDescriptor>,
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<RowGroupMetaData> {
+    // create default initialized RowGroupMetaData
+    let mut row_group = RowGroupMetaDataBuilder::new(schema_descr.clone()).build_unchecked();
+
+    // mask values for required fields
+    const RG_COLUMNS: u8 = 1 << 1;
+    const RG_TOT_BYTE_SIZE: u8 = 1 << 2;
+    const RG_NUM_ROWS: u8 = 1 << 3;
+    const RG_ALL_REQUIRED: u8 = RG_COLUMNS | RG_TOT_BYTE_SIZE | RG_NUM_ROWS;
+
+    let mut mask = 0u8;
+
+    // struct RowGroup {
+    //   1: required list<ColumnChunk> columns
+    //   2: required i64 total_byte_size
+    //   3: required i64 num_rows
+    //   4: optional list<SortingColumn> sorting_columns
+    //   5: optional i64 file_offset
+    //   6: optional i64 total_compressed_size
+    //   7: optional i16 ordinal
+    // }
+    let mut last_field_id = 0i16;
+    loop {
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        match field_ident.id {
+            1 => {
+                let list_ident = prot.read_list_begin()?;
+                if schema_descr.num_columns() != list_ident.size as usize {
+                    return Err(general_err!(
+                        "Column count mismatch. Schema has {} columns while Row Group has {}",
+                        schema_descr.num_columns(),
+                        list_ident.size
+                    ));
+                }
+                for i in 0..list_ident.size as usize {
+                    let col = read_column_chunk(prot, &schema_descr.columns()[i], i, options)?;
+                    row_group.columns.push(col);
+                }
+                mask |= RG_COLUMNS;
+            }
+            2 => {
+                row_group.total_byte_size = i64::read_thrift(&mut *prot)?;
+                mask |= RG_TOT_BYTE_SIZE;
+            }
+            3 => {
+                row_group.num_rows = i64::read_thrift(&mut *prot)?;
+                mask |= RG_NUM_ROWS;
+            }
+            4 => {
+                let val = read_thrift_vec::<SortingColumn, ThriftSliceInputProtocol>(&mut *prot)?;
+                row_group.sorting_columns = Some(val);
+            }
+            5 => {
+                row_group.file_offset = Some(i64::read_thrift(&mut *prot)?);
+            }
+            // 6: we don't expose total_compressed_size
+            7 => {
+                row_group.ordinal = Some(i16::read_thrift(&mut *prot)?);
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+            }
+        };
+        last_field_id = field_ident.id;
+    }
+
+    if mask != RG_ALL_REQUIRED {
+        if mask & RG_COLUMNS == 0 {
+            return Err(general_err!("Required field columns is missing"));
+        }
+        if mask & RG_TOT_BYTE_SIZE == 0 {
+            return Err(general_err!("Required field total_byte_size is missing"));
+        }
+        if mask & RG_NUM_ROWS == 0 {
+            return Err(general_err!("Required field num_rows is missing"));
+        }
+    }
+
+    Ok(row_group)
+}
+
+/// Create a [`SchemaDescriptor`] from thrift input. The input buffer must contain a complete
+/// Parquet footer.
+pub(crate) fn parquet_schema_from_bytes(buf: &[u8]) -> Result<SchemaDescriptor> {
+    let mut prot = ThriftSliceInputProtocol::new(buf);
+
+    let mut last_field_id = 0i16;
+    loop {
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        match field_ident.id {
+            2 => {
+                // read schema and convert to SchemaDescriptor for use when reading row groups
+                let val = read_thrift_vec::<SchemaElement, ThriftSliceInputProtocol>(&mut prot)?;
+                let val = parquet_schema_from_array(val)?;
+                return Ok(SchemaDescriptor::new(val));
+            }
+            _ => prot.skip(field_ident.field_type)?,
+        }
+        last_field_id = field_ident.id;
+    }
+    Err(general_err!("Input does not contain a schema"))
+}
+
+/// Create [`ParquetMetaData`] from thrift input. Note that this only decodes the file metadata in
+/// the Parquet footer. Page indexes will need to be added later.
+pub(crate) fn parquet_metadata_from_bytes(
+    buf: &[u8],
+    options: Option<&ParquetMetaDataOptions>,
+) -> Result<ParquetMetaData> {
+    let mut prot = ThriftSliceInputProtocol::new(buf);
+
+    // begin reading the file metadata
+    let mut version: Option<i32> = None;
+    let mut num_rows: Option<i64> = None;
+    let mut row_groups: Option<Vec<RowGroupMetaData>> = None;
+    let mut key_value_metadata: Option<Vec<KeyValue>> = None;
+    let mut created_by: Option<&str> = None;
+    let mut column_orders: Option<Vec<ColumnOrder>> = None;
+    #[cfg(feature = "encryption")]
+    let mut encryption_algorithm: Option<EncryptionAlgorithm> = None;
+    #[cfg(feature = "encryption")]
+    let mut footer_signing_key_metadata: Option<&[u8]> = None;
+
+    // this will need to be set before parsing row groups
+    let mut schema_descr: Option<Arc<SchemaDescriptor>> = None;
+
+    // see if we already have a schema.
+    if let Some(options) = options {
+        schema_descr = options.schema().cloned();
+    }
+
+    // struct FileMetaData {
+    //   1: required i32 version
+    //   2: required list<SchemaElement> schema;
+    //   3: required i64 num_rows
+    //   4: required list<RowGroup> row_groups
+    //   5: optional list<KeyValue> key_value_metadata
+    //   6: optional string created_by
+    //   7: optional list<ColumnOrder> column_orders;
+    //   8: optional EncryptionAlgorithm encryption_algorithm
+    //   9: optional binary footer_signing_key_metadata
+    // }
+    let mut last_field_id = 0i16;
+    loop {
+        let field_ident = prot.read_field_begin(last_field_id)?;
+        if field_ident.field_type == FieldType::Stop {
+            break;
+        }
+        match field_ident.id {
+            1 => {
+                version = Some(i32::read_thrift(&mut prot)?);
+            }
+            2 => {
+                // If schema was passed in, skip parsing it
+                if schema_descr.is_some() {
+                    prot.skip(field_ident.field_type)?;
+                } else {
+                    // read schema and convert to SchemaDescriptor for use when reading row groups
+                    let val =
+                        read_thrift_vec::<SchemaElement, ThriftSliceInputProtocol>(&mut prot)?;
+                    let val = parquet_schema_from_array(val)?;
+                    schema_descr = Some(Arc::new(SchemaDescriptor::new(val)));
+                }
+            }
+            3 => {
+                num_rows = Some(i64::read_thrift(&mut prot)?);
+            }
+            4 => {
+                if schema_descr.is_none() {
+                    return Err(general_err!("Required field schema is missing"));
+                }
+                let schema_descr = schema_descr.as_ref().unwrap();
+                let list_ident = prot.read_list_begin()?;
+                let mut rg_vec = Vec::with_capacity(list_ident.size as usize);
+
+                // Read row groups and handle ordinal assignment
+                let mut assigner = OrdinalAssigner::new();
+                for ordinal in 0..list_ident.size {
+                    let ordinal: i16 = ordinal.try_into().map_err(|_| {
+                        ParquetError::General(format!(
+                            "Row group ordinal {ordinal} exceeds i16 max value",
+                        ))
+                    })?;
+                    let rg = read_row_group(&mut prot, schema_descr, options)?;
+                    rg_vec.push(assigner.ensure(ordinal, rg)?);
+                }
+                row_groups = Some(rg_vec);
+            }
+            5 => {
+                let val = read_thrift_vec::<KeyValue, ThriftSliceInputProtocol>(&mut prot)?;
+                key_value_metadata = Some(val);
+            }
+            6 => {
+                created_by = Some(<&str>::read_thrift(&mut prot)?);
+            }
+            7 => {
+                let val = read_thrift_vec::<ColumnOrder, ThriftSliceInputProtocol>(&mut prot)?;
+                column_orders = Some(val);
+            }
+            #[cfg(feature = "encryption")]
+            8 => {
+                let val = EncryptionAlgorithm::read_thrift(&mut prot)?;
+                encryption_algorithm = Some(val);
+            }
+            #[cfg(feature = "encryption")]
+            9 => {
+                footer_signing_key_metadata = Some(<&[u8]>::read_thrift(&mut prot)?);
+            }
+            _ => {
+                prot.skip(field_ident.field_type)?;
+            }
+        };
+        last_field_id = field_ident.id;
+    }
+    let Some(version) = version else {
+        return Err(general_err!("Required field version is missing"));
+    };
+    let Some(num_rows) = num_rows else {
+        return Err(general_err!("Required field num_rows is missing"));
+    };
+    let Some(row_groups) = row_groups else {
+        return Err(general_err!("Required field row_groups is missing"));
+    };
+
+    let created_by = created_by.map(|c| c.to_owned());
+
+    // we've tested for `None` by now so this is safe
+    let schema_descr = schema_descr.unwrap();
+
+    // need to map read column orders to actual values based on the schema
+    if column_orders
+        .as_ref()
+        .is_some_and(|cos| cos.len() != schema_descr.num_columns())
+    {
+        return Err(general_err!("Column order length mismatch"));
+    }
+    // replace default type defined column orders with ones having the correct sort order
+    // TODO(ets): this could instead be done above when decoding
+    let column_orders = column_orders.map(|mut cos| {
+        for (i, column) in schema_descr.columns().iter().enumerate() {
+            if let ColumnOrder::TYPE_DEFINED_ORDER(_) = cos[i] {
+                let sort_order = ColumnOrder::sort_order_for_type(
+                    column.logical_type_ref(),
+                    column.converted_type(),
+                    column.physical_type(),
+                );
+                cos[i] = ColumnOrder::TYPE_DEFINED_ORDER(sort_order);
+            }
+        }
+        cos
+    });
+
+    #[cfg(not(feature = "encryption"))]
+    let fmd = crate::file::metadata::FileMetaData::new(
+        version,
+        num_rows,
+        created_by,
+        key_value_metadata,
+        schema_descr,
+        column_orders,
+    );
+    #[cfg(feature = "encryption")]
+    let fmd = crate::file::metadata::FileMetaData::new(
+        version,
+        num_rows,
+        created_by,
+        key_value_metadata,
+        schema_descr,
+        column_orders,
+    )
+    .with_encryption_algorithm(encryption_algorithm)
+    .with_footer_signing_key_metadata(footer_signing_key_metadata.map(|v| v.to_vec()));
+
+    Ok(ParquetMetaData::new(fmd, row_groups))
+}
+
+/// Assign [`RowGroupMetaData::ordinal`]  if it is missing.
+#[derive(Debug, Default)]
+pub(crate) struct OrdinalAssigner {
+    first_has_ordinal: Option<bool>,
+}
+
+impl OrdinalAssigner {
+    fn new() -> Self {
+        Default::default()
+    }
+
+    /// Sets [`RowGroupMetaData::ordinal`] if it is missing.
+    ///
+    /// # Arguments
+    /// - actual_ordinal: The ordinal (index) of the row group being processed
+    ///   in the file metadata.
+    /// - rg: The [`RowGroupMetaData`] to potentially modify.
+    ///
+    /// Ensures:
+    /// 1. If the first row group has an ordinal, all subsequent row groups must
+    ///    also have ordinals.
+    /// 2. If the first row group does NOT have an ordinal, all subsequent row
+    ///    groups must also not have ordinals.
+    fn ensure(
+        &mut self,
+        actual_ordinal: i16,
+        mut rg: RowGroupMetaData,
+    ) -> Result<RowGroupMetaData> {
+        let rg_has_ordinal = rg.ordinal.is_some();
+
+        // Only set first_has_ordinal if it's None (first row group that arrives)
+        if self.first_has_ordinal.is_none() {
+            self.first_has_ordinal = Some(rg_has_ordinal);
+        }
+
+        // assign ordinal if missing and consistent with first row group
+        let first_has_ordinal = self.first_has_ordinal.unwrap();
+        if !first_has_ordinal && !rg_has_ordinal {
+            rg.ordinal = Some(actual_ordinal);
+        } else if first_has_ordinal != rg_has_ordinal {
+            return Err(general_err!(
+                "Inconsistent ordinal assignment: first_has_ordinal is set to \
+                {} but row-group with actual ordinal {} has rg_has_ordinal set to {}",
+                first_has_ordinal,
+                actual_ordinal,
+                rg_has_ordinal
+            ));
+        }
+        Ok(rg)
+    }
+}
+
+thrift_struct!(
+    pub(crate) struct IndexPageHeader {}
+);
+
+thrift_struct!(
+pub(crate) struct DictionaryPageHeader {
+  /// Number of values in the dictionary
+  1: required i32 num_values;
+
+  /// Encoding using this dictionary page
+  2: required Encoding encoding
+
+  /// If true, the entries in the dictionary are sorted in ascending order
+  3: optional bool is_sorted;
+}
+);
+
+thrift_struct!(
+/// Statistics for the page header.
+///
+/// This is a duplicate of the [`Statistics`] struct above. Because the page reader uses
+/// the [`Read`] API, we cannot read the min/max values as slices. This should not be
+/// a huge problem since this crate no longer reads the page header statistics by default.
+///
+/// [`Read`]: crate::parquet_thrift::ThriftReadInputProtocol
+pub(crate) struct PageStatistics {
+   1: optional binary max;
+   2: optional binary min;
+   3: optional i64 null_count;
+   4: optional i64 distinct_count;
+   5: optional binary max_value;
+   6: optional binary min_value;
+   7: optional bool is_max_value_exact;
+   8: optional bool is_min_value_exact;
+}
+);
+
+thrift_struct!(
+pub(crate) struct DataPageHeader {
+  1: required i32 num_values
+  2: required Encoding encoding
+  3: required Encoding definition_level_encoding;
+  4: required Encoding repetition_level_encoding;
+  5: optional PageStatistics statistics;
+}
+);
+
+impl DataPageHeader {
+    // reader that skips decoding page statistics
+    fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result<Self>
+    where
+        R: ThriftCompactInputProtocol<'a>,
+    {
+        let mut num_values: Option<i32> = None;
+        let mut encoding: Option<Encoding> = None;
+        let mut definition_level_encoding: Option<Encoding> = None;
+        let mut repetition_level_encoding: Option<Encoding> = None;
+        let statistics: Option<PageStatistics> = None;
+        let mut last_field_id = 0i16;
+        loop {
+            let field_ident = prot.read_field_begin(last_field_id)?;
+            if field_ident.field_type == FieldType::Stop {
+                break;
+            }
+            match field_ident.id {
+                1 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    num_values = Some(val);
+                }
+                2 => {
+                    let val = Encoding::read_thrift(&mut *prot)?;
+                    encoding = Some(val);
+                }
+                3 => {
+                    let val = Encoding::read_thrift(&mut *prot)?;
+                    definition_level_encoding = Some(val);
+                }
+                4 => {
+                    let val = Encoding::read_thrift(&mut *prot)?;
+                    repetition_level_encoding = Some(val);
+                }
+                _ => {
+                    prot.skip(field_ident.field_type)?;
+                }
+            };
+            last_field_id = field_ident.id;
+        }
+        let Some(num_values) = num_values else {
+            return Err(general_err!("Required field num_values is missing"));
+        };
+        let Some(encoding) = encoding else {
+            return Err(general_err!("Required field encoding is missing"));
+        };
+        let Some(definition_level_encoding) = definition_level_encoding else {
+            return Err(general_err!(
+                "Required field definition_level_encoding is missing"
+            ));
+        };
+        let Some(repetition_level_encoding) = repetition_level_encoding else {
+            return Err(general_err!(
+                "Required field repetition_level_encoding is missing"
+            ));
+        };
+        Ok(Self {
+            num_values,
+            encoding,
+            definition_level_encoding,
+            repetition_level_encoding,
+            statistics,
+        })
+    }
+}
+
+thrift_struct!(
+pub(crate) struct DataPageHeaderV2 {
+  1: required i32 num_values
+  2: required i32 num_nulls
+  3: required i32 num_rows
+  4: required Encoding encoding
+  5: required i32 definition_levels_byte_length;
+  6: required i32 repetition_levels_byte_length;
+  7: optional bool is_compressed = true;
+  8: optional PageStatistics statistics;
+}
+);
+
+impl DataPageHeaderV2 {
+    // reader that skips decoding page statistics
+    fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result<Self>
+    where
+        R: ThriftCompactInputProtocol<'a>,
+    {
+        let mut num_values: Option<i32> = None;
+        let mut num_nulls: Option<i32> = None;
+        let mut num_rows: Option<i32> = None;
+        let mut encoding: Option<Encoding> = None;
+        let mut definition_levels_byte_length: Option<i32> = None;
+        let mut repetition_levels_byte_length: Option<i32> = None;
+        let mut is_compressed: Option<bool> = None;
+        let statistics: Option<PageStatistics> = None;
+        let mut last_field_id = 0i16;
+        loop {
+            let field_ident = prot.read_field_begin(last_field_id)?;
+            if field_ident.field_type == FieldType::Stop {
+                break;
+            }
+            match field_ident.id {
+                1 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    num_values = Some(val);
+                }
+                2 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    num_nulls = Some(val);
+                }
+                3 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    num_rows = Some(val);
+                }
+                4 => {
+                    let val = Encoding::read_thrift(&mut *prot)?;
+                    encoding = Some(val);
+                }
+                5 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    definition_levels_byte_length = Some(val);
+                }
+                6 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    repetition_levels_byte_length = Some(val);
+                }
+                7 => {
+                    let val = field_ident.bool_val.unwrap();
+                    is_compressed = Some(val);
+                }
+                _ => {
+                    prot.skip(field_ident.field_type)?;
+                }
+            };
+            last_field_id = field_ident.id;
+        }
+        let Some(num_values) = num_values else {
+            return Err(general_err!("Required field num_values is missing"));
+        };
+        let Some(num_nulls) = num_nulls else {
+            return Err(general_err!("Required field num_nulls is missing"));
+        };
+        let Some(num_rows) = num_rows else {
+            return Err(general_err!("Required field num_rows is missing"));
+        };
+        let Some(encoding) = encoding else {
+            return Err(general_err!("Required field encoding is missing"));
+        };
+        let Some(definition_levels_byte_length) = definition_levels_byte_length else {
+            return Err(general_err!(
+                "Required field definition_levels_byte_length is missing"
+            ));
+        };
+        let Some(repetition_levels_byte_length) = repetition_levels_byte_length else {
+            return Err(general_err!(
+                "Required field repetition_levels_byte_length is missing"
+            ));
+        };
+        Ok(Self {
+            num_values,
+            num_nulls,
+            num_rows,
+            encoding,
+            definition_levels_byte_length,
+            repetition_levels_byte_length,
+            is_compressed,
+            statistics,
+        })
+    }
+}
+
+thrift_struct!(
+pub(crate) struct PageHeader {
+  /// the type of the page: indicates which of the *_header fields is set
+  1: required PageType r#type
+
+  /// Uncompressed page size in bytes (not including this header)
+  2: required i32 uncompressed_page_size
+
+  /// Compressed (and potentially encrypted) page size in bytes, not including this header
+  3: required i32 compressed_page_size
+
+  /// The 32-bit CRC checksum for the page, to be be calculated as follows:
+  4: optional i32 crc
+
+  // Headers for page specific data.  One only will be set.
+  5: optional DataPageHeader data_page_header;
+  6: optional IndexPageHeader index_page_header;
+  7: optional DictionaryPageHeader dictionary_page_header;
+  8: optional DataPageHeaderV2 data_page_header_v2;
+}
+);
+
+impl PageHeader {
+    // reader that skips reading page statistics. obtained by running
+    // `cargo expand -p parquet --all-features --lib file::metadata::thrift`
+    // and modifying the impl of `read_thrift`
+    pub(crate) fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result<Self>
+    where
+        R: ThriftCompactInputProtocol<'a>,
+    {
+        let mut type_: Option<PageType> = None;
+        let mut uncompressed_page_size: Option<i32> = None;
+        let mut compressed_page_size: Option<i32> = None;
+        let mut crc: Option<i32> = None;
+        let mut data_page_header: Option<DataPageHeader> = None;
+        let mut index_page_header: Option<IndexPageHeader> = None;
+        let mut dictionary_page_header: Option<DictionaryPageHeader> = None;
+        let mut data_page_header_v2: Option<DataPageHeaderV2> = None;
+        let mut last_field_id = 0i16;
+        loop {
+            let field_ident = prot.read_field_begin(last_field_id)?;
+            if field_ident.field_type == FieldType::Stop {
+                break;
+            }
+            match field_ident.id {
+                1 => {
+                    let val = PageType::read_thrift(&mut *prot)?;
+                    type_ = Some(val);
+                }
+                2 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    uncompressed_page_size = Some(val);
+                }
+                3 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    compressed_page_size = Some(val);
+                }
+                4 => {
+                    let val = i32::read_thrift(&mut *prot)?;
+                    crc = Some(val);
+                }
+                5 => {
+                    let val = DataPageHeader::read_thrift_without_stats(&mut *prot)?;
+                    data_page_header = Some(val);
+                }
+                6 => {
+                    let val = IndexPageHeader::read_thrift(&mut *prot)?;
+                    index_page_header = Some(val);
+                }
+                7 => {
+                    let val = DictionaryPageHeader::read_thrift(&mut *prot)?;
+                    dictionary_page_header = Some(val);
+                }
+                8 => {
+                    let val = DataPageHeaderV2::read_thrift_without_stats(&mut *prot)?;
+                    data_page_header_v2 = Some(val);
+                }
+                _ => {
+                    prot.skip(field_ident.field_type)?;
+                }
+            };
+            last_field_id = field_ident.id;
+        }
+        let Some(type_) = type_ else {
+            return Err(general_err!("Required field type_ is missing"));
+        };
+        let Some(uncompressed_page_size) = uncompressed_page_size else {
+            return Err(general_err!(
+                "Required field uncompressed_page_size is missing"
+            ));
+        };
+        let Some(compressed_page_size) = compressed_page_size else {
+            return Err(general_err!(
+                "Required field compressed_page_size is missing"
+            ));
+        };
+        Ok(Self {
+            r#type: type_,
+            uncompressed_page_size,
+            compressed_page_size,
+            crc,
+            data_page_header,
+            index_page_header,
+            dictionary_page_header,
+            data_page_header_v2,
+        })
+    }
+}
+
+/////////////////////////////////////////////////
+// helper functions for writing file meta data
+
+// serialize the bits of the column chunk needed for a thrift ColumnMetaData
+// struct ColumnMetaData {
+//   1: required Type type
+//   2: required list<Encoding> encodings
+//   3: required list<string> path_in_schema
+//   4: required CompressionCodec codec
+//   5: required i64 num_values
+//   6: required i64 total_uncompressed_size
+//   7: required i64 total_compressed_size
+//   8: optional list<KeyValue> key_value_metadata
+//   9: required i64 data_page_offset
+//   10: optional i64 index_page_offset
+//   11: optional i64 dictionary_page_offset
+//   12: optional Statistics statistics;
+//   13: optional list<PageEncodingStats> encoding_stats;
+//   14: optional i64 bloom_filter_offset;
+//   15: optional i32 bloom_filter_length;
+//   16: optional SizeStatistics size_statistics;
+//   17: optional GeospatialStatistics geospatial_statistics;
+// }
+pub(super) fn serialize_column_meta_data<W: Write>(
+    column_chunk: &ColumnChunkMetaData,
+    w: &mut ThriftCompactOutputProtocol<W>,
+) -> Result<()> {
+    use crate::file::statistics::page_stats_to_thrift;
+
+    column_chunk.column_type().write_thrift_field(w, 1, 0)?;
+    column_chunk
+        .encodings()
+        .collect::<Vec<_>>()
+        .write_thrift_field(w, 2, 1)?;
+    let path = column_chunk.column_descr.path().parts();
+    let path: Vec<&str> = path.iter().map(|v| v.as_str()).collect();
+    path.write_thrift_field(w, 3, 2)?;
+    column_chunk.compression.write_thrift_field(w, 4, 3)?;
+    column_chunk.num_values.write_thrift_field(w, 5, 4)?;
+    column_chunk
+        .total_uncompressed_size
+        .write_thrift_field(w, 6, 5)?;
+    column_chunk
+        .total_compressed_size
+        .write_thrift_field(w, 7, 6)?;
+    // no key_value_metadata here
+    let mut last_field_id = column_chunk.data_page_offset.write_thrift_field(w, 9, 7)?;
+    if let Some(index_page_offset) = column_chunk.index_page_offset {
+        last_field_id = index_page_offset.write_thrift_field(w, 10, last_field_id)?;
+    }
+    if let Some(dictionary_page_offset) = column_chunk.dictionary_page_offset {
+        last_field_id = dictionary_page_offset.write_thrift_field(w, 11, last_field_id)?;
+    }
+    // PageStatistics is the same as thrift Statistics, but writable
+    let stats = page_stats_to_thrift(column_chunk.statistics());
+    if let Some(stats) = stats {
+        last_field_id = stats.write_thrift_field(w, 12, last_field_id)?;
+    }
+    if let Some(page_encoding_stats) = column_chunk.page_encoding_stats() {
+        last_field_id = page_encoding_stats.write_thrift_field(w, 13, last_field_id)?;
+    }
+    if let Some(bloom_filter_offset) = column_chunk.bloom_filter_offset {
+        last_field_id = bloom_filter_offset.write_thrift_field(w, 14, last_field_id)?;
+    }
+    if let Some(bloom_filter_length) = column_chunk.bloom_filter_length {
+        last_field_id = bloom_filter_length.write_thrift_field(w, 15, last_field_id)?;
+    }
+
+    // SizeStatistics
+    let size_stats = if column_chunk.unencoded_byte_array_data_bytes.is_some()
+        || column_chunk.repetition_level_histogram.is_some()
+        || column_chunk.definition_level_histogram.is_some()
+    {
+        let repetition_level_histogram = column_chunk
+            .repetition_level_histogram()
+            .map(|hist| hist.clone().into_inner());
+
+        let definition_level_histogram = column_chunk
+            .definition_level_histogram()
+            .map(|hist| hist.clone().into_inner());
+
+        Some(SizeStatistics {
+            unencoded_byte_array_data_bytes: column_chunk.unencoded_byte_array_data_bytes,
+            repetition_level_histogram,
+            definition_level_histogram,
+        })
+    } else {
+        None
+    };
+    if let Some(size_stats) = size_stats {
+        last_field_id = size_stats.write_thrift_field(w, 16, last_field_id)?;
+    }
+
+    if let Some(geo_stats) = column_chunk.geo_statistics() {
+        geo_stats.write_thrift_field(w, 17, last_field_id)?;
+    }
+
+    w.write_struct_end()
+}
+
+// temp struct used for writing
+pub(super) struct FileMeta<'a> {
+    pub(super) file_metadata: &'a crate::file::metadata::FileMetaData,
+    pub(super) row_groups: &'a Vec<RowGroupMetaData>,
+}
+
+// struct FileMetaData {
+//   1: required i32 version
+//   2: required list<SchemaElement> schema;
+//   3: required i64 num_rows
+//   4: required list<RowGroup> row_groups
+//   5: optional list<KeyValue> key_value_metadata
+//   6: optional string created_by
+//   7: optional list<ColumnOrder> column_orders;
+//   8: optional EncryptionAlgorithm encryption_algorithm
+//   9: optional binary footer_signing_key_metadata
+// }
+impl<'a> WriteThrift for FileMeta<'a> {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    // needed for last_field_id w/o encryption
+    #[allow(unused_assignments)]
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        self.file_metadata
+            .version
+            .write_thrift_field(writer, 1, 0)?;
+
+        // field 2 is schema. do depth-first traversal of tree, converting to SchemaElement and
+        // writing along the way.
+        let root = self.file_metadata.schema_descr().root_schema_ptr();
+        let schema_len = num_nodes(&root)?;
+        writer.write_field_begin(FieldType::List, 2, 1)?;
+        writer.write_list_begin(ElementType::Struct, schema_len)?;
+        // recursively write Type nodes as SchemaElements
+        write_schema(&root, writer)?;
+
+        self.file_metadata
+            .num_rows
+            .write_thrift_field(writer, 3, 2)?;
+
+        // this will call RowGroupMetaData::write_thrift
+        let mut last_field_id = self.row_groups.write_thrift_field(writer, 4, 3)?;
+
+        if let Some(kv_metadata) = self.file_metadata.key_value_metadata() {
+            last_field_id = kv_metadata.write_thrift_field(writer, 5, last_field_id)?;
+        }
+        if let Some(created_by) = self.file_metadata.created_by() {
+            last_field_id = created_by.write_thrift_field(writer, 6, last_field_id)?;
+        }
+        if let Some(column_orders) = self.file_metadata.column_orders() {
+            last_field_id = column_orders.write_thrift_field(writer, 7, last_field_id)?;
+        }
+        #[cfg(feature = "encryption")]
+        if let Some(algo) = self.file_metadata.encryption_algorithm.as_ref() {
+            last_field_id = algo.write_thrift_field(writer, 8, last_field_id)?;
+        }
+        #[cfg(feature = "encryption")]
+        if let Some(key) = self.file_metadata.footer_signing_key_metadata.as_ref() {
+            key.as_slice()
+                .write_thrift_field(writer, 9, last_field_id)?;
+        }
+
+        writer.write_struct_end()
+    }
+}
+
+fn write_schema<W: Write>(
+    schema: &TypePtr,
+    writer: &mut ThriftCompactOutputProtocol<W>,
+) -> Result<()> {
+    if !schema.is_group() {
+        return Err(general_err!("Root schema must be Group type"));
+    }
+    write_schema_helper(schema, writer)
+}
+
+fn write_schema_helper<W: Write>(
+    node: &TypePtr,
+    writer: &mut ThriftCompactOutputProtocol<W>,
+) -> Result<()> {
+    match node.as_ref() {
+        crate::schema::types::Type::PrimitiveType {
+            basic_info,
+            physical_type,
+            type_length,
+            scale,
+            precision,
+        } => {
+            let element = SchemaElement {
+                r#type: Some(*physical_type),
+                type_length: if *type_length >= 0 {
+                    Some(*type_length)
+                } else {
+                    None
+                },
+                repetition_type: Some(basic_info.repetition()),
+                name: basic_info.name(),
+                num_children: None,
+                converted_type: match basic_info.converted_type() {
+                    ConvertedType::NONE => None,
+                    other => Some(other),
+                },
+                scale: if *scale >= 0 { Some(*scale) } else { None },
+                precision: if *precision >= 0 {
+                    Some(*precision)
+                } else {
+                    None
+                },
+                field_id: if basic_info.has_id() {
+                    Some(basic_info.id())
+                } else {
+                    None
+                },
+                logical_type: basic_info.logical_type_ref().cloned(),
+            };
+            element.write_thrift(writer)
+        }
+        crate::schema::types::Type::GroupType { basic_info, fields } => {
+            let repetition = if basic_info.has_repetition() {
+                Some(basic_info.repetition())
+            } else {
+                None
+            };
+
+            let element = SchemaElement {
+                r#type: None,
+                type_length: None,
+                repetition_type: repetition,
+                name: basic_info.name(),
+                num_children: Some(fields.len().try_into()?),
+                converted_type: match basic_info.converted_type() {
+                    ConvertedType::NONE => None,
+                    other => Some(other),
+                },
+                scale: None,
+                precision: None,
+                field_id: if basic_info.has_id() {
+                    Some(basic_info.id())
+                } else {
+                    None
+                },
+                logical_type: basic_info.logical_type_ref().cloned(),
+            };
+
+            element.write_thrift(writer)?;
+
+            // Add child elements for a group
+            for field in fields {
+                write_schema_helper(field, writer)?;
+            }
+            Ok(())
+        }
+    }
+}
+
+// struct RowGroup {
+//   1: required list<ColumnChunk> columns
+//   2: required i64 total_byte_size
+//   3: required i64 num_rows
+//   4: optional list<SortingColumn> sorting_columns
+//   5: optional i64 file_offset
+//   6: optional i64 total_compressed_size
+//   7: optional i16 ordinal
+// }
+impl WriteThrift for RowGroupMetaData {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        // this will call ColumnChunkMetaData::write_thrift
+        self.columns.write_thrift_field(writer, 1, 0)?;
+        self.total_byte_size.write_thrift_field(writer, 2, 1)?;
+        let mut last_field_id = self.num_rows.write_thrift_field(writer, 3, 2)?;
+        if let Some(sorting_columns) = self.sorting_columns() {
+            last_field_id = sorting_columns.write_thrift_field(writer, 4, last_field_id)?;
+        }
+        if let Some(file_offset) = self.file_offset() {
+            last_field_id = file_offset.write_thrift_field(writer, 5, last_field_id)?;
+        }
+        // this is optional, but we'll always write it
+        last_field_id = self
+            .compressed_size()
+            .write_thrift_field(writer, 6, last_field_id)?;
+        if let Some(ordinal) = self.ordinal() {
+            ordinal.write_thrift_field(writer, 7, last_field_id)?;
+        }
+        writer.write_struct_end()
+    }
+}
+
+// struct ColumnChunk {
+//   1: optional string file_path
+//   2: required i64 file_offset = 0
+//   3: optional ColumnMetaData meta_data
+//   4: optional i64 offset_index_offset
+//   5: optional i32 offset_index_length
+//   6: optional i64 column_index_offset
+//   7: optional i32 column_index_length
+//   8: optional ColumnCryptoMetaData crypto_metadata
+//   9: optional binary encrypted_column_metadata
+// }
+impl WriteThrift for ColumnChunkMetaData {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    #[allow(unused_assignments)]
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        let mut last_field_id = 0i16;
+        if let Some(file_path) = self.file_path() {
+            last_field_id = file_path.write_thrift_field(writer, 1, last_field_id)?;
+        }
+        last_field_id = self
+            .file_offset()
+            .write_thrift_field(writer, 2, last_field_id)?;
+
+        #[cfg(feature = "encryption")]
+        {
+            // only write the ColumnMetaData if we haven't already encrypted it
+            if self.encrypted_column_metadata.is_none() {
+                writer.write_field_begin(FieldType::Struct, 3, last_field_id)?;
+                serialize_column_meta_data(self, writer)?;
+                last_field_id = 3;
+            }
+        }
+        #[cfg(not(feature = "encryption"))]
+        {
+            // always write the ColumnMetaData
+            writer.write_field_begin(FieldType::Struct, 3, last_field_id)?;
+            serialize_column_meta_data(self, writer)?;
+            last_field_id = 3;
+        }
+
+        if let Some(offset_idx_off) = self.offset_index_offset() {
+            last_field_id = offset_idx_off.write_thrift_field(writer, 4, last_field_id)?;
+        }
+        if let Some(offset_idx_len) = self.offset_index_length() {
+            last_field_id = offset_idx_len.write_thrift_field(writer, 5, last_field_id)?;
+        }
+        if let Some(column_idx_off) = self.column_index_offset() {
+            last_field_id = column_idx_off.write_thrift_field(writer, 6, last_field_id)?;
+        }
+        if let Some(column_idx_len) = self.column_index_length() {
+            last_field_id = column_idx_len.write_thrift_field(writer, 7, last_field_id)?;
+        }
+        #[cfg(feature = "encryption")]
+        {
+            if let Some(crypto_metadata) = self.crypto_metadata() {
+                last_field_id = crypto_metadata.write_thrift_field(writer, 8, last_field_id)?;
+            }
+            if let Some(encrypted_meta) = self.encrypted_column_metadata.as_ref() {
+                encrypted_meta
+                    .as_slice()
+                    .write_thrift_field(writer, 9, last_field_id)?;
+            }
+        }
+
+        writer.write_struct_end()
+    }
+}
+
+// struct GeospatialStatistics {
+//   1: optional BoundingBox bbox;
+//   2: optional list<i32> geospatial_types;
+// }
+impl WriteThrift for crate::geospatial::statistics::GeospatialStatistics {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        let mut last_field_id = 0i16;
+        if let Some(bbox) = self.bounding_box() {
+            last_field_id = bbox.write_thrift_field(writer, 1, last_field_id)?;
+        }
+        if let Some(geo_types) = self.geospatial_types() {
+            geo_types.write_thrift_field(writer, 2, last_field_id)?;
+        }
+
+        writer.write_struct_end()
+    }
+}
+
+// macro cannot handle qualified names
+use crate::geospatial::statistics::GeospatialStatistics as RustGeospatialStatistics;
+write_thrift_field!(RustGeospatialStatistics, FieldType::Struct);
+
+// struct BoundingBox {
+//   1: required double xmin;
+//   2: required double xmax;
+//   3: required double ymin;
+//   4: required double ymax;
+//   5: optional double zmin;
+//   6: optional double zmax;
+//   7: optional double mmin;
+//   8: optional double mmax;
+// }
+impl WriteThrift for crate::geospatial::bounding_box::BoundingBox {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        self.get_xmin().write_thrift_field(writer, 1, 0)?;
+        self.get_xmax().write_thrift_field(writer, 2, 1)?;
+        self.get_ymin().write_thrift_field(writer, 3, 2)?;
+        let mut last_field_id = self.get_ymax().write_thrift_field(writer, 4, 3)?;
+
+        if let Some(zmin) = self.get_zmin() {
+            last_field_id = zmin.write_thrift_field(writer, 5, last_field_id)?;
+        }
+        if let Some(zmax) = self.get_zmax() {
+            last_field_id = zmax.write_thrift_field(writer, 6, last_field_id)?;
+        }
+        if let Some(mmin) = self.get_mmin() {
+            last_field_id = mmin.write_thrift_field(writer, 7, last_field_id)?;
+        }
+        if let Some(mmax) = self.get_mmax() {
+            mmax.write_thrift_field(writer, 8, last_field_id)?;
+        }
+
+        writer.write_struct_end()
+    }
+}
+
+// macro cannot handle qualified names
+use crate::geospatial::bounding_box::BoundingBox as RustBoundingBox;
+write_thrift_field!(RustBoundingBox, FieldType::Struct);
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use crate::errors::Result;
+    use crate::file::metadata::thrift::{BoundingBox, SchemaElement, write_schema};
+    use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaDataOptions, RowGroupMetaData};
+    use crate::parquet_thrift::tests::test_roundtrip;
+    use crate::parquet_thrift::{
+        ElementType, ThriftCompactOutputProtocol, ThriftSliceInputProtocol, read_thrift_vec,
+    };
+    use crate::schema::types::{
+        ColumnDescriptor, SchemaDescriptor, TypePtr, num_nodes, parquet_schema_from_array,
+    };
+    use std::sync::Arc;
+
+    // for testing. decode thrift encoded RowGroup
+    pub(crate) fn read_row_group(
+        buf: &mut [u8],
+        schema_descr: Arc<SchemaDescriptor>,
+    ) -> Result<RowGroupMetaData> {
+        let mut reader = ThriftSliceInputProtocol::new(buf);
+        crate::file::metadata::thrift::read_row_group(&mut reader, &schema_descr, None)
+    }
+
+    pub(crate) fn read_column_chunk(
+        buf: &mut [u8],
+        column_descr: Arc<ColumnDescriptor>,
+    ) -> Result<ColumnChunkMetaData> {
+        read_column_chunk_with_options(buf, column_descr, None)
+    }
+
+    pub(crate) fn read_column_chunk_with_options(
+        buf: &mut [u8],
+        column_descr: Arc<ColumnDescriptor>,
+        options: Option<&ParquetMetaDataOptions>,
+    ) -> Result<ColumnChunkMetaData> {
+        let mut reader = ThriftSliceInputProtocol::new(buf);
+        crate::file::metadata::thrift::read_column_chunk(&mut reader, &column_descr, 0, options)
+    }
+
+    pub(crate) fn roundtrip_schema(schema: TypePtr) -> Result<TypePtr> {
+        let num_nodes = num_nodes(&schema)?;
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+
+        // kick off writing list
+        writer.write_list_begin(ElementType::Struct, num_nodes)?;
+
+        // write SchemaElements
+        write_schema(&schema, &mut writer)?;
+
+        let mut prot = ThriftSliceInputProtocol::new(&buf);
+        let se: Vec<SchemaElement> = read_thrift_vec(&mut prot)?;
+        parquet_schema_from_array(se)
+    }
+
+    pub(crate) fn schema_to_buf(schema: &TypePtr) -> Result<Vec<u8>> {
+        let num_nodes = num_nodes(schema)?;
+        let mut buf = Vec::new();
+        let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+
+        // kick off writing list
+        writer.write_list_begin(ElementType::Struct, num_nodes)?;
+
+        // write SchemaElements
+        write_schema(schema, &mut writer)?;
+        Ok(buf)
+    }
+
+    pub(crate) fn buf_to_schema_list<'a>(buf: &'a mut Vec<u8>) -> Result<Vec<SchemaElement<'a>>> {
+        let mut prot = ThriftSliceInputProtocol::new(buf.as_mut_slice());
+        read_thrift_vec(&mut prot)
+    }
+
+    #[test]
+    fn test_bounding_box_roundtrip() {
+        test_roundtrip(BoundingBox {
+            xmin: 0.1.into(),
+            xmax: 10.3.into(),
+            ymin: 0.001.into(),
+            ymax: 128.5.into(),
+            zmin: None,
+            zmax: None,
+            mmin: None,
+            mmax: None,
+        });
+
+        test_roundtrip(BoundingBox {
+            xmin: 0.1.into(),
+            xmax: 10.3.into(),
+            ymin: 0.001.into(),
+            ymax: 128.5.into(),
+            zmin: Some(11.0.into()),
+            zmax: Some(1300.0.into()),
+            mmin: None,
+            mmax: None,
+        });
+
+        test_roundtrip(BoundingBox {
+            xmin: 0.1.into(),
+            xmax: 10.3.into(),
+            ymin: 0.001.into(),
+            ymax: 128.5.into(),
+            zmin: Some(11.0.into()),
+            zmax: Some(1300.0.into()),
+            mmin: Some(3.7.into()),
+            mmax: Some(42.0.into()),
+        });
+    }
+}
diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs
index 0320d1e474fd..38215f5ecdcb 100644
--- a/parquet/src/file/metadata/writer.rs
+++ b/parquet/src/file/metadata/writer.rs
@@ -15,40 +15,49 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::file::metadata::thrift::FileMeta;
+use crate::file::metadata::{
+    ColumnChunkMetaData, ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData,
+};
+use crate::schema::types::{SchemaDescPtr, SchemaDescriptor};
+use crate::{
+    basic::ColumnOrder,
+    file::metadata::{FileMetaData, ParquetMetaDataBuilder},
+};
 #[cfg(feature = "encryption")]
-use crate::encryption::{
-    encrypt::{
-        encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor,
+use crate::{
+    encryption::{
+        encrypt::{FileEncryptor, encrypt_thrift_object, write_signed_plaintext_thrift_object},
+        modules::{ModuleType, create_footer_aad, create_module_aad},
     },
-    modules::{create_footer_aad, create_module_aad, ModuleType},
+    file::column_crypto_metadata::ColumnCryptoMetaData,
+    file::metadata::thrift::encryption::{AesGcmV1, EncryptionAlgorithm, FileCryptoMetaData},
+};
+use crate::{errors::Result, file::page_index::column_index::ColumnIndexMetaData};
+
+use crate::{
+    file::writer::{TrackedWrite, get_file_magic},
+    parquet_thrift::WriteThrift,
+};
+use crate::{
+    file::{
+        metadata::{KeyValue, ParquetMetaData},
+        page_index::offset_index::OffsetIndexMetaData,
+    },
+    parquet_thrift::ThriftCompactOutputProtocol,
 };
-#[cfg(feature = "encryption")]
-use crate::errors::ParquetError;
-use crate::errors::Result;
-use crate::file::metadata::{KeyValue, ParquetMetaData};
-use crate::file::page_index::index::Index;
-use crate::file::writer::{get_file_magic, TrackedWrite};
-use crate::format::EncryptionAlgorithm;
-#[cfg(feature = "encryption")]
-use crate::format::{AesGcmV1, ColumnCryptoMetaData};
-use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, RowGroup};
-use crate::schema::types;
-use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr};
-use crate::thrift::TSerializable;
 use std::io::Write;
 use std::sync::Arc;
-use thrift::protocol::TCompactOutputProtocol;
 
 /// Writes `crate::file::metadata` structures to a thrift encoded byte stream
 ///
 /// See [`ParquetMetaDataWriter`] for background and example.
 pub(crate) struct ThriftMetadataWriter<'a, W: Write> {
     buf: &'a mut TrackedWrite<W>,
-    schema: &'a TypePtr,
     schema_descr: &'a SchemaDescPtr,
-    row_groups: Vec<RowGroup>,
-    column_indexes: Option<&'a [Vec<Option<ColumnIndex>>]>,
-    offset_indexes: Option<&'a [Vec<Option<OffsetIndex>>]>,
+    row_groups: Vec<RowGroupMetaData>,
+    column_indexes: Option<Vec<Vec<Option<ColumnIndexMetaData>>>>,
+    offset_indexes: Option<Vec<Vec<Option<OffsetIndexMetaData>>>>,
     key_value_metadata: Option<Vec<KeyValue>>,
     created_by: Option<String>,
     object_writer: MetadataObjectWriter,
@@ -61,7 +70,10 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
     /// Note: also updates the `ColumnChunk::offset_index_offset` and
     /// `ColumnChunk::offset_index_length` to reflect the position and length
     /// of the serialized offset indexes.
-    fn write_offset_indexes(&mut self, offset_indexes: &[Vec<Option<OffsetIndex>>]) -> Result<()> {
+    fn write_offset_indexes(
+        &mut self,
+        offset_indexes: &[Vec<Option<OffsetIndexMetaData>>],
+    ) -> Result<()> {
         // iter row group
         // iter each column
         // write offset index to the file
@@ -91,7 +103,10 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
     /// Note: also updates the `ColumnChunk::column_index_offset` and
     /// `ColumnChunk::column_index_length` to reflect the position and length
     /// of the serialized column indexes.
-    fn write_column_indexes(&mut self, column_indexes: &[Vec<Option<ColumnIndex>>]) -> Result<()> {
+    fn write_column_indexes(
+        &mut self,
+        column_indexes: &[Vec<Option<ColumnIndexMetaData>>],
+    ) -> Result<()> {
         // iter row group
         // iter each column
         // write column index to the file
@@ -99,69 +114,157 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
             for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate() {
                 if let Some(column_index) = &column_indexes[row_group_idx][column_idx] {
                     let start_offset = self.buf.bytes_written();
-                    self.object_writer.write_column_index(
+                    // only update column_metadata if the write succeeds
+                    if self.object_writer.write_column_index(
                         column_index,
                         column_metadata,
                         row_group_idx,
                         column_idx,
                         &mut self.buf,
-                    )?;
-                    let end_offset = self.buf.bytes_written();
-                    // set offset and index for offset index
-                    column_metadata.column_index_offset = Some(start_offset as i64);
-                    column_metadata.column_index_length = Some((end_offset - start_offset) as i32);
+                    )? {
+                        let end_offset = self.buf.bytes_written();
+                        // set offset and index for offset index
+                        column_metadata.column_index_offset = Some(start_offset as i64);
+                        column_metadata.column_index_length =
+                            Some((end_offset - start_offset) as i32);
+                    }
                 }
             }
         }
         Ok(())
     }
 
-    /// Assembles and writes the final metadata to self.buf
-    pub fn finish(mut self) -> Result<crate::format::FileMetaData> {
-        let num_rows = self.row_groups.iter().map(|x| x.num_rows).sum();
+    /// Serialize the column indexes and transform to `Option<ParquetColumnIndex>`
+    fn finalize_column_indexes(&mut self) -> Result<Option<ParquetColumnIndex>> {
+        let column_indexes = std::mem::take(&mut self.column_indexes);
 
-        // Write column indexes and offset indexes
-        if let Some(column_indexes) = self.column_indexes {
+        // Write column indexes to file
+        if let Some(column_indexes) = column_indexes.as_ref() {
             self.write_column_indexes(column_indexes)?;
         }
-        if let Some(offset_indexes) = self.offset_indexes {
+
+        // check to see if the index is `None` for every row group and column chunk
+        let all_none = column_indexes
+            .as_ref()
+            .is_some_and(|ci| ci.iter().all(|cii| cii.iter().all(|idx| idx.is_none())));
+
+        // transform from Option<Vec<Vec<Option<ColumnIndexMetaData>>>> to
+        // Option<Vec<Vec<ColumnIndexMetaData>>>
+        let column_indexes: Option<ParquetColumnIndex> = if all_none {
+            None
+        } else {
+            column_indexes.map(|ovvi| {
+                ovvi.into_iter()
+                    .map(|vi| {
+                        vi.into_iter()
+                            .map(|ci| ci.unwrap_or(ColumnIndexMetaData::NONE))
+                            .collect()
+                    })
+                    .collect()
+            })
+        };
+
+        Ok(column_indexes)
+    }
+
+    /// Serialize the offset indexes and transform to `Option<ParquetOffsetIndex>`
+    fn finalize_offset_indexes(&mut self) -> Result<Option<ParquetOffsetIndex>> {
+        let offset_indexes = std::mem::take(&mut self.offset_indexes);
+
+        // Write offset indexes to file
+        if let Some(offset_indexes) = offset_indexes.as_ref() {
             self.write_offset_indexes(offset_indexes)?;
         }
 
+        // check to see if the index is `None` for every row group and column chunk
+        let all_none = offset_indexes
+            .as_ref()
+            .is_some_and(|oi| oi.iter().all(|oii| oii.iter().all(|idx| idx.is_none())));
+
+        let offset_indexes: Option<ParquetOffsetIndex> = if all_none {
+            None
+        } else {
+            // FIXME(ets): this will panic if there's a missing index.
+            offset_indexes.map(|ovvi| {
+                ovvi.into_iter()
+                    .map(|vi| vi.into_iter().map(|oi| oi.unwrap()).collect())
+                    .collect()
+            })
+        };
+
+        Ok(offset_indexes)
+    }
+
+    /// Assembles and writes the final metadata to self.buf
+    pub fn finish(mut self) -> Result<ParquetMetaData> {
+        let num_rows = self.row_groups.iter().map(|x| x.num_rows).sum();
+
+        // serialize page indexes and transform to the proper form for use in ParquetMetaData
+        let column_indexes = self.finalize_column_indexes()?;
+        let offset_indexes = self.finalize_offset_indexes()?;
+
         // We only include ColumnOrder for leaf nodes.
         // Currently only supported ColumnOrder is TypeDefinedOrder so we set this
         // for all leaf nodes.
         // Even if the column has an undefined sort order, such as INTERVAL, this
         // is still technically the defined TYPEORDER so it should still be set.
-        let column_orders = (0..self.schema_descr.num_columns())
-            .map(|_| crate::format::ColumnOrder::TYPEORDER(crate::format::TypeDefinedOrder {}))
+        let column_orders = self
+            .schema_descr
+            .columns()
+            .iter()
+            .map(|col| {
+                let sort_order = ColumnOrder::sort_order_for_type(
+                    col.logical_type_ref(),
+                    col.converted_type(),
+                    col.physical_type(),
+                );
+                ColumnOrder::TYPE_DEFINED_ORDER(sort_order)
+            })
             .collect();
+
         // This field is optional, perhaps in cases where no min/max fields are set
         // in any Statistics or ColumnIndex object in the whole file.
         // But for simplicity we always set this field.
         let column_orders = Some(column_orders);
+
         let (row_groups, unencrypted_row_groups) = self
             .object_writer
             .apply_row_group_encryption(self.row_groups)?;
 
+        #[cfg(feature = "encryption")]
         let (encryption_algorithm, footer_signing_key_metadata) =
             self.object_writer.get_plaintext_footer_crypto_metadata();
-        let mut file_metadata = FileMetaData {
+        #[cfg(feature = "encryption")]
+        let file_metadata = FileMetaData::new(
+            self.writer_version,
             num_rows,
-            row_groups,
-            key_value_metadata: self.key_value_metadata.clone(),
-            version: self.writer_version,
-            schema: types::to_thrift(self.schema.as_ref())?,
-            created_by: self.created_by.clone(),
+            self.created_by,
+            self.key_value_metadata,
+            self.schema_descr.clone(),
             column_orders,
-            encryption_algorithm,
-            footer_signing_key_metadata,
+        )
+        .with_encryption_algorithm(encryption_algorithm)
+        .with_footer_signing_key_metadata(footer_signing_key_metadata);
+
+        #[cfg(not(feature = "encryption"))]
+        let file_metadata = FileMetaData::new(
+            self.writer_version,
+            num_rows,
+            self.created_by,
+            self.key_value_metadata,
+            self.schema_descr.clone(),
+            column_orders,
+        );
+
+        let file_meta = FileMeta {
+            file_metadata: &file_metadata,
+            row_groups: &row_groups,
         };
 
         // Write file metadata
         let start_pos = self.buf.bytes_written();
         self.object_writer
-            .write_file_metadata(&file_metadata, &mut self.buf)?;
+            .write_file_metadata(&file_meta, &mut self.buf)?;
         let end_pos = self.buf.bytes_written();
 
         // Write footer
@@ -170,28 +273,29 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
         self.buf.write_all(&metadata_len.to_le_bytes())?;
         self.buf.write_all(self.object_writer.get_file_magic())?;
 
-        if let Some(row_groups) = unencrypted_row_groups {
-            // If row group metadata was encrypted, we replace the encrypted row groups with
-            // unencrypted metadata before it is returned to users. This allows the metadata
-            // to be usable for retrieving the row group statistics for example, without users
-            // needing to decrypt the metadata.
-            file_metadata.row_groups = row_groups;
-        }
-
-        Ok(file_metadata)
+        // If row group metadata was encrypted, we replace the encrypted row groups with
+        // unencrypted metadata before it is returned to users. This allows the metadata
+        // to be usable for retrieving the row group statistics for example, without users
+        // needing to decrypt the metadata.
+        let builder = ParquetMetaDataBuilder::new(file_metadata)
+            .set_column_index(column_indexes)
+            .set_offset_index(offset_indexes);
+
+        Ok(match unencrypted_row_groups {
+            Some(rg) => builder.set_row_groups(rg).build(),
+            None => builder.set_row_groups(row_groups).build(),
+        })
     }
 
     pub fn new(
         buf: &'a mut TrackedWrite<W>,
-        schema: &'a TypePtr,
         schema_descr: &'a SchemaDescPtr,
-        row_groups: Vec<RowGroup>,
+        row_groups: Vec<RowGroupMetaData>,
         created_by: Option<String>,
         writer_version: i32,
     ) -> Self {
         Self {
             buf,
-            schema,
             schema_descr,
             row_groups,
             column_indexes: None,
@@ -203,12 +307,18 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
         }
     }
 
-    pub fn with_column_indexes(mut self, column_indexes: &'a [Vec<Option<ColumnIndex>>]) -> Self {
+    pub fn with_column_indexes(
+        mut self,
+        column_indexes: Vec<Vec<Option<ColumnIndexMetaData>>>,
+    ) -> Self {
         self.column_indexes = Some(column_indexes);
         self
     }
 
-    pub fn with_offset_indexes(mut self, offset_indexes: &'a [Vec<Option<OffsetIndex>>]) -> Self {
+    pub fn with_offset_indexes(
+        mut self,
+        offset_indexes: Vec<Vec<Option<OffsetIndexMetaData>>>,
+    ) -> Self {
         self.offset_indexes = Some(offset_indexes);
         self
     }
@@ -255,8 +365,10 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
 /// 4. Length of encoded `FileMetaData` (4 bytes, little endian)
 /// 5. Parquet Magic Bytes (4 bytes)
 ///
-/// [`FileMetaData`]: crate::format::FileMetaData
+/// [`FileMetaData`]: https://github.com/apache/parquet-format/tree/master?tab=readme-ov-file#metadata
 /// [`ColumnChunkMetaData`]: crate::file::metadata::ColumnChunkMetaData
+/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 ///
 /// ```text
 /// ┌──────────────────────┐
@@ -335,12 +447,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> {
         let schema_descr = Arc::new(SchemaDescriptor::new(schema.clone()));
         let created_by = file_metadata.created_by().map(str::to_string);
 
-        let row_groups = self
-            .metadata
-            .row_groups()
-            .iter()
-            .map(|rg| rg.to_thrift())
-            .collect::<Vec<_>>();
+        let row_groups = self.metadata.row_groups.clone();
 
         let key_value_metadata = file_metadata.key_value_metadata().cloned();
 
@@ -349,14 +456,20 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> {
 
         let mut encoder = ThriftMetadataWriter::new(
             &mut self.buf,
-            &schema,
             &schema_descr,
             row_groups,
             created_by,
             file_metadata.version(),
         );
-        encoder = encoder.with_column_indexes(&column_indexes);
-        encoder = encoder.with_offset_indexes(&offset_indexes);
+
+        if let Some(column_indexes) = column_indexes {
+            encoder = encoder.with_column_indexes(column_indexes);
+        }
+
+        if let Some(offset_indexes) = offset_indexes {
+            encoder = encoder.with_offset_indexes(offset_indexes);
+        }
+
         if let Some(key_value_metadata) = key_value_metadata {
             encoder = encoder.with_key_value_metadata(key_value_metadata);
         }
@@ -365,58 +478,38 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> {
         Ok(())
     }
 
-    fn convert_column_indexes(&self) -> Vec<Vec<Option<ColumnIndex>>> {
-        if let Some(row_group_column_indexes) = self.metadata.column_index() {
-            (0..self.metadata.row_groups().len())
-                .map(|rg_idx| {
-                    let column_indexes = &row_group_column_indexes[rg_idx];
-                    column_indexes
-                        .iter()
-                        .map(|column_index| match column_index {
-                            Index::NONE => None,
-                            Index::BOOLEAN(column_index) => Some(column_index.to_thrift()),
-                            Index::BYTE_ARRAY(column_index) => Some(column_index.to_thrift()),
-                            Index::DOUBLE(column_index) => Some(column_index.to_thrift()),
-                            Index::FIXED_LEN_BYTE_ARRAY(column_index) => {
-                                Some(column_index.to_thrift())
-                            }
-                            Index::FLOAT(column_index) => Some(column_index.to_thrift()),
-                            Index::INT32(column_index) => Some(column_index.to_thrift()),
-                            Index::INT64(column_index) => Some(column_index.to_thrift()),
-                            Index::INT96(column_index) => Some(column_index.to_thrift()),
-                        })
-                        .collect()
-                })
-                .collect()
-        } else {
-            // make a None for each row group, for each column
-            self.metadata
-                .row_groups()
-                .iter()
-                .map(|rg| std::iter::repeat(None).take(rg.columns().len()).collect())
-                .collect()
-        }
+    fn convert_column_indexes(&self) -> Option<Vec<Vec<Option<ColumnIndexMetaData>>>> {
+        // TODO(ets): we're converting from ParquetColumnIndex to vec<vec<option>>,
+        // but then converting back to ParquetColumnIndex in the end. need to unify this.
+        self.metadata
+            .column_index()
+            .map(|row_group_column_indexes| {
+                (0..self.metadata.row_groups().len())
+                    .map(|rg_idx| {
+                        let column_indexes = &row_group_column_indexes[rg_idx];
+                        column_indexes
+                            .iter()
+                            .map(|column_index| Some(column_index.clone()))
+                            .collect()
+                    })
+                    .collect()
+            })
     }
 
-    fn convert_offset_index(&self) -> Vec<Vec<Option<OffsetIndex>>> {
-        if let Some(row_group_offset_indexes) = self.metadata.offset_index() {
-            (0..self.metadata.row_groups().len())
-                .map(|rg_idx| {
-                    let offset_indexes = &row_group_offset_indexes[rg_idx];
-                    offset_indexes
-                        .iter()
-                        .map(|offset_index| Some(offset_index.to_thrift()))
-                        .collect()
-                })
-                .collect()
-        } else {
-            // make a None for each row group, for each column
-            self.metadata
-                .row_groups()
-                .iter()
-                .map(|rg| std::iter::repeat(None).take(rg.columns().len()).collect())
-                .collect()
-        }
+    fn convert_offset_index(&self) -> Option<Vec<Vec<Option<OffsetIndexMetaData>>>> {
+        self.metadata
+            .offset_index()
+            .map(|row_group_offset_indexes| {
+                (0..self.metadata.row_groups().len())
+                    .map(|rg_idx| {
+                        let offset_indexes = &row_group_offset_indexes[rg_idx];
+                        offset_indexes
+                            .iter()
+                            .map(|offset_index| Some(offset_index.clone()))
+                            .collect()
+                    })
+                    .collect()
+            })
     }
 }
 
@@ -428,9 +521,9 @@ struct MetadataObjectWriter {
 
 impl MetadataObjectWriter {
     #[inline]
-    fn write_object(object: &impl TSerializable, sink: impl Write) -> Result<()> {
-        let mut protocol = TCompactOutputProtocol::new(sink);
-        object.write_to_out_protocol(&mut protocol)?;
+    fn write_thrift_object(object: &impl WriteThrift, sink: impl Write) -> Result<()> {
+        let mut protocol = ThriftCompactOutputProtocol::new(sink);
+        object.write_thrift(&mut protocol)?;
         Ok(())
     }
 }
@@ -439,39 +532,55 @@ impl MetadataObjectWriter {
 #[cfg(not(feature = "encryption"))]
 impl MetadataObjectWriter {
     /// Write [`FileMetaData`] in Thrift format
-    fn write_file_metadata(&self, file_metadata: &FileMetaData, sink: impl Write) -> Result<()> {
-        Self::write_object(file_metadata, sink)
+    ///
+    /// [`FileMetaData`]: https://github.com/apache/parquet-format/tree/master?tab=readme-ov-file#metadata
+    fn write_file_metadata(&self, file_metadata: &FileMeta, sink: impl Write) -> Result<()> {
+        Self::write_thrift_object(file_metadata, sink)
     }
 
     /// Write a column [`OffsetIndex`] in Thrift format
+    ///
+    /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
     fn write_offset_index(
         &self,
-        offset_index: &OffsetIndex,
-        _column_chunk: &ColumnChunk,
+        offset_index: &OffsetIndexMetaData,
+        _column_chunk: &ColumnChunkMetaData,
         _row_group_idx: usize,
         _column_idx: usize,
         sink: impl Write,
     ) -> Result<()> {
-        Self::write_object(offset_index, sink)
+        Self::write_thrift_object(offset_index, sink)
     }
 
     /// Write a column [`ColumnIndex`] in Thrift format
+    ///
+    /// If `column_index` is [`ColumnIndexMetaData::NONE`] the index will not be written and
+    /// this will return `false`. Returns `true` otherwise.
+    ///
+    /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
     fn write_column_index(
         &self,
-        column_index: &ColumnIndex,
-        _column_chunk: &ColumnChunk,
+        column_index: &ColumnIndexMetaData,
+        _column_chunk: &ColumnChunkMetaData,
         _row_group_idx: usize,
         _column_idx: usize,
         sink: impl Write,
-    ) -> Result<()> {
-        Self::write_object(column_index, sink)
+    ) -> Result<bool> {
+        match column_index {
+            // Missing indexes may also have the placeholder ColumnIndexMetaData::NONE
+            ColumnIndexMetaData::NONE => Ok(false),
+            _ => {
+                Self::write_thrift_object(column_index, sink)?;
+                Ok(true)
+            }
+        }
     }
 
     /// No-op implementation of row-group metadata encryption
     fn apply_row_group_encryption(
         &self,
-        row_groups: Vec<RowGroup>,
-    ) -> Result<(Vec<RowGroup>, Option<Vec<RowGroup>>)> {
+        row_groups: Vec<RowGroupMetaData>,
+    ) -> Result<(Vec<RowGroupMetaData>, Option<Vec<RowGroupMetaData>>)> {
         Ok((row_groups, None))
     }
 
@@ -479,12 +588,6 @@ impl MetadataObjectWriter {
     pub fn get_file_magic(&self) -> &[u8; 4] {
         get_file_magic()
     }
-
-    fn get_plaintext_footer_crypto_metadata(
-        &self,
-    ) -> (Option<EncryptionAlgorithm>, Option<Vec<u8>>) {
-        (None, None)
-    }
 }
 
 /// Implementations of [`MetadataObjectWriter`] methods that rely on encryption being enabled
@@ -497,43 +600,43 @@ impl MetadataObjectWriter {
     }
 
     /// Write [`FileMetaData`] in Thrift format, possibly encrypting it if required
-    fn write_file_metadata(
-        &self,
-        file_metadata: &FileMetaData,
-        mut sink: impl Write,
-    ) -> Result<()> {
+    ///
+    /// [`FileMetaData`]: https://github.com/apache/parquet-format/tree/master?tab=readme-ov-file#metadata
+    fn write_file_metadata(&self, file_metadata: &FileMeta, mut sink: impl Write) -> Result<()> {
         match self.file_encryptor.as_ref() {
             Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => {
                 // First write FileCryptoMetadata
                 let crypto_metadata = Self::file_crypto_metadata(file_encryptor)?;
-                let mut protocol = TCompactOutputProtocol::new(&mut sink);
-                crypto_metadata.write_to_out_protocol(&mut protocol)?;
+                let mut protocol = ThriftCompactOutputProtocol::new(&mut sink);
+                crypto_metadata.write_thrift(&mut protocol)?;
 
                 // Then write encrypted footer
                 let aad = create_footer_aad(file_encryptor.file_aad())?;
                 let mut encryptor = file_encryptor.get_footer_encryptor()?;
-                encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad)
+                encrypt_thrift_object(file_metadata, &mut encryptor, &mut sink, &aad)
             }
-            Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => {
+            Some(file_encryptor) if file_metadata.file_metadata.encryption_algorithm.is_some() => {
                 let aad = create_footer_aad(file_encryptor.file_aad())?;
                 let mut encryptor = file_encryptor.get_footer_encryptor()?;
-                write_signed_plaintext_object(file_metadata, &mut encryptor, &mut sink, &aad)
+                write_signed_plaintext_thrift_object(file_metadata, &mut encryptor, &mut sink, &aad)
             }
-            _ => Self::write_object(file_metadata, &mut sink),
+            _ => Self::write_thrift_object(file_metadata, &mut sink),
         }
     }
 
     /// Write a column [`OffsetIndex`] in Thrift format, possibly encrypting it if required
+    ///
+    /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
     fn write_offset_index(
         &self,
-        offset_index: &OffsetIndex,
-        column_chunk: &ColumnChunk,
+        offset_index: &OffsetIndexMetaData,
+        column_chunk: &ColumnChunkMetaData,
         row_group_idx: usize,
         column_idx: usize,
         sink: impl Write,
     ) -> Result<()> {
         match &self.file_encryptor {
-            Some(file_encryptor) => Self::write_object_with_encryption(
+            Some(file_encryptor) => Self::write_thrift_object_with_encryption(
                 offset_index,
                 sink,
                 file_encryptor,
@@ -542,30 +645,42 @@ impl MetadataObjectWriter {
                 row_group_idx,
                 column_idx,
             ),
-            None => Self::write_object(offset_index, sink),
+            None => Self::write_thrift_object(offset_index, sink),
         }
     }
 
     /// Write a column [`ColumnIndex`] in Thrift format, possibly encrypting it if required
+    ///
+    /// If `column_index` is [`ColumnIndexMetaData::NONE`] the index will not be written and
+    /// this will return `false`. Returns `true` otherwise.
+    ///
+    /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
     fn write_column_index(
         &self,
-        column_index: &ColumnIndex,
-        column_chunk: &ColumnChunk,
+        column_index: &ColumnIndexMetaData,
+        column_chunk: &ColumnChunkMetaData,
         row_group_idx: usize,
         column_idx: usize,
         sink: impl Write,
-    ) -> Result<()> {
-        match &self.file_encryptor {
-            Some(file_encryptor) => Self::write_object_with_encryption(
-                column_index,
-                sink,
-                file_encryptor,
-                column_chunk,
-                ModuleType::ColumnIndex,
-                row_group_idx,
-                column_idx,
-            ),
-            None => Self::write_object(column_index, sink),
+    ) -> Result<bool> {
+        match column_index {
+            // Missing indexes may also have the placeholder ColumnIndexMetaData::NONE
+            ColumnIndexMetaData::NONE => Ok(false),
+            _ => {
+                match &self.file_encryptor {
+                    Some(file_encryptor) => Self::write_thrift_object_with_encryption(
+                        column_index,
+                        sink,
+                        file_encryptor,
+                        column_chunk,
+                        ModuleType::ColumnIndex,
+                        row_group_idx,
+                        column_idx,
+                    )?,
+                    None => Self::write_thrift_object(column_index, sink)?,
+                }
+                Ok(true)
+            }
         }
     }
 
@@ -574,8 +689,8 @@ impl MetadataObjectWriter {
     /// and possibly unencrypted metadata to be returned to clients if data was encrypted.
     fn apply_row_group_encryption(
         &self,
-        row_groups: Vec<RowGroup>,
-    ) -> Result<(Vec<RowGroup>, Option<Vec<RowGroup>>)> {
+        row_groups: Vec<RowGroupMetaData>,
+    ) -> Result<(Vec<RowGroupMetaData>, Option<Vec<RowGroupMetaData>>)> {
         match &self.file_encryptor {
             Some(file_encryptor) => {
                 let unencrypted_row_groups = row_groups.clone();
@@ -595,25 +710,16 @@ impl MetadataObjectWriter {
         )
     }
 
-    fn write_object_with_encryption(
-        object: &impl TSerializable,
+    fn write_thrift_object_with_encryption(
+        object: &impl WriteThrift,
         mut sink: impl Write,
         file_encryptor: &FileEncryptor,
-        column_metadata: &ColumnChunk,
+        column_metadata: &ColumnChunkMetaData,
         module_type: ModuleType,
         row_group_index: usize,
         column_index: usize,
     ) -> Result<()> {
-        let column_path_vec = &column_metadata
-            .meta_data
-            .as_ref()
-            .ok_or_else(|| {
-                general_err!(
-                    "Column metadata not set for column {} when encrypting object",
-                    column_index
-                )
-            })?
-            .path_in_schema;
+        let column_path_vec = column_metadata.column_path().as_ref();
 
         let joined_column_path;
         let column_path = if column_path_vec.len() == 1 {
@@ -624,6 +730,8 @@ impl MetadataObjectWriter {
         };
 
         if file_encryptor.is_column_encrypted(column_path) {
+            use crate::encryption::encrypt::encrypt_thrift_object;
+
             let aad = create_module_aad(
                 file_encryptor.file_aad(),
                 module_type,
@@ -632,9 +740,9 @@ impl MetadataObjectWriter {
                 None,
             )?;
             let mut encryptor = file_encryptor.get_column_encryptor(column_path)?;
-            encrypt_object(object, &mut encryptor, &mut sink, &aad)
+            encrypt_thrift_object(object, &mut encryptor, &mut sink, &aad)
         } else {
-            Self::write_object(object, sink)
+            Self::write_thrift_object(object, sink)
         }
     }
 
@@ -660,36 +768,34 @@ impl MetadataObjectWriter {
             .aad_prefix()
             .map(|_| !file_encryptor.properties().store_aad_prefix());
         let aad_prefix = if file_encryptor.properties().store_aad_prefix() {
-            file_encryptor.properties().aad_prefix().cloned()
+            file_encryptor.properties().aad_prefix()
         } else {
             None
         };
-        EncryptionAlgorithm::AESGCMV1(AesGcmV1 {
-            aad_prefix,
+        EncryptionAlgorithm::AES_GCM_V1(AesGcmV1 {
+            aad_prefix: aad_prefix.cloned(),
             aad_file_unique: Some(file_encryptor.aad_file_unique().clone()),
             supply_aad_prefix,
         })
     }
 
-    fn file_crypto_metadata(
-        file_encryptor: &FileEncryptor,
-    ) -> Result<crate::format::FileCryptoMetaData> {
+    fn file_crypto_metadata(file_encryptor: &'_ FileEncryptor) -> Result<FileCryptoMetaData<'_>> {
         let properties = file_encryptor.properties();
-        Ok(crate::format::FileCryptoMetaData {
+        Ok(FileCryptoMetaData {
             encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor),
-            key_metadata: properties.footer_key_metadata().cloned(),
+            key_metadata: properties.footer_key_metadata().map(|v| v.as_slice()),
         })
     }
 
     fn encrypt_row_groups(
-        row_groups: Vec<RowGroup>,
+        row_groups: Vec<RowGroupMetaData>,
         file_encryptor: &Arc<FileEncryptor>,
-    ) -> Result<Vec<RowGroup>> {
+    ) -> Result<Vec<RowGroupMetaData>> {
         row_groups
             .into_iter()
             .enumerate()
             .map(|(rg_idx, mut rg)| {
-                let cols: Result<Vec<ColumnChunk>> = rg
+                let cols: Result<Vec<ColumnChunkMetaData>> = rg
                     .columns
                     .into_iter()
                     .enumerate()
@@ -705,26 +811,24 @@ impl MetadataObjectWriter {
 
     /// Apply column encryption to column chunk metadata
     fn encrypt_column_chunk(
-        mut column_chunk: ColumnChunk,
+        mut column_chunk: ColumnChunkMetaData,
         file_encryptor: &Arc<FileEncryptor>,
         row_group_index: usize,
         column_index: usize,
-    ) -> Result<ColumnChunk> {
+    ) -> Result<ColumnChunkMetaData> {
         // Column crypto metadata should have already been set when the column was created.
         // Here we apply the encryption by encrypting the column metadata if required.
-        match column_chunk.crypto_metadata.as_ref() {
+        match column_chunk.column_crypto_metadata.as_deref() {
             None => {}
-            Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => {
+            Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY) => {
                 // When uniform encryption is used the footer is already encrypted,
                 // so the column chunk does not need additional encryption.
             }
-            Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(col_key)) => {
+            Some(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(col_key)) => {
+                use crate::file::metadata::thrift::serialize_column_meta_data;
+
                 let column_path = col_key.path_in_schema.join(".");
                 let mut column_encryptor = file_encryptor.get_column_encryptor(&column_path)?;
-                let meta_data = column_chunk
-                    .meta_data
-                    .take()
-                    .ok_or_else(|| general_err!("Column metadata not set for encryption"))?;
                 let aad = create_module_aad(
                     file_encryptor.file_aad(),
                     ModuleType::ColumnMetaData,
@@ -732,10 +836,15 @@ impl MetadataObjectWriter {
                     column_index,
                     None,
                 )?;
-                let ciphertext = encrypt_object_to_vec(&meta_data, &mut column_encryptor, &aad)?;
+                // create temp ColumnMetaData that we can encrypt
+                let mut buffer: Vec<u8> = vec![];
+                {
+                    let mut prot = ThriftCompactOutputProtocol::new(&mut buffer);
+                    serialize_column_meta_data(&column_chunk, &mut prot)?;
+                }
+                let ciphertext = column_encryptor.encrypt(&buffer, &aad)?;
 
                 column_chunk.encrypted_column_metadata = Some(ciphertext);
-                debug_assert!(column_chunk.meta_data.is_none());
             }
         }
 
diff --git a/parquet/src/file/mod.rs b/parquet/src/file/mod.rs
index 94eeb2b22edb..09036cd7d7b9 100644
--- a/parquet/src/file/mod.rs
+++ b/parquet/src/file/mod.rs
@@ -99,9 +99,7 @@
 //! ```
 #[cfg(feature = "encryption")]
 pub mod column_crypto_metadata;
-pub mod footer;
 pub mod metadata;
-pub mod page_encoding_stats;
 pub mod page_index;
 pub mod properties;
 pub mod reader;
diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs
deleted file mode 100644
index edb6a8fa9d4c..000000000000
--- a/parquet/src/file/page_encoding_stats.rs
+++ /dev/null
@@ -1,77 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Per-page encoding information.
-
-use crate::basic::{Encoding, PageType};
-use crate::errors::Result;
-use crate::format::{
-    Encoding as TEncoding, PageEncodingStats as TPageEncodingStats, PageType as TPageType,
-};
-
-/// PageEncodingStats for a column chunk and data page.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct PageEncodingStats {
-    /// the page type (data/dic/...)
-    pub page_type: PageType,
-    /// encoding of the page
-    pub encoding: Encoding,
-    /// number of pages of this type with this encoding
-    pub count: i32,
-}
-
-/// Converts Thrift definition into `PageEncodingStats`.
-pub fn try_from_thrift(thrift_encoding_stats: &TPageEncodingStats) -> Result<PageEncodingStats> {
-    let page_type = PageType::try_from(thrift_encoding_stats.page_type)?;
-    let encoding = Encoding::try_from(thrift_encoding_stats.encoding)?;
-    let count = thrift_encoding_stats.count;
-
-    Ok(PageEncodingStats {
-        page_type,
-        encoding,
-        count,
-    })
-}
-
-/// Converts `PageEncodingStats` into Thrift definition.
-pub fn to_thrift(encoding_stats: &PageEncodingStats) -> TPageEncodingStats {
-    let page_type = TPageType::from(encoding_stats.page_type);
-    let encoding = TEncoding::from(encoding_stats.encoding);
-    let count = encoding_stats.count;
-
-    TPageEncodingStats {
-        page_type,
-        encoding,
-        count,
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_page_encoding_stats_from_thrift() {
-        let stats = PageEncodingStats {
-            page_type: PageType::DATA_PAGE,
-            encoding: Encoding::PLAIN,
-            count: 1,
-        };
-
-        assert_eq!(try_from_thrift(&to_thrift(&stats)).unwrap(), stats);
-    }
-}
diff --git a/parquet/src/file/page_index/column_index.rs b/parquet/src/file/page_index/column_index.rs
new file mode 100644
index 000000000000..a41fefef2600
--- /dev/null
+++ b/parquet/src/file/page_index/column_index.rs
@@ -0,0 +1,750 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ColumnIndexMetaData`] structures holding decoded [`ColumnIndex`] information
+//!
+//! [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+//!
+
+use crate::{
+    data_type::{ByteArray, FixedLenByteArray},
+    errors::{ParquetError, Result},
+    parquet_thrift::{
+        ElementType, FieldType, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField,
+    },
+};
+use std::ops::Deref;
+
+use crate::{
+    basic::BoundaryOrder,
+    data_type::{Int96, private::ParquetValueType},
+    file::page_index::index_reader::ThriftColumnIndex,
+};
+
+/// Common bits of the column index
+#[derive(Debug, Clone, PartialEq)]
+pub struct ColumnIndex {
+    pub(crate) null_pages: Vec<bool>,
+    pub(crate) boundary_order: BoundaryOrder,
+    pub(crate) null_counts: Option<Vec<i64>>,
+    pub(crate) repetition_level_histograms: Option<Vec<i64>>,
+    pub(crate) definition_level_histograms: Option<Vec<i64>>,
+}
+
+impl ColumnIndex {
+    /// Returns the number of pages
+    pub fn num_pages(&self) -> u64 {
+        self.null_pages.len() as u64
+    }
+
+    /// Returns the number of null values in the page indexed by `idx`
+    ///
+    /// Returns `None` if no null counts have been set in the index
+    pub fn null_count(&self, idx: usize) -> Option<i64> {
+        self.null_counts.as_ref().map(|nc| nc[idx])
+    }
+
+    /// Returns the repetition level histogram for the page indexed by `idx`
+    pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
+        if let Some(rep_hists) = self.repetition_level_histograms.as_ref() {
+            let num_lvls = rep_hists.len() / self.num_pages() as usize;
+            let start = num_lvls * idx;
+            Some(&rep_hists[start..start + num_lvls])
+        } else {
+            None
+        }
+    }
+
+    /// Returns the definition level histogram for the page indexed by `idx`
+    pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
+        if let Some(def_hists) = self.definition_level_histograms.as_ref() {
+            let num_lvls = def_hists.len() / self.num_pages() as usize;
+            let start = num_lvls * idx;
+            Some(&def_hists[start..start + num_lvls])
+        } else {
+            None
+        }
+    }
+
+    /// Returns whether the page indexed by `idx` consists of all null values
+    pub fn is_null_page(&self, idx: usize) -> bool {
+        self.null_pages[idx]
+    }
+}
+
+/// Column index for primitive types
+#[derive(Debug, Clone, PartialEq)]
+pub struct PrimitiveColumnIndex<T> {
+    pub(crate) column_index: ColumnIndex,
+    pub(crate) min_values: Vec<T>,
+    pub(crate) max_values: Vec<T>,
+}
+
+impl<T: ParquetValueType> PrimitiveColumnIndex<T> {
+    pub(crate) fn try_new(
+        null_pages: Vec<bool>,
+        boundary_order: BoundaryOrder,
+        null_counts: Option<Vec<i64>>,
+        repetition_level_histograms: Option<Vec<i64>>,
+        definition_level_histograms: Option<Vec<i64>>,
+        min_bytes: Vec<&[u8]>,
+        max_bytes: Vec<&[u8]>,
+    ) -> Result<Self> {
+        let len = null_pages.len();
+
+        let mut min_values = Vec::with_capacity(len);
+        let mut max_values = Vec::with_capacity(len);
+
+        for (i, is_null) in null_pages.iter().enumerate().take(len) {
+            if !is_null {
+                let min = min_bytes[i];
+                min_values.push(T::try_from_le_slice(min)?);
+
+                let max = max_bytes[i];
+                max_values.push(T::try_from_le_slice(max)?);
+            } else {
+                // need placeholders
+                min_values.push(Default::default());
+                max_values.push(Default::default());
+            }
+        }
+
+        Ok(Self {
+            column_index: ColumnIndex {
+                null_pages,
+                boundary_order,
+                null_counts,
+                repetition_level_histograms,
+                definition_level_histograms,
+            },
+            min_values,
+            max_values,
+        })
+    }
+
+    pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result<Self> {
+        Self::try_new(
+            index.null_pages,
+            index.boundary_order,
+            index.null_counts,
+            index.repetition_level_histograms,
+            index.definition_level_histograms,
+            index.min_values,
+            index.max_values,
+        )
+    }
+}
+
+impl<T> PrimitiveColumnIndex<T> {
+    /// Returns an array containing the min values for each page.
+    ///
+    /// Values in the returned slice are only valid if [`ColumnIndex::is_null_page()`]
+    /// is `false` for the same index.
+    pub fn min_values(&self) -> &[T] {
+        &self.min_values
+    }
+
+    /// Returns an array containing the max values for each page.
+    ///
+    /// Values in the returned slice are only valid if [`ColumnIndex::is_null_page()`]
+    /// is `false` for the same index.
+    pub fn max_values(&self) -> &[T] {
+        &self.max_values
+    }
+
+    /// Returns an iterator over the min values.
+    ///
+    /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`.
+    pub fn min_values_iter(&self) -> impl Iterator<Item = Option<&T>> {
+        self.min_values.iter().enumerate().map(|(i, min)| {
+            if self.is_null_page(i) {
+                None
+            } else {
+                Some(min)
+            }
+        })
+    }
+
+    /// Returns an iterator over the max values.
+    ///
+    /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`.
+    pub fn max_values_iter(&self) -> impl Iterator<Item = Option<&T>> {
+        self.max_values.iter().enumerate().map(|(i, min)| {
+            if self.is_null_page(i) {
+                None
+            } else {
+                Some(min)
+            }
+        })
+    }
+
+    /// Returns the min value for the page indexed by `idx`
+    ///
+    /// It is `None` when all values are null
+    pub fn min_value(&self, idx: usize) -> Option<&T> {
+        if self.null_pages[idx] {
+            None
+        } else {
+            Some(&self.min_values[idx])
+        }
+    }
+
+    /// Returns the max value for the page indexed by `idx`
+    ///
+    /// It is `None` when all values are null
+    pub fn max_value(&self, idx: usize) -> Option<&T> {
+        if self.null_pages[idx] {
+            None
+        } else {
+            Some(&self.max_values[idx])
+        }
+    }
+}
+
+impl<T> Deref for PrimitiveColumnIndex<T> {
+    type Target = ColumnIndex;
+
+    fn deref(&self) -> &Self::Target {
+        &self.column_index
+    }
+}
+
+impl<T: ParquetValueType> WriteThrift for PrimitiveColumnIndex<T> {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+    fn write_thrift<W: std::io::Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+    ) -> Result<()> {
+        self.null_pages.write_thrift_field(writer, 1, 0)?;
+
+        // need to handle min/max manually
+        let len = self.null_pages.len();
+        writer.write_field_begin(FieldType::List, 2, 1)?;
+        writer.write_list_begin(ElementType::Binary, len)?;
+        for i in 0..len {
+            let min = self.min_value(i).map(|m| m.as_bytes()).unwrap_or(&[]);
+            min.write_thrift(writer)?;
+        }
+        writer.write_field_begin(FieldType::List, 3, 2)?;
+        writer.write_list_begin(ElementType::Binary, len)?;
+        for i in 0..len {
+            let max = self.max_value(i).map(|m| m.as_bytes()).unwrap_or(&[]);
+            max.write_thrift(writer)?;
+        }
+        let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?;
+        if self.null_counts.is_some() {
+            last_field_id =
+                self.null_counts
+                    .as_ref()
+                    .unwrap()
+                    .write_thrift_field(writer, 5, last_field_id)?;
+        }
+        if self.repetition_level_histograms.is_some() {
+            last_field_id = self
+                .repetition_level_histograms
+                .as_ref()
+                .unwrap()
+                .write_thrift_field(writer, 6, last_field_id)?;
+        }
+        if self.definition_level_histograms.is_some() {
+            self.definition_level_histograms
+                .as_ref()
+                .unwrap()
+                .write_thrift_field(writer, 7, last_field_id)?;
+        }
+        writer.write_struct_end()
+    }
+}
+
+/// Column index for byte arrays (fixed length and variable)
+#[derive(Debug, Clone, PartialEq)]
+pub struct ByteArrayColumnIndex {
+    pub(crate) column_index: ColumnIndex,
+    // raw bytes for min and max values
+    pub(crate) min_bytes: Vec<u8>,
+    pub(crate) min_offsets: Vec<usize>,
+    pub(crate) max_bytes: Vec<u8>,
+    pub(crate) max_offsets: Vec<usize>,
+}
+
+impl ByteArrayColumnIndex {
+    pub(crate) fn try_new(
+        null_pages: Vec<bool>,
+        boundary_order: BoundaryOrder,
+        null_counts: Option<Vec<i64>>,
+        repetition_level_histograms: Option<Vec<i64>>,
+        definition_level_histograms: Option<Vec<i64>>,
+        min_values: Vec<&[u8]>,
+        max_values: Vec<&[u8]>,
+    ) -> Result<Self> {
+        let len = null_pages.len();
+
+        let min_len = min_values.iter().map(|&v| v.len()).sum();
+        let max_len = max_values.iter().map(|&v| v.len()).sum();
+        let mut min_bytes = vec![0u8; min_len];
+        let mut max_bytes = vec![0u8; max_len];
+
+        let mut min_offsets = vec![0usize; len + 1];
+        let mut max_offsets = vec![0usize; len + 1];
+
+        let mut min_pos = 0;
+        let mut max_pos = 0;
+
+        for (i, is_null) in null_pages.iter().enumerate().take(len) {
+            if !is_null {
+                let min = min_values[i];
+                let dst = &mut min_bytes[min_pos..min_pos + min.len()];
+                dst.copy_from_slice(min);
+                min_offsets[i] = min_pos;
+                min_pos += min.len();
+
+                let max = max_values[i];
+                let dst = &mut max_bytes[max_pos..max_pos + max.len()];
+                dst.copy_from_slice(max);
+                max_offsets[i] = max_pos;
+                max_pos += max.len();
+            } else {
+                min_offsets[i] = min_pos;
+                max_offsets[i] = max_pos;
+            }
+        }
+
+        min_offsets[len] = min_pos;
+        max_offsets[len] = max_pos;
+
+        Ok(Self {
+            column_index: ColumnIndex {
+                null_pages,
+                boundary_order,
+                null_counts,
+                repetition_level_histograms,
+                definition_level_histograms,
+            },
+            min_bytes,
+            min_offsets,
+            max_bytes,
+            max_offsets,
+        })
+    }
+
+    pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result<Self> {
+        Self::try_new(
+            index.null_pages,
+            index.boundary_order,
+            index.null_counts,
+            index.repetition_level_histograms,
+            index.definition_level_histograms,
+            index.min_values,
+            index.max_values,
+        )
+    }
+
+    /// Returns the min value for the page indexed by `idx`
+    ///
+    /// It is `None` when all values are null
+    pub fn min_value(&self, idx: usize) -> Option<&[u8]> {
+        if self.null_pages[idx] {
+            None
+        } else {
+            let start = self.min_offsets[idx];
+            let end = self.min_offsets[idx + 1];
+            Some(&self.min_bytes[start..end])
+        }
+    }
+
+    /// Returns the max value for the page indexed by `idx`
+    ///
+    /// It is `None` when all values are null
+    pub fn max_value(&self, idx: usize) -> Option<&[u8]> {
+        if self.null_pages[idx] {
+            None
+        } else {
+            let start = self.max_offsets[idx];
+            let end = self.max_offsets[idx + 1];
+            Some(&self.max_bytes[start..end])
+        }
+    }
+
+    /// Returns an iterator over the min values.
+    ///
+    /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`.
+    pub fn min_values_iter(&self) -> impl Iterator<Item = Option<&[u8]>> {
+        (0..self.num_pages() as usize).map(|i| {
+            if self.is_null_page(i) {
+                None
+            } else {
+                self.min_value(i)
+            }
+        })
+    }
+
+    /// Returns an iterator over the max values.
+    ///
+    /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`.
+    pub fn max_values_iter(&self) -> impl Iterator<Item = Option<&[u8]>> {
+        (0..self.num_pages() as usize).map(|i| {
+            if self.is_null_page(i) {
+                None
+            } else {
+                self.max_value(i)
+            }
+        })
+    }
+}
+
+impl Deref for ByteArrayColumnIndex {
+    type Target = ColumnIndex;
+
+    fn deref(&self) -> &Self::Target {
+        &self.column_index
+    }
+}
+
+impl WriteThrift for ByteArrayColumnIndex {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+    fn write_thrift<W: std::io::Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+    ) -> Result<()> {
+        self.null_pages.write_thrift_field(writer, 1, 0)?;
+
+        // need to handle min/max manually
+        let len = self.null_pages.len();
+        writer.write_field_begin(FieldType::List, 2, 1)?;
+        writer.write_list_begin(ElementType::Binary, len)?;
+        for i in 0..len {
+            let min = self.min_value(i).unwrap_or(&[]);
+            min.write_thrift(writer)?;
+        }
+        writer.write_field_begin(FieldType::List, 3, 2)?;
+        writer.write_list_begin(ElementType::Binary, len)?;
+        for i in 0..len {
+            let max = self.max_value(i).unwrap_or(&[]);
+            max.write_thrift(writer)?;
+        }
+        let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?;
+        if self.null_counts.is_some() {
+            last_field_id =
+                self.null_counts
+                    .as_ref()
+                    .unwrap()
+                    .write_thrift_field(writer, 5, last_field_id)?;
+        }
+        if self.repetition_level_histograms.is_some() {
+            last_field_id = self
+                .repetition_level_histograms
+                .as_ref()
+                .unwrap()
+                .write_thrift_field(writer, 6, last_field_id)?;
+        }
+        if self.definition_level_histograms.is_some() {
+            self.definition_level_histograms
+                .as_ref()
+                .unwrap()
+                .write_thrift_field(writer, 7, last_field_id)?;
+        }
+        writer.write_struct_end()
+    }
+}
+
+// Macro to generate getter functions for ColumnIndexMetaData.
+macro_rules! colidx_enum_func {
+    ($self:ident, $func:ident, $arg:ident) => {{
+        match *$self {
+            Self::BOOLEAN(ref typed) => typed.$func($arg),
+            Self::INT32(ref typed) => typed.$func($arg),
+            Self::INT64(ref typed) => typed.$func($arg),
+            Self::INT96(ref typed) => typed.$func($arg),
+            Self::FLOAT(ref typed) => typed.$func($arg),
+            Self::DOUBLE(ref typed) => typed.$func($arg),
+            Self::BYTE_ARRAY(ref typed) => typed.$func($arg),
+            Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func($arg),
+            _ => panic!(concat!(
+                "Cannot call ",
+                stringify!($func),
+                " on ColumnIndexMetaData::NONE"
+            )),
+        }
+    }};
+    ($self:ident, $func:ident) => {{
+        match *$self {
+            Self::BOOLEAN(ref typed) => typed.$func(),
+            Self::INT32(ref typed) => typed.$func(),
+            Self::INT64(ref typed) => typed.$func(),
+            Self::INT96(ref typed) => typed.$func(),
+            Self::FLOAT(ref typed) => typed.$func(),
+            Self::DOUBLE(ref typed) => typed.$func(),
+            Self::BYTE_ARRAY(ref typed) => typed.$func(),
+            Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func(),
+            _ => panic!(concat!(
+                "Cannot call ",
+                stringify!($func),
+                " on ColumnIndexMetaData::NONE"
+            )),
+        }
+    }};
+}
+
+/// Parsed [`ColumnIndex`] information for a Parquet file.
+///
+/// See [`ParquetColumnIndex`] for more information.
+///
+/// [`ParquetColumnIndex`]: crate::file::metadata::ParquetColumnIndex
+/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+#[derive(Debug, Clone, PartialEq)]
+#[allow(non_camel_case_types)]
+pub enum ColumnIndexMetaData {
+    /// Sometimes reading page index from parquet file
+    /// will only return pageLocations without min_max index,
+    /// `NONE` represents this lack of index information
+    NONE,
+    /// Boolean type index
+    BOOLEAN(PrimitiveColumnIndex<bool>),
+    /// 32-bit integer type index
+    INT32(PrimitiveColumnIndex<i32>),
+    /// 64-bit integer type index
+    INT64(PrimitiveColumnIndex<i64>),
+    /// 96-bit integer type (timestamp) index
+    INT96(PrimitiveColumnIndex<Int96>),
+    /// 32-bit floating point type index
+    FLOAT(PrimitiveColumnIndex<f32>),
+    /// 64-bit floating point type index
+    DOUBLE(PrimitiveColumnIndex<f64>),
+    /// Byte array type index
+    BYTE_ARRAY(ByteArrayColumnIndex),
+    /// Fixed length byte array type index
+    FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex),
+}
+
+impl ColumnIndexMetaData {
+    /// Return min/max elements inside ColumnIndex are ordered or not.
+    pub fn is_sorted(&self) -> bool {
+        // 0:UNORDERED, 1:ASCENDING ,2:DESCENDING,
+        if let Some(order) = self.get_boundary_order() {
+            order != BoundaryOrder::UNORDERED
+        } else {
+            false
+        }
+    }
+
+    /// Get boundary_order of this page index.
+    pub fn get_boundary_order(&self) -> Option<BoundaryOrder> {
+        match self {
+            Self::NONE => None,
+            Self::BOOLEAN(index) => Some(index.boundary_order),
+            Self::INT32(index) => Some(index.boundary_order),
+            Self::INT64(index) => Some(index.boundary_order),
+            Self::INT96(index) => Some(index.boundary_order),
+            Self::FLOAT(index) => Some(index.boundary_order),
+            Self::DOUBLE(index) => Some(index.boundary_order),
+            Self::BYTE_ARRAY(index) => Some(index.boundary_order),
+            Self::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order),
+        }
+    }
+
+    /// Returns array of null counts, one per page.
+    ///
+    /// Returns `None` if now null counts have been set in the index
+    pub fn null_counts(&self) -> Option<&Vec<i64>> {
+        match self {
+            Self::NONE => None,
+            Self::BOOLEAN(index) => index.null_counts.as_ref(),
+            Self::INT32(index) => index.null_counts.as_ref(),
+            Self::INT64(index) => index.null_counts.as_ref(),
+            Self::INT96(index) => index.null_counts.as_ref(),
+            Self::FLOAT(index) => index.null_counts.as_ref(),
+            Self::DOUBLE(index) => index.null_counts.as_ref(),
+            Self::BYTE_ARRAY(index) => index.null_counts.as_ref(),
+            Self::FIXED_LEN_BYTE_ARRAY(index) => index.null_counts.as_ref(),
+        }
+    }
+
+    /// Returns the number of pages
+    pub fn num_pages(&self) -> u64 {
+        colidx_enum_func!(self, num_pages)
+    }
+
+    /// Returns the number of null values in the page indexed by `idx`
+    ///
+    /// Returns `None` if no null counts have been set in the index
+    pub fn null_count(&self, idx: usize) -> Option<i64> {
+        colidx_enum_func!(self, null_count, idx)
+    }
+
+    /// Returns the repetition level histogram for the page indexed by `idx`
+    pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
+        colidx_enum_func!(self, repetition_level_histogram, idx)
+    }
+
+    /// Returns the definition level histogram for the page indexed by `idx`
+    pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> {
+        colidx_enum_func!(self, definition_level_histogram, idx)
+    }
+
+    /// Returns whether the page indexed by `idx` consists of all null values
+    pub fn is_null_page(&self, idx: usize) -> bool {
+        colidx_enum_func!(self, is_null_page, idx)
+    }
+}
+
+/// Provides iterators over min and max values of a [`ColumnIndexMetaData`]
+pub trait ColumnIndexIterators {
+    /// Can be one of `bool`, `i32`, `i64`, `Int96`, `f32`, `f64`, [`ByteArray`],
+    /// or [`FixedLenByteArray`]
+    type Item;
+
+    /// Return iterator over the min values for the index
+    fn min_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator<Item = Option<Self::Item>>;
+
+    /// Return iterator over the max values for the index
+    fn max_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator<Item = Option<Self::Item>>;
+}
+
+macro_rules! column_index_iters {
+    ($item: ident, $variant: ident, $conv:expr) => {
+        impl ColumnIndexIterators for $item {
+            type Item = $item;
+
+            fn min_values_iter(
+                colidx: &ColumnIndexMetaData,
+            ) -> impl Iterator<Item = Option<Self::Item>> {
+                if let ColumnIndexMetaData::$variant(index) = colidx {
+                    index.min_values_iter().map($conv)
+                } else {
+                    panic!(concat!("Wrong type for ", stringify!($item), " iterator"))
+                }
+            }
+
+            fn max_values_iter(
+                colidx: &ColumnIndexMetaData,
+            ) -> impl Iterator<Item = Option<Self::Item>> {
+                if let ColumnIndexMetaData::$variant(index) = colidx {
+                    index.max_values_iter().map($conv)
+                } else {
+                    panic!(concat!("Wrong type for ", stringify!($item), " iterator"))
+                }
+            }
+        }
+    };
+}
+
+column_index_iters!(bool, BOOLEAN, |v| v.copied());
+column_index_iters!(i32, INT32, |v| v.copied());
+column_index_iters!(i64, INT64, |v| v.copied());
+column_index_iters!(Int96, INT96, |v| v.copied());
+column_index_iters!(f32, FLOAT, |v| v.copied());
+column_index_iters!(f64, DOUBLE, |v| v.copied());
+column_index_iters!(ByteArray, BYTE_ARRAY, |v| v
+    .map(|v| ByteArray::from(v.to_owned())));
+column_index_iters!(FixedLenByteArray, FIXED_LEN_BYTE_ARRAY, |v| v
+    .map(|v| FixedLenByteArray::from(v.to_owned())));
+
+impl WriteThrift for ColumnIndexMetaData {
+    const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+    fn write_thrift<W: std::io::Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+    ) -> Result<()> {
+        match self {
+            ColumnIndexMetaData::BOOLEAN(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::INT32(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::INT64(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::INT96(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::FLOAT(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::DOUBLE(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::BYTE_ARRAY(index) => index.write_thrift(writer),
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(index) => index.write_thrift(writer),
+            _ => Err(general_err!("Cannot serialize NONE index")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_page_index_min_max_null() {
+        let column_index = PrimitiveColumnIndex {
+            column_index: ColumnIndex {
+                null_pages: vec![false],
+                boundary_order: BoundaryOrder::ASCENDING,
+                null_counts: Some(vec![0]),
+                repetition_level_histograms: Some(vec![1, 2]),
+                definition_level_histograms: Some(vec![1, 2, 3]),
+            },
+            min_values: vec![-123],
+            max_values: vec![234],
+        };
+
+        assert_eq!(column_index.min_value(0), Some(&-123));
+        assert_eq!(column_index.max_value(0), Some(&234));
+        assert_eq!(column_index.null_count(0), Some(0));
+        assert_eq!(column_index.repetition_level_histogram(0).unwrap(), &[1, 2]);
+        assert_eq!(
+            column_index.definition_level_histogram(0).unwrap(),
+            &[1, 2, 3]
+        );
+    }
+
+    #[test]
+    fn test_page_index_min_max_null_none() {
+        let column_index: PrimitiveColumnIndex<i32> = PrimitiveColumnIndex::<i32> {
+            column_index: ColumnIndex {
+                null_pages: vec![true],
+                boundary_order: BoundaryOrder::ASCENDING,
+                null_counts: Some(vec![1]),
+                repetition_level_histograms: None,
+                definition_level_histograms: Some(vec![1, 0]),
+            },
+            min_values: vec![Default::default()],
+            max_values: vec![Default::default()],
+        };
+
+        assert_eq!(column_index.min_value(0), None);
+        assert_eq!(column_index.max_value(0), None);
+        assert_eq!(column_index.null_count(0), Some(1));
+        assert_eq!(column_index.repetition_level_histogram(0), None);
+        assert_eq!(column_index.definition_level_histogram(0).unwrap(), &[1, 0]);
+    }
+
+    #[test]
+    fn test_invalid_column_index() {
+        let column_index = ThriftColumnIndex {
+            null_pages: vec![true, false],
+            min_values: vec![
+                &[],
+                &[], // this shouldn't be empty as null_pages[1] is false
+            ],
+            max_values: vec![
+                &[],
+                &[], // this shouldn't be empty as null_pages[1] is false
+            ],
+            null_counts: None,
+            repetition_level_histograms: None,
+            definition_level_histograms: None,
+            boundary_order: BoundaryOrder::UNORDERED,
+        };
+
+        let err = PrimitiveColumnIndex::<i32>::try_from_thrift(column_index).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: error converting value, expected 4 bytes got 0"
+        );
+    }
+}
diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs
deleted file mode 100644
index a66509e14c7a..000000000000
--- a/parquet/src/file/page_index/index.rs
+++ /dev/null
@@ -1,375 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! [`Index`] structures holding decoded [`ColumnIndex`] information
-
-use crate::basic::Type;
-use crate::data_type::private::ParquetValueType;
-use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
-use crate::errors::ParquetError;
-use crate::file::metadata::LevelHistogram;
-use crate::format::{BoundaryOrder, ColumnIndex};
-use std::fmt::Debug;
-
-/// Typed statistics for one data page
-///
-/// See [`NativeIndex`] for more details
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct PageIndex<T> {
-    /// The minimum value, It is None when all values are null
-    pub min: Option<T>,
-    /// The maximum value, It is None when all values are null
-    pub max: Option<T>,
-    /// Null values in the page
-    pub null_count: Option<i64>,
-    /// Repetition level histogram for the page
-    ///
-    /// `repetition_level_histogram[i]` is a count of how many values are at repetition level `i`.
-    /// For example, `repetition_level_histogram[0]` indicates how many rows the page contains.
-    pub repetition_level_histogram: Option<LevelHistogram>,
-    /// Definition level histogram for the page
-    ///
-    /// `definition_level_histogram[i]` is a count of how many values are at definition level `i`.
-    /// For example, `definition_level_histogram[max_definition_level]` indicates how many
-    /// non-null values are present in the page.
-    pub definition_level_histogram: Option<LevelHistogram>,
-}
-
-impl<T> PageIndex<T> {
-    /// Returns the minimum value in the page
-    ///
-    /// It is `None` when all values are null
-    pub fn min(&self) -> Option<&T> {
-        self.min.as_ref()
-    }
-
-    /// Returns the maximum value in the page
-    ///
-    /// It is `None` when all values are null
-    pub fn max(&self) -> Option<&T> {
-        self.max.as_ref()
-    }
-
-    /// Returns the number of null values in the page
-    pub fn null_count(&self) -> Option<i64> {
-        self.null_count
-    }
-
-    /// Returns the repetition level histogram for the page
-    pub fn repetition_level_histogram(&self) -> Option<&LevelHistogram> {
-        self.repetition_level_histogram.as_ref()
-    }
-
-    /// Returns the definition level histogram for the page
-    pub fn definition_level_histogram(&self) -> Option<&LevelHistogram> {
-        self.definition_level_histogram.as_ref()
-    }
-}
-
-impl<T> PageIndex<T>
-where
-    T: AsBytes,
-{
-    /// Returns the minimum value in the page as bytes
-    ///
-    /// It is `None` when all values are null
-    pub fn max_bytes(&self) -> Option<&[u8]> {
-        self.max.as_ref().map(|x| x.as_bytes())
-    }
-
-    /// Returns the maximum value in the page as bytes
-    ///
-    /// It is `None` when all values are null
-    pub fn min_bytes(&self) -> Option<&[u8]> {
-        self.min.as_ref().map(|x| x.as_bytes())
-    }
-}
-
-#[derive(Debug, Clone, PartialEq)]
-#[allow(non_camel_case_types)]
-/// Statistics for data pages in a column chunk.
-///
-/// See [`NativeIndex`] for more information
-pub enum Index {
-    /// Sometimes reading page index from parquet file
-    /// will only return pageLocations without min_max index,
-    /// `NONE` represents this lack of index information
-    NONE,
-    /// Boolean type index
-    BOOLEAN(NativeIndex<bool>),
-    /// 32-bit integer type index
-    INT32(NativeIndex<i32>),
-    /// 64-bit integer type index
-    INT64(NativeIndex<i64>),
-    /// 96-bit integer type (timestamp) index
-    INT96(NativeIndex<Int96>),
-    /// 32-bit floating point type index
-    FLOAT(NativeIndex<f32>),
-    /// 64-bit floating point type index
-    DOUBLE(NativeIndex<f64>),
-    /// Byte array type index
-    BYTE_ARRAY(NativeIndex<ByteArray>),
-    /// Fixed length byte array type index
-    FIXED_LEN_BYTE_ARRAY(NativeIndex<FixedLenByteArray>),
-}
-
-impl Index {
-    /// Return min/max elements inside ColumnIndex are ordered or not.
-    pub fn is_sorted(&self) -> bool {
-        // 0:UNORDERED, 1:ASCENDING ,2:DESCENDING,
-        if let Some(order) = self.get_boundary_order() {
-            order.0 > (BoundaryOrder::UNORDERED.0)
-        } else {
-            false
-        }
-    }
-
-    /// Get boundary_order of this page index.
-    pub fn get_boundary_order(&self) -> Option<BoundaryOrder> {
-        match self {
-            Index::NONE => None,
-            Index::BOOLEAN(index) => Some(index.boundary_order),
-            Index::INT32(index) => Some(index.boundary_order),
-            Index::INT64(index) => Some(index.boundary_order),
-            Index::INT96(index) => Some(index.boundary_order),
-            Index::FLOAT(index) => Some(index.boundary_order),
-            Index::DOUBLE(index) => Some(index.boundary_order),
-            Index::BYTE_ARRAY(index) => Some(index.boundary_order),
-            Index::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order),
-        }
-    }
-}
-
-/// Strongly typed statistics for data pages in a column chunk.
-///
-/// This structure is a natively typed, in memory representation of the
-/// [`ColumnIndex`] structure in a parquet file footer, as described in the
-/// Parquet [PageIndex documentation]. The statistics stored in this structure
-/// can be used by query engines to skip decoding pages while reading parquet
-/// data.
-///
-/// # Differences with Row Group Level Statistics
-///
-/// One significant difference between `NativeIndex` and row group level
-/// [`Statistics`] is that page level statistics may not store actual column
-/// values as min and max (e.g. they may store truncated strings to save space)
-///
-/// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
-/// [`Statistics`]: crate::file::statistics::Statistics
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct NativeIndex<T: ParquetValueType> {
-    /// The actual column indexes, one item per page
-    pub indexes: Vec<PageIndex<T>>,
-    /// If the min/max elements are ordered, and if so in which
-    /// direction. See [source] for details.
-    ///
-    /// [source]: https://github.com/apache/parquet-format/blob/bfc549b93e6927cb1fc425466e4084f76edc6d22/src/main/thrift/parquet.thrift#L959-L964
-    pub boundary_order: BoundaryOrder,
-}
-
-impl<T: ParquetValueType> NativeIndex<T> {
-    /// The physical data type of the column
-    pub const PHYSICAL_TYPE: Type = T::PHYSICAL_TYPE;
-
-    /// Creates a new [`NativeIndex`]
-    pub(crate) fn try_new(index: ColumnIndex) -> Result<Self, ParquetError> {
-        let len = index.min_values.len();
-
-        let null_counts = index
-            .null_counts
-            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
-            .unwrap_or_else(|| vec![None; len]);
-
-        // histograms are a 1D array encoding a 2D num_pages X num_levels matrix.
-        let to_page_histograms = |opt_hist: Option<Vec<i64>>| {
-            if let Some(hist) = opt_hist {
-                // TODO: should we assert (hist.len() % len) == 0?
-                let num_levels = hist.len() / len;
-                let mut res = Vec::with_capacity(len);
-                for i in 0..len {
-                    let page_idx = i * num_levels;
-                    let page_hist = hist[page_idx..page_idx + num_levels].to_vec();
-                    res.push(Some(LevelHistogram::from(page_hist)));
-                }
-                res
-            } else {
-                vec![None; len]
-            }
-        };
-
-        let rep_hists: Vec<Option<LevelHistogram>> =
-            to_page_histograms(index.repetition_level_histograms);
-        let def_hists: Vec<Option<LevelHistogram>> =
-            to_page_histograms(index.definition_level_histograms);
-
-        let indexes = index
-            .min_values
-            .iter()
-            .zip(index.max_values.iter())
-            .zip(index.null_pages.into_iter())
-            .zip(null_counts.into_iter())
-            .zip(rep_hists.into_iter())
-            .zip(def_hists.into_iter())
-            .map(
-                |(
-                    ((((min, max), is_null), null_count), repetition_level_histogram),
-                    definition_level_histogram,
-                )| {
-                    let (min, max) = if is_null {
-                        (None, None)
-                    } else {
-                        (
-                            Some(T::try_from_le_slice(min)?),
-                            Some(T::try_from_le_slice(max)?),
-                        )
-                    };
-                    Ok(PageIndex {
-                        min,
-                        max,
-                        null_count,
-                        repetition_level_histogram,
-                        definition_level_histogram,
-                    })
-                },
-            )
-            .collect::<Result<Vec<_>, ParquetError>>()?;
-
-        Ok(Self {
-            indexes,
-            boundary_order: index.boundary_order,
-        })
-    }
-
-    pub(crate) fn to_thrift(&self) -> ColumnIndex {
-        let min_values = self
-            .indexes
-            .iter()
-            .map(|x| x.min_bytes().unwrap_or(&[]).to_vec())
-            .collect::<Vec<_>>();
-
-        let max_values = self
-            .indexes
-            .iter()
-            .map(|x| x.max_bytes().unwrap_or(&[]).to_vec())
-            .collect::<Vec<_>>();
-
-        let null_counts = self
-            .indexes
-            .iter()
-            .map(|x| x.null_count())
-            .collect::<Option<Vec<_>>>();
-
-        // Concatenate page histograms into a single Option<Vec>
-        let repetition_level_histograms = self
-            .indexes
-            .iter()
-            .map(|x| x.repetition_level_histogram().map(|v| v.values()))
-            .collect::<Option<Vec<&[i64]>>>()
-            .map(|hists| hists.concat());
-
-        let definition_level_histograms = self
-            .indexes
-            .iter()
-            .map(|x| x.definition_level_histogram().map(|v| v.values()))
-            .collect::<Option<Vec<&[i64]>>>()
-            .map(|hists| hists.concat());
-
-        ColumnIndex::new(
-            self.indexes.iter().map(|x| x.min().is_none()).collect(),
-            min_values,
-            max_values,
-            self.boundary_order,
-            null_counts,
-            repetition_level_histograms,
-            definition_level_histograms,
-        )
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_page_index_min_max_null() {
-        let page_index = PageIndex {
-            min: Some(-123),
-            max: Some(234),
-            null_count: Some(0),
-            repetition_level_histogram: Some(LevelHistogram::from(vec![1, 2])),
-            definition_level_histogram: Some(LevelHistogram::from(vec![1, 2, 3])),
-        };
-
-        assert_eq!(page_index.min().unwrap(), &-123);
-        assert_eq!(page_index.max().unwrap(), &234);
-        assert_eq!(page_index.min_bytes().unwrap(), (-123).as_bytes());
-        assert_eq!(page_index.max_bytes().unwrap(), 234.as_bytes());
-        assert_eq!(page_index.null_count().unwrap(), 0);
-        assert_eq!(
-            page_index.repetition_level_histogram().unwrap().values(),
-            &vec![1, 2]
-        );
-        assert_eq!(
-            page_index.definition_level_histogram().unwrap().values(),
-            &vec![1, 2, 3]
-        );
-    }
-
-    #[test]
-    fn test_page_index_min_max_null_none() {
-        let page_index: PageIndex<i32> = PageIndex {
-            min: None,
-            max: None,
-            null_count: None,
-            repetition_level_histogram: None,
-            definition_level_histogram: None,
-        };
-
-        assert_eq!(page_index.min(), None);
-        assert_eq!(page_index.max(), None);
-        assert_eq!(page_index.min_bytes(), None);
-        assert_eq!(page_index.max_bytes(), None);
-        assert_eq!(page_index.null_count(), None);
-        assert_eq!(page_index.repetition_level_histogram(), None);
-        assert_eq!(page_index.definition_level_histogram(), None);
-    }
-
-    #[test]
-    fn test_invalid_column_index() {
-        let column_index = ColumnIndex {
-            null_pages: vec![true, false],
-            min_values: vec![
-                vec![],
-                vec![], // this shouldn't be empty as null_pages[1] is false
-            ],
-            max_values: vec![
-                vec![],
-                vec![], // this shouldn't be empty as null_pages[1] is false
-            ],
-            null_counts: None,
-            repetition_level_histograms: None,
-            definition_level_histograms: None,
-            boundary_order: BoundaryOrder::UNORDERED,
-        };
-
-        let err = NativeIndex::<i32>::try_new(column_index).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "Parquet error: error converting value, expected 4 bytes got 0"
-        );
-    }
-}
diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs
index 368ede8b4094..ce7fc5fbaac5 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -15,17 +15,23 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Support for reading [`Index`] and [`PageLocation`] from parquet metadata.
+//! Support for reading [`ColumnIndexMetaData`] and [`OffsetIndexMetaData`] from parquet metadata.
 
-use crate::basic::Type;
+use crate::basic::{BoundaryOrder, Type};
 use crate::data_type::Int96;
-use crate::errors::ParquetError;
+use crate::errors::{ParquetError, Result};
 use crate::file::metadata::ColumnChunkMetaData;
-use crate::file::page_index::index::{Index, NativeIndex};
+use crate::file::page_index::column_index::{
+    ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
+};
 use crate::file::page_index::offset_index::OffsetIndexMetaData;
 use crate::file::reader::ChunkReader;
-use crate::format::{ColumnIndex, OffsetIndex, PageLocation};
-use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+use crate::parquet_thrift::{
+    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
+    ThriftSliceInputProtocol, WriteThrift, WriteThriftField, read_thrift_vec,
+};
+use crate::thrift_struct;
+use std::io::Write;
 use std::ops::Range;
 
 /// Computes the covering range of two optional ranges
@@ -38,7 +44,7 @@ pub(crate) fn acc_range(a: Option<Range<u64>>, b: Option<Range<u64>>) -> Option<
     }
 }
 
-/// Reads per-column [`Index`] for all columns of a row group by
+/// Reads per-column [`ColumnIndexMetaData`] for all columns of a row group by
 /// decoding [`ColumnIndex`] .
 ///
 /// Returns a vector of `index[column_number]`.
@@ -48,6 +54,7 @@ pub(crate) fn acc_range(a: Option<Range<u64>>, b: Option<Range<u64>>) -> Option<
 /// See [Page Index Documentation] for more details.
 ///
 /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 #[deprecated(
     since = "55.2.0",
     note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
@@ -55,7 +62,7 @@ pub(crate) fn acc_range(a: Option<Range<u64>>, b: Option<Range<u64>>) -> Option<
 pub fn read_columns_indexes<R: ChunkReader>(
     reader: &R,
     chunks: &[ColumnChunkMetaData],
-) -> Result<Option<Vec<Index>>, ParquetError> {
+) -> Result<Option<Vec<ColumnIndexMetaData>>, ParquetError> {
     let fetch = chunks
         .iter()
         .fold(None, |range, c| acc_range(range, c.column_index_range()));
@@ -76,52 +83,13 @@ pub fn read_columns_indexes<R: ChunkReader>(
                         ..usize::try_from(r.end - fetch.start)?],
                     c.column_type(),
                 ),
-                None => Ok(Index::NONE),
+                None => Ok(ColumnIndexMetaData::NONE),
             })
             .collect(),
     )
     .transpose()
 }
 
-/// Reads [`OffsetIndex`],  per-page [`PageLocation`] for all columns of a row
-/// group.
-///
-/// Returns a vector of `location[column_number][page_number]`
-///
-/// Return an empty vector if this row group does not contain an
-/// [`OffsetIndex]`.
-///
-/// See [Page Index Documentation] for more details.
-///
-/// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
-#[deprecated(since = "53.0.0", note = "Use read_offset_indexes")]
-pub fn read_pages_locations<R: ChunkReader>(
-    reader: &R,
-    chunks: &[ColumnChunkMetaData],
-) -> Result<Vec<Vec<PageLocation>>, ParquetError> {
-    let fetch = chunks
-        .iter()
-        .fold(None, |range, c| acc_range(range, c.offset_index_range()));
-
-    let fetch = match fetch {
-        Some(r) => r,
-        None => return Ok(vec![]),
-    };
-
-    let bytes = reader.get_bytes(fetch.start as _, (fetch.end - fetch.start).try_into()?)?;
-
-    chunks
-        .iter()
-        .map(|c| match c.offset_index_range() {
-            Some(r) => decode_page_locations(
-                &bytes[usize::try_from(r.start - fetch.start)?
-                    ..usize::try_from(r.end - fetch.start)?],
-            ),
-            None => Err(general_err!("missing offset index")),
-        })
-        .collect()
-}
-
 /// Reads per-column [`OffsetIndexMetaData`] for all columns of a row group by
 /// decoding [`OffsetIndex`] .
 ///
@@ -132,6 +100,7 @@ pub fn read_pages_locations<R: ChunkReader>(
 /// See [Page Index Documentation] for more details.
 ///
 /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
+/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 #[deprecated(
     since = "55.2.0",
     note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
@@ -167,31 +136,64 @@ pub fn read_offset_indexes<R: ChunkReader>(
 }
 
 pub(crate) fn decode_offset_index(data: &[u8]) -> Result<OffsetIndexMetaData, ParquetError> {
-    let mut prot = TCompactSliceInputProtocol::new(data);
-    let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
-    OffsetIndexMetaData::try_new(offset)
+    let mut prot = ThriftSliceInputProtocol::new(data);
+
+    // Try to read fast-path first. If that fails, fall back to slower but more robust
+    // decoder.
+    match OffsetIndexMetaData::try_from_fast(&mut prot) {
+        Ok(offset_index) => Ok(offset_index),
+        Err(_) => {
+            prot = ThriftSliceInputProtocol::new(data);
+            OffsetIndexMetaData::read_thrift(&mut prot)
+        }
+    }
 }
 
-pub(crate) fn decode_page_locations(data: &[u8]) -> Result<Vec<PageLocation>, ParquetError> {
-    let mut prot = TCompactSliceInputProtocol::new(data);
-    let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
-    Ok(offset.page_locations)
+// private struct only used for decoding then discarded
+thrift_struct!(
+pub(super) struct ThriftColumnIndex<'a> {
+  1: required list<bool> null_pages
+  2: required list<'a><binary> min_values
+  3: required list<'a><binary> max_values
+  4: required BoundaryOrder boundary_order
+  5: optional list<i64> null_counts
+  6: optional list<i64> repetition_level_histograms;
+  7: optional list<i64> definition_level_histograms;
 }
+);
 
-pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result<Index, ParquetError> {
-    let mut prot = TCompactSliceInputProtocol::new(data);
-
-    let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
+pub(crate) fn decode_column_index(
+    data: &[u8],
+    column_type: Type,
+) -> Result<ColumnIndexMetaData, ParquetError> {
+    let mut prot = ThriftSliceInputProtocol::new(data);
+    let index = ThriftColumnIndex::read_thrift(&mut prot)?;
 
     let index = match column_type {
-        Type::BOOLEAN => Index::BOOLEAN(NativeIndex::<bool>::try_new(index)?),
-        Type::INT32 => Index::INT32(NativeIndex::<i32>::try_new(index)?),
-        Type::INT64 => Index::INT64(NativeIndex::<i64>::try_new(index)?),
-        Type::INT96 => Index::INT96(NativeIndex::<Int96>::try_new(index)?),
-        Type::FLOAT => Index::FLOAT(NativeIndex::<f32>::try_new(index)?),
-        Type::DOUBLE => Index::DOUBLE(NativeIndex::<f64>::try_new(index)?),
-        Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new(index)?),
-        Type::FIXED_LEN_BYTE_ARRAY => Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index)?),
+        Type::BOOLEAN => {
+            ColumnIndexMetaData::BOOLEAN(PrimitiveColumnIndex::<bool>::try_from_thrift(index)?)
+        }
+        Type::INT32 => {
+            ColumnIndexMetaData::INT32(PrimitiveColumnIndex::<i32>::try_from_thrift(index)?)
+        }
+        Type::INT64 => {
+            ColumnIndexMetaData::INT64(PrimitiveColumnIndex::<i64>::try_from_thrift(index)?)
+        }
+        Type::INT96 => {
+            ColumnIndexMetaData::INT96(PrimitiveColumnIndex::<Int96>::try_from_thrift(index)?)
+        }
+        Type::FLOAT => {
+            ColumnIndexMetaData::FLOAT(PrimitiveColumnIndex::<f32>::try_from_thrift(index)?)
+        }
+        Type::DOUBLE => {
+            ColumnIndexMetaData::DOUBLE(PrimitiveColumnIndex::<f64>::try_from_thrift(index)?)
+        }
+        Type::BYTE_ARRAY => {
+            ColumnIndexMetaData::BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?)
+        }
+        Type::FIXED_LEN_BYTE_ARRAY => {
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?)
+        }
     };
 
     Ok(index)
diff --git a/parquet/src/file/page_index/mod.rs b/parquet/src/file/page_index/mod.rs
index a8077896db34..71b8290d5d36 100644
--- a/parquet/src/file/page_index/mod.rs
+++ b/parquet/src/file/page_index/mod.rs
@@ -19,6 +19,6 @@
 //!
 //! [Column Index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 
-pub mod index;
+pub mod column_index;
 pub mod index_reader;
 pub mod offset_index;
diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs
index d48d1b6c083d..b1e30dd4590c 100644
--- a/parquet/src/file/page_index/offset_index.rs
+++ b/parquet/src/file/page_index/offset_index.rs
@@ -16,30 +16,52 @@
 // under the License.
 
 //! [`OffsetIndexMetaData`] structure holding decoded [`OffsetIndex`] information
+//!
+//! [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 
-use crate::errors::ParquetError;
-use crate::format::{OffsetIndex, PageLocation};
+use std::io::Write;
 
+use crate::parquet_thrift::{
+    ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
+    WriteThrift, WriteThriftField, read_thrift_vec,
+};
+use crate::{
+    errors::{ParquetError, Result},
+    thrift_struct,
+};
+
+thrift_struct!(
+/// Page location information for [`OffsetIndexMetaData`]
+pub struct PageLocation {
+  /// Offset of the page in the file
+  1: required i64 offset
+  /// Size of the page, including header. Sum of compressed_page_size and header
+  2: required i32 compressed_page_size
+  /// Index within the RowGroup of the first row of the page. When an
+  /// OffsetIndex is present, pages must begin on row boundaries
+  /// (repetition_level = 0).
+  3: required i64 first_row_index
+}
+);
+
+thrift_struct!(
 /// [`OffsetIndex`] information for a column chunk. Contains offsets and sizes for each page
 /// in the chunk. Optionally stores fully decoded page sizes for BYTE_ARRAY columns.
-#[derive(Debug, Clone, PartialEq)]
+///
+/// See [`ParquetOffsetIndex`] for more information.
+///
+/// [`ParquetOffsetIndex`]: crate::file::metadata::ParquetOffsetIndex
+/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md
 pub struct OffsetIndexMetaData {
-    /// Vector of [`PageLocation`] objects, one per page in the chunk.
-    pub page_locations: Vec<PageLocation>,
-    /// Optional vector of unencoded page sizes, one per page in the chunk.
-    /// Only defined for BYTE_ARRAY columns.
-    pub unencoded_byte_array_data_bytes: Option<Vec<i64>>,
+  /// Vector of [`PageLocation`] objects, one per page in the chunk.
+  1: required list<PageLocation> page_locations
+  /// Optional vector of unencoded page sizes, one per page in the chunk.
+  /// Only defined for BYTE_ARRAY columns.
+  2: optional list<i64> unencoded_byte_array_data_bytes
 }
+);
 
 impl OffsetIndexMetaData {
-    /// Creates a new [`OffsetIndexMetaData`] from an [`OffsetIndex`].
-    pub(crate) fn try_new(index: OffsetIndex) -> Result<Self, ParquetError> {
-        Ok(Self {
-            page_locations: index.page_locations,
-            unencoded_byte_array_data_bytes: index.unencoded_byte_array_data_bytes,
-        })
-    }
-
     /// Vector of [`PageLocation`] objects, one per page in the chunk.
     pub fn page_locations(&self) -> &Vec<PageLocation> {
         &self.page_locations
@@ -51,12 +73,126 @@ impl OffsetIndexMetaData {
         self.unencoded_byte_array_data_bytes.as_ref()
     }
 
-    // TODO: remove annotation after merge
-    #[allow(dead_code)]
-    pub(crate) fn to_thrift(&self) -> OffsetIndex {
-        OffsetIndex::new(
-            self.page_locations.clone(),
-            self.unencoded_byte_array_data_bytes.clone(),
-        )
+    // Fast-path read of offset index. This works because we expect all field deltas to be 1,
+    // and there's no nesting beyond PageLocation, so no need to save the last field id. Like
+    // read_page_locations(), this will fail if absolute field id's are used.
+    pub(super) fn try_from_fast<'a, R: ThriftCompactInputProtocol<'a>>(
+        prot: &mut R,
+    ) -> Result<Self> {
+        // Offset index is a struct with 2 fields. First field is an array of PageLocations,
+        // the second an optional array of i64.
+
+        // read field 1 header, then list header, then vec of PageLocations
+        let (field_type, delta) = prot.read_field_header()?;
+        if delta != 1 || field_type != FieldType::List as u8 {
+            return Err(general_err!("error reading OffsetIndex::page_locations"));
+        }
+
+        // we have to do this manually because we want to use the fast PageLocation decoder
+        let list_ident = prot.read_list_begin()?;
+        let mut page_locations = Vec::with_capacity(list_ident.size as usize);
+        for _ in 0..list_ident.size {
+            page_locations.push(read_page_location(prot)?);
+        }
+
+        let mut unencoded_byte_array_data_bytes: Option<Vec<i64>> = None;
+
+        // read second field...if it's Stop we're done
+        let (mut field_type, delta) = prot.read_field_header()?;
+        if field_type == FieldType::List as u8 {
+            if delta != 1 {
+                return Err(general_err!(
+                    "encountered unknown field while reading OffsetIndex"
+                ));
+            }
+            let vec = read_thrift_vec::<i64, R>(&mut *prot)?;
+            unencoded_byte_array_data_bytes = Some(vec);
+
+            // this one should be Stop
+            (field_type, _) = prot.read_field_header()?;
+        }
+
+        if field_type != FieldType::Stop as u8 {
+            return Err(general_err!(
+                "encountered unknown field while reading OffsetIndex"
+            ));
+        }
+
+        Ok(Self {
+            page_locations,
+            unencoded_byte_array_data_bytes,
+        })
+    }
+}
+
+// hand coding this one because it is very time critical
+
+// Note: this will fail if the fields are either out of order, or if a suboptimal
+// encoder doesn't use field deltas.
+fn read_page_location<'a, R: ThriftCompactInputProtocol<'a>>(prot: &mut R) -> Result<PageLocation> {
+    // there are 3 fields, all mandatory, so all field deltas should be 1
+    let (field_type, delta) = prot.read_field_header()?;
+    if delta != 1 || field_type != FieldType::I64 as u8 {
+        return Err(general_err!("error reading PageLocation::offset"));
+    }
+    let offset = prot.read_i64()?;
+
+    let (field_type, delta) = prot.read_field_header()?;
+    if delta != 1 || field_type != FieldType::I32 as u8 {
+        return Err(general_err!(
+            "error reading PageLocation::compressed_page_size"
+        ));
+    }
+    let compressed_page_size = prot.read_i32()?;
+
+    let (field_type, delta) = prot.read_field_header()?;
+    if delta != 1 || field_type != FieldType::I64 as u8 {
+        return Err(general_err!("error reading PageLocation::first_row_index"));
+    }
+    let first_row_index = prot.read_i64()?;
+
+    // read end of struct...return error if there are unknown fields present
+    let (field_type, _) = prot.read_field_header()?;
+    if field_type != FieldType::Stop as u8 {
+        return Err(general_err!("unexpected field in PageLocation"));
+    }
+
+    Ok(PageLocation {
+        offset,
+        compressed_page_size,
+        first_row_index,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parquet_thrift::tests::test_roundtrip;
+
+    #[test]
+    fn test_offset_idx_roundtrip() {
+        let page_locations = [
+            PageLocation {
+                offset: 0,
+                compressed_page_size: 10,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 10,
+                compressed_page_size: 20,
+                first_row_index: 100,
+            },
+        ]
+        .to_vec();
+        let unenc = [0i64, 100i64].to_vec();
+
+        test_roundtrip(OffsetIndexMetaData {
+            page_locations: page_locations.clone(),
+            unencoded_byte_array_data_bytes: Some(unenc),
+        });
+        test_roundtrip(OffsetIndexMetaData {
+            page_locations,
+            unencoded_byte_array_data_bytes: None,
+        });
     }
 }
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 88425fd2b539..38a5a804c0b7 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -20,8 +20,7 @@ use crate::basic::{Compression, Encoding};
 use crate::compression::{CodecOptions, CodecOptionsBuilder};
 #[cfg(feature = "encryption")]
 use crate::encryption::encrypt::FileEncryptionProperties;
-use crate::file::metadata::KeyValue;
-use crate::format::SortingColumn;
+use crate::file::metadata::{KeyValue, SortingColumn};
 use crate::schema::types::ColumnPath;
 use std::str::FromStr;
 use std::{collections::HashMap, sync::Arc};
@@ -42,9 +41,8 @@ pub const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE;
 pub const DEFAULT_DATA_PAGE_ROW_COUNT_LIMIT: usize = 20_000;
 /// Default value for [`WriterProperties::statistics_enabled`]
 pub const DEFAULT_STATISTICS_ENABLED: EnabledStatistics = EnabledStatistics::Page;
-/// Default value for [`WriterProperties::max_statistics_size`]
-#[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-pub const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
+/// Default value for [`WriterProperties::write_page_header_statistics`]
+pub const DEFAULT_WRITE_PAGE_HEADER_STATISTICS: bool = false;
 /// Default value for [`WriterProperties::max_row_group_size`]
 pub const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
 /// Default value for [`WriterProperties::bloom_filter_position`]
@@ -58,7 +56,7 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
 /// Default value for [`BloomFilterProperties::ndv`]
 pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
 /// Default values for [`WriterProperties::statistics_truncate_length`]
-pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = None;
+pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = Some(64);
 /// Default value for [`WriterProperties::offset_index_disabled`]
 pub const DEFAULT_OFFSET_INDEX_DISABLED: bool = false;
 /// Default values for [`WriterProperties::coerce_types`]
@@ -93,7 +91,7 @@ impl FromStr for WriterVersion {
         match s {
             "PARQUET_1_0" | "parquet_1_0" => Ok(WriterVersion::PARQUET_1_0),
             "PARQUET_2_0" | "parquet_2_0" => Ok(WriterVersion::PARQUET_2_0),
-            _ => Err(format!("Invalid writer version: {}", s)),
+            _ => Err(format!("Invalid writer version: {s}")),
         }
     }
 }
@@ -171,7 +169,7 @@ pub struct WriterProperties {
     statistics_truncate_length: Option<usize>,
     coerce_types: bool,
     #[cfg(feature = "encryption")]
-    pub(crate) file_encryption_properties: Option<FileEncryptionProperties>,
+    pub(crate) file_encryption_properties: Option<Arc<FileEncryptionProperties>>,
 }
 
 impl Default for WriterProperties {
@@ -191,7 +189,13 @@ impl WriterProperties {
     /// Returns a new default [`WriterPropertiesBuilder`] for creating writer
     /// properties.
     pub fn builder() -> WriterPropertiesBuilder {
-        WriterPropertiesBuilder::with_defaults()
+        WriterPropertiesBuilder::default()
+    }
+
+    /// Converts this [`WriterProperties`] into a [`WriterPropertiesBuilder`]
+    /// Used for mutating existing property settings
+    pub fn into_builder(self) -> WriterPropertiesBuilder {
+        self.into()
     }
 
     /// Returns data page size limit.
@@ -396,17 +400,20 @@ impl WriterProperties {
             .unwrap_or(DEFAULT_STATISTICS_ENABLED)
     }
 
-    /// Returns max size for statistics.
+    /// Returns `true` if [`Statistics`] are to be written to the page header for a column.
     ///
-    /// UNUSED
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    pub fn max_statistics_size(&self, col: &ColumnPath) -> usize {
-        #[allow(deprecated)]
+    /// For more details see [`WriterPropertiesBuilder::set_write_page_header_statistics`]
+    ///
+    /// [`Statistics`]: crate::file::statistics::Statistics
+    pub fn write_page_header_statistics(&self, col: &ColumnPath) -> bool {
         self.column_properties
             .get(col)
-            .and_then(|c| c.max_statistics_size())
-            .or_else(|| self.default_column_properties.max_statistics_size())
-            .unwrap_or(DEFAULT_MAX_STATISTICS_SIZE)
+            .and_then(|c| c.write_page_header_statistics())
+            .or_else(|| {
+                self.default_column_properties
+                    .write_page_header_statistics()
+            })
+            .unwrap_or(DEFAULT_WRITE_PAGE_HEADER_STATISTICS)
     }
 
     /// Returns the [`BloomFilterProperties`] for the given column
@@ -425,7 +432,7 @@ impl WriterProperties {
     ///
     /// For more details see [`WriterPropertiesBuilder::with_file_encryption_properties`]
     #[cfg(feature = "encryption")]
-    pub fn file_encryption_properties(&self) -> Option<&FileEncryptionProperties> {
+    pub fn file_encryption_properties(&self) -> Option<&Arc<FileEncryptionProperties>> {
         self.file_encryption_properties.as_ref()
     }
 }
@@ -433,6 +440,7 @@ impl WriterProperties {
 /// Builder for  [`WriterProperties`] Parquet writer configuration.
 ///
 /// See example on [`WriterProperties`]
+#[derive(Debug, Clone)]
 pub struct WriterPropertiesBuilder {
     data_page_size_limit: usize,
     data_page_row_count_limit: usize,
@@ -450,12 +458,12 @@ pub struct WriterPropertiesBuilder {
     statistics_truncate_length: Option<usize>,
     coerce_types: bool,
     #[cfg(feature = "encryption")]
-    file_encryption_properties: Option<FileEncryptionProperties>,
+    file_encryption_properties: Option<Arc<FileEncryptionProperties>>,
 }
 
-impl WriterPropertiesBuilder {
+impl Default for WriterPropertiesBuilder {
     /// Returns default state of the builder.
-    fn with_defaults() -> Self {
+    fn default() -> Self {
         Self {
             data_page_size_limit: DEFAULT_PAGE_SIZE,
             data_page_row_count_limit: DEFAULT_DATA_PAGE_ROW_COUNT_LIMIT,
@@ -476,7 +484,9 @@ impl WriterPropertiesBuilder {
             file_encryption_properties: None,
         }
     }
+}
 
+impl WriterPropertiesBuilder {
     /// Finalizes the configuration and returns immutable writer properties struct.
     pub fn build(self) -> WriterProperties {
         WriterProperties {
@@ -544,23 +554,6 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets best effort maximum dictionary page size, in bytes (defaults to `1024 * 1024`
-    /// via [`DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT`]).
-    ///
-    /// The parquet writer will attempt to limit the size of each
-    /// `DataPage` used to store dictionaries to this many
-    /// bytes. Reducing this value will result in larger parquet
-    /// files, but may improve the effectiveness of page index based
-    /// predicate pushdown during reading.
-    ///
-    /// Note: this is a best effort limit based on value of
-    /// [`set_write_batch_size`](Self::set_write_batch_size).
-    pub fn set_dictionary_page_size_limit(mut self, value: usize) -> Self {
-        self.default_column_properties
-            .set_dictionary_page_size_limit(value);
-        self
-    }
-
     /// Sets write batch size (defaults to 1024 via [`DEFAULT_WRITE_BATCH_SIZE`]).
     ///
     /// For performance reasons, data for each column is written in
@@ -646,10 +639,13 @@ impl WriterPropertiesBuilder {
     /// * If `Some`, must be greater than 0, otherwise will panic
     /// * If `None`, there's no effective limit.
     ///
-    /// [`Index`]: crate::file::page_index::index::Index
+    /// [`Index`]: crate::file::page_index::column_index::ColumnIndexMetaData
     pub fn set_column_index_truncate_length(mut self, max_length: Option<usize>) -> Self {
         if let Some(value) = max_length {
-            assert!(value > 0, "Cannot have a 0 column index truncate length. If you wish to disable min/max value truncation, set it to `None`.");
+            assert!(
+                value > 0,
+                "Cannot have a 0 column index truncate length. If you wish to disable min/max value truncation, set it to `None`."
+            );
         }
 
         self.column_index_truncate_length = max_length;
@@ -657,7 +653,7 @@ impl WriterPropertiesBuilder {
     }
 
     /// Sets the max length of min/max value fields in row group and data page header
-    /// [`Statistics`] (defaults to `None` (no limit) via [`DEFAULT_STATISTICS_TRUNCATE_LENGTH`]).
+    /// [`Statistics`] (defaults to `Some(64)` via [`DEFAULT_STATISTICS_TRUNCATE_LENGTH`]).
     ///
     /// # Notes
     /// Row group [`Statistics`] are written when [`Self::set_statistics_enabled`] is
@@ -675,7 +671,10 @@ impl WriterPropertiesBuilder {
     /// [`Statistics`]: crate::file::statistics::Statistics
     pub fn set_statistics_truncate_length(mut self, max_length: Option<usize>) -> Self {
         if let Some(value) = max_length {
-            assert!(value > 0, "Cannot have a 0 statistics truncate length. If you wish to disable min/max value truncation, set it to `None`.");
+            assert!(
+                value > 0,
+                "Cannot have a 0 statistics truncate length. If you wish to disable min/max value truncation, set it to `None`."
+            );
         }
 
         self.statistics_truncate_length = max_length;
@@ -710,7 +709,7 @@ impl WriterPropertiesBuilder {
     #[cfg(feature = "encryption")]
     pub fn with_file_encryption_properties(
         mut self,
-        file_encryption_properties: FileEncryptionProperties,
+        file_encryption_properties: Arc<FileEncryptionProperties>,
     ) -> Self {
         self.file_encryption_properties = Some(file_encryption_properties);
         self
@@ -753,7 +752,24 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets default statistics level for all columns (defaults to [`Page`] via
+    /// Sets best effort maximum dictionary page size, in bytes (defaults to `1024 * 1024`
+    /// via [`DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT`]).
+    ///
+    /// The parquet writer will attempt to limit the size of each
+    /// `DataPage` used to store dictionaries to this many
+    /// bytes. Reducing this value will result in larger parquet
+    /// files, but may improve the effectiveness of page index based
+    /// predicate pushdown during reading.
+    ///
+    /// Note: this is a best effort limit based on value of
+    /// [`set_write_batch_size`](Self::set_write_batch_size).
+    pub fn set_dictionary_page_size_limit(mut self, value: usize) -> Self {
+        self.default_column_properties
+            .set_dictionary_page_size_limit(value);
+        self
+    }
+
+    /// Sets default [`EnabledStatistics`] level for all columns (defaults to [`Page`] via
     /// [`DEFAULT_STATISTICS_ENABLED`]).
     ///
     /// [`Page`]: EnabledStatistics::Page
@@ -762,15 +778,30 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets default max statistics size for all columns (defaults to `4096` via
-    /// [`DEFAULT_MAX_STATISTICS_SIZE`]).
+    /// enable/disable writing [`Statistics`] in the page header
+    /// (defaults to `false` via [`DEFAULT_WRITE_PAGE_HEADER_STATISTICS`]).
+    ///
+    /// Only applicable if [`Page`] level statistics are gathered.
+    ///
+    /// Setting this value to `true` can greatly increase the size of the resulting Parquet
+    /// file while yielding very little added benefit. Most modern Parquet implementations
+    /// will use the min/max values stored in the [`ParquetColumnIndex`] rather than
+    /// those in the page header.
+    ///
+    /// # Note
+    ///
+    /// Prior to version 56.0.0, the `parquet` crate always wrote these
+    /// statistics (the equivalent of setting this option to `true`). This was
+    /// changed in 56.0.0 to follow the recommendation in the Parquet
+    /// specification. See [issue #7580] for more details.
     ///
-    /// Applicable only if statistics are enabled.
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    pub fn set_max_statistics_size(mut self, value: usize) -> Self {
-        #[allow(deprecated)]
+    /// [`Statistics`]: crate::file::statistics::Statistics
+    /// [`ParquetColumnIndex`]: crate::file::metadata::ParquetColumnIndex
+    /// [`Page`]: EnabledStatistics::Page
+    /// [issue #7580]: https://github.com/apache/arrow-rs/issues/7580
+    pub fn set_write_page_header_statistics(mut self, value: bool) -> Self {
         self.default_column_properties
-            .set_max_statistics_size(value);
+            .set_write_page_header_statistics(value);
         self
     }
 
@@ -867,7 +898,7 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets statistics level for a specific column
+    /// Sets [`EnabledStatistics`] level for a specific column.
     ///
     /// Takes precedence over [`Self::set_statistics_enabled`].
     pub fn set_column_statistics_enabled(
@@ -879,13 +910,14 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets max size for statistics for a specific column.
+    /// Sets whether to write [`Statistics`] in the page header for a specific column.
     ///
-    /// Takes precedence over [`Self::set_max_statistics_size`].
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    pub fn set_column_max_statistics_size(mut self, col: ColumnPath, value: usize) -> Self {
-        #[allow(deprecated)]
-        self.get_mut_props(col).set_max_statistics_size(value);
+    /// Takes precedence over [`Self::set_write_page_header_statistics`].
+    ///
+    /// [`Statistics`]: crate::file::statistics::Statistics
+    pub fn set_column_write_page_header_statistics(mut self, col: ColumnPath, value: bool) -> Self {
+        self.get_mut_props(col)
+            .set_write_page_header_statistics(value);
         self
     }
 
@@ -914,6 +946,30 @@ impl WriterPropertiesBuilder {
     }
 }
 
+impl From<WriterProperties> for WriterPropertiesBuilder {
+    fn from(props: WriterProperties) -> Self {
+        WriterPropertiesBuilder {
+            data_page_size_limit: props.data_page_size_limit,
+            data_page_row_count_limit: props.data_page_row_count_limit,
+            write_batch_size: props.write_batch_size,
+            max_row_group_size: props.max_row_group_size,
+            bloom_filter_position: props.bloom_filter_position,
+            writer_version: props.writer_version,
+            created_by: props.created_by,
+            offset_index_disabled: props.offset_index_disabled,
+            key_value_metadata: props.key_value_metadata,
+            default_column_properties: props.default_column_properties,
+            column_properties: props.column_properties,
+            sorting_columns: props.sorting_columns,
+            column_index_truncate_length: props.column_index_truncate_length,
+            statistics_truncate_length: props.statistics_truncate_length,
+            coerce_types: props.coerce_types,
+            #[cfg(feature = "encryption")]
+            file_encryption_properties: props.file_encryption_properties,
+        }
+    }
+}
+
 /// Controls the level of statistics to be computed by the writer and stored in
 /// the parquet file.
 ///
@@ -936,8 +992,12 @@ pub enum EnabledStatistics {
     /// Compute page-level and column chunk-level statistics.
     ///
     /// Setting this option will store one set of statistics for each relevant
-    /// column for each page and row group. The more row groups and the more
-    /// pages written, the more statistics will be stored.
+    /// column for each row group. In addition, this will enable the writing
+    /// of the column index (the offset index is always written regardless of
+    /// this setting). See [`ParquetColumnIndex`] for
+    /// more information.
+    ///
+    /// [`ParquetColumnIndex`]: crate::file::metadata::ParquetColumnIndex
     Page,
 }
 
@@ -949,7 +1009,7 @@ impl FromStr for EnabledStatistics {
             "NONE" | "none" => Ok(EnabledStatistics::None),
             "CHUNK" | "chunk" => Ok(EnabledStatistics::Chunk),
             "PAGE" | "page" => Ok(EnabledStatistics::Page),
-            _ => Err(format!("Invalid statistics arg: {}", s)),
+            _ => Err(format!("Invalid statistics arg: {s}")),
         }
     }
 }
@@ -1008,8 +1068,7 @@ struct ColumnProperties {
     dictionary_page_size_limit: Option<usize>,
     dictionary_enabled: Option<bool>,
     statistics_enabled: Option<EnabledStatistics>,
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    max_statistics_size: Option<usize>,
+    write_page_header_statistics: Option<bool>,
     /// bloom filter related properties
     bloom_filter_properties: Option<BloomFilterProperties>,
 }
@@ -1051,11 +1110,9 @@ impl ColumnProperties {
         self.statistics_enabled = Some(enabled);
     }
 
-    /// Sets max size for statistics for this column.
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    #[allow(deprecated)]
-    fn set_max_statistics_size(&mut self, value: usize) {
-        self.max_statistics_size = Some(value);
+    /// Sets whether to write statistics in the page header for this column.
+    fn set_write_page_header_statistics(&mut self, enabled: bool) {
+        self.write_page_header_statistics = Some(enabled);
     }
 
     /// If `value` is `true`, sets bloom filter properties to default values if not previously set,
@@ -1122,11 +1179,12 @@ impl ColumnProperties {
         self.statistics_enabled
     }
 
-    /// Returns optional max size in bytes for statistics.
-    #[deprecated(since = "54.0.0", note = "Unused; will be removed in 56.0.0")]
-    fn max_statistics_size(&self) -> Option<usize> {
-        #[allow(deprecated)]
-        self.max_statistics_size
+    /// Returns `Some(true)` if [`Statistics`] are to be written to the page header for this
+    /// column.
+    ///
+    /// [`Statistics`]: crate::file::statistics::Statistics
+    fn write_page_header_statistics(&self) -> Option<bool> {
+        self.write_page_header_statistics
     }
 
     /// Returns the bloom filter properties, or `None` if not enabled
@@ -1139,6 +1197,7 @@ impl ColumnProperties {
 pub type ReaderPropertiesPtr = Arc<ReaderProperties>;
 
 const DEFAULT_READ_BLOOM_FILTER: bool = false;
+const DEFAULT_READ_PAGE_STATS: bool = false;
 
 /// Configuration settings for reading parquet files.
 ///
@@ -1161,6 +1220,7 @@ const DEFAULT_READ_BLOOM_FILTER: bool = false;
 pub struct ReaderProperties {
     codec_options: CodecOptions,
     read_bloom_filter: bool,
+    read_page_stats: bool,
 }
 
 impl ReaderProperties {
@@ -1178,6 +1238,11 @@ impl ReaderProperties {
     pub(crate) fn read_bloom_filter(&self) -> bool {
         self.read_bloom_filter
     }
+
+    /// Returns whether to read page level statistics
+    pub(crate) fn read_page_stats(&self) -> bool {
+        self.read_page_stats
+    }
 }
 
 /// Builder for parquet file reader configuration. See example on
@@ -1185,6 +1250,7 @@ impl ReaderProperties {
 pub struct ReaderPropertiesBuilder {
     codec_options_builder: CodecOptionsBuilder,
     read_bloom_filter: Option<bool>,
+    read_page_stats: Option<bool>,
 }
 
 /// Reader properties builder.
@@ -1194,6 +1260,7 @@ impl ReaderPropertiesBuilder {
         Self {
             codec_options_builder: CodecOptionsBuilder::default(),
             read_bloom_filter: None,
+            read_page_stats: None,
         }
     }
 
@@ -1202,6 +1269,7 @@ impl ReaderPropertiesBuilder {
         ReaderProperties {
             codec_options: self.codec_options_builder.build(),
             read_bloom_filter: self.read_bloom_filter.unwrap_or(DEFAULT_READ_BLOOM_FILTER),
+            read_page_stats: self.read_page_stats.unwrap_or(DEFAULT_READ_PAGE_STATS),
         }
     }
 
@@ -1230,6 +1298,20 @@ impl ReaderPropertiesBuilder {
         self.read_bloom_filter = Some(value);
         self
     }
+
+    /// Enable/disable reading page-level statistics
+    ///
+    /// If set to `true`, then the reader will decode and populate the [`Statistics`] for
+    /// each page, if present.
+    /// If set to `false`, then the reader will skip decoding the statistics.
+    ///
+    /// By default statistics will not be decoded.
+    ///
+    /// [`Statistics`]: crate::file::statistics::Statistics
+    pub fn set_read_page_statistics(mut self, value: bool) -> Self {
+        self.read_page_stats = Some(value);
+        self
+    }
 }
 
 #[cfg(test)]
@@ -1269,9 +1351,11 @@ mod tests {
             props.statistics_enabled(&ColumnPath::from("col")),
             DEFAULT_STATISTICS_ENABLED
         );
-        assert!(props
-            .bloom_filter_properties(&ColumnPath::from("col"))
-            .is_none());
+        assert!(
+            props
+                .bloom_filter_properties(&ColumnPath::from("col"))
+                .is_none()
+        );
     }
 
     #[test]
@@ -1355,50 +1439,59 @@ mod tests {
             .set_column_bloom_filter_fpp(ColumnPath::from("col"), 0.1)
             .build();
 
-        assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
-        assert_eq!(props.data_page_size_limit(), 10);
-        assert_eq!(props.dictionary_page_size_limit(), 20);
-        assert_eq!(props.write_batch_size(), 30);
-        assert_eq!(props.max_row_group_size(), 40);
-        assert_eq!(props.created_by(), "default");
-        assert_eq!(
-            props.key_value_metadata(),
-            Some(&vec![
-                KeyValue::new("key".to_string(), "value".to_string(),)
-            ])
-        );
+        fn test_props(props: &WriterProperties) {
+            assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
+            assert_eq!(props.data_page_size_limit(), 10);
+            assert_eq!(props.dictionary_page_size_limit(), 20);
+            assert_eq!(props.write_batch_size(), 30);
+            assert_eq!(props.max_row_group_size(), 40);
+            assert_eq!(props.created_by(), "default");
+            assert_eq!(
+                props.key_value_metadata(),
+                Some(&vec![
+                    KeyValue::new("key".to_string(), "value".to_string(),)
+                ])
+            );
 
-        assert_eq!(
-            props.encoding(&ColumnPath::from("a")),
-            Some(Encoding::DELTA_BINARY_PACKED)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("a")),
-            Compression::GZIP(Default::default())
-        );
-        assert!(!props.dictionary_enabled(&ColumnPath::from("a")));
-        assert_eq!(
-            props.statistics_enabled(&ColumnPath::from("a")),
-            EnabledStatistics::None
-        );
+            assert_eq!(
+                props.encoding(&ColumnPath::from("a")),
+                Some(Encoding::DELTA_BINARY_PACKED)
+            );
+            assert_eq!(
+                props.compression(&ColumnPath::from("a")),
+                Compression::GZIP(Default::default())
+            );
+            assert!(!props.dictionary_enabled(&ColumnPath::from("a")));
+            assert_eq!(
+                props.statistics_enabled(&ColumnPath::from("a")),
+                EnabledStatistics::None
+            );
 
-        assert_eq!(
-            props.encoding(&ColumnPath::from("col")),
-            Some(Encoding::RLE)
-        );
-        assert_eq!(
-            props.compression(&ColumnPath::from("col")),
-            Compression::SNAPPY
-        );
-        assert!(props.dictionary_enabled(&ColumnPath::from("col")));
-        assert_eq!(
-            props.statistics_enabled(&ColumnPath::from("col")),
-            EnabledStatistics::Chunk
-        );
-        assert_eq!(
-            props.bloom_filter_properties(&ColumnPath::from("col")),
-            Some(&BloomFilterProperties { fpp: 0.1, ndv: 100 })
-        );
+            assert_eq!(
+                props.encoding(&ColumnPath::from("col")),
+                Some(Encoding::RLE)
+            );
+            assert_eq!(
+                props.compression(&ColumnPath::from("col")),
+                Compression::SNAPPY
+            );
+            assert!(props.dictionary_enabled(&ColumnPath::from("col")));
+            assert_eq!(
+                props.statistics_enabled(&ColumnPath::from("col")),
+                EnabledStatistics::Chunk
+            );
+            assert_eq!(
+                props.bloom_filter_properties(&ColumnPath::from("col")),
+                Some(&BloomFilterProperties { fpp: 0.1, ndv: 100 })
+            );
+        }
+
+        // Test direct build of properties
+        test_props(&props);
+
+        // Test that into_builder() gives the same result
+        let props_into_builder_and_back = props.into_builder().build();
+        test_props(&props_into_builder_and_back);
     }
 
     #[test]
diff --git a/parquet/src/file/reader.rs b/parquet/src/file/reader.rs
index 400441f0c9cd..3adf10fac220 100644
--- a/parquet/src/file/reader.rs
+++ b/parquet/src/file/reader.rs
@@ -48,11 +48,12 @@ pub trait Length {
 /// Generates [`Read`]ers to read chunks of a Parquet data source.
 ///
 /// The Parquet reader uses [`ChunkReader`] to access Parquet data, allowing
-/// multiple decoders to read concurrently from different locations in the same file.
+/// multiple decoders to read concurrently from different locations in the same
+/// file.
 ///
-/// The trait provides:
-/// * random access (via [`Self::get_bytes`])
-/// * sequential (via [`Self::get_read`])
+/// The trait functions both as a reader and a factory for readers.
+/// * random access via [`Self::get_bytes`]
+/// * sequential access via the reader returned via factory method [`Self::get_read`]
 ///
 /// # Provided Implementations
 /// * [`File`] for reading from local file system
@@ -123,11 +124,25 @@ impl ChunkReader for Bytes {
 
     fn get_read(&self, start: u64) -> Result<Self::T> {
         let start = start as usize;
+        if start > self.len() {
+            return Err(eof_err!(
+                "Expected to read at offset {start}, while file has length {}",
+                self.len()
+            ));
+        }
         Ok(self.slice(start..).reader())
     }
 
     fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes> {
         let start = start as usize;
+        if start > self.len() || start + length > self.len() {
+            return Err(eof_err!(
+                "Expected to read {} bytes at offset {}, while file has length {}",
+                length,
+                start,
+                self.len()
+            ));
+        }
         Ok(self.slice(start..start + length))
     }
 }
@@ -153,7 +168,7 @@ pub trait FileReader: Send + Sync {
     ///
     /// Projected schema can be a subset of or equal to the file schema, when it is None,
     /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter<'_>>;
 }
 
 /// Parquet row group reader API. With this, user can get metadata information about the
@@ -211,7 +226,7 @@ pub trait RowGroupReader: Send + Sync {
     ///
     /// Projected schema can be a subset of or equal to the file schema, when it is None,
     /// full file schema is assumed.
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter>;
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter<'_>>;
 }
 
 // ----------------------------------------------------------------------
@@ -273,3 +288,34 @@ impl Iterator for FilePageIterator {
 }
 
 impl PageIterator for FilePageIterator {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bytes_chunk_reader_get_read_out_of_bounds() {
+        let data = Bytes::from(vec![0, 1, 2, 3]);
+        let err = data.get_read(5).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "EOF: Expected to read at offset 5, while file has length 4"
+        );
+    }
+
+    #[test]
+    fn test_bytes_chunk_reader_get_bytes_out_of_bounds() {
+        let data = Bytes::from(vec![0, 1, 2, 3]);
+        let err = data.get_bytes(5, 1).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "EOF: Expected to read 1 bytes at offset 5, while file has length 4"
+        );
+
+        let err = data.get_bytes(2, 3).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "EOF: Expected to read 3 bytes at offset 2, while file has length 4"
+        );
+    }
+}
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index ac43381ae8b9..b3b6383f78bb 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -18,32 +18,30 @@
 //! Contains implementations of the reader traits FileReader, RowGroupReader and PageReader
 //! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
 
-use crate::basic::{Encoding, Type};
+use crate::basic::{PageType, Type};
 use crate::bloom_filter::Sbbf;
 use crate::column::page::{Page, PageMetadata, PageReader};
-use crate::compression::{create_codec, Codec};
+use crate::compression::{Codec, create_codec};
 #[cfg(feature = "encryption")]
-use crate::encryption::decrypt::{read_and_decrypt, CryptoContext};
+use crate::encryption::decrypt::{CryptoContext, read_and_decrypt};
 use crate::errors::{ParquetError, Result};
-use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::file::metadata::thrift::PageHeader;
+use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation};
+use crate::file::statistics;
 use crate::file::{
     metadata::*,
     properties::{ReaderProperties, ReaderPropertiesPtr},
     reader::*,
-    statistics,
 };
-use crate::format::{PageHeader, PageLocation, PageType};
-use crate::record::reader::RowIter;
-use crate::record::Row;
-use crate::schema::types::Type as SchemaType;
 #[cfg(feature = "encryption")]
-use crate::thrift::TCompactSliceInputProtocol;
-use crate::thrift::TSerializable;
+use crate::parquet_thrift::ThriftSliceInputProtocol;
+use crate::parquet_thrift::{ReadThrift, ThriftReadInputProtocol};
+use crate::record::Row;
+use crate::record::reader::RowIter;
+use crate::schema::types::{SchemaDescPtr, Type as SchemaType};
 use bytes::Bytes;
 use std::collections::VecDeque;
-use std::iter;
 use std::{fs::File, io::Read, path::Path, sync::Arc};
-use thrift::protocol::TCompactInputProtocol;
 
 impl TryFrom<File> for SerializedFileReader<File> {
     type Error = ParquetError;
@@ -112,6 +110,7 @@ pub struct ReadOptionsBuilder {
     predicates: Vec<ReadGroupPredicate>,
     enable_page_index: bool,
     props: Option<ReaderProperties>,
+    metadata_options: ParquetMetaDataOptions,
 }
 
 impl ReadOptionsBuilder {
@@ -154,6 +153,53 @@ impl ReadOptionsBuilder {
         self
     }
 
+    /// Provide a Parquet schema to use when decoding the metadata. The schema in the Parquet
+    /// footer will be skipped.
+    pub fn with_parquet_schema(mut self, schema: SchemaDescPtr) -> Self {
+        self.metadata_options.set_schema(schema);
+        self
+    }
+
+    /// Set whether to convert the [`encoding_stats`] in the Parquet `ColumnMetaData` to a bitmask
+    /// (defaults to `false`).
+    ///
+    /// See [`ColumnChunkMetaData::page_encoding_stats_mask`] for an explanation of why this
+    /// might be desirable.
+    ///
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn with_encoding_stats_as_mask(mut self, val: bool) -> Self {
+        self.metadata_options.set_encoding_stats_as_mask(val);
+        self
+    }
+
+    /// Sets the decoding policy for [`encoding_stats`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`encoding_stats`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L917
+    pub fn with_encoding_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_encoding_stats_policy(policy);
+        self
+    }
+
+    /// Sets the decoding policy for [`statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L912
+    pub fn with_column_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_column_stats_policy(policy);
+        self
+    }
+
+    /// Sets the decoding policy for [`size_statistics`] in the Parquet `ColumnMetaData`.
+    ///
+    /// [`size_statistics`]:
+    /// https://github.com/apache/parquet-format/blob/786142e26740487930ddc3ec5e39d780bd930907/src/main/thrift/parquet.thrift#L936
+    pub fn with_size_stats_policy(mut self, policy: ParquetStatisticsPolicy) -> Self {
+        self.metadata_options.set_size_stats_policy(policy);
+        self
+    }
+
     /// Seal the builder and return the read options
     pub fn build(self) -> ReadOptions {
         let props = self
@@ -163,18 +209,20 @@ impl ReadOptionsBuilder {
             predicates: self.predicates,
             enable_page_index: self.enable_page_index,
             props,
+            metadata_options: self.metadata_options,
         }
     }
 }
 
 /// A collection of options for reading a Parquet file.
 ///
-/// Currently, only predicates on row group metadata are supported.
+/// Predicates are currently only supported on row group metadata.
 /// All predicates will be chained using 'AND' to filter the row groups.
 pub struct ReadOptions {
     predicates: Vec<ReadGroupPredicate>,
     enable_page_index: bool,
     props: ReaderProperties,
+    metadata_options: ParquetMetaDataOptions,
 }
 
 impl<R: 'static + ChunkReader> SerializedFileReader<R> {
@@ -192,8 +240,10 @@ impl<R: 'static + ChunkReader> SerializedFileReader<R> {
 
     /// Creates file reader from a Parquet file with read options.
     /// Returns an error if the Parquet file does not exist or is corrupt.
+    #[allow(deprecated)]
     pub fn new_with_options(chunk_reader: R, options: ReadOptions) -> Result<Self> {
         let mut metadata_builder = ParquetMetaDataReader::new()
+            .with_metadata_options(Some(options.metadata_options.clone()))
             .parse_and_finish(&chunk_reader)?
             .into_builder();
         let mut predicates = options.predicates;
@@ -264,7 +314,7 @@ impl<R: 'static + ChunkReader> FileReader for SerializedFileReader<R> {
         )?))
     }
 
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter<'_>> {
         RowIter::from_file(projection, self)
     }
 }
@@ -293,7 +343,7 @@ impl<'a, R: ChunkReader> SerializedRowGroupReader<'a, R> {
                 .map(|col| Sbbf::read_from_column_chunk(col, &*chunk_reader))
                 .collect::<Result<Vec<_>>>()?
         } else {
-            iter::repeat(None).take(metadata.columns().len()).collect()
+            std::iter::repeat_n(None, metadata.columns().len()).collect()
         };
         Ok(Self {
             chunk_reader,
@@ -335,7 +385,7 @@ impl<R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'_, R
         self.bloom_filters[i].as_ref()
     }
 
-    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter> {
+    fn get_row_iter(&self, projection: Option<SchemaType>) -> Result<RowIter<'_>> {
         RowIter::from_row_group(projection, self)
     }
 }
@@ -388,16 +438,19 @@ pub(crate) fn decode_page(
         can_decompress = header_v2.is_compressed.unwrap_or(true);
     }
 
-    // TODO: page header could be huge because of statistics. We should set a
-    // maximum page header size and abort if that is exceeded.
     let buffer = match decompressor {
         Some(decompressor) if can_decompress => {
             let uncompressed_page_size = usize::try_from(page_header.uncompressed_page_size)?;
+            if offset > buffer.len() || offset > uncompressed_page_size {
+                return Err(general_err!("Invalid page header"));
+            }
             let decompressed_size = uncompressed_page_size - offset;
             let mut decompressed = Vec::with_capacity(uncompressed_page_size);
-            decompressed.extend_from_slice(&buffer.as_ref()[..offset]);
+            decompressed.extend_from_slice(&buffer[..offset]);
+            // decompressed size of zero corresponds to a page with no non-null values
+            // see https://github.com/apache/parquet-format/blob/master/README.md#data-pages
             if decompressed_size > 0 {
-                let compressed = &buffer.as_ref()[offset..];
+                let compressed = &buffer[offset..];
                 decompressor.decompress(compressed, &mut decompressed, Some(decompressed_size))?;
             }
 
@@ -414,7 +467,7 @@ pub(crate) fn decode_page(
         _ => buffer,
     };
 
-    let result = match page_header.type_ {
+    let result = match page_header.r#type {
         PageType::DICTIONARY_PAGE => {
             let dict_header = page_header.dictionary_page_header.as_ref().ok_or_else(|| {
                 ParquetError::General("Missing dictionary page header".to_string())
@@ -423,7 +476,7 @@ pub(crate) fn decode_page(
             Page::DictionaryPage {
                 buf: buffer,
                 num_values: dict_header.num_values.try_into()?,
-                encoding: Encoding::try_from(dict_header.encoding)?,
+                encoding: dict_header.encoding,
                 is_sorted,
             }
         }
@@ -434,10 +487,10 @@ pub(crate) fn decode_page(
             Page::DataPage {
                 buf: buffer,
                 num_values: header.num_values.try_into()?,
-                encoding: Encoding::try_from(header.encoding)?,
-                def_level_encoding: Encoding::try_from(header.definition_level_encoding)?,
-                rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?,
-                statistics: statistics::from_thrift(physical_type, header.statistics)?,
+                encoding: header.encoding,
+                def_level_encoding: header.definition_level_encoding,
+                rep_level_encoding: header.repetition_level_encoding,
+                statistics: statistics::from_thrift_page_stats(physical_type, header.statistics)?,
             }
         }
         PageType::DATA_PAGE_V2 => {
@@ -448,18 +501,21 @@ pub(crate) fn decode_page(
             Page::DataPageV2 {
                 buf: buffer,
                 num_values: header.num_values.try_into()?,
-                encoding: Encoding::try_from(header.encoding)?,
+                encoding: header.encoding,
                 num_nulls: header.num_nulls.try_into()?,
                 num_rows: header.num_rows.try_into()?,
                 def_levels_byte_len: header.definition_levels_byte_length.try_into()?,
                 rep_levels_byte_len: header.repetition_levels_byte_length.try_into()?,
                 is_compressed,
-                statistics: statistics::from_thrift(physical_type, header.statistics)?,
+                statistics: statistics::from_thrift_page_stats(physical_type, header.statistics)?,
             }
         }
         _ => {
             // For unknown page type (e.g., INDEX_PAGE), skip and read next.
-            unimplemented!("Page type {:?} is not supported", page_header.type_)
+            return Err(general_err!(
+                "Page type {:?} is not supported",
+                page_header.r#type
+            ));
         }
     };
 
@@ -469,10 +525,12 @@ pub(crate) fn decode_page(
 enum SerializedPageReaderState {
     Values {
         /// The current byte offset in the reader
-        offset: usize,
+        /// Note that offset is u64 (i.e., not usize) to support 32-bit architectures such as WASM
+        offset: u64,
 
         /// The length of the chunk in bytes
-        remaining_bytes: usize,
+        /// Note that remaining_bytes is u64 (i.e., not usize) to support 32-bit architectures such as WASM
+        remaining_bytes: u64,
 
         // If the next page header has already been "peeked", we will cache it and it`s length here
         next_page_header: Option<Box<PageHeader>>,
@@ -497,6 +555,8 @@ enum SerializedPageReaderState {
 
 #[derive(Default)]
 struct SerializedPageReaderContext {
+    /// Controls decoding of page-level statistics
+    read_stats: bool,
     /// Crypto context carrying objects required for decryption
     #[cfg(feature = "encryption")]
     crypto_context: Option<Arc<CryptoContext>>,
@@ -601,19 +661,23 @@ impl<R: ChunkReader> SerializedPageReader<R> {
                 }
             }
             None => SerializedPageReaderState::Values {
-                offset: usize::try_from(start)?,
-                remaining_bytes: usize::try_from(len)?,
+                offset: start,
+                remaining_bytes: len,
                 next_page_header: None,
                 page_index: 0,
                 require_dictionary: meta.dictionary_page_offset().is_some(),
             },
         };
+        let mut context = SerializedPageReaderContext::default();
+        if props.read_page_stats() {
+            context.read_stats = true;
+        }
         Ok(Self {
             reader,
             decompressor,
             state,
             physical_type: meta.column_type(),
-            context: Default::default(),
+            context,
         })
     }
 
@@ -623,7 +687,7 @@ impl<R: ChunkReader> SerializedPageReader<R> {
     /// This is used when we need to read parquet with row-filter, and we don't want to decompress the page twice.
     /// This function allows us to check if the next page is being cached or read previously.
     #[cfg(test)]
-    fn peek_next_page_offset(&mut self) -> Result<Option<usize>> {
+    fn peek_next_page_offset(&mut self) -> Result<Option<u64>> {
         match &mut self.state {
             SerializedPageReaderState::Values {
                 offset,
@@ -645,15 +709,15 @@ impl<R: ChunkReader> SerializedPageReader<R> {
                             continue;
                         }
                     } else {
-                        let mut read = self.reader.get_read(*offset as u64)?;
+                        let mut read = self.reader.get_read(*offset)?;
                         let (header_len, header) = Self::read_page_header_len(
                             &self.context,
                             &mut read,
                             *page_index,
                             *require_dictionary,
                         )?;
-                        *offset += header_len;
-                        *remaining_bytes -= header_len;
+                        *offset += header_len as u64;
+                        *remaining_bytes -= header_len as u64;
                         let page_meta = if let Ok(_page_meta) = PageMetadata::try_from(&header) {
                             Ok(Some(*offset))
                         } else {
@@ -671,9 +735,9 @@ impl<R: ChunkReader> SerializedPageReader<R> {
                 ..
             } => {
                 if let Some(page) = dictionary_page {
-                    Ok(Some(usize::try_from(page.offset)?))
+                    Ok(Some(page.offset as u64))
                 } else if let Some(page) = page_locations.front() {
-                    Ok(Some(usize::try_from(page.offset)?))
+                    Ok(Some(page.offset as u64))
                 } else {
                     Ok(None)
                 }
@@ -730,8 +794,12 @@ impl SerializedPageReaderContext {
         _page_index: usize,
         _dictionary_page: bool,
     ) -> Result<PageHeader> {
-        let mut prot = TCompactInputProtocol::new(input);
-        Ok(PageHeader::read_from_in_protocol(&mut prot)?)
+        let mut prot = ThriftReadInputProtocol::new(input);
+        if self.read_stats {
+            Ok(PageHeader::read_thrift(&mut prot)?)
+        } else {
+            Ok(PageHeader::read_thrift_without_stats(&mut prot)?)
+        }
     }
 
     fn decrypt_page_data<T>(
@@ -754,8 +822,14 @@ impl SerializedPageReaderContext {
     ) -> Result<PageHeader> {
         match self.page_crypto_context(page_index, dictionary_page) {
             None => {
-                let mut prot = TCompactInputProtocol::new(input);
-                Ok(PageHeader::read_from_in_protocol(&mut prot)?)
+                let mut prot = ThriftReadInputProtocol::new(input);
+                if self.read_stats {
+                    Ok(PageHeader::read_thrift(&mut prot)?)
+                } else {
+                    use crate::file::metadata::thrift::PageHeader;
+
+                    Ok(PageHeader::read_thrift_without_stats(&mut prot)?)
+                }
             }
             Some(page_crypto_context) => {
                 let data_decryptor = page_crypto_context.data_decryptor();
@@ -768,8 +842,12 @@ impl SerializedPageReaderContext {
                     ))
                 })?;
 
-                let mut prot = TCompactSliceInputProtocol::new(buf.as_slice());
-                Ok(PageHeader::read_from_in_protocol(&mut prot)?)
+                let mut prot = ThriftSliceInputProtocol::new(buf.as_slice());
+                if self.read_stats {
+                    Ok(PageHeader::read_thrift(&mut prot)?)
+                } else {
+                    Ok(PageHeader::read_thrift_without_stats(&mut prot)?)
+                }
             }
         }
     }
@@ -813,8 +891,8 @@ impl<R: ChunkReader> Iterator for SerializedPageReader<R> {
     }
 }
 
-fn verify_page_header_len(header_len: usize, remaining_bytes: usize) -> Result<()> {
-    if header_len > remaining_bytes {
+fn verify_page_header_len(header_len: usize, remaining_bytes: u64) -> Result<()> {
+    if header_len as u64 > remaining_bytes {
         return Err(eof_err!("Invalid page header"));
     }
     Ok(())
@@ -823,12 +901,12 @@ fn verify_page_header_len(header_len: usize, remaining_bytes: usize) -> Result<(
 fn verify_page_size(
     compressed_size: i32,
     uncompressed_size: i32,
-    remaining_bytes: usize,
+    remaining_bytes: u64,
 ) -> Result<()> {
     // The page's compressed size should not exceed the remaining bytes that are
     // available to read. The page's uncompressed size is the expected size
     // after decompression, which can never be negative.
-    if compressed_size < 0 || compressed_size as usize > remaining_bytes || uncompressed_size < 0 {
+    if compressed_size < 0 || compressed_size as u64 > remaining_bytes || uncompressed_size < 0 {
         return Err(eof_err!("Invalid page header"));
     }
     Ok(())
@@ -849,7 +927,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         return Ok(None);
                     }
 
-                    let mut read = self.reader.get_read(*offset as u64)?;
+                    let mut read = self.reader.get_read(*offset)?;
                     let header = if let Some(header) = next_page_header.take() {
                         *header
                     } else {
@@ -860,8 +938,8 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                             *require_dictionary,
                         )?;
                         verify_page_header_len(header_len, *remaining)?;
-                        *offset += header_len;
-                        *remaining -= header_len;
+                        *offset += header_len as u64;
+                        *remaining -= header_len as u64;
                         header
                     };
                     verify_page_size(
@@ -870,23 +948,15 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         *remaining,
                     )?;
                     let data_len = header.compressed_page_size as usize;
-                    *offset += data_len;
-                    *remaining -= data_len;
+                    let data_start = *offset;
+                    *offset += data_len as u64;
+                    *remaining -= data_len as u64;
 
-                    if header.type_ == PageType::INDEX_PAGE {
+                    if header.r#type == PageType::INDEX_PAGE {
                         continue;
                     }
 
-                    let mut buffer = Vec::with_capacity(data_len);
-                    let read = read.take(data_len as u64).read_to_end(&mut buffer)?;
-
-                    if read != data_len {
-                        return Err(eof_err!(
-                            "Expected to read {} bytes of page, read only {}",
-                            data_len,
-                            read
-                        ));
-                    }
+                    let buffer = self.reader.get_bytes(data_start, data_len)?;
 
                     let buffer =
                         self.context
@@ -894,7 +964,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
                     let page = decode_page(
                         header,
-                        Bytes::from(buffer),
+                        buffer,
                         self.physical_type,
                         self.decompressor.as_mut(),
                     )?;
@@ -971,7 +1041,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                             continue;
                         }
                     } else {
-                        let mut read = self.reader.get_read(*offset as u64)?;
+                        let mut read = self.reader.get_read(*offset)?;
                         let (header_len, header) = Self::read_page_header_len(
                             &self.context,
                             &mut read,
@@ -979,8 +1049,8 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                             *require_dictionary,
                         )?;
                         verify_page_header_len(header_len, *remaining_bytes)?;
-                        *offset += header_len;
-                        *remaining_bytes -= header_len;
+                        *offset += header_len as u64;
+                        *remaining_bytes -= header_len as u64;
                         let page_meta = if let Ok(page_meta) = (&header).try_into() {
                             Ok(Some(page_meta))
                         } else {
@@ -1038,10 +1108,10 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         *remaining_bytes,
                     )?;
                     // The next page header has already been peeked, so just advance the offset
-                    *offset += buffered_header.compressed_page_size as usize;
-                    *remaining_bytes -= buffered_header.compressed_page_size as usize;
+                    *offset += buffered_header.compressed_page_size as u64;
+                    *remaining_bytes -= buffered_header.compressed_page_size as u64;
                 } else {
-                    let mut read = self.reader.get_read(*offset as u64)?;
+                    let mut read = self.reader.get_read(*offset)?;
                     let (header_len, header) = Self::read_page_header_len(
                         &self.context,
                         &mut read,
@@ -1054,9 +1124,9 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         header.uncompressed_page_size,
                         *remaining_bytes,
                     )?;
-                    let data_page_size = header.compressed_page_size as usize;
-                    *offset += header_len + data_page_size;
-                    *remaining_bytes -= header_len + data_page_size;
+                    let data_page_size = header.compressed_page_size as u64;
+                    *offset += header_len as u64 + data_page_size;
+                    *remaining_bytes -= header_len as u64 + data_page_size;
                 }
                 if *require_dictionary {
                     *require_dictionary = false;
@@ -1100,14 +1170,16 @@ mod tests {
 
     use bytes::Buf;
 
+    use crate::file::page_index::column_index::{
+        ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex,
+    };
     use crate::file::properties::{EnabledStatistics, WriterProperties};
-    use crate::format::BoundaryOrder;
 
-    use crate::basic::{self, ColumnOrder, SortOrder};
+    use crate::basic::{self, BoundaryOrder, ColumnOrder, Encoding, SortOrder};
     use crate::column::reader::ColumnReader;
     use crate::data_type::private::ParquetValueType;
     use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type};
-    use crate::file::page_index::index::{Index, NativeIndex};
+    use crate::file::metadata::thrift::DataPageHeaderV2;
     #[allow(deprecated)]
     use crate::file::page_index::index_reader::{read_columns_indexes, read_offset_indexes};
     use crate::file::writer::SerializedFileWriter;
@@ -1117,6 +1189,72 @@ mod tests {
 
     use super::*;
 
+    #[test]
+    fn test_decode_page_invalid_offset() {
+        let page_header = PageHeader {
+            r#type: PageType::DATA_PAGE_V2,
+            uncompressed_page_size: 10,
+            compressed_page_size: 10,
+            data_page_header: None,
+            index_page_header: None,
+            dictionary_page_header: None,
+            crc: None,
+            data_page_header_v2: Some(DataPageHeaderV2 {
+                num_nulls: 0,
+                num_rows: 0,
+                num_values: 0,
+                encoding: Encoding::PLAIN,
+                definition_levels_byte_length: 11,
+                repetition_levels_byte_length: 0,
+                is_compressed: None,
+                statistics: None,
+            }),
+        };
+
+        let buffer = Bytes::new();
+        let err = decode_page(page_header, buffer, Type::INT32, None).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("DataPage v2 header contains implausible values")
+        );
+    }
+
+    #[test]
+    fn test_decode_unsupported_page() {
+        let mut page_header = PageHeader {
+            r#type: PageType::INDEX_PAGE,
+            uncompressed_page_size: 10,
+            compressed_page_size: 10,
+            data_page_header: None,
+            index_page_header: None,
+            dictionary_page_header: None,
+            crc: None,
+            data_page_header_v2: None,
+        };
+        let buffer = Bytes::new();
+        let err = decode_page(page_header.clone(), buffer.clone(), Type::INT32, None).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Page type INDEX_PAGE is not supported"
+        );
+
+        page_header.data_page_header_v2 = Some(DataPageHeaderV2 {
+            num_nulls: 0,
+            num_rows: 0,
+            num_values: 0,
+            encoding: Encoding::PLAIN,
+            definition_levels_byte_length: 11,
+            repetition_levels_byte_length: 0,
+            is_compressed: None,
+            statistics: None,
+        });
+        let err = decode_page(page_header, buffer, Type::INT32, None).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("DataPage v2 header contains implausible values")
+        );
+    }
+
     #[test]
     fn test_cursor_and_file_has_the_same_behaviour() {
         let mut buf: Vec<u8> = Vec::new();
@@ -1393,7 +1531,7 @@ mod tests {
                     assert_eq!(def_levels_byte_len, 2);
                     assert_eq!(rep_levels_byte_len, 0);
                     assert!(is_compressed);
-                    assert!(statistics.is_some());
+                    assert!(statistics.is_none()); // page stats are no longer read
                     true
                 }
                 _ => false,
@@ -1495,7 +1633,7 @@ mod tests {
                     assert_eq!(def_levels_byte_len, 2);
                     assert_eq!(rep_levels_byte_len, 0);
                     assert!(is_compressed);
-                    assert!(statistics.is_some());
+                    assert!(statistics.is_none()); // page stats are no longer read
                     true
                 }
                 _ => false,
@@ -1652,9 +1790,9 @@ mod tests {
                             ..
                         } => {
                             if let Some(page) = dictionary_page {
-                                assert_eq!(page.offset as usize, page_offset);
+                                assert_eq!(page.offset as u64, page_offset);
                             } else if let Some(page) = page_locations.front() {
-                                assert_eq!(page.offset as usize, page_offset);
+                                assert_eq!(page.offset as u64, page_offset);
                             } else {
                                 unreachable!()
                             }
@@ -1735,7 +1873,10 @@ mod tests {
     fn test_file_reader_optional_metadata() {
         // file with optional metadata: bloom filters, encoding stats, column index and offset index.
         let file = get_test_file("data_index_bloom_encoding_stats.parquet");
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
+        let options = ReadOptionsBuilder::new()
+            .with_encoding_stats_as_mask(false)
+            .build();
+        let file_reader = Arc::new(SerializedFileReader::new_with_options(file, options).unwrap());
 
         let row_group_metadata = file_reader.metadata.row_group(0);
         let col0_metadata = row_group_metadata.column(0);
@@ -1759,6 +1900,103 @@ mod tests {
         assert_eq!(col0_metadata.offset_index_length().unwrap(), 11);
     }
 
+    #[test]
+    fn test_file_reader_page_stats_mask() {
+        let file = get_test_file("alltypes_tiny_pages.parquet");
+        let options = ReadOptionsBuilder::new()
+            .with_encoding_stats_as_mask(true)
+            .build();
+        let file_reader = Arc::new(SerializedFileReader::new_with_options(file, options).unwrap());
+
+        let row_group_metadata = file_reader.metadata.row_group(0);
+
+        // test page encoding stats
+        let page_encoding_stats = row_group_metadata
+            .column(0)
+            .page_encoding_stats_mask()
+            .unwrap();
+        assert!(page_encoding_stats.is_only(Encoding::PLAIN));
+        let page_encoding_stats = row_group_metadata
+            .column(2)
+            .page_encoding_stats_mask()
+            .unwrap();
+        assert!(page_encoding_stats.is_only(Encoding::PLAIN_DICTIONARY));
+    }
+
+    #[test]
+    fn test_file_reader_page_stats_skipped() {
+        let file = get_test_file("alltypes_tiny_pages.parquet");
+
+        // test skipping all
+        let options = ReadOptionsBuilder::new()
+            .with_encoding_stats_policy(ParquetStatisticsPolicy::SkipAll)
+            .with_column_stats_policy(ParquetStatisticsPolicy::SkipAll)
+            .build();
+        let file_reader = Arc::new(
+            SerializedFileReader::new_with_options(file.try_clone().unwrap(), options).unwrap(),
+        );
+
+        let row_group_metadata = file_reader.metadata.row_group(0);
+        for column in row_group_metadata.columns() {
+            assert!(column.page_encoding_stats().is_none());
+            assert!(column.page_encoding_stats_mask().is_none());
+            assert!(column.statistics().is_none());
+        }
+
+        // test skipping all but one column
+        let options = ReadOptionsBuilder::new()
+            .with_encoding_stats_as_mask(true)
+            .with_encoding_stats_policy(ParquetStatisticsPolicy::skip_except(&[0]))
+            .with_column_stats_policy(ParquetStatisticsPolicy::skip_except(&[0]))
+            .build();
+        let file_reader = Arc::new(
+            SerializedFileReader::new_with_options(file.try_clone().unwrap(), options).unwrap(),
+        );
+
+        let row_group_metadata = file_reader.metadata.row_group(0);
+        for (idx, column) in row_group_metadata.columns().iter().enumerate() {
+            assert!(column.page_encoding_stats().is_none());
+            assert_eq!(column.page_encoding_stats_mask().is_some(), idx == 0);
+            assert_eq!(column.statistics().is_some(), idx == 0);
+        }
+    }
+
+    #[test]
+    fn test_file_reader_size_stats_skipped() {
+        let file = get_test_file("repeated_primitive_no_list.parquet");
+
+        // test skipping all
+        let options = ReadOptionsBuilder::new()
+            .with_size_stats_policy(ParquetStatisticsPolicy::SkipAll)
+            .build();
+        let file_reader = Arc::new(
+            SerializedFileReader::new_with_options(file.try_clone().unwrap(), options).unwrap(),
+        );
+
+        let row_group_metadata = file_reader.metadata.row_group(0);
+        for column in row_group_metadata.columns() {
+            assert!(column.repetition_level_histogram().is_none());
+            assert!(column.definition_level_histogram().is_none());
+            assert!(column.unencoded_byte_array_data_bytes().is_none());
+        }
+
+        // test skipping all but one column
+        let options = ReadOptionsBuilder::new()
+            .with_encoding_stats_as_mask(true)
+            .with_size_stats_policy(ParquetStatisticsPolicy::skip_except(&[1]))
+            .build();
+        let file_reader = Arc::new(
+            SerializedFileReader::new_with_options(file.try_clone().unwrap(), options).unwrap(),
+        );
+
+        let row_group_metadata = file_reader.metadata.row_group(0);
+        for (idx, column) in row_group_metadata.columns().iter().enumerate() {
+            assert_eq!(column.repetition_level_histogram().is_some(), idx == 1);
+            assert_eq!(column.definition_level_histogram().is_some(), idx == 1);
+            assert_eq!(column.unencoded_byte_array_data_bytes().is_some(), idx == 1);
+        }
+    }
+
     #[test]
     fn test_file_reader_with_no_filter() -> Result<()> {
         let test_file = get_test_file("alltypes_plain.parquet");
@@ -1874,7 +2112,7 @@ mod tests {
         let ret = SerializedFileReader::new(Bytes::copy_from_slice(&data));
         assert_eq!(
             ret.err().unwrap().to_string(),
-            "Parquet error: Could not parse metadata: bad data"
+            "Parquet error: Received empty union from remote ColumnOrder"
         );
     }
 
@@ -1911,21 +2149,19 @@ mod tests {
 
         // only one row group
         assert_eq!(column_index.len(), 1);
-        let index = if let Index::BYTE_ARRAY(index) = &column_index[0][0] {
+        let index = if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][0] {
             index
         } else {
             unreachable!()
         };
 
         assert_eq!(index.boundary_order, BoundaryOrder::ASCENDING);
-        let index_in_pages = &index.indexes;
 
         //only one page group
-        assert_eq!(index_in_pages.len(), 1);
+        assert_eq!(index.num_pages(), 1);
 
-        let page0 = &index_in_pages[0];
-        let min = page0.min.as_ref().unwrap();
-        let max = page0.max.as_ref().unwrap();
+        let min = index.min_value(0).unwrap();
+        let max = index.max_value(0).unwrap();
         assert_eq!(b"Hello", min.as_bytes());
         assert_eq!(b"today", max.as_bytes());
 
@@ -1990,7 +2226,7 @@ mod tests {
         let boundary_order = &column_index[0][0].get_boundary_order();
         assert!(boundary_order.is_some());
         matches!(boundary_order.unwrap(), BoundaryOrder::UNORDERED);
-        if let Index::INT32(index) = &column_index[0][0] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][0] {
             check_native_page_index(
                 index,
                 325,
@@ -2003,15 +2239,15 @@ mod tests {
         };
         //col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0]
         assert!(&column_index[0][1].is_sorted());
-        if let Index::BOOLEAN(index) = &column_index[0][1] {
-            assert_eq!(index.indexes.len(), 82);
+        if let ColumnIndexMetaData::BOOLEAN(index) = &column_index[0][1] {
+            assert_eq!(index.num_pages(), 82);
             assert_eq!(row_group_offset_indexes[1].page_locations.len(), 82);
         } else {
             unreachable!()
         };
         //col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
         assert!(&column_index[0][2].is_sorted());
-        if let Index::INT32(index) = &column_index[0][2] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][2] {
             check_native_page_index(
                 index,
                 325,
@@ -2024,7 +2260,7 @@ mod tests {
         };
         //col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
         assert!(&column_index[0][3].is_sorted());
-        if let Index::INT32(index) = &column_index[0][3] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][3] {
             check_native_page_index(
                 index,
                 325,
@@ -2037,7 +2273,7 @@ mod tests {
         };
         //col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
         assert!(&column_index[0][4].is_sorted());
-        if let Index::INT32(index) = &column_index[0][4] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][4] {
             check_native_page_index(
                 index,
                 325,
@@ -2050,7 +2286,7 @@ mod tests {
         };
         //col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, num_nulls: 0]
         assert!(!&column_index[0][5].is_sorted());
-        if let Index::INT64(index) = &column_index[0][5] {
+        if let ColumnIndexMetaData::INT64(index) = &column_index[0][5] {
             check_native_page_index(
                 index,
                 528,
@@ -2063,7 +2299,7 @@ mod tests {
         };
         //col7->float_col: FLOAT UNCOMPRESSED DO:0 FPO:223924 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 9.9, num_nulls: 0]
         assert!(&column_index[0][6].is_sorted());
-        if let Index::FLOAT(index) = &column_index[0][6] {
+        if let ColumnIndexMetaData::FLOAT(index) = &column_index[0][6] {
             check_native_page_index(
                 index,
                 325,
@@ -2076,7 +2312,7 @@ mod tests {
         };
         //col8->double_col: DOUBLE UNCOMPRESSED DO:0 FPO:261249 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 90.89999999999999, num_nulls: 0]
         assert!(!&column_index[0][7].is_sorted());
-        if let Index::DOUBLE(index) = &column_index[0][7] {
+        if let ColumnIndexMetaData::DOUBLE(index) = &column_index[0][7] {
             check_native_page_index(
                 index,
                 528,
@@ -2089,8 +2325,8 @@ mod tests {
         };
         //col9->date_string_col: BINARY UNCOMPRESSED DO:0 FPO:332847 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 01/01/09, max: 12/31/10, num_nulls: 0]
         assert!(!&column_index[0][8].is_sorted());
-        if let Index::BYTE_ARRAY(index) = &column_index[0][8] {
-            check_native_page_index(
+        if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][8] {
+            check_byte_array_page_index(
                 index,
                 974,
                 get_row_group_min_max_bytes(row_group_metadata, 8),
@@ -2102,8 +2338,8 @@ mod tests {
         };
         //col10->string_col: BINARY UNCOMPRESSED DO:0 FPO:444795 SZ:45298/45298/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0]
         assert!(&column_index[0][9].is_sorted());
-        if let Index::BYTE_ARRAY(index) = &column_index[0][9] {
-            check_native_page_index(
+        if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][9] {
+            check_byte_array_page_index(
                 index,
                 352,
                 get_row_group_min_max_bytes(row_group_metadata, 9),
@@ -2116,14 +2352,14 @@ mod tests {
         //col11->timestamp_col: INT96 UNCOMPRESSED DO:0 FPO:490093 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[num_nulls: 0, min/max not defined]
         //Notice: min_max values for each page for this col not exits.
         assert!(!&column_index[0][10].is_sorted());
-        if let Index::NONE = &column_index[0][10] {
+        if let ColumnIndexMetaData::NONE = &column_index[0][10] {
             assert_eq!(row_group_offset_indexes[10].page_locations.len(), 974);
         } else {
             unreachable!()
         };
         //col12->year: INT32 UNCOMPRESSED DO:0 FPO:602041 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 2009, max: 2010, num_nulls: 0]
         assert!(&column_index[0][11].is_sorted());
-        if let Index::INT32(index) = &column_index[0][11] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][11] {
             check_native_page_index(
                 index,
                 325,
@@ -2136,7 +2372,7 @@ mod tests {
         };
         //col13->month: INT32 UNCOMPRESSED DO:0 FPO:639366 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 1, max: 12, num_nulls: 0]
         assert!(!&column_index[0][12].is_sorted());
-        if let Index::INT32(index) = &column_index[0][12] {
+        if let ColumnIndexMetaData::INT32(index) = &column_index[0][12] {
             check_native_page_index(
                 index,
                 325,
@@ -2150,17 +2386,31 @@ mod tests {
     }
 
     fn check_native_page_index<T: ParquetValueType>(
-        row_group_index: &NativeIndex<T>,
+        row_group_index: &PrimitiveColumnIndex<T>,
         page_size: usize,
         min_max: (&[u8], &[u8]),
         boundary_order: BoundaryOrder,
     ) {
-        assert_eq!(row_group_index.indexes.len(), page_size);
+        assert_eq!(row_group_index.num_pages() as usize, page_size);
         assert_eq!(row_group_index.boundary_order, boundary_order);
-        row_group_index.indexes.iter().all(|x| {
-            x.min.as_ref().unwrap() >= &T::try_from_le_slice(min_max.0).unwrap()
-                && x.max.as_ref().unwrap() <= &T::try_from_le_slice(min_max.1).unwrap()
-        });
+        assert!(row_group_index.min_values().iter().all(|x| {
+            x >= &T::try_from_le_slice(min_max.0).unwrap()
+                && x <= &T::try_from_le_slice(min_max.1).unwrap()
+        }));
+    }
+
+    fn check_byte_array_page_index(
+        row_group_index: &ByteArrayColumnIndex,
+        page_size: usize,
+        min_max: (&[u8], &[u8]),
+        boundary_order: BoundaryOrder,
+    ) {
+        assert_eq!(row_group_index.num_pages() as usize, page_size);
+        assert_eq!(row_group_index.boundary_order, boundary_order);
+        for i in 0..row_group_index.num_pages() as usize {
+            let x = row_group_index.min_value(i).unwrap();
+            assert!(x >= min_max.0 && x <= min_max.1);
+        }
     }
 
     fn get_row_group_min_max_bytes(r: &RowGroupMetaData, col_num: usize) -> (&[u8], &[u8]) {
@@ -2401,12 +2651,11 @@ mod tests {
         assert_eq!(c.len(), 1);
 
         match &c[0] {
-            Index::FIXED_LEN_BYTE_ARRAY(v) => {
-                assert_eq!(v.indexes.len(), 1);
-                let page_idx = &v.indexes[0];
-                assert_eq!(page_idx.null_count.unwrap(), 1);
-                assert_eq!(page_idx.min.as_ref().unwrap().as_ref(), &[0; 11]);
-                assert_eq!(page_idx.max.as_ref().unwrap().as_ref(), &[5; 11]);
+            ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(v) => {
+                assert_eq!(v.num_pages(), 1);
+                assert_eq!(v.null_count(0).unwrap(), 1);
+                assert_eq!(v.min_value(0).unwrap(), &[0; 11]);
+                assert_eq!(v.max_value(0).unwrap(), &[5; 11]);
             }
             _ => unreachable!(),
         }
@@ -2507,8 +2756,8 @@ mod tests {
         }
         let file_metadata = file_writer.close().unwrap();
 
-        assert_eq!(file_metadata.num_rows, 25);
-        assert_eq!(file_metadata.row_groups.len(), 5);
+        assert_eq!(file_metadata.file_metadata().num_rows(), 25);
+        assert_eq!(file_metadata.num_row_groups(), 5);
 
         // read only the 3rd row group
         let read_options = ReadOptionsBuilder::new()
@@ -2537,11 +2786,11 @@ mod tests {
 
         // test that we got the index matching the row group
         match pg_idx {
-            Index::INT32(int_idx) => {
+            ColumnIndexMetaData::INT32(int_idx) => {
                 let min = col_stats.min_bytes_opt().unwrap().get_i32_le();
                 let max = col_stats.max_bytes_opt().unwrap().get_i32_le();
-                assert_eq!(int_idx.indexes[0].min(), Some(min).as_ref());
-                assert_eq!(int_idx.indexes[0].max(), Some(max).as_ref());
+                assert_eq!(int_idx.min_value(0), Some(min).as_ref());
+                assert_eq!(int_idx.max_value(0), Some(max).as_ref());
             }
             _ => panic!("wrong stats type"),
         }
@@ -2582,11 +2831,11 @@ mod tests {
 
             // test that we got the index matching the row group
             match pg_idx {
-                Index::INT32(int_idx) => {
+                ColumnIndexMetaData::INT32(int_idx) => {
                     let min = col_stats.min_bytes_opt().unwrap().get_i32_le();
                     let max = col_stats.max_bytes_opt().unwrap().get_i32_le();
-                    assert_eq!(int_idx.indexes[0].min(), Some(min).as_ref());
-                    assert_eq!(int_idx.indexes[0].max(), Some(max).as_ref());
+                    assert_eq!(int_idx.min_value(0), Some(min).as_ref());
+                    assert_eq!(int_idx.max_value(0), Some(max).as_ref());
                 }
                 _ => panic!("wrong stats type"),
             }
@@ -2598,4 +2847,51 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_reuse_schema() {
+        let file = get_test_file("alltypes_plain.parquet");
+        let file_reader = SerializedFileReader::new(file.try_clone().unwrap()).unwrap();
+        let schema = file_reader.metadata().file_metadata().schema_descr_ptr();
+        let expected = file_reader.metadata;
+
+        let options = ReadOptionsBuilder::new()
+            .with_parquet_schema(schema)
+            .build();
+        let file_reader = SerializedFileReader::new_with_options(file, options).unwrap();
+
+        assert_eq!(expected.as_ref(), file_reader.metadata.as_ref());
+        // Should have used the same schema instance
+        assert!(Arc::ptr_eq(
+            &expected.file_metadata().schema_descr_ptr(),
+            &file_reader.metadata.file_metadata().schema_descr_ptr()
+        ));
+    }
+
+    #[test]
+    fn test_read_unknown_logical_type() {
+        let file = get_test_file("unknown-logical-type.parquet");
+        let reader = SerializedFileReader::new(file).expect("Error opening file");
+
+        let schema = reader.metadata().file_metadata().schema_descr();
+        assert_eq!(
+            schema.column(0).logical_type_ref(),
+            Some(&basic::LogicalType::String)
+        );
+        assert_eq!(
+            schema.column(1).logical_type_ref(),
+            Some(&basic::LogicalType::_Unknown { field_id: 2555 })
+        );
+        assert_eq!(schema.column(1).physical_type(), Type::BYTE_ARRAY);
+
+        let mut iter = reader
+            .get_row_iter(None)
+            .expect("Failed to create row iterator");
+
+        let mut num_rows = 0;
+        while iter.next().is_some() {
+            num_rows += 1;
+        }
+        assert_eq!(num_rows, reader.metadata().file_metadata().num_rows());
+    }
 }
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index b7522a76f0fc..a813e82d13f2 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -41,12 +41,11 @@
 
 use std::fmt;
 
-use crate::format::Statistics as TStatistics;
-
 use crate::basic::Type;
 use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
+use crate::file::metadata::thrift::PageStatistics;
 use crate::util::bit_util::FromBytes;
 
 pub(crate) mod private {
@@ -120,9 +119,9 @@ macro_rules! statistics_enum_func {
 }
 
 /// Converts Thrift definition into `Statistics`.
-pub fn from_thrift(
+pub(crate) fn from_thrift_page_stats(
     physical_type: Type,
-    thrift_stats: Option<TStatistics>,
+    thrift_stats: Option<PageStatistics>,
 ) -> Result<Option<Statistics>> {
     Ok(match thrift_stats {
         Some(stats) => {
@@ -133,8 +132,7 @@ pub fn from_thrift(
 
             if null_count < 0 {
                 return Err(ParquetError::General(format!(
-                    "Statistics null count is negative {}",
-                    null_count
+                    "Statistics null count is negative {null_count}",
                 )));
             }
 
@@ -211,16 +209,22 @@ pub fn from_thrift(
                 ),
                 Type::INT96 => {
                     // INT96 statistics may not be correct, because comparison is signed
-                    // byte-wise, not actual timestamps. It is recommended to ignore
-                    // min/max statistics for INT96 columns.
                     let min = if let Some(data) = min {
-                        assert_eq!(data.len(), 12);
+                        if data.len() != 12 {
+                            return Err(ParquetError::General(
+                                "Incorrect Int96 min statistics".to_string(),
+                            ));
+                        }
                         Some(Int96::try_from_le_slice(&data)?)
                     } else {
                         None
                     };
                     let max = if let Some(data) = max {
-                        assert_eq!(data.len(), 12);
+                        if data.len() != 12 {
+                            return Err(ParquetError::General(
+                                "Incorrect Int96 max statistics".to_string(),
+                            ));
+                        }
                         Some(Int96::try_from_le_slice(&data)?)
                     } else {
                         None
@@ -272,7 +276,7 @@ pub fn from_thrift(
 }
 
 /// Convert Statistics into Thrift definition.
-pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
+pub(crate) fn page_stats_to_thrift(stats: Option<&Statistics>) -> Option<PageStatistics> {
     let stats = stats?;
 
     // record null count if it can fit in i64
@@ -285,7 +289,7 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
         .distinct_count_opt()
         .and_then(|value| i64::try_from(value).ok());
 
-    let mut thrift_stats = TStatistics {
+    let mut thrift_stats = PageStatistics {
         max: None,
         min: None,
         null_count,
@@ -322,15 +326,14 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
 
 /// Strongly typed statistics for a column chunk within a row group.
 ///
-/// This structure is a natively typed, in memory representation of the
-/// [`Statistics`] structure in a parquet file footer. The statistics stored in
+/// This structure is a natively typed, in memory representation of the thrift
+/// `Statistics` structure in a Parquet file footer. The statistics stored in
 /// this structure can be used by query engines to skip decoding pages while
 /// reading parquet data.
 ///
-/// Page level statistics are stored separately, in [NativeIndex].
+/// Page level statistics are stored separately, in [ColumnIndexMetaData].
 ///
-/// [`Statistics`]: crate::format::Statistics
-/// [NativeIndex]: crate::file::page_index::index::NativeIndex
+/// [ColumnIndexMetaData]: crate::file::page_index::column_index::ColumnIndexMetaData
 #[derive(Debug, Clone, PartialEq)]
 pub enum Statistics {
     /// Statistics for Boolean column
@@ -419,36 +422,12 @@ impl Statistics {
         statistics_enum_func![self, is_min_max_backwards_compatible]
     }
 
-    /// Returns optional value of number of distinct values occurring.
-    /// When it is `None`, the value should be ignored.
-    #[deprecated(since = "53.0.0", note = "Use `distinct_count_opt` method instead")]
-    pub fn distinct_count(&self) -> Option<u64> {
-        self.distinct_count_opt()
-    }
-
     /// Returns optional value of number of distinct values occurring.
     /// When it is `None`, the value should be ignored.
     pub fn distinct_count_opt(&self) -> Option<u64> {
         statistics_enum_func![self, distinct_count]
     }
 
-    /// Returns number of null values for the column.
-    /// Note that this includes all nulls when column is part of the complex type.
-    ///
-    /// Note this API returns 0 if the null count is not available.
-    #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method instead")]
-    pub fn null_count(&self) -> u64 {
-        // 0 to remain consistent behavior prior to `null_count_opt`
-        self.null_count_opt().unwrap_or(0)
-    }
-
-    /// Returns `true` if statistics collected any null values, `false` otherwise.
-    #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method instead")]
-    #[allow(deprecated)]
-    pub fn has_nulls(&self) -> bool {
-        self.null_count() > 0
-    }
-
     /// Returns number of null values for the column, if known.
     /// Note that this includes all nulls when column is part of the complex type.
     ///
@@ -459,16 +438,6 @@ impl Statistics {
         statistics_enum_func![self, null_count_opt]
     }
 
-    /// Whether or not min and max values are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    #[deprecated(
-        since = "53.0.0",
-        note = "Use `min_bytes_opt` and `max_bytes_opt` methods instead"
-    )]
-    pub fn has_min_max_set(&self) -> bool {
-        statistics_enum_func![self, _internal_has_min_max_set]
-    }
-
     /// Returns `true` if the min value is set, and is an exact min value.
     pub fn min_is_exact(&self) -> bool {
         statistics_enum_func![self, min_is_exact]
@@ -484,25 +453,11 @@ impl Statistics {
         statistics_enum_func![self, min_bytes_opt]
     }
 
-    /// Returns slice of bytes that represent min value.
-    /// Panics if min value is not set.
-    #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
-    pub fn min_bytes(&self) -> &[u8] {
-        self.min_bytes_opt().unwrap()
-    }
-
     /// Returns slice of bytes that represent max value, if max value is known.
     pub fn max_bytes_opt(&self) -> Option<&[u8]> {
         statistics_enum_func![self, max_bytes_opt]
     }
 
-    /// Returns slice of bytes that represent max value.
-    /// Panics if max value is not set.
-    #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
-    pub fn max_bytes(&self) -> &[u8] {
-        self.max_bytes_opt().unwrap()
-    }
-
     /// Returns physical type associated with statistics.
     pub fn physical_type(&self) -> Type {
         match self {
@@ -560,7 +515,7 @@ pub struct ValueStatistics<T> {
     is_min_max_backwards_compatible: bool,
 }
 
-impl<T: ParquetValueType> ValueStatistics<T> {
+impl<T> ValueStatistics<T> {
     /// Creates new typed statistics.
     pub fn new(
         min: Option<T>,
@@ -615,69 +570,16 @@ impl<T: ParquetValueType> ValueStatistics<T> {
         }
     }
 
-    /// Returns min value of the statistics.
-    ///
-    /// Panics if min value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    #[deprecated(since = "53.0.0", note = "Use `min_opt` instead")]
-    pub fn min(&self) -> &T {
-        self.min.as_ref().unwrap()
-    }
-
     /// Returns min value of the statistics, if known.
     pub fn min_opt(&self) -> Option<&T> {
         self.min.as_ref()
     }
 
-    /// Returns max value of the statistics.
-    ///
-    /// Panics if max value is not set, e.g. all values are `null`.
-    /// Use `has_min_max_set` method to check that.
-    #[deprecated(since = "53.0.0", note = "Use `max_opt` instead")]
-    pub fn max(&self) -> &T {
-        self.max.as_ref().unwrap()
-    }
-
     /// Returns max value of the statistics, if known.
     pub fn max_opt(&self) -> Option<&T> {
         self.max.as_ref()
     }
 
-    /// Returns min value as bytes of the statistics, if min value is known.
-    pub fn min_bytes_opt(&self) -> Option<&[u8]> {
-        self.min_opt().map(AsBytes::as_bytes)
-    }
-
-    /// Returns min value as bytes of the statistics.
-    ///
-    /// Panics if min value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    #[deprecated(since = "53.0.0", note = "Use `min_bytes_opt` instead")]
-    pub fn min_bytes(&self) -> &[u8] {
-        self.min_bytes_opt().unwrap()
-    }
-
-    /// Returns max value as bytes of the statistics, if max value is known.
-    pub fn max_bytes_opt(&self) -> Option<&[u8]> {
-        self.max_opt().map(AsBytes::as_bytes)
-    }
-
-    /// Returns max value as bytes of the statistics.
-    ///
-    /// Panics if max value is not set, use `has_min_max_set` method to check
-    /// if values are set.
-    #[deprecated(since = "53.0.0", note = "Use `max_bytes_opt` instead")]
-    pub fn max_bytes(&self) -> &[u8] {
-        self.max_bytes_opt().unwrap()
-    }
-
-    /// Whether or not min and max values are set.
-    /// Normally both min/max values will be set to `Some(value)` or `None`.
-    #[deprecated(since = "53.0.0", note = "Use `min_opt` and `max_opt` methods instead")]
-    pub fn has_min_max_set(&self) -> bool {
-        self._internal_has_min_max_set()
-    }
-
     /// Whether or not min and max values are set.
     /// Normally both min/max values will be set to `Some(value)` or `None`.
     pub(crate) fn _internal_has_min_max_set(&self) -> bool {
@@ -699,14 +601,6 @@ impl<T: ParquetValueType> ValueStatistics<T> {
         self.distinct_count
     }
 
-    /// Returns number of null values for the column.
-    /// Note that this includes all nulls when column is part of the complex type.
-    #[deprecated(since = "53.0.0", note = "Use `null_count_opt` method instead")]
-    pub fn null_count(&self) -> u64 {
-        // 0 to remain consistent behavior prior to `null_count_opt`
-        self.null_count_opt().unwrap_or(0)
-    }
-
     /// Returns null count.
     pub fn null_count_opt(&self) -> Option<u64> {
         self.null_count
@@ -732,6 +626,18 @@ impl<T: ParquetValueType> ValueStatistics<T> {
     }
 }
 
+impl<T: AsBytes> ValueStatistics<T> {
+    /// Returns min value as bytes of the statistics, if min value is known.
+    pub fn min_bytes_opt(&self) -> Option<&[u8]> {
+        self.min_opt().map(AsBytes::as_bytes)
+    }
+
+    /// Returns max value as bytes of the statistics, if max value is known.
+    pub fn max_bytes_opt(&self) -> Option<&[u8]> {
+        self.max_opt().map(AsBytes::as_bytes)
+    }
+}
+
 impl<T: ParquetValueType> fmt::Display for ValueStatistics<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{{")?;
@@ -804,7 +710,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "General(\"Statistics null count is negative -10\")")]
     fn test_statistics_negative_null_count() {
-        let thrift_stats = TStatistics {
+        let thrift_stats = PageStatistics {
             max: None,
             min: None,
             null_count: Some(-10),
@@ -815,13 +721,16 @@ mod tests {
             is_min_value_exact: None,
         };
 
-        from_thrift(Type::INT32, Some(thrift_stats)).unwrap();
+        from_thrift_page_stats(Type::INT32, Some(thrift_stats)).unwrap();
     }
 
     #[test]
     fn test_statistics_thrift_none() {
-        assert_eq!(from_thrift(Type::INT32, None).unwrap(), None);
-        assert_eq!(from_thrift(Type::BYTE_ARRAY, None).unwrap(), None);
+        assert_eq!(from_thrift_page_stats(Type::INT32, None).unwrap(), None);
+        assert_eq!(
+            from_thrift_page_stats(Type::BYTE_ARRAY, None).unwrap(),
+            None
+        );
     }
 
     #[test]
@@ -966,8 +875,11 @@ mod tests {
         // Helper method to check statistics conversion.
         fn check_stats(stats: Statistics) {
             let tpe = stats.physical_type();
-            let thrift_stats = to_thrift(Some(&stats));
-            assert_eq!(from_thrift(tpe, thrift_stats).unwrap(), Some(stats));
+            let thrift_stats = page_stats_to_thrift(Some(&stats));
+            assert_eq!(
+                from_thrift_page_stats(tpe, thrift_stats).unwrap(),
+                Some(stats)
+            );
         }
 
         check_stats(Statistics::boolean(
@@ -1103,7 +1015,7 @@ mod tests {
     fn test_count_encoding_distinct_too_large() {
         // statistics are stored using i64, so test trying to store larger values
         let statistics = make_bool_stats(Some(u64::MAX), Some(100));
-        let thrift_stats = to_thrift(Some(&statistics)).unwrap();
+        let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
         assert_eq!(thrift_stats.distinct_count, None); // can't store u64 max --> null
         assert_eq!(thrift_stats.null_count, Some(100));
     }
@@ -1112,18 +1024,24 @@ mod tests {
     fn test_count_encoding_null_too_large() {
         // statistics are stored using i64, so test trying to store larger values
         let statistics = make_bool_stats(Some(100), Some(u64::MAX));
-        let thrift_stats = to_thrift(Some(&statistics)).unwrap();
+        let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
         assert_eq!(thrift_stats.distinct_count, Some(100));
         assert_eq!(thrift_stats.null_count, None); // can' store u64 max --> null
     }
 
     #[test]
     fn test_count_decoding_null_invalid() {
-        let tstatistics = TStatistics {
+        let tstatistics = PageStatistics {
             null_count: Some(-42),
-            ..Default::default()
+            max: None,
+            min: None,
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
         };
-        let err = from_thrift(Type::BOOLEAN, Some(tstatistics)).unwrap_err();
+        let err = from_thrift_page_stats(Type::BOOLEAN, Some(tstatistics)).unwrap_err();
         assert_eq!(
             err.to_string(),
             "Parquet error: Statistics null count is negative -42"
@@ -1136,14 +1054,14 @@ mod tests {
     fn statistics_count_test(distinct_count: Option<u64>, null_count: Option<u64>) {
         let statistics = make_bool_stats(distinct_count, null_count);
 
-        let thrift_stats = to_thrift(Some(&statistics)).unwrap();
+        let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap();
         assert_eq!(thrift_stats.null_count.map(|c| c as u64), null_count);
         assert_eq!(
             thrift_stats.distinct_count.map(|c| c as u64),
             distinct_count
         );
 
-        let round_tripped = from_thrift(Type::BOOLEAN, Some(thrift_stats))
+        let round_tripped = from_thrift_page_stats(Type::BOOLEAN, Some(thrift_stats))
             .unwrap()
             .unwrap();
         // TODO: remove branch when we no longer support assuming null_count==None in the thrift
@@ -1177,4 +1095,54 @@ mod tests {
             is_min_max_deprecated,
         ))
     }
+
+    #[test]
+    fn test_int96_invalid_statistics() {
+        let mut thrift_stats = PageStatistics {
+            max: None,
+            min: Some((0..13).collect()),
+            null_count: Some(0),
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
+        };
+
+        let err = from_thrift_page_stats(Type::INT96, Some(thrift_stats.clone())).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Incorrect Int96 min statistics"
+        );
+
+        thrift_stats.min = None;
+        thrift_stats.max = Some((0..13).collect());
+        let err = from_thrift_page_stats(Type::INT96, Some(thrift_stats)).unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Incorrect Int96 max statistics"
+        );
+    }
+
+    // Ensures that we can call ValueStatistics::min_opt from a
+    // generic function without reyling on a bound to a private trait.
+    fn generic_statistics_handler<T: std::fmt::Display>(stats: ValueStatistics<T>) -> String {
+        match stats.min_opt() {
+            Some(s) => format!("min: {}", s),
+            None => "min: NA".to_string(),
+        }
+    }
+
+    #[test]
+    fn test_generic_access() {
+        let stats = Statistics::int32(Some(12), Some(45), None, Some(11), false);
+
+        match stats {
+            Statistics::Int32(v) => {
+                let stats_string = generic_statistics_handler(v);
+                assert_eq!(&stats_string, "min: 12");
+            }
+            _ => unreachable!(),
+        }
+    }
 }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 31a3344db66c..35948af022f1 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -15,35 +15,34 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Contains file writer API, and provides methods to write row groups and columns by
-//! using row group writers and column writers respectively.
+//! [`SerializedFileWriter`]: Low level Parquet writer API
 
 use crate::bloom_filter::Sbbf;
-use crate::format as parquet;
-use crate::format::{ColumnIndex, OffsetIndex};
-use crate::thrift::TSerializable;
+use crate::file::metadata::thrift::PageHeader;
+use crate::file::page_index::column_index::ColumnIndexMetaData;
+use crate::file::page_index::offset_index::OffsetIndexMetaData;
+use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift};
 use std::fmt::Debug;
 use std::io::{BufWriter, IoSlice, Read};
 use std::{io::Write, sync::Arc};
-use thrift::protocol::TCompactOutputProtocol;
 
 use crate::column::page_encryption::PageEncryptor;
-use crate::column::writer::{get_typed_column_writer_mut, ColumnCloseResult, ColumnWriterImpl};
+use crate::column::writer::{ColumnCloseResult, ColumnWriterImpl, get_typed_column_writer_mut};
 use crate::column::{
     page::{CompressedPage, PageWriteSpec, PageWriter},
-    writer::{get_column_writer, ColumnWriter},
+    writer::{ColumnWriter, get_column_writer},
 };
 use crate::data_type::DataType;
 #[cfg(feature = "encryption")]
 use crate::encryption::encrypt::{
-    get_column_crypto_metadata, FileEncryptionProperties, FileEncryptor,
+    FileEncryptionProperties, FileEncryptor, get_column_crypto_metadata,
 };
 use crate::errors::{ParquetError, Result};
-use crate::file::properties::{BloomFilterPosition, WriterPropertiesPtr};
-use crate::file::reader::ChunkReader;
 #[cfg(feature = "encryption")]
 use crate::file::PARQUET_MAGIC_ENCR_FOOTER;
-use crate::file::{metadata::*, PARQUET_MAGIC};
+use crate::file::properties::{BloomFilterPosition, WriterPropertiesPtr};
+use crate::file::reader::ChunkReader;
+use crate::file::{PARQUET_MAGIC, metadata::*};
 use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, TypePtr};
 
 /// A wrapper around a [`Write`] that keeps track of the number
@@ -128,8 +127,8 @@ pub type OnCloseRowGroup<'a, W> = Box<
             &'a mut TrackedWrite<W>,
             RowGroupMetaData,
             Vec<Option<Sbbf>>,
-            Vec<Option<ColumnIndex>>,
-            Vec<Option<OffsetIndex>>,
+            Vec<Option<ColumnIndexMetaData>>,
+            Vec<Option<OffsetIndexMetaData>>,
         ) -> Result<()>
         + 'a
         + Send,
@@ -139,7 +138,14 @@ pub type OnCloseRowGroup<'a, W> = Box<
 // Serialized impl for file & row group writers
 
 /// Parquet file writer API.
-/// Provides methods to write row groups sequentially.
+///
+/// This is a low level API for writing Parquet files directly, and handles
+/// tracking the location of file structures such as row groups and column
+/// chunks, and writing the metadata and file footer.
+///
+/// Data is written to row groups using  [`SerializedRowGroupWriter`] and
+/// columns using [`SerializedColumnWriter`]. The `SerializedFileWriter` tracks
+/// where all the data is written, and assembles the final file metadata.
 ///
 /// The main workflow should be as following:
 /// - Create file writer, this will open a new file and potentially write some metadata.
@@ -149,13 +155,12 @@ pub type OnCloseRowGroup<'a, W> = Box<
 /// - After all row groups have been written, close the file writer using `close` method.
 pub struct SerializedFileWriter<W: Write> {
     buf: TrackedWrite<W>,
-    schema: TypePtr,
     descr: SchemaDescPtr,
     props: WriterPropertiesPtr,
     row_groups: Vec<RowGroupMetaData>,
     bloom_filters: Vec<Vec<Option<Sbbf>>>,
-    column_indexes: Vec<Vec<Option<ColumnIndex>>>,
-    offset_indexes: Vec<Vec<Option<OffsetIndex>>>,
+    column_indexes: Vec<Vec<Option<ColumnIndexMetaData>>>,
+    offset_indexes: Vec<Vec<Option<OffsetIndexMetaData>>>,
     row_group_index: usize,
     // kv_metadatas will be appended to `props` when `write_metadata`
     kv_metadatas: Vec<KeyValue>,
@@ -189,7 +194,6 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         Self::start_file(&properties, &mut buf)?;
         Ok(Self {
             buf,
-            schema,
             descr: Arc::new(schema_descriptor),
             props: properties,
             row_groups: vec![],
@@ -209,23 +213,25 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         properties: &WriterPropertiesPtr,
         schema_descriptor: &SchemaDescriptor,
     ) -> Result<Option<Arc<FileEncryptor>>> {
-        if let Some(file_encryption_properties) = &properties.file_encryption_properties {
+        if let Some(file_encryption_properties) = properties.file_encryption_properties() {
             file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?;
 
-            Ok(Some(Arc::new(FileEncryptor::new(
-                file_encryption_properties.clone(),
-            )?)))
+            Ok(Some(Arc::new(FileEncryptor::new(Arc::clone(
+                file_encryption_properties,
+            ))?)))
         } else {
             Ok(None)
         }
     }
 
     /// Creates new row group from this file writer.
-    /// In case of IO error or Thrift error, returns `Err`.
     ///
-    /// There can be at most 2^15 row groups in a file; and row groups have
-    /// to be written sequentially. Every time the next row group is requested, the
-    /// previous row group must be finalised and closed using `RowGroupWriter::close` method.
+    /// Note: Parquet files are limited to at most 2^15 row groups in a file; and row groups must
+    /// be written sequentially.
+    ///
+    /// Every time the next row group is requested, the previous row group must
+    /// be finalised and closed using the [`SerializedRowGroupWriter::close`]
+    /// method or an error will be returned.
     pub fn next_row_group(&mut self) -> Result<SerializedRowGroupWriter<'_, W>> {
         self.assert_previous_writer_closed()?;
         let ordinal = self.row_group_index;
@@ -290,7 +296,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
     /// Unlike [`Self::close`] this does not consume self
     ///
     /// Attempting to write after calling finish will result in an error
-    pub fn finish(&mut self) -> Result<parquet::FileMetaData> {
+    pub fn finish(&mut self) -> Result<ParquetMetaData> {
         self.assert_previous_writer_closed()?;
         let metadata = self.write_metadata()?;
         self.buf.flush()?;
@@ -298,7 +304,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
     }
 
     /// Closes and finalises file writer, returning the file metadata.
-    pub fn close(mut self) -> Result<parquet::FileMetaData> {
+    pub fn close(mut self) -> Result<ParquetMetaData> {
         self.finish()
     }
 
@@ -318,8 +324,9 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         Ok(())
     }
 
-    /// Assembles and writes metadata at the end of the file.
-    fn write_metadata(&mut self) -> Result<parquet::FileMetaData> {
+    /// Assembles and writes metadata at the end of the file. This will take ownership
+    /// of `row_groups` and the page index structures.
+    fn write_metadata(&mut self) -> Result<ParquetMetaData> {
         self.finished = true;
 
         // write out any remaining bloom filters after all row groups
@@ -333,15 +340,13 @@ impl<W: Write + Send> SerializedFileWriter<W> {
             None => Some(self.kv_metadatas.clone()),
         };
 
-        let row_groups = self
-            .row_groups
-            .iter()
-            .map(|v| v.to_thrift())
-            .collect::<Vec<_>>();
+        // take ownership of metadata
+        let row_groups = std::mem::take(&mut self.row_groups);
+        let column_indexes = std::mem::take(&mut self.column_indexes);
+        let offset_indexes = std::mem::take(&mut self.offset_indexes);
 
         let mut encoder = ThriftMetadataWriter::new(
             &mut self.buf,
-            &self.schema,
             &self.descr,
             row_groups,
             Some(self.props.created_by().to_string()),
@@ -356,8 +361,11 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         if let Some(key_value_metadata) = key_value_metadata {
             encoder = encoder.with_key_value_metadata(key_value_metadata)
         }
-        encoder = encoder.with_column_indexes(&self.column_indexes);
-        encoder = encoder.with_offset_indexes(&self.offset_indexes);
+
+        encoder = encoder.with_column_indexes(column_indexes);
+        if !self.props.offset_index_disabled() {
+            encoder = encoder.with_offset_indexes(offset_indexes);
+        }
         encoder.finish()
     }
 
@@ -384,6 +392,12 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         &self.descr
     }
 
+    /// Returns a reference to schema descriptor Arc.
+    #[cfg(feature = "arrow")]
+    pub(crate) fn schema_descr_ptr(&self) -> &SchemaDescPtr {
+        &self.descr
+    }
+
     /// Returns a reference to the writer properties
     pub fn properties(&self) -> &WriterPropertiesPtr {
         &self.props
@@ -396,8 +410,8 @@ impl<W: Write + Send> SerializedFileWriter<W> {
 
     /// Writes the given buf bytes to the internal buffer.
     ///
-    /// This can be used to write raw data to an in-progress parquet file, for
-    ///  example, custom index structures or other payloads. Other parquet readers
+    /// This can be used to write raw data to an in-progress Parquet file, for
+    /// example, custom index structures or other payloads. Other Parquet readers
     /// will skip this data when reading the files.
     ///
     /// It's safe to use this method to write data to the underlying writer,
@@ -406,10 +420,15 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         self.buf.write_all(buf)
     }
 
+    /// Flushes underlying writer
+    pub fn flush(&mut self) -> std::io::Result<()> {
+        self.buf.flush()
+    }
+
     /// Returns a mutable reference to the underlying writer.
     ///
     /// **Warning**: if you write directly to this writer, you will skip
-    /// the `TrackedWrite` buffering and byte‐counting layers. That’ll cause
+    /// the `TrackedWrite` buffering and byte‐counting layers, which can cause
     /// the file footer’s recorded offsets and sizes to diverge from reality,
     /// resulting in an unreadable or corrupted Parquet file.
     ///
@@ -478,6 +497,7 @@ fn write_bloom_filters<W: Write + Send>(
 }
 
 /// Parquet row group writer API.
+///
 /// Provides methods to access column writers in an iterator-like fashion, order is
 /// guaranteed to match the order of schema leaves (column descriptors).
 ///
@@ -486,7 +506,7 @@ fn write_bloom_filters<W: Write + Send>(
 ///   more columns are available to write.
 /// - Once done writing a column, close column writer with `close`
 /// - Once all columns have been written, close row group writer with `close`
-///   method. THe close method will return row group metadata and is no-op
+///   method. The close method will return row group metadata and is no-op
 ///   on already closed row group.
 pub struct SerializedRowGroupWriter<'a, W: Write> {
     descr: SchemaDescPtr,
@@ -499,8 +519,8 @@ pub struct SerializedRowGroupWriter<'a, W: Write> {
     row_group_metadata: Option<RowGroupMetaDataPtr>,
     column_chunks: Vec<ColumnChunkMetaData>,
     bloom_filters: Vec<Option<Sbbf>>,
-    column_indexes: Vec<Option<ColumnIndex>>,
-    offset_indexes: Vec<Option<OffsetIndex>>,
+    column_indexes: Vec<Option<ColumnIndexMetaData>>,
+    offset_indexes: Vec<Option<OffsetIndexMetaData>>,
     row_group_index: i16,
     file_offset: i64,
     on_close: Option<OnCloseRowGroup<'a, W>>,
@@ -645,12 +665,20 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
         })
     }
 
-    /// Append an encoded column chunk from another source without decoding it
+    /// Append an encoded column chunk from `reader` directly to the underlying
+    /// writer.
     ///
-    /// This can be used for efficiently concatenating or projecting parquet data,
-    /// or encoding parquet data to temporary in-memory buffers
+    /// This method can be used for efficiently concatenating or projecting
+    /// Parquet data, or encoding Parquet data to temporary in-memory buffers.
     ///
-    /// See [`Self::next_column`] for writing data that isn't already encoded
+    /// Arguments:
+    /// - `reader`: a [`ChunkReader`] containing the encoded column data
+    /// - `close`: the [`ColumnCloseResult`] metadata returned from closing
+    ///   the column writer that wrote the data in `reader`.
+    ///
+    /// See Also:
+    /// 1. [`get_column_writer`]  for creating writers that can encode data.
+    /// 2. [`Self::next_column`] for writing data that isn't already encoded
     pub fn append_column<R: ChunkReader>(
         &mut self,
         reader: &R,
@@ -689,7 +717,7 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
         let map_offset = |x| x - src_offset + write_offset as i64;
         let mut builder = ColumnChunkMetaData::builder(metadata.column_descr_ptr())
             .set_compression(metadata.compression())
-            .set_encodings(metadata.encodings().clone())
+            .set_encodings_mask(*metadata.encodings_mask())
             .set_total_compressed_size(metadata.compressed_size())
             .set_total_uncompressed_size(metadata.uncompressed_size())
             .set_num_values(metadata.num_values())
@@ -706,6 +734,9 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
         if let Some(statistics) = metadata.statistics() {
             builder = builder.set_statistics(statistics.clone())
         }
+        if let Some(geo_statistics) = metadata.geo_statistics() {
+            builder = builder.set_geo_statistics(Box::new(geo_statistics.clone()))
+        }
         if let Some(page_encoding_stats) = metadata.page_encoding_stats() {
             builder = builder.set_page_encoding_stats(page_encoding_stats.clone())
         }
@@ -901,15 +932,15 @@ impl<'a, W: Write> SerializedPageWriter<'a, W> {
     /// Serializes page header into Thrift.
     /// Returns number of bytes that have been written into the sink.
     #[inline]
-    fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result<usize> {
+    fn serialize_page_header(&mut self, header: PageHeader) -> Result<usize> {
         let start_pos = self.sink.bytes_written();
         match self.page_encryptor_and_sink_mut() {
             Some((page_encryptor, sink)) => {
                 page_encryptor.encrypt_page_header(&header, sink)?;
             }
             None => {
-                let mut protocol = TCompactOutputProtocol::new(&mut self.sink);
-                header.write_to_out_protocol(&mut protocol)?;
+                let mut protocol = ThriftCompactOutputProtocol::new(&mut self.sink);
+                header.write_thrift(&mut protocol)?;
             }
         }
         Ok(self.sink.bytes_written() - start_pos)
@@ -958,7 +989,7 @@ impl<W: Write + Send> PageWriter for SerializedPageWriter<'_, W> {
         let page_type = page.page_type();
         let start_pos = self.sink.bytes_written() as u64;
 
-        let page_header = page.to_thrift_header();
+        let page_header = page.to_thrift_header()?;
         let header_size = self.serialize_page_header(page_header)?;
 
         self.sink.write_all(page.data())?;
@@ -989,7 +1020,7 @@ impl<W: Write + Send> PageWriter for SerializedPageWriter<'_, W> {
 /// as a Parquet file.
 #[cfg(feature = "encryption")]
 pub(crate) fn get_file_magic(
-    file_encryption_properties: Option<&FileEncryptionProperties>,
+    file_encryption_properties: Option<&Arc<FileEncryptionProperties>>,
 ) -> &'static [u8; 4] {
     match file_encryption_properties.as_ref() {
         Some(encryption_properties) if encryption_properties.encrypt_footer() => {
@@ -1013,30 +1044,31 @@ mod tests {
     use bytes::Bytes;
     use std::fs::File;
 
-    #[cfg(feature = "arrow")]
-    use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
     #[cfg(feature = "arrow")]
     use crate::arrow::ArrowWriter;
+    #[cfg(feature = "arrow")]
+    use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
     use crate::basic::{
         ColumnOrder, Compression, ConvertedType, Encoding, LogicalType, Repetition, SortOrder, Type,
     };
     use crate::column::page::{Page, PageReader};
     use crate::column::reader::get_typed_column_reader;
-    use crate::compression::{create_codec, Codec, CodecOptionsBuilder};
+    use crate::compression::{Codec, CodecOptionsBuilder, create_codec};
     use crate::data_type::{BoolType, ByteArrayType, Int32Type};
-    use crate::file::page_index::index::Index;
+    use crate::file::page_index::column_index::ColumnIndexMetaData;
     use crate::file::properties::EnabledStatistics;
     use crate::file::serialized_reader::ReadOptionsBuilder;
+    use crate::file::statistics::{from_thrift_page_stats, page_stats_to_thrift};
     use crate::file::{
         properties::{ReaderProperties, WriterProperties, WriterVersion},
         reader::{FileReader, SerializedFileReader, SerializedPageReader},
-        statistics::{from_thrift, to_thrift, Statistics},
+        statistics::Statistics,
     };
-    use crate::format::SortingColumn;
     use crate::record::{Row, RowAccessor};
     use crate::schema::parser::parse_message_type;
     use crate::schema::types;
     use crate::schema::types::{ColumnDescriptor, ColumnPath};
+    use crate::util::test_common::file_util::get_test_file;
     use crate::util::test_common::rand_gen::RandGen;
 
     #[test]
@@ -1390,7 +1422,7 @@ mod tests {
 
     #[test]
     fn test_page_writer_data_pages() {
-        let pages = vec![
+        let pages = [
             Page::DataPage {
                 buf: Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]),
                 num_values: 10,
@@ -1418,7 +1450,7 @@ mod tests {
 
     #[test]
     fn test_page_writer_dict_pages() {
-        let pages = vec![
+        let pages = [
             Page::DictionaryPage {
                 buf: Bytes::from(vec![1, 2, 3, 4, 5]),
                 num_values: 5,
@@ -1482,8 +1514,11 @@ mod tests {
                         encoding,
                         def_level_encoding,
                         rep_level_encoding,
-                        statistics: from_thrift(physical_type, to_thrift(statistics.as_ref()))
-                            .unwrap(),
+                        statistics: from_thrift_page_stats(
+                            physical_type,
+                            page_stats_to_thrift(statistics.as_ref()),
+                        )
+                        .unwrap(),
                     }
                 }
                 Page::DataPageV2 {
@@ -1512,8 +1547,11 @@ mod tests {
                         def_levels_byte_len,
                         rep_levels_byte_len,
                         is_compressed: compressor.is_some(),
-                        statistics: from_thrift(physical_type, to_thrift(statistics.as_ref()))
-                            .unwrap(),
+                        statistics: from_thrift_page_stats(
+                            physical_type,
+                            page_stats_to_thrift(statistics.as_ref()),
+                        )
+                        .unwrap(),
                     }
                 }
                 Page::DictionaryPage {
@@ -1565,6 +1603,7 @@ mod tests {
 
             let props = ReaderProperties::builder()
                 .set_backward_compatible_lz4(false)
+                .set_read_page_statistics(true)
                 .build();
             let mut page_reader = SerializedPageReader::new_with_properties(
                 Arc::new(reader),
@@ -1603,7 +1642,10 @@ mod tests {
         assert_eq!(&left.buffer(), &right.buffer());
         assert_eq!(left.num_values(), right.num_values());
         assert_eq!(left.encoding(), right.encoding());
-        assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics()));
+        assert_eq!(
+            page_stats_to_thrift(left.statistics()),
+            page_stats_to_thrift(right.statistics())
+        );
     }
 
     /// Tests roundtrip of i32 data written using `W` and read using `R`
@@ -1611,7 +1653,7 @@ mod tests {
         file: W,
         data: Vec<Vec<i32>>,
         compression: Compression,
-    ) -> crate::format::FileMetaData
+    ) -> ParquetMetaData
     where
         W: Write + Send,
         R: ChunkReader + From<W> + 'static,
@@ -1626,7 +1668,7 @@ mod tests {
         data: Vec<Vec<D::T>>,
         value: F,
         compression: Compression,
-    ) -> crate::format::FileMetaData
+    ) -> ParquetMetaData
     where
         W: Write + Send,
         R: ChunkReader + From<W> + 'static,
@@ -1697,7 +1739,7 @@ mod tests {
 
     /// File write-read roundtrip.
     /// `data` consists of arrays of values for each row group.
-    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) -> crate::format::FileMetaData {
+    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) -> ParquetMetaData {
         test_roundtrip_i32::<File, File>(file, data, Compression::UNCOMPRESSED)
     }
 
@@ -1772,13 +1814,12 @@ mod tests {
     fn test_column_offset_index_file() {
         let file = tempfile::tempfile().unwrap();
         let file_metadata = test_file_roundtrip(file, vec![vec![1, 2, 3, 4, 5]]);
-        file_metadata.row_groups.iter().for_each(|row_group| {
-            row_group.columns.iter().for_each(|column_chunk| {
-                assert_ne!(None, column_chunk.column_index_offset);
-                assert_ne!(None, column_chunk.column_index_length);
-
-                assert_ne!(None, column_chunk.offset_index_offset);
-                assert_ne!(None, column_chunk.offset_index_length);
+        file_metadata.row_groups().iter().for_each(|row_group| {
+            row_group.columns().iter().for_each(|column_chunk| {
+                assert!(column_chunk.column_index_offset().is_some());
+                assert!(column_chunk.column_index_length().is_some());
+                assert!(column_chunk.offset_index_offset().is_some());
+                assert!(column_chunk.offset_index_length().is_some());
             })
         });
     }
@@ -1871,29 +1912,22 @@ mod tests {
         let metadata = row_group_writer.close().unwrap();
         writer.close().unwrap();
 
-        let thrift = metadata.to_thrift();
-        let encoded_stats: Vec<_> = thrift
-            .columns
-            .into_iter()
-            .map(|x| x.meta_data.unwrap().statistics.unwrap())
-            .collect();
-
         // decimal
-        let s = &encoded_stats[0];
+        let s = page_stats_to_thrift(metadata.column(0).statistics()).unwrap();
         assert_eq!(s.min.as_deref(), Some(1_i32.to_le_bytes().as_ref()));
         assert_eq!(s.max.as_deref(), Some(3_i32.to_le_bytes().as_ref()));
         assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref()));
         assert_eq!(s.max_value.as_deref(), Some(3_i32.to_le_bytes().as_ref()));
 
         // i32
-        let s = &encoded_stats[1];
+        let s = page_stats_to_thrift(metadata.column(1).statistics()).unwrap();
         assert_eq!(s.min.as_deref(), Some(1_i32.to_le_bytes().as_ref()));
         assert_eq!(s.max.as_deref(), Some(3_i32.to_le_bytes().as_ref()));
         assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref()));
         assert_eq!(s.max_value.as_deref(), Some(3_i32.to_le_bytes().as_ref()));
 
         // u32
-        let s = &encoded_stats[2];
+        let s = page_stats_to_thrift(metadata.column(2).statistics()).unwrap();
         assert_eq!(s.min.as_deref(), None);
         assert_eq!(s.max.as_deref(), None);
         assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref()));
@@ -2019,15 +2053,15 @@ mod tests {
         row_group_writer.close().unwrap();
 
         let metadata = file_writer.finish().unwrap();
-        assert_eq!(metadata.row_groups.len(), 1);
-        let row_group = &metadata.row_groups[0];
-        assert_eq!(row_group.columns.len(), 2);
+        assert_eq!(metadata.num_row_groups(), 1);
+        let row_group = metadata.row_group(0);
+        assert_eq!(row_group.num_columns(), 2);
         // Column "a" has both offset and column index, as requested
-        assert!(row_group.columns[0].offset_index_offset.is_some());
-        assert!(row_group.columns[0].column_index_offset.is_some());
+        assert!(row_group.column(0).offset_index_offset().is_some());
+        assert!(row_group.column(0).column_index_offset().is_some());
         // Column "b" should only have offset index
-        assert!(row_group.columns[1].offset_index_offset.is_some());
-        assert!(row_group.columns[1].column_index_offset.is_none());
+        assert!(row_group.column(1).offset_index_offset().is_some());
+        assert!(row_group.column(1).column_index_offset().is_none());
 
         let err = file_writer.next_row_group().err().unwrap().to_string();
         assert_eq!(err, "Parquet error: SerializedFileWriter already finished");
@@ -2046,9 +2080,9 @@ mod tests {
         assert_eq!(column_index[0].len(), 2); // 2 column
 
         let a_idx = &column_index[0][0];
-        assert!(matches!(a_idx, Index::INT32(_)), "{a_idx:?}");
+        assert!(matches!(a_idx, ColumnIndexMetaData::INT32(_)), "{a_idx:?}");
         let b_idx = &column_index[0][1];
-        assert!(matches!(b_idx, Index::NONE), "{b_idx:?}");
+        assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}");
     }
 
     #[test]
@@ -2081,9 +2115,8 @@ mod tests {
         row_group_writer.close().unwrap();
         let file_metadata = writer.close().unwrap();
 
-        assert_eq!(file_metadata.row_groups.len(), 1);
-        assert_eq!(file_metadata.row_groups[0].columns.len(), 1);
-        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
+        assert_eq!(file_metadata.num_row_groups(), 1);
+        assert_eq!(file_metadata.row_group(0).num_columns(), 1);
 
         let check_def_hist = |def_hist: &[i64]| {
             assert_eq!(def_hist.len(), 2);
@@ -2091,29 +2124,26 @@ mod tests {
             assert_eq!(def_hist[1], 7);
         };
 
-        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
-        let meta_data = file_metadata.row_groups[0].columns[0]
-            .meta_data
-            .as_ref()
-            .unwrap();
-        assert!(meta_data.size_statistics.is_some());
-        let size_stats = meta_data.size_statistics.as_ref().unwrap();
+        let meta_data = file_metadata.row_group(0).column(0);
 
-        assert!(size_stats.repetition_level_histogram.is_none());
-        assert!(size_stats.definition_level_histogram.is_some());
-        assert!(size_stats.unencoded_byte_array_data_bytes.is_some());
+        assert!(meta_data.repetition_level_histogram().is_none());
+        assert!(meta_data.definition_level_histogram().is_some());
+        assert!(meta_data.unencoded_byte_array_data_bytes().is_some());
         assert_eq!(
             unenc_size,
-            size_stats.unencoded_byte_array_data_bytes.unwrap()
+            meta_data.unencoded_byte_array_data_bytes().unwrap()
         );
-        check_def_hist(size_stats.definition_level_histogram.as_ref().unwrap());
+        check_def_hist(meta_data.definition_level_histogram().unwrap().values());
 
         // check that the read metadata is also correct
         let options = ReadOptionsBuilder::new().with_page_index().build();
         let reader = SerializedFileReader::new_with_options(file, options).unwrap();
 
         let rfile_metadata = reader.metadata().file_metadata();
-        assert_eq!(rfile_metadata.num_rows(), file_metadata.num_rows);
+        assert_eq!(
+            rfile_metadata.num_rows(),
+            file_metadata.file_metadata().num_rows()
+        );
         assert_eq!(reader.num_row_groups(), 1);
         let rowgroup = reader.get_row_group(0).unwrap();
         assert_eq!(rowgroup.num_columns(), 1);
@@ -2132,16 +2162,16 @@ mod tests {
         let column_index = reader.metadata().column_index().unwrap();
         assert_eq!(column_index.len(), 1);
         assert_eq!(column_index[0].len(), 1);
-        let col_idx = if let Index::BYTE_ARRAY(index) = &column_index[0][0] {
-            assert_eq!(index.indexes.len(), 1);
-            &index.indexes[0]
+        let col_idx = if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][0] {
+            assert_eq!(index.num_pages(), 1);
+            index
         } else {
             unreachable!()
         };
 
-        assert!(col_idx.repetition_level_histogram().is_none());
-        assert!(col_idx.definition_level_histogram().is_some());
-        check_def_hist(col_idx.definition_level_histogram().unwrap().values());
+        assert!(col_idx.repetition_level_histogram(0).is_none());
+        assert!(col_idx.definition_level_histogram(0).is_some());
+        check_def_hist(col_idx.definition_level_histogram(0).unwrap());
 
         assert!(reader.metadata().offset_index().is_some());
         let offset_index = reader.metadata().offset_index().unwrap();
@@ -2233,9 +2263,8 @@ mod tests {
         row_group_writer.close().unwrap();
         let file_metadata = writer.close().unwrap();
 
-        assert_eq!(file_metadata.row_groups.len(), 1);
-        assert_eq!(file_metadata.row_groups[0].columns.len(), 1);
-        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
+        assert_eq!(file_metadata.num_row_groups(), 1);
+        assert_eq!(file_metadata.row_group(0).num_columns(), 1);
 
         let check_def_hist = |def_hist: &[i64]| {
             assert_eq!(def_hist.len(), 4);
@@ -2253,25 +2282,22 @@ mod tests {
 
         // check that histograms are set properly in the write and read metadata
         // also check that unencoded_byte_array_data_bytes is not set
-        assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some());
-        let meta_data = file_metadata.row_groups[0].columns[0]
-            .meta_data
-            .as_ref()
-            .unwrap();
-        assert!(meta_data.size_statistics.is_some());
-        let size_stats = meta_data.size_statistics.as_ref().unwrap();
-        assert!(size_stats.repetition_level_histogram.is_some());
-        assert!(size_stats.definition_level_histogram.is_some());
-        assert!(size_stats.unencoded_byte_array_data_bytes.is_none());
-        check_def_hist(size_stats.definition_level_histogram.as_ref().unwrap());
-        check_rep_hist(size_stats.repetition_level_histogram.as_ref().unwrap());
+        let meta_data = file_metadata.row_group(0).column(0);
+        assert!(meta_data.repetition_level_histogram().is_some());
+        assert!(meta_data.definition_level_histogram().is_some());
+        assert!(meta_data.unencoded_byte_array_data_bytes().is_none());
+        check_def_hist(meta_data.definition_level_histogram().unwrap().values());
+        check_rep_hist(meta_data.repetition_level_histogram().unwrap().values());
 
         // check that the read metadata is also correct
         let options = ReadOptionsBuilder::new().with_page_index().build();
         let reader = SerializedFileReader::new_with_options(file, options).unwrap();
 
         let rfile_metadata = reader.metadata().file_metadata();
-        assert_eq!(rfile_metadata.num_rows(), file_metadata.num_rows);
+        assert_eq!(
+            rfile_metadata.num_rows(),
+            file_metadata.file_metadata().num_rows()
+        );
         assert_eq!(reader.num_row_groups(), 1);
         let rowgroup = reader.get_row_group(0).unwrap();
         assert_eq!(rowgroup.num_columns(), 1);
@@ -2287,15 +2313,15 @@ mod tests {
         let column_index = reader.metadata().column_index().unwrap();
         assert_eq!(column_index.len(), 1);
         assert_eq!(column_index[0].len(), 1);
-        let col_idx = if let Index::INT32(index) = &column_index[0][0] {
-            assert_eq!(index.indexes.len(), 1);
-            &index.indexes[0]
+        let col_idx = if let ColumnIndexMetaData::INT32(index) = &column_index[0][0] {
+            assert_eq!(index.num_pages(), 1);
+            index
         } else {
             unreachable!()
         };
 
-        check_def_hist(col_idx.definition_level_histogram().unwrap().values());
-        check_rep_hist(col_idx.repetition_level_histogram().unwrap().values());
+        check_def_hist(col_idx.definition_level_histogram(0).unwrap());
+        check_rep_hist(col_idx.repetition_level_histogram(0).unwrap());
 
         assert!(reader.metadata().offset_index().is_some());
         let offset_index = reader.metadata().offset_index().unwrap();
@@ -2371,11 +2397,14 @@ mod tests {
 
         // Make sure byte_stream_split encoding was used
         let check_encoding = |x: usize, filemeta: &ParquetMetaData| {
-            assert!(filemeta
-                .row_group(0)
-                .column(x)
-                .encodings()
-                .contains(&Encoding::BYTE_STREAM_SPLIT));
+            assert!(
+                filemeta
+                    .row_group(0)
+                    .column(x)
+                    .encodings()
+                    .collect::<Vec<_>>()
+                    .contains(&Encoding::BYTE_STREAM_SPLIT)
+            );
         };
 
         check_encoding(1, filemeta);
@@ -2414,4 +2443,74 @@ mod tests {
             start += 1;
         }
     }
+
+    #[test]
+    fn test_rewrite_no_page_indexes() {
+        let file = get_test_file("alltypes_tiny_pages.parquet");
+        let metadata = ParquetMetaDataReader::new()
+            .with_page_index_policy(PageIndexPolicy::Optional)
+            .parse_and_finish(&file)
+            .unwrap();
+
+        let props = Arc::new(WriterProperties::builder().build());
+        let schema = metadata.file_metadata().schema_descr().root_schema_ptr();
+        let output = Vec::<u8>::new();
+        let mut writer = SerializedFileWriter::new(output, schema, props).unwrap();
+
+        for rg in metadata.row_groups() {
+            let mut rg_out = writer.next_row_group().unwrap();
+            for column in rg.columns() {
+                let result = ColumnCloseResult {
+                    bytes_written: column.compressed_size() as _,
+                    rows_written: rg.num_rows() as _,
+                    metadata: column.clone(),
+                    bloom_filter: None,
+                    column_index: None,
+                    offset_index: None,
+                };
+                rg_out.append_column(&file, result).unwrap();
+            }
+            rg_out.close().unwrap();
+        }
+        writer.close().unwrap();
+    }
+
+    #[test]
+    fn test_rewrite_missing_column_index() {
+        // this file has an INT96 column that lacks a column index entry
+        let file = get_test_file("alltypes_tiny_pages.parquet");
+        let metadata = ParquetMetaDataReader::new()
+            .with_page_index_policy(PageIndexPolicy::Optional)
+            .parse_and_finish(&file)
+            .unwrap();
+
+        let props = Arc::new(WriterProperties::builder().build());
+        let schema = metadata.file_metadata().schema_descr().root_schema_ptr();
+        let output = Vec::<u8>::new();
+        let mut writer = SerializedFileWriter::new(output, schema, props).unwrap();
+
+        let column_indexes = metadata.column_index();
+        let offset_indexes = metadata.offset_index();
+
+        for (rg_idx, rg) in metadata.row_groups().iter().enumerate() {
+            let rg_column_indexes = column_indexes.and_then(|ci| ci.get(rg_idx));
+            let rg_offset_indexes = offset_indexes.and_then(|oi| oi.get(rg_idx));
+            let mut rg_out = writer.next_row_group().unwrap();
+            for (col_idx, column) in rg.columns().iter().enumerate() {
+                let column_index = rg_column_indexes.and_then(|row| row.get(col_idx)).cloned();
+                let offset_index = rg_offset_indexes.and_then(|row| row.get(col_idx)).cloned();
+                let result = ColumnCloseResult {
+                    bytes_written: column.compressed_size() as _,
+                    rows_written: rg.num_rows() as _,
+                    metadata: column.clone(),
+                    bloom_filter: None,
+                    column_index,
+                    offset_index,
+                };
+                rg_out.append_column(&file, result).unwrap();
+            }
+            rg_out.close().unwrap();
+        }
+        writer.close().unwrap();
+    }
 }
diff --git a/parquet/src/format.rs b/parquet/src/format.rs
index 287d08b7a95c..101799d00350 100644
--- a/parquet/src/format.rs
+++ b/parquet/src/format.rs
@@ -1,5 +1,5 @@
 //! See [`crate::file`] for easier to use APIs.
-// Autogenerated by Thrift Compiler (0.20.0)
+// Autogenerated by Thrift Compiler (0.21.0)
 // DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
 
 #![allow(dead_code)]
@@ -341,6 +341,67 @@ impl From<&FieldRepetitionType> for i32 {
   }
 }
 
+/// Edge interpolation algorithm for Geography logical type
+#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct EdgeInterpolationAlgorithm(pub i32);
+
+impl EdgeInterpolationAlgorithm {
+  pub const SPHERICAL: EdgeInterpolationAlgorithm = EdgeInterpolationAlgorithm(0);
+  pub const VINCENTY: EdgeInterpolationAlgorithm = EdgeInterpolationAlgorithm(1);
+  pub const THOMAS: EdgeInterpolationAlgorithm = EdgeInterpolationAlgorithm(2);
+  pub const ANDOYER: EdgeInterpolationAlgorithm = EdgeInterpolationAlgorithm(3);
+  pub const KARNEY: EdgeInterpolationAlgorithm = EdgeInterpolationAlgorithm(4);
+  pub const ENUM_VALUES: &'static [Self] = &[
+    Self::SPHERICAL,
+    Self::VINCENTY,
+    Self::THOMAS,
+    Self::ANDOYER,
+    Self::KARNEY,
+  ];
+}
+
+impl crate::thrift::TSerializable for EdgeInterpolationAlgorithm {
+  #[allow(clippy::trivially_copy_pass_by_ref)]
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    o_prot.write_i32(self.0)
+  }
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<EdgeInterpolationAlgorithm> {
+    let enum_value = i_prot.read_i32()?;
+    Ok(EdgeInterpolationAlgorithm::from(enum_value))
+  }
+}
+
+impl From<i32> for EdgeInterpolationAlgorithm {
+  fn from(i: i32) -> Self {
+    match i {
+      0 => EdgeInterpolationAlgorithm::SPHERICAL,
+      1 => EdgeInterpolationAlgorithm::VINCENTY,
+      2 => EdgeInterpolationAlgorithm::THOMAS,
+      3 => EdgeInterpolationAlgorithm::ANDOYER,
+      4 => EdgeInterpolationAlgorithm::KARNEY,
+      _ => EdgeInterpolationAlgorithm(i)
+    }
+  }
+}
+
+impl From<&i32> for EdgeInterpolationAlgorithm {
+  fn from(i: &i32) -> Self {
+    EdgeInterpolationAlgorithm::from(*i)
+  }
+}
+
+impl From<EdgeInterpolationAlgorithm> for i32 {
+  fn from(e: EdgeInterpolationAlgorithm) -> i32 {
+    e.0
+  }
+}
+
+impl From<&EdgeInterpolationAlgorithm> for i32 {
+  fn from(e: &EdgeInterpolationAlgorithm) -> i32 {
+    e.0
+  }
+}
+
 /// Encodings supported by Parquet.  Not all encodings are valid for all types.  These
 /// enums are also used to specify the encoding of definition and repetition levels.
 /// See the accompanying doc for the details of the more complicated encodings.
@@ -774,6 +835,235 @@ impl crate::thrift::TSerializable for SizeStatistics {
   }
 }
 
+//
+// BoundingBox
+//
+
+/// Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max
+/// value pair of coordinates from each axis.
+#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct BoundingBox {
+  pub xmin: OrderedFloat<f64>,
+  pub xmax: OrderedFloat<f64>,
+  pub ymin: OrderedFloat<f64>,
+  pub ymax: OrderedFloat<f64>,
+  pub zmin: Option<OrderedFloat<f64>>,
+  pub zmax: Option<OrderedFloat<f64>>,
+  pub mmin: Option<OrderedFloat<f64>>,
+  pub mmax: Option<OrderedFloat<f64>>,
+}
+
+impl BoundingBox {
+  pub fn new<F5, F6, F7, F8>(xmin: OrderedFloat<f64>, xmax: OrderedFloat<f64>, ymin: OrderedFloat<f64>, ymax: OrderedFloat<f64>, zmin: F5, zmax: F6, mmin: F7, mmax: F8) -> BoundingBox where F5: Into<Option<OrderedFloat<f64>>>, F6: Into<Option<OrderedFloat<f64>>>, F7: Into<Option<OrderedFloat<f64>>>, F8: Into<Option<OrderedFloat<f64>>> {
+    BoundingBox {
+      xmin,
+      xmax,
+      ymin,
+      ymax,
+      zmin: zmin.into(),
+      zmax: zmax.into(),
+      mmin: mmin.into(),
+      mmax: mmax.into(),
+    }
+  }
+}
+
+impl crate::thrift::TSerializable for BoundingBox {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BoundingBox> {
+    i_prot.read_struct_begin()?;
+    let mut f_1: Option<OrderedFloat<f64>> = None;
+    let mut f_2: Option<OrderedFloat<f64>> = None;
+    let mut f_3: Option<OrderedFloat<f64>> = None;
+    let mut f_4: Option<OrderedFloat<f64>> = None;
+    let mut f_5: Option<OrderedFloat<f64>> = None;
+    let mut f_6: Option<OrderedFloat<f64>> = None;
+    let mut f_7: Option<OrderedFloat<f64>> = None;
+    let mut f_8: Option<OrderedFloat<f64>> = None;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      let field_id = field_id(&field_ident)?;
+      match field_id {
+        1 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_1 = Some(val);
+        },
+        2 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_2 = Some(val);
+        },
+        3 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_3 = Some(val);
+        },
+        4 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_4 = Some(val);
+        },
+        5 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_5 = Some(val);
+        },
+        6 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_6 = Some(val);
+        },
+        7 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_7 = Some(val);
+        },
+        8 => {
+          let val = OrderedFloat::from(i_prot.read_double()?);
+          f_8 = Some(val);
+        },
+        _ => {
+          i_prot.skip(field_ident.field_type)?;
+        },
+      };
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    verify_required_field_exists("BoundingBox.xmin", &f_1)?;
+    verify_required_field_exists("BoundingBox.xmax", &f_2)?;
+    verify_required_field_exists("BoundingBox.ymin", &f_3)?;
+    verify_required_field_exists("BoundingBox.ymax", &f_4)?;
+    let ret = BoundingBox {
+      xmin: f_1.expect("auto-generated code should have checked for presence of required fields"),
+      xmax: f_2.expect("auto-generated code should have checked for presence of required fields"),
+      ymin: f_3.expect("auto-generated code should have checked for presence of required fields"),
+      ymax: f_4.expect("auto-generated code should have checked for presence of required fields"),
+      zmin: f_5,
+      zmax: f_6,
+      mmin: f_7,
+      mmax: f_8,
+    };
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("BoundingBox");
+    o_prot.write_struct_begin(&struct_ident)?;
+    o_prot.write_field_begin(&TFieldIdentifier::new("xmin", TType::Double, 1))?;
+    o_prot.write_double(self.xmin.into())?;
+    o_prot.write_field_end()?;
+    o_prot.write_field_begin(&TFieldIdentifier::new("xmax", TType::Double, 2))?;
+    o_prot.write_double(self.xmax.into())?;
+    o_prot.write_field_end()?;
+    o_prot.write_field_begin(&TFieldIdentifier::new("ymin", TType::Double, 3))?;
+    o_prot.write_double(self.ymin.into())?;
+    o_prot.write_field_end()?;
+    o_prot.write_field_begin(&TFieldIdentifier::new("ymax", TType::Double, 4))?;
+    o_prot.write_double(self.ymax.into())?;
+    o_prot.write_field_end()?;
+    if let Some(fld_var) = self.zmin {
+      o_prot.write_field_begin(&TFieldIdentifier::new("zmin", TType::Double, 5))?;
+      o_prot.write_double(fld_var.into())?;
+      o_prot.write_field_end()?
+    }
+    if let Some(fld_var) = self.zmax {
+      o_prot.write_field_begin(&TFieldIdentifier::new("zmax", TType::Double, 6))?;
+      o_prot.write_double(fld_var.into())?;
+      o_prot.write_field_end()?
+    }
+    if let Some(fld_var) = self.mmin {
+      o_prot.write_field_begin(&TFieldIdentifier::new("mmin", TType::Double, 7))?;
+      o_prot.write_double(fld_var.into())?;
+      o_prot.write_field_end()?
+    }
+    if let Some(fld_var) = self.mmax {
+      o_prot.write_field_begin(&TFieldIdentifier::new("mmax", TType::Double, 8))?;
+      o_prot.write_double(fld_var.into())?;
+      o_prot.write_field_end()?
+    }
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
+  }
+}
+
+//
+// GeospatialStatistics
+//
+
+/// Statistics specific to Geometry and Geography logical types
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct GeospatialStatistics {
+  /// A bounding box of geospatial instances
+  pub bbox: Option<BoundingBox>,
+  /// Geospatial type codes of all instances, or an empty list if not known
+  pub geospatial_types: Option<Vec<i32>>,
+}
+
+impl GeospatialStatistics {
+  pub fn new<F1, F2>(bbox: F1, geospatial_types: F2) -> GeospatialStatistics where F1: Into<Option<BoundingBox>>, F2: Into<Option<Vec<i32>>> {
+    GeospatialStatistics {
+      bbox: bbox.into(),
+      geospatial_types: geospatial_types.into(),
+    }
+  }
+}
+
+impl crate::thrift::TSerializable for GeospatialStatistics {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<GeospatialStatistics> {
+    i_prot.read_struct_begin()?;
+    let mut f_1: Option<BoundingBox> = None;
+    let mut f_2: Option<Vec<i32>> = None;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      let field_id = field_id(&field_ident)?;
+      match field_id {
+        1 => {
+          let val = BoundingBox::read_from_in_protocol(i_prot)?;
+          f_1 = Some(val);
+        },
+        2 => {
+          let list_ident = i_prot.read_list_begin()?;
+          let mut val: Vec<i32> = Vec::with_capacity(list_ident.size as usize);
+          for _ in 0..list_ident.size {
+            let list_elem_2 = i_prot.read_i32()?;
+            val.push(list_elem_2);
+          }
+          i_prot.read_list_end()?;
+          f_2 = Some(val);
+        },
+        _ => {
+          i_prot.skip(field_ident.field_type)?;
+        },
+      };
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    let ret = GeospatialStatistics {
+      bbox: f_1,
+      geospatial_types: f_2,
+    };
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("GeospatialStatistics");
+    o_prot.write_struct_begin(&struct_ident)?;
+    if let Some(ref fld_var) = self.bbox {
+      o_prot.write_field_begin(&TFieldIdentifier::new("bbox", TType::Struct, 1))?;
+      fld_var.write_to_out_protocol(o_prot)?;
+      o_prot.write_field_end()?
+    }
+    if let Some(ref fld_var) = self.geospatial_types {
+      o_prot.write_field_begin(&TFieldIdentifier::new("geospatial_types", TType::List, 2))?;
+      o_prot.write_list_begin(&TListIdentifier::new(TType::I32, fld_var.len() as i32))?;
+      for e in fld_var {
+        o_prot.write_i32(*e)?;
+      }
+      o_prot.write_list_end()?;
+      o_prot.write_field_end()?
+    }
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
+  }
+}
+
 //
 // Statistics
 //
@@ -795,7 +1085,12 @@ pub struct Statistics {
   /// signed.
   pub max: Option<Vec<u8>>,
   pub min: Option<Vec<u8>>,
-  /// count of null value in the column
+  /// Count of null values in the column.
+  /// 
+  /// Writers SHOULD always write this field even if it is zero (i.e. no null value)
+  /// or the column is not nullable.
+  /// Readers MUST distinguish between null_count not being present and null_count == 0.
+  /// If null_count is not present, readers MUST NOT assume null_count == 0.
   pub null_count: Option<i64>,
   /// count of distinct values occurring
   pub distinct_count: Option<i64>,
@@ -1834,6 +2129,218 @@ impl crate::thrift::TSerializable for BsonType {
   }
 }
 
+//
+// VariantType
+//
+
+/// Embedded Variant logical type annotation
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct VariantType {
+  pub specification_version: Option<i8>,
+}
+
+impl VariantType {
+  pub fn new<F1>(specification_version: F1) -> VariantType where F1: Into<Option<i8>> {
+    VariantType {
+      specification_version: specification_version.into(),
+    }
+  }
+}
+
+impl crate::thrift::TSerializable for VariantType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<VariantType> {
+    i_prot.read_struct_begin()?;
+    let mut f_1: Option<i8> = None;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      let field_id = field_id(&field_ident)?;
+      match field_id {
+        1 => {
+          let val = i_prot.read_i8()?;
+          f_1 = Some(val);
+        },
+        _ => {
+          i_prot.skip(field_ident.field_type)?;
+        },
+      };
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    let ret = VariantType {
+      specification_version: f_1,
+    };
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("VariantType");
+    o_prot.write_struct_begin(&struct_ident)?;
+    if let Some(fld_var) = self.specification_version {
+      o_prot.write_field_begin(&TFieldIdentifier::new("specification_version", TType::I08, 1))?;
+      o_prot.write_i8(fld_var)?;
+      o_prot.write_field_end()?
+    }
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
+  }
+}
+
+//
+// GeometryType
+//
+
+/// Embedded Geometry logical type annotation
+/// 
+/// Geospatial features in the Well-Known Binary (WKB) format and edges interpolation
+/// is always linear/planar.
+/// 
+/// A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84",
+/// which means that the geometries must be stored in longitude, latitude based on
+/// the WGS84 datum.
+/// 
+/// Allowed for physical type: BYTE_ARRAY.
+/// 
+/// See Geospatial.md for details.
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct GeometryType {
+  pub crs: Option<String>,
+}
+
+impl GeometryType {
+  pub fn new<F1>(crs: F1) -> GeometryType where F1: Into<Option<String>> {
+    GeometryType {
+      crs: crs.into(),
+    }
+  }
+}
+
+impl crate::thrift::TSerializable for GeometryType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<GeometryType> {
+    i_prot.read_struct_begin()?;
+    let mut f_1: Option<String> = None;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      let field_id = field_id(&field_ident)?;
+      match field_id {
+        1 => {
+          let val = i_prot.read_string()?;
+          f_1 = Some(val);
+        },
+        _ => {
+          i_prot.skip(field_ident.field_type)?;
+        },
+      };
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    let ret = GeometryType {
+      crs: f_1,
+    };
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("GeometryType");
+    o_prot.write_struct_begin(&struct_ident)?;
+    if let Some(ref fld_var) = self.crs {
+      o_prot.write_field_begin(&TFieldIdentifier::new("crs", TType::String, 1))?;
+      o_prot.write_string(fld_var)?;
+      o_prot.write_field_end()?
+    }
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
+  }
+}
+
+//
+// GeographyType
+//
+
+/// Embedded Geography logical type annotation
+/// 
+/// Geospatial features in the WKB format with an explicit (non-linear/non-planar)
+/// edges interpolation algorithm.
+/// 
+/// A custom geographic CRS can be set by the crs field, where longitudes are
+/// bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS
+/// defaults to "OGC:CRS84".
+/// 
+/// An optional algorithm can be set to correctly interpret edges interpolation
+/// of the geometries. If unset, the algorithm defaults to SPHERICAL.
+/// 
+/// Allowed for physical type: BYTE_ARRAY.
+/// 
+/// See Geospatial.md for details.
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct GeographyType {
+  pub crs: Option<String>,
+  pub algorithm: Option<EdgeInterpolationAlgorithm>,
+}
+
+impl GeographyType {
+  pub fn new<F1, F2>(crs: F1, algorithm: F2) -> GeographyType where F1: Into<Option<String>>, F2: Into<Option<EdgeInterpolationAlgorithm>> {
+    GeographyType {
+      crs: crs.into(),
+      algorithm: algorithm.into(),
+    }
+  }
+}
+
+impl crate::thrift::TSerializable for GeographyType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<GeographyType> {
+    i_prot.read_struct_begin()?;
+    let mut f_1: Option<String> = None;
+    let mut f_2: Option<EdgeInterpolationAlgorithm> = None;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      let field_id = field_id(&field_ident)?;
+      match field_id {
+        1 => {
+          let val = i_prot.read_string()?;
+          f_1 = Some(val);
+        },
+        2 => {
+          let val = EdgeInterpolationAlgorithm::read_from_in_protocol(i_prot)?;
+          f_2 = Some(val);
+        },
+        _ => {
+          i_prot.skip(field_ident.field_type)?;
+        },
+      };
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    let ret = GeographyType {
+      crs: f_1,
+      algorithm: f_2,
+    };
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("GeographyType");
+    o_prot.write_struct_begin(&struct_ident)?;
+    if let Some(ref fld_var) = self.crs {
+      o_prot.write_field_begin(&TFieldIdentifier::new("crs", TType::String, 1))?;
+      o_prot.write_string(fld_var)?;
+      o_prot.write_field_end()?
+    }
+    if let Some(ref fld_var) = self.algorithm {
+      o_prot.write_field_begin(&TFieldIdentifier::new("algorithm", TType::I32, 2))?;
+      fld_var.write_to_out_protocol(o_prot)?;
+      o_prot.write_field_end()?
+    }
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
+  }
+}
+
 //
 // LogicalType
 //
@@ -1854,6 +2361,9 @@ pub enum LogicalType {
   BSON(BsonType),
   UUID(UUIDType),
   FLOAT16(Float16Type),
+  VARIANT(VariantType),
+  GEOMETRY(GeometryType),
+  GEOGRAPHY(GeographyType),
 }
 
 impl crate::thrift::TSerializable for LogicalType {
@@ -1966,6 +2476,27 @@ impl crate::thrift::TSerializable for LogicalType {
           }
           received_field_count += 1;
         },
+        16 => {
+          let val = VariantType::read_from_in_protocol(i_prot)?;
+          if ret.is_none() {
+            ret = Some(LogicalType::VARIANT(val));
+          }
+          received_field_count += 1;
+        },
+        17 => {
+          let val = GeometryType::read_from_in_protocol(i_prot)?;
+          if ret.is_none() {
+            ret = Some(LogicalType::GEOMETRY(val));
+          }
+          received_field_count += 1;
+        },
+        18 => {
+          let val = GeographyType::read_from_in_protocol(i_prot)?;
+          if ret.is_none() {
+            ret = Some(LogicalType::GEOGRAPHY(val));
+          }
+          received_field_count += 1;
+        },
         _ => {
           i_prot.skip(field_ident.field_type)?;
           received_field_count += 1;
@@ -2070,6 +2601,21 @@ impl crate::thrift::TSerializable for LogicalType {
         f.write_to_out_protocol(o_prot)?;
         o_prot.write_field_end()?;
       },
+      LogicalType::VARIANT(ref f) => {
+        o_prot.write_field_begin(&TFieldIdentifier::new("VARIANT", TType::Struct, 16))?;
+        f.write_to_out_protocol(o_prot)?;
+        o_prot.write_field_end()?;
+      },
+      LogicalType::GEOMETRY(ref f) => {
+        o_prot.write_field_begin(&TFieldIdentifier::new("GEOMETRY", TType::Struct, 17))?;
+        f.write_to_out_protocol(o_prot)?;
+        o_prot.write_field_end()?;
+      },
+      LogicalType::GEOGRAPHY(ref f) => {
+        o_prot.write_field_begin(&TFieldIdentifier::new("GEOGRAPHY", TType::Struct, 18))?;
+        f.write_to_out_protocol(o_prot)?;
+        o_prot.write_field_end()?;
+      },
     }
     o_prot.write_field_stop()?;
     o_prot.write_struct_end()
@@ -2081,13 +2627,9 @@ impl crate::thrift::TSerializable for LogicalType {
 //
 
 /// Represents a element inside a schema definition.
-///
-///  - if it is a group (inner node) then type is undefined and num_children
-///    is defined
-///  - if it is a primitive type (leaf) then type is defined and
-///    num_children is undefined
-///
-/// Note the  nodes are listed in depth first traversal order.
+///  - if it is a group (inner node) then type is undefined and num_children is defined
+///  - if it is a primitive type (leaf) then type is defined and num_children is undefined
+/// the nodes are listed in depth first traversal order.
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct SchemaElement {
   /// Data type for this field. Not set if the current element is a non-leaf node
@@ -3574,10 +4116,12 @@ pub struct ColumnMetaData {
   /// also be useful in some cases for more fine-grained nullability/list length
   /// filter pushdown.
   pub size_statistics: Option<SizeStatistics>,
+  /// Optional statistics specific for Geometry and Geography logical types
+  pub geospatial_statistics: Option<GeospatialStatistics>,
 }
 
 impl ColumnMetaData {
-  pub fn new<F8, F10, F11, F12, F13, F14, F15, F16>(type_: Type, encodings: Vec<Encoding>, path_in_schema: Vec<String>, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14, bloom_filter_length: F15, size_statistics: F16) -> ColumnMetaData where F8: Into<Option<Vec<KeyValue>>>, F10: Into<Option<i64>>, F11: Into<Option<i64>>, F12: Into<Option<Statistics>>, F13: Into<Option<Vec<PageEncodingStats>>>, F14: Into<Option<i64>>, F15: Into<Option<i32>>, F16: Into<Option<SizeStatistics>> {
+  pub fn new<F8, F10, F11, F12, F13, F14, F15, F16, F17>(type_: Type, encodings: Vec<Encoding>, path_in_schema: Vec<String>, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14, bloom_filter_length: F15, size_statistics: F16, geospatial_statistics: F17) -> ColumnMetaData where F8: Into<Option<Vec<KeyValue>>>, F10: Into<Option<i64>>, F11: Into<Option<i64>>, F12: Into<Option<Statistics>>, F13: Into<Option<Vec<PageEncodingStats>>>, F14: Into<Option<i64>>, F15: Into<Option<i32>>, F16: Into<Option<SizeStatistics>>, F17: Into<Option<GeospatialStatistics>> {
     ColumnMetaData {
       type_,
       encodings,
@@ -3595,6 +4139,7 @@ impl ColumnMetaData {
       bloom_filter_offset: bloom_filter_offset.into(),
       bloom_filter_length: bloom_filter_length.into(),
       size_statistics: size_statistics.into(),
+      geospatial_statistics: geospatial_statistics.into(),
     }
   }
 }
@@ -3618,6 +4163,7 @@ impl crate::thrift::TSerializable for ColumnMetaData {
     let mut f_14: Option<i64> = None;
     let mut f_15: Option<i32> = None;
     let mut f_16: Option<SizeStatistics> = None;
+    let mut f_17: Option<GeospatialStatistics> = None;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
@@ -3633,8 +4179,8 @@ impl crate::thrift::TSerializable for ColumnMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<Encoding> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_2 = Encoding::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_2);
+            let list_elem_3 = Encoding::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_3);
           }
           i_prot.read_list_end()?;
           f_2 = Some(val);
@@ -3643,8 +4189,8 @@ impl crate::thrift::TSerializable for ColumnMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<String> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_3 = i_prot.read_string()?;
-            val.push(list_elem_3);
+            let list_elem_4 = i_prot.read_string()?;
+            val.push(list_elem_4);
           }
           i_prot.read_list_end()?;
           f_3 = Some(val);
@@ -3669,8 +4215,8 @@ impl crate::thrift::TSerializable for ColumnMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<KeyValue> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_4 = KeyValue::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_4);
+            let list_elem_5 = KeyValue::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_5);
           }
           i_prot.read_list_end()?;
           f_8 = Some(val);
@@ -3695,8 +4241,8 @@ impl crate::thrift::TSerializable for ColumnMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<PageEncodingStats> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_5 = PageEncodingStats::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_5);
+            let list_elem_6 = PageEncodingStats::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_6);
           }
           i_prot.read_list_end()?;
           f_13 = Some(val);
@@ -3713,6 +4259,10 @@ impl crate::thrift::TSerializable for ColumnMetaData {
           let val = SizeStatistics::read_from_in_protocol(i_prot)?;
           f_16 = Some(val);
         },
+        17 => {
+          let val = GeospatialStatistics::read_from_in_protocol(i_prot)?;
+          f_17 = Some(val);
+        },
         _ => {
           i_prot.skip(field_ident.field_type)?;
         },
@@ -3745,6 +4295,7 @@ impl crate::thrift::TSerializable for ColumnMetaData {
       bloom_filter_offset: f_14,
       bloom_filter_length: f_15,
       size_statistics: f_16,
+      geospatial_statistics: f_17,
     };
     Ok(ret)
   }
@@ -3831,6 +4382,11 @@ impl crate::thrift::TSerializable for ColumnMetaData {
       fld_var.write_to_out_protocol(o_prot)?;
       o_prot.write_field_end()?
     }
+    if let Some(ref fld_var) = self.geospatial_statistics {
+      o_prot.write_field_begin(&TFieldIdentifier::new("geospatial_statistics", TType::Struct, 17))?;
+      fld_var.write_to_out_protocol(o_prot)?;
+      o_prot.write_field_end()?
+    }
     o_prot.write_field_stop()?;
     o_prot.write_struct_end()
   }
@@ -3910,8 +4466,8 @@ impl crate::thrift::TSerializable for EncryptionWithColumnKey {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<String> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_6 = i_prot.read_string()?;
-            val.push(list_elem_6);
+            let list_elem_7 = i_prot.read_string()?;
+            val.push(list_elem_7);
           }
           i_prot.read_list_end()?;
           f_1 = Some(val);
@@ -4284,8 +4840,8 @@ impl crate::thrift::TSerializable for RowGroup {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<ColumnChunk> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_7 = ColumnChunk::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_7);
+            let list_elem_8 = ColumnChunk::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_8);
           }
           i_prot.read_list_end()?;
           f_1 = Some(val);
@@ -4302,8 +4858,8 @@ impl crate::thrift::TSerializable for RowGroup {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<SortingColumn> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_8 = SortingColumn::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_8);
+            let list_elem_9 = SortingColumn::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_9);
           }
           i_prot.read_list_end()?;
           f_4 = Some(val);
@@ -4629,8 +5185,8 @@ impl crate::thrift::TSerializable for OffsetIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<PageLocation> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_9 = PageLocation::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_9);
+            let list_elem_10 = PageLocation::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_10);
           }
           i_prot.read_list_end()?;
           f_1 = Some(val);
@@ -4639,8 +5195,8 @@ impl crate::thrift::TSerializable for OffsetIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<i64> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_10 = i_prot.read_i64()?;
-            val.push(list_elem_10);
+            let list_elem_11 = i_prot.read_i64()?;
+            val.push(list_elem_11);
           }
           i_prot.read_list_end()?;
           f_2 = Some(val);
@@ -4718,7 +5274,14 @@ pub struct ColumnIndex {
   /// lists. Readers cannot assume that max_values\[i\] <= min_values\[i+1\], even
   /// if the lists are ordered.
   pub boundary_order: BoundaryOrder,
-  /// A list containing the number of null values for each page *
+  /// A list containing the number of null values for each page
+  /// 
+  /// Writers SHOULD always write this field even if no null values
+  /// are present or the column is not nullable.
+  /// Readers MUST distinguish between null_counts not being present
+  /// and null_count being 0.
+  /// If null_counts are not present, readers MUST NOT assume all
+  /// null counts are 0.
   pub null_counts: Option<Vec<i64>>,
   /// Contains repetition level histograms for each page
   /// concatenated together.  The repetition_level_histogram field on
@@ -4772,8 +5335,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<bool> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_11 = i_prot.read_bool()?;
-            val.push(list_elem_11);
+            let list_elem_12 = i_prot.read_bool()?;
+            val.push(list_elem_12);
           }
           i_prot.read_list_end()?;
           f_1 = Some(val);
@@ -4782,8 +5345,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<Vec<u8>> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_12 = i_prot.read_bytes()?;
-            val.push(list_elem_12);
+            let list_elem_13 = i_prot.read_bytes()?;
+            val.push(list_elem_13);
           }
           i_prot.read_list_end()?;
           f_2 = Some(val);
@@ -4792,8 +5355,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<Vec<u8>> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_13 = i_prot.read_bytes()?;
-            val.push(list_elem_13);
+            let list_elem_14 = i_prot.read_bytes()?;
+            val.push(list_elem_14);
           }
           i_prot.read_list_end()?;
           f_3 = Some(val);
@@ -4806,8 +5369,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<i64> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_14 = i_prot.read_i64()?;
-            val.push(list_elem_14);
+            let list_elem_15 = i_prot.read_i64()?;
+            val.push(list_elem_15);
           }
           i_prot.read_list_end()?;
           f_5 = Some(val);
@@ -4816,8 +5379,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<i64> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_15 = i_prot.read_i64()?;
-            val.push(list_elem_15);
+            let list_elem_16 = i_prot.read_i64()?;
+            val.push(list_elem_16);
           }
           i_prot.read_list_end()?;
           f_6 = Some(val);
@@ -4826,8 +5389,8 @@ impl crate::thrift::TSerializable for ColumnIndex {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<i64> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_16 = i_prot.read_i64()?;
-            val.push(list_elem_16);
+            let list_elem_17 = i_prot.read_i64()?;
+            val.push(list_elem_17);
           }
           i_prot.read_list_end()?;
           f_7 = Some(val);
@@ -5267,8 +5830,8 @@ impl crate::thrift::TSerializable for FileMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<SchemaElement> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_17 = SchemaElement::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_17);
+            let list_elem_18 = SchemaElement::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_18);
           }
           i_prot.read_list_end()?;
           f_2 = Some(val);
@@ -5281,8 +5844,8 @@ impl crate::thrift::TSerializable for FileMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<RowGroup> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_18 = RowGroup::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_18);
+            let list_elem_19 = RowGroup::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_19);
           }
           i_prot.read_list_end()?;
           f_4 = Some(val);
@@ -5291,8 +5854,8 @@ impl crate::thrift::TSerializable for FileMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<KeyValue> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_19 = KeyValue::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_19);
+            let list_elem_20 = KeyValue::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_20);
           }
           i_prot.read_list_end()?;
           f_5 = Some(val);
@@ -5305,8 +5868,8 @@ impl crate::thrift::TSerializable for FileMetaData {
           let list_ident = i_prot.read_list_begin()?;
           let mut val: Vec<ColumnOrder> = Vec::with_capacity(list_ident.size as usize);
           for _ in 0..list_ident.size {
-            let list_elem_20 = ColumnOrder::read_from_in_protocol(i_prot)?;
-            val.push(list_elem_20);
+            let list_elem_21 = ColumnOrder::read_from_in_protocol(i_prot)?;
+            val.push(list_elem_21);
           }
           i_prot.read_list_end()?;
           f_7 = Some(val);
diff --git a/parquet/src/geospatial/accumulator.rs b/parquet/src/geospatial/accumulator.rs
new file mode 100644
index 000000000000..d25a47930f6e
--- /dev/null
+++ b/parquet/src/geospatial/accumulator.rs
@@ -0,0 +1,387 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides implementations and traits for building [`GeospatialStatistics`]
+
+use std::sync::{Arc, OnceLock};
+
+use crate::{
+    basic::LogicalType, errors::ParquetError, geospatial::statistics::GeospatialStatistics,
+    schema::types::ColumnDescPtr,
+};
+
+/// Create a new [`GeoStatsAccumulator`] instance if `descr` represents a Geometry or
+/// Geography [`LogicalType`]
+///
+/// Returns a suitable [`GeoStatsAccumulator`] if `descr` represents a non-geospatial type
+/// or `None` otherwise.
+pub fn try_new_geo_stats_accumulator(
+    descr: &ColumnDescPtr,
+) -> Option<Box<dyn GeoStatsAccumulator>> {
+    if !matches!(
+        descr.logical_type_ref(),
+        Some(LogicalType::Geometry { .. }) | Some(LogicalType::Geography { .. })
+    ) {
+        return None;
+    }
+
+    Some(
+        ACCUMULATOR_FACTORY
+            .get_or_init(|| Arc::new(DefaultGeoStatsAccumulatorFactory::default()))
+            .new_accumulator(descr),
+    )
+}
+
+/// Initialize the global [`GeoStatsAccumulatorFactory`]
+///
+/// This may only be done once before any calls to [`try_new_geo_stats_accumulator`].
+/// Clients may use this to implement support for builds of the Parquet crate without
+/// geospatial support or to implement support for Geography bounding using external
+/// dependencies.
+pub fn init_geo_stats_accumulator_factory(
+    factory: Arc<dyn GeoStatsAccumulatorFactory>,
+) -> Result<(), ParquetError> {
+    if ACCUMULATOR_FACTORY.set(factory).is_err() {
+        Err(ParquetError::General(
+            "Global GeoStatsAccumulatorFactory already set".to_string(),
+        ))
+    } else {
+        Ok(())
+    }
+}
+
+/// Global accumulator factory instance
+static ACCUMULATOR_FACTORY: OnceLock<Arc<dyn GeoStatsAccumulatorFactory>> = OnceLock::new();
+
+/// Factory for [`GeospatialStatistics`] accumulators
+///
+/// The GeoStatsAccumulatorFactory is a trait implemented by the global factory that
+/// generates new instances of a [`GeoStatsAccumulator`] when constructing new
+/// encoders for a Geometry or Geography logical type.
+pub trait GeoStatsAccumulatorFactory: Send + Sync {
+    /// Create a new [`GeoStatsAccumulator`] appropriate for the logical type of a given
+    /// [`ColumnDescPtr`]
+    fn new_accumulator(&self, descr: &ColumnDescPtr) -> Box<dyn GeoStatsAccumulator>;
+}
+
+/// Dynamic [`GeospatialStatistics`] accumulator
+///
+/// The GeoStatsAccumulator is a trait whose implementors can ingest the (non-null)
+/// elements of a column and return compliant [`GeospatialStatistics`] (or `None`).
+/// When built with geospatial support this will usually be the
+/// [`ParquetGeoStatsAccumulator`]
+pub trait GeoStatsAccumulator: Send {
+    /// Returns true if this instance can return [`GeospatialStatistics`] from
+    /// [`GeoStatsAccumulator::finish`].
+    ///
+    /// This method returns false when this crate is built without geospatial support
+    /// (i.e., from the [`VoidGeoStatsAccumulator`]) or if the accumulator encountered
+    /// invalid or unsupported elements for which it cannot compute valid statistics.
+    fn is_valid(&self) -> bool;
+
+    /// Update with a single slice of WKB-encoded values
+    ///
+    /// This method is infallible; however, in the event of improperly encoded values,
+    /// implementations must ensure that [`GeoStatsAccumulator::finish`] returns `None`.
+    fn update_wkb(&mut self, wkb: &[u8]);
+
+    /// Compute the final statistics and reset internal state
+    fn finish(&mut self) -> Option<Box<GeospatialStatistics>>;
+}
+
+/// Default accumulator for [`GeospatialStatistics`]
+///
+/// When this crate is built with geospatial support, this factory constructs a
+/// [`ParquetGeoStatsAccumulator`] that ensures Geometry columns are written with
+/// statistics when statistics for that column are enabled. Otherwise, this factory
+/// returns a [`VoidGeoStatsAccumulator`] that never adds any geospatial statistics.
+///
+/// Bounding for Geography columns is not currently implemented by parquet-geospatial
+/// and this factory will always return a [`VoidGeoStatsAccumulator`].
+#[derive(Debug, Default)]
+pub struct DefaultGeoStatsAccumulatorFactory {}
+
+impl GeoStatsAccumulatorFactory for DefaultGeoStatsAccumulatorFactory {
+    fn new_accumulator(&self, _descr: &ColumnDescPtr) -> Box<dyn GeoStatsAccumulator> {
+        #[cfg(feature = "geospatial")]
+        if let Some(crate::basic::LogicalType::Geometry { .. }) = _descr.logical_type_ref() {
+            Box::new(ParquetGeoStatsAccumulator::default())
+        } else {
+            Box::new(VoidGeoStatsAccumulator::default())
+        }
+
+        #[cfg(not(feature = "geospatial"))]
+        return Box::new(VoidGeoStatsAccumulator::default());
+    }
+}
+
+/// A [`GeoStatsAccumulator`] that never computes any [`GeospatialStatistics`]
+#[derive(Debug, Default)]
+pub struct VoidGeoStatsAccumulator {}
+
+impl GeoStatsAccumulator for VoidGeoStatsAccumulator {
+    fn is_valid(&self) -> bool {
+        false
+    }
+
+    fn update_wkb(&mut self, _wkb: &[u8]) {}
+
+    fn finish(&mut self) -> Option<Box<GeospatialStatistics>> {
+        None
+    }
+}
+
+/// A [`GeoStatsAccumulator`] that uses the parquet-geospatial crate to compute Geometry statistics
+///
+/// Note that this accumulator only supports Geometry types and will return invalid statistics for
+/// non-point Geography input ([`GeoStatsAccumulatorFactory::new_accumulator`] is responsible
+/// for ensuring an appropriate accumulator based on the logical type).
+#[cfg(feature = "geospatial")]
+#[derive(Debug)]
+pub struct ParquetGeoStatsAccumulator {
+    bounder: parquet_geospatial::bounding::GeometryBounder,
+    invalid: bool,
+}
+
+#[cfg(feature = "geospatial")]
+impl Default for ParquetGeoStatsAccumulator {
+    fn default() -> Self {
+        Self {
+            bounder: parquet_geospatial::bounding::GeometryBounder::empty(),
+            invalid: false,
+        }
+    }
+}
+
+#[cfg(feature = "geospatial")]
+impl GeoStatsAccumulator for ParquetGeoStatsAccumulator {
+    fn is_valid(&self) -> bool {
+        !self.invalid
+    }
+
+    fn update_wkb(&mut self, wkb: &[u8]) {
+        if self.bounder.update_wkb(wkb).is_err() {
+            self.invalid = true;
+        }
+    }
+
+    fn finish(&mut self) -> Option<Box<GeospatialStatistics>> {
+        use parquet_geospatial::interval::IntervalTrait;
+
+        use crate::geospatial::bounding_box::BoundingBox;
+
+        if self.invalid {
+            // Reset
+            self.invalid = false;
+            self.bounder = parquet_geospatial::bounding::GeometryBounder::empty();
+            return None;
+        }
+
+        let bbox = if self.bounder.x().is_empty() || self.bounder.y().is_empty() {
+            None
+        } else {
+            let mut bbox = BoundingBox::new(
+                self.bounder.x().lo(),
+                self.bounder.x().hi(),
+                self.bounder.y().lo(),
+                self.bounder.y().hi(),
+            );
+
+            if !self.bounder.z().is_empty() {
+                bbox = bbox.with_zrange(self.bounder.z().lo(), self.bounder.z().hi());
+            }
+
+            if !self.bounder.m().is_empty() {
+                bbox = bbox.with_mrange(self.bounder.m().lo(), self.bounder.m().hi());
+            }
+
+            Some(bbox)
+        };
+
+        let bounder_geometry_types = self.bounder.geometry_types();
+        let geometry_types = if bounder_geometry_types.is_empty() {
+            None
+        } else {
+            Some(bounder_geometry_types)
+        };
+
+        // Reset
+        self.bounder = parquet_geospatial::bounding::GeometryBounder::empty();
+
+        Some(Box::new(GeospatialStatistics::new(bbox, geometry_types)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_void_accumulator() {
+        let mut accumulator = VoidGeoStatsAccumulator {};
+        assert!(!accumulator.is_valid());
+        accumulator.update_wkb(&[0x01, 0x02, 0x03]);
+        assert!(accumulator.finish().is_none());
+    }
+
+    #[cfg(feature = "geospatial")]
+    #[test]
+    fn test_default_accumulator_geospatial_factory() {
+        use std::sync::Arc;
+
+        use parquet_geospatial::testing::wkb_point_xy;
+
+        use crate::{
+            basic::LogicalType,
+            geospatial::bounding_box::BoundingBox,
+            schema::types::{ColumnDescriptor, ColumnPath, Type},
+        };
+
+        // Check that we have a working accumulator for Geometry
+        let parquet_type = Type::primitive_type_builder("geom", crate::basic::Type::BYTE_ARRAY)
+            .with_logical_type(Some(LogicalType::Geometry { crs: None }))
+            .build()
+            .unwrap();
+        let column_descr =
+            ColumnDescriptor::new(Arc::new(parquet_type), 0, 0, ColumnPath::new(vec![]));
+        let mut accumulator = try_new_geo_stats_accumulator(&Arc::new(column_descr)).unwrap();
+
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(1.0, 2.0));
+        accumulator.update_wkb(&wkb_point_xy(11.0, 12.0));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(
+            stats.bounding_box().unwrap(),
+            &BoundingBox::new(1.0, 11.0, 2.0, 12.0)
+        );
+
+        // Check that we have a void accumulator for Geography
+        let parquet_type = Type::primitive_type_builder("geom", crate::basic::Type::BYTE_ARRAY)
+            .with_logical_type(Some(LogicalType::Geography {
+                crs: None,
+                algorithm: None,
+            }))
+            .build()
+            .unwrap();
+        let column_descr =
+            ColumnDescriptor::new(Arc::new(parquet_type), 0, 0, ColumnPath::new(vec![]));
+        let mut accumulator = try_new_geo_stats_accumulator(&Arc::new(column_descr)).unwrap();
+
+        assert!(!accumulator.is_valid());
+        assert!(accumulator.finish().is_none());
+
+        // Check that we return None if the type is not geometry or goegraphy
+        let parquet_type = Type::primitive_type_builder("geom", crate::basic::Type::BYTE_ARRAY)
+            .build()
+            .unwrap();
+        let column_descr =
+            ColumnDescriptor::new(Arc::new(parquet_type), 0, 0, ColumnPath::new(vec![]));
+        assert!(try_new_geo_stats_accumulator(&Arc::new(column_descr)).is_none());
+
+        // We should not be able to initialize a global accumulator after we've initialized at least
+        // one accumulator
+        assert!(
+            init_geo_stats_accumulator_factory(Arc::new(
+                DefaultGeoStatsAccumulatorFactory::default()
+            ))
+            .is_err()
+        )
+    }
+
+    #[cfg(feature = "geospatial")]
+    #[test]
+    fn test_geometry_accumulator() {
+        use parquet_geospatial::testing::{wkb_point_xy, wkb_point_xyzm};
+
+        use crate::geospatial::bounding_box::BoundingBox;
+
+        let mut accumulator = ParquetGeoStatsAccumulator::default();
+
+        // A fresh instance should be able to bound input
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(1.0, 2.0));
+        accumulator.update_wkb(&wkb_point_xy(11.0, 12.0));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(stats.geospatial_types().unwrap(), &vec![1]);
+        assert_eq!(
+            stats.bounding_box().unwrap(),
+            &BoundingBox::new(1.0, 11.0, 2.0, 12.0)
+        );
+
+        // finish() should have reset the bounder such that the first values
+        // aren't when computing the next bound of statistics.
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(21.0, 22.0));
+        accumulator.update_wkb(&wkb_point_xy(31.0, 32.0));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(stats.geospatial_types().unwrap(), &vec![1]);
+        assert_eq!(
+            stats.bounding_box().unwrap(),
+            &BoundingBox::new(21.0, 31.0, 22.0, 32.0)
+        );
+
+        // When an accumulator encounters invalid input, it reports is_valid() false
+        // and does not compute subsequent statistics
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(41.0, 42.0));
+        accumulator.update_wkb("these bytes are not WKB".as_bytes());
+        assert!(!accumulator.is_valid());
+        assert!(accumulator.finish().is_none());
+
+        // Subsequent rounds of accumulation should work as expected
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(41.0, 42.0));
+        accumulator.update_wkb(&wkb_point_xy(51.0, 52.0));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(stats.geospatial_types().unwrap(), &vec![1]);
+        assert_eq!(
+            stats.bounding_box().unwrap(),
+            &BoundingBox::new(41.0, 51.0, 42.0, 52.0)
+        );
+
+        // When there was no input at all (occurs in the all null case), both geometry
+        // types and bounding box will be None. This is because Parquet Thrift statistics
+        // have no mechanism to communicate "empty". (The all null situation may be determined
+        // from the null count in this case).
+        assert!(accumulator.is_valid());
+        let stats = accumulator.finish().unwrap();
+        assert!(stats.geospatial_types().is_none());
+        assert!(stats.bounding_box().is_none());
+
+        // When there was 100% "empty" input (i.e., non-null geometries without
+        // coordinates), there should be statistics with geometry types but no
+        // bounding box.
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xy(f64::NAN, f64::NAN));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(stats.geospatial_types().unwrap(), &vec![1]);
+        assert!(stats.bounding_box().is_none());
+
+        // If Z and/or M are present, they should be reported in the bounding box
+        assert!(accumulator.is_valid());
+        accumulator.update_wkb(&wkb_point_xyzm(1.0, 2.0, 3.0, 4.0));
+        accumulator.update_wkb(&wkb_point_xyzm(5.0, 6.0, 7.0, 8.0));
+        let stats = accumulator.finish().unwrap();
+        assert_eq!(stats.geospatial_types().unwrap(), &vec![3001]);
+        assert_eq!(
+            stats.bounding_box().unwrap(),
+            &BoundingBox::new(1.0, 5.0, 2.0, 6.0)
+                .with_zrange(3.0, 7.0)
+                .with_mrange(4.0, 8.0)
+        );
+    }
+}
diff --git a/parquet/src/geospatial/bounding_box.rs b/parquet/src/geospatial/bounding_box.rs
new file mode 100644
index 000000000000..59a4dfb50154
--- /dev/null
+++ b/parquet/src/geospatial/bounding_box.rs
@@ -0,0 +1,224 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max
+//! value pair of coordinates from each axis.
+//!
+//! Derived from the parquet format spec: <https://github.com/apache/parquet-format/blob/master/Geospatial.md>
+//!
+//!
+
+use crate::file::metadata::HeapSize;
+
+/// A geospatial instance has at least two coordinate dimensions: X and Y for 2D coordinates of each point.
+/// X represents longitude/easting and Y represents latitude/northing. A geospatial instance can optionally
+/// have Z and/or M values associated with each point.
+///
+/// The Z values introduce the third dimension coordinate, typically used to indicate height or elevation.
+///
+/// M values allow tracking a value in a fourth dimension. These can represent:
+/// - Linear reference values (e.g., highway milepost)
+/// - Timestamps
+/// - Other values defined by the CRS
+///
+/// The bounding box is defined as min/max value pairs of coordinates from each axis. X and Y values are
+/// always present, while Z and M are omitted for 2D geospatial instances.
+///
+/// When calculating a bounding box:
+/// - Null or NaN values in a coordinate dimension are skipped
+/// - If a dimension has only null/NaN values, that dimension is omitted
+/// - If either X or Y dimension is missing, no bounding box is produced
+/// - Example: POINT (1 NaN) contributes to X but not to Y, Z, or M dimensions
+///
+/// Special cases:
+/// - For X values only, xmin may exceed xmax. In this case, a point matches if x >= xmin OR x <= xmax
+/// - This wraparound can occur when the bounding box crosses the antimeridian line.
+/// - In geographic terms: xmin=westernmost, xmax=easternmost, ymin=southernmost, ymax=northernmost
+///
+/// For GEOGRAPHY types:
+/// - X values must be within [-180, 180] (longitude)
+/// - Y values must be within [-90, 90] (latitude)
+///
+/// Derived from the parquet format [spec][bounding-box-spec]
+///
+/// # Examples
+///
+/// ```
+/// use parquet::geospatial::bounding_box::BoundingBox;
+///
+/// // 2D bounding box
+/// let bbox_2d = BoundingBox::new(0.0, 0.0, 100.0, 100.0);
+///
+/// // 3D bounding box with elevation
+/// let bbox_3d = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
+///     .with_zrange(0.0, 1000.0);
+///
+/// // 3D bounding box with elevation and measured value
+/// let bbox_3d_m = BoundingBox::new(0.0, 0.0, 100.0, 100.0)
+///     .with_zrange(0.0, 1000.0)
+///     .with_mrange(0.0, 1000.0);
+/// ```
+///
+/// [bounding-box-spec]: https://github.com/apache/parquet-format/blob/master/Geospatial.md#bounding-box
+#[derive(Clone, Debug, PartialEq)]
+pub struct BoundingBox {
+    /// X coordinates (longitude or easting): (min, max)
+    x_range: (f64, f64),
+    /// Y coordinates (latitude or northing): (min, max)
+    y_range: (f64, f64),
+    /// Z coordinates (elevation/height): (min, max), if present
+    z_range: Option<(f64, f64)>,
+    /// M coordinates (measured value): (min, max), if present
+    m_range: Option<(f64, f64)>,
+}
+
+impl BoundingBox {
+    /// Creates a new bounding box with the specified coordinates.
+    pub fn new(xmin: f64, xmax: f64, ymin: f64, ymax: f64) -> Self {
+        Self {
+            x_range: (xmin, xmax),
+            y_range: (ymin, ymax),
+            z_range: None,
+            m_range: None,
+        }
+    }
+
+    /// Updates the bounding box with specified X-coordinate range.
+    pub fn with_xrange(mut self, xmin: f64, xmax: f64) -> Self {
+        self.x_range = (xmin, xmax);
+        self
+    }
+
+    /// Updates the bounding box with specified Y-coordinate range.
+    pub fn with_yrange(mut self, ymin: f64, ymax: f64) -> Self {
+        self.y_range = (ymin, ymax);
+        self
+    }
+
+    /// Creates a new bounding box with the specified Z-coordinate range.
+    pub fn with_zrange(mut self, zmin: f64, zmax: f64) -> Self {
+        self.z_range = Some((zmin, zmax));
+        self
+    }
+
+    /// Creates a new bounding box with the specified M-coordinate range.
+    pub fn with_mrange(mut self, mmin: f64, mmax: f64) -> Self {
+        self.m_range = Some((mmin, mmax));
+        self
+    }
+
+    /// Returns the minimum x-coordinate.
+    pub fn get_xmin(&self) -> f64 {
+        self.x_range.0
+    }
+
+    /// Returns the maximum x-coordinate.
+    pub fn get_xmax(&self) -> f64 {
+        self.x_range.1
+    }
+
+    /// Returns the minimum y-coordinate.
+    pub fn get_ymin(&self) -> f64 {
+        self.y_range.0
+    }
+
+    /// Returns the maximum y-coordinate.
+    pub fn get_ymax(&self) -> f64 {
+        self.y_range.1
+    }
+
+    /// Returns the minimum z-coordinate, if present.
+    pub fn get_zmin(&self) -> Option<f64> {
+        self.z_range.map(|z| z.0)
+    }
+
+    /// Returns the maximum z-coordinate, if present.
+    pub fn get_zmax(&self) -> Option<f64> {
+        self.z_range.map(|z| z.1)
+    }
+
+    /// Returns the minimum m-value (measure), if present.
+    pub fn get_mmin(&self) -> Option<f64> {
+        self.m_range.map(|m| m.0)
+    }
+
+    /// Returns the maximum m-value (measure), if present.
+    pub fn get_mmax(&self) -> Option<f64> {
+        self.m_range.map(|m| m.1)
+    }
+
+    /// Returns `true` if both zmin and zmax are present.
+    pub fn is_z_valid(&self) -> bool {
+        self.z_range.is_some()
+    }
+
+    /// Returns `true` if both mmin and mmax are present.
+    pub fn is_m_valid(&self) -> bool {
+        self.m_range.is_some()
+    }
+}
+
+impl HeapSize for BoundingBox {
+    fn heap_size(&self) -> usize {
+        0 // no heap allocations
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bounding_box() {
+        let bbox = BoundingBox::new(0.0, 0.0, 10.0, 10.0);
+        assert_eq!(bbox.get_xmin(), 0.0);
+        assert_eq!(bbox.get_xmax(), 0.0);
+        assert_eq!(bbox.get_ymin(), 10.0);
+        assert_eq!(bbox.get_ymax(), 10.0);
+        assert_eq!(bbox.get_zmin(), None);
+        assert_eq!(bbox.get_zmax(), None);
+        assert_eq!(bbox.get_mmin(), None);
+        assert_eq!(bbox.get_mmax(), None);
+        assert!(!bbox.is_z_valid());
+        assert!(!bbox.is_m_valid());
+
+        // test with zrange
+        let bbox_z = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_zrange(5.0, 15.0);
+        assert_eq!(bbox_z.get_zmin(), Some(5.0));
+        assert_eq!(bbox_z.get_zmax(), Some(15.0));
+        assert!(bbox_z.is_z_valid());
+        assert!(!bbox_z.is_m_valid());
+
+        // test with mrange
+        let bbox_m = BoundingBox::new(0.0, 0.0, 10.0, 10.0).with_mrange(10.0, 20.0);
+        assert_eq!(bbox_m.get_mmin(), Some(10.0));
+        assert_eq!(bbox_m.get_mmax(), Some(20.0));
+        assert!(!bbox_m.is_z_valid());
+        assert!(bbox_m.is_m_valid());
+
+        // test with zrange and mrange
+        let bbox_zm = BoundingBox::new(0.0, 0.0, 10.0, 10.0)
+            .with_zrange(5.0, 15.0)
+            .with_mrange(10.0, 20.0);
+        assert_eq!(bbox_zm.get_zmin(), Some(5.0));
+        assert_eq!(bbox_zm.get_zmax(), Some(15.0));
+        assert_eq!(bbox_zm.get_mmin(), Some(10.0));
+        assert_eq!(bbox_zm.get_mmax(), Some(20.0));
+        assert!(bbox_zm.is_z_valid());
+        assert!(bbox_zm.is_m_valid());
+    }
+}
diff --git a/parquet/src/geospatial/mod.rs b/parquet/src/geospatial/mod.rs
new file mode 100644
index 000000000000..9d55fca89d46
--- /dev/null
+++ b/parquet/src/geospatial/mod.rs
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module provides functionality for working with geospatial data in Parquet file as defined in the [spec][parquet-geo-spec].
+//!
+//! * [`GeospatialStatistics`]: describes the geospatial statistics for a Parquet column.
+//! * [`BoundingBox`]: describes the bounding box values for a geospatial column.
+//!
+//! [`GeospatialStatistics`] describes the geospatial statistics for a Parquet column.
+//! * bbox: the [`BoundingBox`] for the geospatial data
+//! * geospatial_types: the geospatial types for the geospatial data as specified in [specification][geo-types].
+//!
+//! Geospatial bounding box describes the spatial extent of the geospatial data within a Parquet row group.
+//! * xmin, xmax: the minimum and maximum longitude values
+//! * ymin, ymax: the minimum and maximum latitude values
+//! * zmin, zmax: (optional) the minimum and maximum elevation values
+//! * mmin, mmax: (optional) the minimum and maximum linear reference values
+//!
+//! In 2D representation, where x are points:
+//! ```text
+//!  ymax +-----------------------+
+//!       |               x       |
+//!       |      x                |
+//!       |              x        |
+//!       |      x                |
+//!  ymin +-----------------------+
+//!       xmin                    xmax
+//! ```
+//!
+//! [`GeospatialStatistics`]: crate::geospatial::statistics::GeospatialStatistics
+//! [`BoundingBox`]: crate::geospatial::bounding_box::BoundingBox
+//! [parquet-geo-spec]: https://github.com/apache/parquet-format/blob/master/Geospatial.md
+//! [geo-types]: https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types
+
+pub mod accumulator;
+pub mod bounding_box;
+pub mod statistics;
diff --git a/parquet/src/geospatial/statistics.rs b/parquet/src/geospatial/statistics.rs
new file mode 100644
index 000000000000..edabfea52d75
--- /dev/null
+++ b/parquet/src/geospatial/statistics.rs
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Geospatial statistics for Parquet files.
+//!
+//! This module provides functionality for working with geospatial statistics in Parquet files.
+//! It includes support for bounding boxes and geospatial statistics in column chunk metadata.
+
+use crate::{file::metadata::HeapSize, geospatial::bounding_box::BoundingBox};
+
+// ----------------------------------------------------------------------
+// Geospatial Statistics
+
+/// Represents geospatial statistics for a Parquet column or dataset.
+///
+/// This struct contains metadata about the spatial characteristics of geospatial data,
+/// including bounding box information and the types of geospatial geometries present.
+/// It's used to optimize spatial queries and provide spatial context for data analysis.
+///
+/// # Examples
+///
+/// ```
+/// use parquet::geospatial::statistics::GeospatialStatistics;
+/// use parquet::geospatial::bounding_box::BoundingBox;
+///
+/// // Statistics with bounding box
+/// let bbox = BoundingBox::new(0.0, 0.0, 100.0, 100.0);
+/// let stats = GeospatialStatistics::new(Some(bbox), Some(vec![1, 2, 3]));
+/// ```
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct GeospatialStatistics {
+    bbox: Option<BoundingBox>,
+    geospatial_types: Option<Vec<i32>>,
+}
+
+impl GeospatialStatistics {
+    /// Creates a new geospatial statistics instance with the specified data.
+    pub fn new(bbox: Option<BoundingBox>, geospatial_types: Option<Vec<i32>>) -> Self {
+        Self {
+            bbox,
+            geospatial_types,
+        }
+    }
+
+    /// Optional list of geometry type identifiers, where `None` represents lack of information
+    pub fn geospatial_types(&self) -> Option<&Vec<i32>> {
+        self.geospatial_types.as_ref()
+    }
+
+    /// Optional bounding defining the spatial extent, where `None` represents a lack of information.
+    pub fn bounding_box(&self) -> Option<&BoundingBox> {
+        self.bbox.as_ref()
+    }
+}
+
+impl HeapSize for GeospatialStatistics {
+    fn heap_size(&self) -> usize {
+        self.bbox.heap_size() + self.geospatial_types.heap_size()
+    }
+}
diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs
index f814ddeb0737..98106a2c1059 100644
--- a/parquet/src/lib.rs
+++ b/parquet/src/lib.rs
@@ -55,25 +55,28 @@
 //! ## Reading and Writing Arrow (`arrow` feature)
 //!
 //! The [`arrow`] module supports reading and writing Parquet data to/from
-//! Arrow `RecordBatch`es. Using Arrow is simple and performant, and allows workloads
+//! Arrow [`RecordBatch`]es. Using Arrow is simple and performant, and allows workloads
 //! to leverage the wide range of data transforms provided by the [arrow] crate, and by the
 //! ecosystem of [Arrow] compatible systems.
 //!
 //! Most users will use [`ArrowWriter`] for writing and [`ParquetRecordBatchReaderBuilder`] for
-//! reading.
+//! reading from synchronous IO sources such as files or in-memory buffers.
 //!
-//! Lower level APIs include [`ArrowColumnWriter`] for writing using multiple
-//! threads, and [`RowFilter`] to apply filters during decode.
+//! Lower level APIs include
+//! * [`ParquetPushDecoder`] for file grained control over interleaving of IO and CPU.
+//! * [`ArrowColumnWriter`] for writing using multiple threads,
+//! * [`RowFilter`] to apply filters during decode
 //!
 //! [`ArrowWriter`]: arrow::arrow_writer::ArrowWriter
 //! [`ParquetRecordBatchReaderBuilder`]: arrow::arrow_reader::ParquetRecordBatchReaderBuilder
+//! [`ParquetPushDecoder`]: arrow::push_decoder::ParquetPushDecoder
 //! [`ArrowColumnWriter`]: arrow::arrow_writer::ArrowColumnWriter
 //! [`RowFilter`]: arrow::arrow_reader::RowFilter
 //!
-//! ## `async` Reading and Writing Arrow (`async` feature)
+//! ## `async` Reading and Writing Arrow (`arrow` feature + `async` feature)
 //!
 //! The [`async_reader`] and [`async_writer`] modules provide async APIs to
-//! read and write `RecordBatch`es  asynchronously.
+//! read and write [`RecordBatch`]es  asynchronously.
 //!
 //! Most users will use [`AsyncArrowWriter`] for writing and [`ParquetRecordBatchStreamBuilder`]
 //! for reading. When the `object_store` feature is enabled, [`ParquetObjectReader`]
@@ -86,6 +89,14 @@
 //! [`ParquetRecordBatchStreamBuilder`]: arrow::async_reader::ParquetRecordBatchStreamBuilder
 //! [`ParquetObjectReader`]: arrow::async_reader::ParquetObjectReader
 //!
+//! ## Variant Logical Type (`variant_experimental` feature)
+//!
+//! The [`variant`] module supports reading and writing Parquet files
+//! with the [Variant Binary Encoding] logical type, which can represent
+//! semi-structured data such as JSON efficiently.
+//!
+//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
+//!
 //! ## Read/Write Parquet Directly
 //!
 //! Workloads needing finer-grained control, or to avoid a dependence on arrow,
@@ -96,6 +107,7 @@
 //!
 //! [arrow]: https://docs.rs/arrow/latest/arrow/index.html
 //! [Arrow]: https://arrow.apache.org/
+//! [`RecordBatch`]: https://docs.rs/arrow/latest/arrow/array/struct.RecordBatch.html
 //! [CSV]: https://en.wikipedia.org/wiki/Comma-separated_values
 //! [Dremel]: https://research.google/pubs/pub36632/
 //! [Logical Types]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
@@ -105,7 +117,7 @@
     html_logo_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 /// Defines a an item with an experimental public API
 ///
@@ -130,6 +142,14 @@ macro_rules! experimental {
     }
 }
 
+#[cfg(all(
+    feature = "flate2",
+    not(any(feature = "flate2-zlib-rs", feature = "flate2-rust_backened"))
+))]
+compile_error!(
+    "When enabling `flate2` you must enable one of the features: `flate2-zlib-rs` or `flate2-rust_backened`."
+);
+
 #[macro_use]
 pub mod errors;
 pub mod basic;
@@ -144,11 +164,17 @@ pub mod basic;
 // Don't try clippy and format auto generated code
 #[allow(clippy::all, missing_docs)]
 #[rustfmt::skip]
+#[deprecated(
+    since = "57.0.0",
+    note = "The `format` module is no longer maintained, and will be removed in `59.0.0`"
+)]
 pub mod format;
 
 #[macro_use]
 pub mod data_type;
 
+use std::fmt::Debug;
+use std::ops::Range;
 // Exported for external use, such as benchmarks
 #[cfg(feature = "experimental")]
 #[doc(hidden)]
@@ -172,4 +198,24 @@ pub mod file;
 pub mod record;
 pub mod schema;
 
+mod parquet_macros;
+mod parquet_thrift;
 pub mod thrift;
+/// What data is needed to read the next item from a decoder.
+///
+/// This is used to communicate between the decoder and the caller
+/// to indicate what data is needed next, or what the result of decoding is.
+#[derive(Debug)]
+pub enum DecodeResult<T: Debug> {
+    /// The ranges of data necessary to proceed
+    // TODO: distinguish between minimim needed to make progress and what could be used?
+    NeedsData(Vec<Range<u64>>),
+    /// The decoder produced an output item
+    Data(T),
+    /// The decoder finished processing
+    Finished,
+}
+
+#[cfg(feature = "variant_experimental")]
+pub mod variant;
+experimental!(pub mod geospatial);
diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs
new file mode 100644
index 000000000000..714015e10e32
--- /dev/null
+++ b/parquet/src/parquet_macros.rs
@@ -0,0 +1,534 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// These macros are adapted from Jörn Horstmann's thrift macros at
+// https://github.com/jhorstmann/compact-thrift
+// They allow for pasting sections of the Parquet thrift IDL file
+// into a macro to generate rust structures and implementations.
+
+//! This is a collection of macros used to parse Thrift IDL descriptions of structs,
+//! unions, and enums into their corresponding Rust types. These macros will also
+//! generate the code necessary to serialize and deserialize to/from the [Thrift compact]
+//! protocol.
+//!
+//! Further details of how to use them (and other aspects of the Thrift serialization process)
+//! can be found in [THRIFT.md].
+//!
+//! [Thrift compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
+//! [THRIFT.md]: https://github.com/apache/arrow-rs/blob/main/parquet/THRIFT.md
+
+#[doc(hidden)]
+#[macro_export]
+#[allow(clippy::crate_in_macro_def)]
+/// Macro used to generate rust enums from a Thrift `enum` definition.
+///
+/// Note:
+///  - All enums generated with this macro will have `pub` visibility.
+///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
+macro_rules! thrift_enum {
+    ($(#[$($def_attrs:tt)*])* enum $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_name:ident = $field_value:literal;)* }) => {
+        $(#[$($def_attrs)*])*
+        #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+        #[allow(non_camel_case_types)]
+        #[allow(missing_docs)]
+        pub enum $identifier {
+            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name = $field_value,)*
+        }
+
+        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
+            #[allow(deprecated)]
+            fn read_thrift(prot: &mut R) -> Result<Self> {
+                let val = prot.read_i32()?;
+                match val {
+                    $($field_value => Ok(Self::$field_name),)*
+                    _ => Err(general_err!("Unexpected {} {}", stringify!($identifier), val)),
+                }
+            }
+        }
+
+        impl fmt::Display for $identifier {
+            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+                write!(f, "{self:?}")
+            }
+        }
+
+        impl WriteThrift for $identifier {
+            const ELEMENT_TYPE: ElementType = ElementType::I32;
+
+            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+                writer.write_i32(*self as i32)
+            }
+        }
+
+        impl WriteThriftField for $identifier {
+            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
+                writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
+
+        impl $identifier {
+            #[allow(deprecated)]
+            #[doc = "Returns a slice containing every variant of this enum."]
+            #[allow(dead_code)]
+            pub const VARIANTS: &'static [Self] = &[
+                $(Self::$field_name),*
+            ];
+
+            #[allow(deprecated)]
+            const fn max_discriminant_impl() -> i32 {
+                let values: &[i32] = &[$($field_value),*];
+                let mut max = values[0];
+                let mut idx = 1;
+                while idx < values.len() {
+                    let candidate = values[idx];
+                    if candidate > max {
+                        max = candidate;
+                    }
+                    idx += 1;
+                }
+                max
+            }
+
+            #[allow(deprecated)]
+            #[doc = "Returns the largest discriminant value defined for this enum."]
+            #[allow(dead_code)]
+            pub const MAX_DISCRIMINANT: i32 = Self::max_discriminant_impl();
+        }
+    }
+}
+
+/// Macro used to generate Rust enums for Thrift unions in which all variants are typed with empty
+/// structs.
+///
+/// Because the compact protocol does not write any struct type information, these empty structs
+/// become a single `0` (end-of-fields marker) upon serialization. Rather than trying to deserialize
+/// an empty struct, we can instead simply read the `0` and discard it.
+///
+/// The resulting Rust enum will have all unit variants.
+///
+/// Note:
+///  - All enums generated with this macro will have `pub` visibility.
+///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
+#[doc(hidden)]
+#[macro_export]
+#[allow(clippy::crate_in_macro_def)]
+macro_rules! thrift_union_all_empty {
+    ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $field_type:ident $(< $element_type:ident >)? $field_name:ident $(;)?)* }) => {
+        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
+        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
+        #[allow(non_camel_case_types)]
+        #[allow(non_snake_case)]
+        #[allow(missing_docs)]
+        pub enum $identifier {
+            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name),*
+        }
+
+        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier {
+            fn read_thrift(prot: &mut R) -> Result<Self> {
+                let field_ident = prot.read_field_begin(0)?;
+                if field_ident.field_type == FieldType::Stop {
+                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
+                }
+                let ret = match field_ident.id {
+                    $($field_id => {
+                        prot.skip_empty_struct()?;
+                        Self::$field_name
+                    }
+                    )*
+                    _ => {
+                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
+                    }
+                };
+                let field_ident = prot.read_field_begin(field_ident.id)?;
+                if field_ident.field_type != FieldType::Stop {
+                    return Err(general_err!(
+                        "Received multiple fields for union from remote {}", stringify!($identifier)
+                    ));
+                }
+                Ok(ret)
+            }
+        }
+
+        impl WriteThrift for $identifier {
+            const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+                match *self {
+                    $(Self::$field_name => writer.write_empty_struct($field_id, 0)?,)*
+                };
+                // write end of struct for this union
+                writer.write_struct_end()
+            }
+        }
+
+        impl WriteThriftField for $identifier {
+            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
+                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
+    }
+}
+
+/// Macro used to generate Rust enums for Thrift unions where variants are a mix of unit and
+/// tuple types.
+///
+/// Use of this macro requires modifying the thrift IDL. For variants with empty structs as their
+/// type, delete the typename (i.e. `1: EmptyStruct Var1;` becomes `1: Var1`). For variants with a
+/// non-empty type, the typename must be contained within parens (e.g. `1: MyType Var1;` becomes
+/// `1: (MyType) Var1;`).
+///
+/// Note:
+///  - All enums generated with this macro will have `pub` visibility.
+///  - This macro allows for specifying lifetime annotations for the resulting `enum` and its fields.
+///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
+#[doc(hidden)]
+#[macro_export]
+#[allow(clippy::crate_in_macro_def)]
+macro_rules! thrift_union {
+    ($(#[$($def_attrs:tt)*])* union $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? $(< $field_lt:lifetime >)?) )? $field_name:ident $(;)?)* }) => {
+        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
+        #[derive(Clone, Debug, Eq, PartialEq)]
+        #[allow(non_camel_case_types)]
+        #[allow(non_snake_case)]
+        #[allow(missing_docs)]
+        pub enum $identifier $(<$lt>)? {
+            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($field_lt)? $($element_type)?} ) )?),*
+        }
+
+        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
+            fn read_thrift(prot: &mut R) -> Result<Self> {
+                let field_ident = prot.read_field_begin(0)?;
+                if field_ident.field_type == FieldType::Stop {
+                    return Err(general_err!("Received empty union from remote {}", stringify!($identifier)));
+                }
+                let ret = match field_ident.id {
+                    $($field_id => {
+                        let val = $crate::__thrift_read_variant!(prot, $field_name $($field_type $($element_type)?)?);
+                        val
+                    })*
+                    _ => {
+                        return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id));
+                    }
+                };
+                let field_ident = prot.read_field_begin(field_ident.id)?;
+                if field_ident.field_type != FieldType::Stop {
+                    return Err(general_err!(
+                        concat!("Received multiple fields for union from remote {}", stringify!($identifier))
+                    ));
+                }
+                Ok(ret)
+            }
+        }
+
+        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
+            const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+                match self {
+                    $($crate::__thrift_write_variant_lhs!($field_name $($field_type)?, variant_val) =>
+                      $crate::__thrift_write_variant_rhs!($field_id $($field_type)?, writer, variant_val),)*
+                };
+                writer.write_struct_end()
+            }
+        }
+
+        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
+                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
+    }
+}
+
+/// Macro used to generate Rust structs from a Thrift `struct` definition.
+///
+/// Note:
+///  - This macro allows for specifying the visibility of the resulting `struct` and its fields.
+///    + The `struct` and all fields will have the same visibility.
+///  - This macro allows for specifying lifetime annotations for the resulting `struct` and its fields.
+///  - When utilizing this macro the Thrift serialization traits and structs need to be in scope.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! thrift_struct {
+    ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => {
+        $(#[cfg_attr(not(doctest), $($def_attrs)*)])*
+        #[derive(Clone, Debug, Eq, PartialEq)]
+        #[allow(non_camel_case_types)]
+        #[allow(non_snake_case)]
+        #[allow(missing_docs)]
+        $vis struct $identifier $(<$lt>)? {
+            $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $vis $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?))),*
+        }
+
+        impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? {
+            fn read_thrift(prot: &mut R) -> Result<Self> {
+                $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)*
+                let mut last_field_id = 0i16;
+                loop {
+                    let field_ident = prot.read_field_begin(last_field_id)?;
+                    if field_ident.field_type == FieldType::Stop {
+                        break;
+                    }
+                    match field_ident.id {
+                        $($field_id => {
+                            let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?);
+                            $field_name = Some(val);
+                        })*
+                        _ => {
+                            prot.skip(field_ident.field_type)?;
+                        }
+                    };
+                    last_field_id = field_ident.id;
+                }
+                $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)*
+                Ok(Self {
+                    $($field_name),*
+                })
+            }
+        }
+
+        impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? {
+            const ELEMENT_TYPE: ElementType = ElementType::Struct;
+
+            #[allow(unused_assignments)]
+            fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+                #[allow(unused_mut, unused_variables)]
+                let mut last_field_id = 0i16;
+                $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)*
+                writer.write_struct_end()
+            }
+        }
+
+        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
+                writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
+    }
+}
+
+#[doc(hidden)]
+#[macro_export]
+/// Generate `WriteThriftField` implementation for a struct.
+macro_rules! write_thrift_field {
+    ($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => {
+        impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+            fn write_thrift_field<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) -> Result<i16> {
+                writer.write_field_begin($fld_type, field_id, last_field_id)?;
+                self.write_thrift(writer)?;
+                Ok(field_id)
+            }
+        }
+    }
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_write_required_or_optional_field {
+    (required $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
+        $crate::__thrift_write_required_field!(
+            $field_type,
+            $field_name,
+            $field_id,
+            $self,
+            $writer,
+            $last_id
+        )
+    };
+    (optional $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => {
+        $crate::__thrift_write_optional_field!(
+            $field_type,
+            $field_name,
+            $field_id,
+            $self,
+            $writer,
+            $last_id
+        )
+    };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_write_required_field {
+    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
+        $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
+        $writer.write_bytes($self.$field_name)?;
+        $last_id = $field_id;
+    };
+    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => {
+        $last_id = $self
+            .$field_name
+            .write_thrift_field($writer, $field_id, $last_id)?;
+    };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_write_optional_field {
+    (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
+        if $self.$field_name.is_some() {
+            $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?;
+            $writer.write_bytes($self.$field_name.as_ref().unwrap())?;
+            $last_id = $field_id;
+        }
+    };
+    ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => {
+        if $self.$field_name.is_some() {
+            $last_id = $self
+                .$field_name
+                .as_ref()
+                .unwrap()
+                .write_thrift_field($writer, $field_id, $last_id)?;
+        }
+    };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_required_or_optional {
+    (required $field_type:ty) => { $field_type };
+    (optional $field_type:ty) => { Option<$field_type> };
+}
+
+// Performance note: using `expect` here is about 4% faster on the page index bench,
+// but we want to propagate errors. Using `ok_or` is *much* slower.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_result_required_or_optional {
+    (required $field_name:ident) => {
+        let Some($field_name) = $field_name else {
+            return Err(general_err!(concat!(
+                "Required field ",
+                stringify!($field_name),
+                " is missing",
+            )));
+        };
+    };
+    (optional $field_name:ident) => {};
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_read_field {
+    ($prot:tt, $field_ident:tt, list $lt:lifetime binary) => {
+        read_thrift_vec::<&'a [u8], R>(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, list $lt:lifetime $element_type:ident) => {
+        read_thrift_vec::<$element_type, R>(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, list string) => {
+        read_thrift_vec::<String, R>(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, list $element_type:ident) => {
+        read_thrift_vec::<$element_type, R>(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, string $lt:lifetime) => {
+        <&$lt str>::read_thrift(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, binary $lt:lifetime) => {
+        <&$lt [u8]>::read_thrift(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, $field_type:ident $lt:lifetime) => {
+        $field_type::read_thrift(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, string) => {
+        String::read_thrift(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, binary) => {
+        // this one needs to not conflict with `list<i8>`
+        $prot.read_bytes_owned()?
+    };
+    ($prot:tt, $field_ident:tt, double) => {
+        $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)?
+    };
+    ($prot:tt, $field_ident:tt, bool) => {
+        $field_ident.bool_val.unwrap()
+    };
+    ($prot:tt, $field_ident:tt, $field_type:ident) => {
+        $field_type::read_thrift(&mut *$prot)?
+    };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_field_type {
+    (binary $lt:lifetime) => { &$lt [u8] };
+    (string $lt:lifetime) => { &$lt str };
+    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
+    (list $lt:lifetime $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type $lt) > };
+    (list string) => { Vec<String> };
+    (list $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > };
+    (binary) => { Vec<u8> };
+    (string) => { String };
+    (double) => { $crate::parquet_thrift::OrderedF64 };
+    ($field_type:ty) => { $field_type };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_union_type {
+    (binary $lt:lifetime) => { &$lt [u8] };
+    (string $lt:lifetime) => { &$lt str };
+    ($field_type:ident $lt:lifetime) => { $field_type<$lt> };
+    ($field_type:ident) => { $field_type };
+    (list $field_type:ident) => { Vec<$field_type> };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_read_variant {
+    ($prot:tt, $field_name:ident $field_type:ident) => {
+        Self::$field_name($field_type::read_thrift(&mut *$prot)?)
+    };
+    ($prot:tt, $field_name:ident list $field_type:ident) => {
+        Self::$field_name(Vec::<$field_type>::read_thrift(&mut *$prot)?)
+    };
+    ($prot:tt, $field_name:ident) => {{
+        $prot.skip_empty_struct()?;
+        Self::$field_name
+    }};
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_write_variant_lhs {
+    ($field_name:ident $field_type:ident, $val:tt) => {
+        Self::$field_name($val)
+    };
+    ($field_name:ident, $val:tt) => {
+        Self::$field_name
+    };
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! __thrift_write_variant_rhs {
+    ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => {
+        $val.write_thrift_field($writer, $field_id, 0)?
+    };
+    ($field_id:literal, $writer:tt, $val:tt) => {
+        $writer.write_empty_struct($field_id, 0)?
+    };
+}
diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs
new file mode 100644
index 000000000000..6c82a0bf2c07
--- /dev/null
+++ b/parquet/src/parquet_thrift.rs
@@ -0,0 +1,1109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Structs used for encoding and decoding Parquet Thrift objects.
+//!
+//! These include:
+//! * [`ThriftCompactInputProtocol`]: Trait implemented by Thrift decoders.
+//!     * [`ThriftSliceInputProtocol`]: Thrift decoder that takes a slice of bytes as input.
+//!     * [`ThriftReadInputProtocol`]: Thrift decoder that takes a [`Read`] as input.
+//! * [`ReadThrift`]: Trait implemented by serializable objects.
+//! * [`ThriftCompactOutputProtocol`]: Thrift encoder.
+//! * [`WriteThrift`]: Trait implemented by serializable objects.
+//! * [`WriteThriftField`]: Trait implemented by serializable objects that are fields in Thrift structs.
+
+use std::{
+    cmp::Ordering,
+    io::{Read, Write},
+};
+
+use crate::{
+    errors::{ParquetError, Result},
+    write_thrift_field,
+};
+use std::io::Error;
+use std::str::Utf8Error;
+
+#[derive(Debug)]
+pub(crate) enum ThriftProtocolError {
+    Eof,
+    IO(Error),
+    InvalidFieldType(u8),
+    InvalidElementType(u8),
+    FieldDeltaOverflow { field_delta: u8, last_field_id: i16 },
+    InvalidBoolean(u8),
+    Utf8Error,
+    SkipDepth(FieldType),
+    SkipUnsupportedType(FieldType),
+}
+
+impl From<ThriftProtocolError> for ParquetError {
+    #[inline(never)]
+    fn from(e: ThriftProtocolError) -> Self {
+        match e {
+            ThriftProtocolError::Eof => eof_err!("Unexpected EOF"),
+            ThriftProtocolError::IO(e) => e.into(),
+            ThriftProtocolError::InvalidFieldType(value) => {
+                general_err!("Unexpected struct field type {}", value)
+            }
+            ThriftProtocolError::InvalidElementType(value) => {
+                general_err!("Unexpected list/set element type {}", value)
+            }
+            ThriftProtocolError::FieldDeltaOverflow {
+                field_delta,
+                last_field_id,
+            } => general_err!("cannot add {} to {}", field_delta, last_field_id),
+            ThriftProtocolError::InvalidBoolean(value) => {
+                general_err!("cannot convert {} into bool", value)
+            }
+            ThriftProtocolError::Utf8Error => general_err!("invalid utf8"),
+            ThriftProtocolError::SkipDepth(field_type) => {
+                general_err!("cannot parse past {:?}", field_type)
+            }
+            ThriftProtocolError::SkipUnsupportedType(field_type) => {
+                general_err!("cannot skip field type {:?}", field_type)
+            }
+        }
+    }
+}
+
+impl From<Utf8Error> for ThriftProtocolError {
+    fn from(_: Utf8Error) -> Self {
+        // ignore error payload to reduce the size of ThriftProtocolError
+        Self::Utf8Error
+    }
+}
+
+impl From<Error> for ThriftProtocolError {
+    fn from(e: Error) -> Self {
+        Self::IO(e)
+    }
+}
+
+pub type ThriftProtocolResult<T> = Result<T, ThriftProtocolError>;
+
+/// Wrapper for thrift `double` fields. This is used to provide
+/// an implementation of `Eq` for floats. This implementation
+/// uses IEEE 754 total order.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct OrderedF64(f64);
+
+impl From<f64> for OrderedF64 {
+    fn from(value: f64) -> Self {
+        Self(value)
+    }
+}
+
+impl From<OrderedF64> for f64 {
+    fn from(value: OrderedF64) -> Self {
+        value.0
+    }
+}
+
+impl Eq for OrderedF64 {} // Marker trait, requires PartialEq
+
+impl Ord for OrderedF64 {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.0.total_cmp(&other.0)
+    }
+}
+
+impl PartialOrd for OrderedF64 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+// Thrift compact protocol types for struct fields.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum FieldType {
+    Stop = 0,
+    BooleanTrue = 1,
+    BooleanFalse = 2,
+    Byte = 3,
+    I16 = 4,
+    I32 = 5,
+    I64 = 6,
+    Double = 7,
+    Binary = 8,
+    List = 9,
+    Set = 10,
+    Map = 11,
+    Struct = 12,
+}
+
+impl TryFrom<u8> for FieldType {
+    type Error = ThriftProtocolError;
+    fn try_from(value: u8) -> ThriftProtocolResult<Self> {
+        match value {
+            0 => Ok(Self::Stop),
+            1 => Ok(Self::BooleanTrue),
+            2 => Ok(Self::BooleanFalse),
+            3 => Ok(Self::Byte),
+            4 => Ok(Self::I16),
+            5 => Ok(Self::I32),
+            6 => Ok(Self::I64),
+            7 => Ok(Self::Double),
+            8 => Ok(Self::Binary),
+            9 => Ok(Self::List),
+            10 => Ok(Self::Set),
+            11 => Ok(Self::Map),
+            12 => Ok(Self::Struct),
+            _ => Err(ThriftProtocolError::InvalidFieldType(value)),
+        }
+    }
+}
+
+impl TryFrom<ElementType> for FieldType {
+    type Error = ThriftProtocolError;
+    fn try_from(value: ElementType) -> std::result::Result<Self, Self::Error> {
+        match value {
+            ElementType::Bool => Ok(Self::BooleanTrue),
+            ElementType::Byte => Ok(Self::Byte),
+            ElementType::I16 => Ok(Self::I16),
+            ElementType::I32 => Ok(Self::I32),
+            ElementType::I64 => Ok(Self::I64),
+            ElementType::Double => Ok(Self::Double),
+            ElementType::Binary => Ok(Self::Binary),
+            ElementType::List => Ok(Self::List),
+            ElementType::Struct => Ok(Self::Struct),
+            _ => Err(ThriftProtocolError::InvalidFieldType(value as u8)),
+        }
+    }
+}
+
+// Thrift compact protocol types for list elements
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum ElementType {
+    Bool = 2,
+    Byte = 3,
+    I16 = 4,
+    I32 = 5,
+    I64 = 6,
+    Double = 7,
+    Binary = 8,
+    List = 9,
+    Set = 10,
+    Map = 11,
+    Struct = 12,
+}
+
+impl TryFrom<u8> for ElementType {
+    type Error = ThriftProtocolError;
+    fn try_from(value: u8) -> ThriftProtocolResult<Self> {
+        match value {
+            // For historical and compatibility reasons, a reader should be capable to deal with both cases.
+            // The only valid value in the original spec was 2, but due to an widespread implementation bug
+            // the defacto standard across large parts of the library became 1 instead.
+            // As a result, both values are now allowed.
+            // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
+            1 | 2 => Ok(Self::Bool),
+            3 => Ok(Self::Byte),
+            4 => Ok(Self::I16),
+            5 => Ok(Self::I32),
+            6 => Ok(Self::I64),
+            7 => Ok(Self::Double),
+            8 => Ok(Self::Binary),
+            9 => Ok(Self::List),
+            10 => Ok(Self::Set),
+            11 => Ok(Self::Map),
+            12 => Ok(Self::Struct),
+            _ => Err(ThriftProtocolError::InvalidElementType(value)),
+        }
+    }
+}
+
+/// Struct used to describe a [thrift struct] field during decoding.
+///
+/// [thrift struct]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding
+pub(crate) struct FieldIdentifier {
+    /// The type for the field.
+    pub(crate) field_type: FieldType,
+    /// The field's `id`. May be computed from delta or directly decoded.
+    pub(crate) id: i16,
+    /// Stores the value for booleans.
+    ///
+    /// Boolean fields store no data, instead the field type is either boolean true, or
+    /// boolean false.
+    pub(crate) bool_val: Option<bool>,
+}
+
+/// Struct used to describe a [thrift list].
+///
+/// [thrift list]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub(crate) struct ListIdentifier {
+    /// The type for each element in the list.
+    pub(crate) element_type: ElementType,
+    /// Number of elements contained in the list.
+    pub(crate) size: i32,
+}
+
+/// Low-level object used to deserialize structs encoded with the Thrift [compact] protocol.
+///
+/// Implementation of this trait must provide the low-level functions `read_byte`, `read_bytes`,
+/// `skip_bytes`, and `read_double`. These primitives are used by the default functions provided
+/// here to perform deserialization.
+///
+/// [compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md
+pub(crate) trait ThriftCompactInputProtocol<'a> {
+    /// Read a single byte from the input.
+    fn read_byte(&mut self) -> ThriftProtocolResult<u8>;
+
+    /// Read a Thrift encoded [binary] from the input.
+    ///
+    /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding
+    fn read_bytes(&mut self) -> ThriftProtocolResult<&'a [u8]>;
+
+    fn read_bytes_owned(&mut self) -> ThriftProtocolResult<Vec<u8>>;
+
+    /// Skip the next `n` bytes of input.
+    fn skip_bytes(&mut self, n: usize) -> ThriftProtocolResult<()>;
+
+    /// Read a ULEB128 encoded unsigned varint from the input.
+    fn read_vlq(&mut self) -> ThriftProtocolResult<u64> {
+        // try the happy path first
+        let byte = self.read_byte()?;
+        if byte & 0x80 == 0 {
+            return Ok(byte as u64);
+        }
+        let mut in_progress = (byte & 0x7f) as u64;
+        let mut shift = 7;
+        loop {
+            let byte = self.read_byte()?;
+            in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift);
+            if byte & 0x80 == 0 {
+                return Ok(in_progress);
+            }
+            shift += 7;
+        }
+    }
+
+    /// Read a zig-zag encoded signed varint from the input.
+    fn read_zig_zag(&mut self) -> ThriftProtocolResult<i64> {
+        let val = self.read_vlq()?;
+        Ok((val >> 1) as i64 ^ -((val & 1) as i64))
+    }
+
+    /// Read the [`ListIdentifier`] for a Thrift encoded list.
+    fn read_list_begin(&mut self) -> ThriftProtocolResult<ListIdentifier> {
+        let header = self.read_byte()?;
+        // some parquet writers will have an element_type of 0 for an empty list.
+        // account for that and return a bogus but valid element_type.
+        if header == 0 {
+            return Ok(ListIdentifier {
+                element_type: ElementType::Byte,
+                size: 0,
+            });
+        }
+        let element_type = ElementType::try_from(header & 0x0f)?;
+
+        let possible_element_count = (header & 0xF0) >> 4;
+        let element_count = if possible_element_count != 15 {
+            // high bits set high if count and type encoded separately
+            possible_element_count as i32
+        } else {
+            self.read_vlq()? as _
+        };
+
+        Ok(ListIdentifier {
+            element_type,
+            size: element_count,
+        })
+    }
+
+    // Full field ids are uncommon.
+    // Not inlining this method reduces the code size of `read_field_begin`, which then ideally gets
+    // inlined everywhere.
+    #[cold]
+    fn read_full_field_id(&mut self) -> ThriftProtocolResult<i16> {
+        self.read_i16()
+    }
+
+    /// Read the [`FieldIdentifier`] for a field in a Thrift encoded struct.
+    fn read_field_begin(&mut self, last_field_id: i16) -> ThriftProtocolResult<FieldIdentifier> {
+        // we can read at least one byte, which is:
+        // - the type
+        // - the field delta and the type
+        let field_type = self.read_byte()?;
+        let field_delta = (field_type & 0xf0) >> 4;
+        let field_type = FieldType::try_from(field_type & 0xf)?;
+        let mut bool_val: Option<bool> = None;
+
+        match field_type {
+            FieldType::Stop => Ok(FieldIdentifier {
+                field_type: FieldType::Stop,
+                id: 0,
+                bool_val,
+            }),
+            _ => {
+                // special handling for bools
+                if field_type == FieldType::BooleanFalse {
+                    bool_val = Some(false);
+                } else if field_type == FieldType::BooleanTrue {
+                    bool_val = Some(true);
+                }
+                let field_id = if field_delta != 0 {
+                    last_field_id.checked_add(field_delta as i16).ok_or(
+                        ThriftProtocolError::FieldDeltaOverflow {
+                            field_delta,
+                            last_field_id,
+                        },
+                    )?
+                } else {
+                    self.read_full_field_id()?
+                };
+
+                Ok(FieldIdentifier {
+                    field_type,
+                    id: field_id,
+                    bool_val,
+                })
+            }
+        }
+    }
+
+    /// This is a specialized version of [`Self::read_field_begin`], solely for use in parsing
+    /// simple structs. This function assumes that the delta field will always be less than 0xf,
+    /// fields will be in order, and no boolean fields will be read.
+    /// This also skips validation of the field type.
+    ///
+    /// Returns a tuple of `(field_type, field_delta)`.
+    fn read_field_header(&mut self) -> ThriftProtocolResult<(u8, u8)> {
+        let field_type = self.read_byte()?;
+        let field_delta = (field_type & 0xf0) >> 4;
+        let field_type = field_type & 0xf;
+        Ok((field_type, field_delta))
+    }
+
+    /// Read a boolean list element. This should not be used for struct fields. For the latter,
+    /// use the [`FieldIdentifier::bool_val`] field.
+    fn read_bool(&mut self) -> ThriftProtocolResult<bool> {
+        let b = self.read_byte()?;
+        // Previous versions of the thrift specification said to use 0 and 1 inside collections,
+        // but that differed from existing implementations.
+        // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8.
+        // At least the go implementation seems to have followed the previously documented values.
+        match b {
+            0x01 => Ok(true),
+            0x00 | 0x02 => Ok(false),
+            _ => Err(ThriftProtocolError::InvalidBoolean(b)),
+        }
+    }
+
+    /// Read a Thrift [binary] as a UTF-8 encoded string.
+    ///
+    /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding
+    fn read_string(&mut self) -> ThriftProtocolResult<&'a str> {
+        let slice = self.read_bytes()?;
+        Ok(std::str::from_utf8(slice)?)
+    }
+
+    /// Read an `i8`.
+    fn read_i8(&mut self) -> ThriftProtocolResult<i8> {
+        Ok(self.read_byte()? as _)
+    }
+
+    /// Read an `i16`.
+    fn read_i16(&mut self) -> ThriftProtocolResult<i16> {
+        Ok(self.read_zig_zag()? as _)
+    }
+
+    /// Read an `i32`.
+    fn read_i32(&mut self) -> ThriftProtocolResult<i32> {
+        Ok(self.read_zig_zag()? as _)
+    }
+
+    /// Read an `i64`.
+    fn read_i64(&mut self) -> ThriftProtocolResult<i64> {
+        self.read_zig_zag()
+    }
+
+    /// Read a Thrift `double` as `f64`.
+    fn read_double(&mut self) -> ThriftProtocolResult<f64>;
+
+    /// Skip a ULEB128 encoded varint.
+    fn skip_vlq(&mut self) -> ThriftProtocolResult<()> {
+        loop {
+            let byte = self.read_byte()?;
+            if byte & 0x80 == 0 {
+                return Ok(());
+            }
+        }
+    }
+
+    /// Skip a thrift [binary].
+    ///
+    /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding
+    fn skip_binary(&mut self) -> ThriftProtocolResult<()> {
+        let len = self.read_vlq()? as usize;
+        self.skip_bytes(len)
+    }
+
+    /// Skip a field with type `field_type` recursively until the default
+    /// maximum skip depth (currently 64) is reached.
+    fn skip(&mut self, field_type: FieldType) -> ThriftProtocolResult<()> {
+        const DEFAULT_SKIP_DEPTH: i8 = 64;
+        self.skip_till_depth(field_type, DEFAULT_SKIP_DEPTH)
+    }
+
+    /// Empty structs in unions consist of a single byte of 0 for the field stop record.
+    /// This skips that byte without encuring the cost of processing the [`FieldIdentifier`].
+    /// Will return an error if the struct is not actually empty.
+    fn skip_empty_struct(&mut self) -> Result<()> {
+        let b = self.read_byte()?;
+        if b != 0 {
+            Err(general_err!("Empty struct has fields"))
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Skip a field with type `field_type` recursively up to `depth` levels.
+    fn skip_till_depth(&mut self, field_type: FieldType, depth: i8) -> ThriftProtocolResult<()> {
+        if depth == 0 {
+            return Err(ThriftProtocolError::SkipDepth(field_type));
+        }
+
+        match field_type {
+            // boolean field has no data
+            FieldType::BooleanFalse | FieldType::BooleanTrue => Ok(()),
+            FieldType::Byte => self.read_i8().map(|_| ()),
+            FieldType::I16 => self.skip_vlq().map(|_| ()),
+            FieldType::I32 => self.skip_vlq().map(|_| ()),
+            FieldType::I64 => self.skip_vlq().map(|_| ()),
+            FieldType::Double => self.skip_bytes(8).map(|_| ()),
+            FieldType::Binary => self.skip_binary().map(|_| ()),
+            FieldType::Struct => {
+                let mut last_field_id = 0i16;
+                loop {
+                    let field_ident = self.read_field_begin(last_field_id)?;
+                    if field_ident.field_type == FieldType::Stop {
+                        break;
+                    }
+                    self.skip_till_depth(field_ident.field_type, depth - 1)?;
+                    last_field_id = field_ident.id;
+                }
+                Ok(())
+            }
+            FieldType::List => {
+                let list_ident = self.read_list_begin()?;
+                for _ in 0..list_ident.size {
+                    let element_type = FieldType::try_from(list_ident.element_type)?;
+                    self.skip_till_depth(element_type, depth - 1)?;
+                }
+                Ok(())
+            }
+            // no list or map types in parquet format
+            _ => Err(ThriftProtocolError::SkipUnsupportedType(field_type)),
+        }
+    }
+}
+
+/// A high performance Thrift reader that reads from a slice of bytes.
+pub(crate) struct ThriftSliceInputProtocol<'a> {
+    buf: &'a [u8],
+}
+
+impl<'a> ThriftSliceInputProtocol<'a> {
+    /// Create a new `ThriftSliceInputProtocol` using the bytes in `buf`.
+    pub fn new(buf: &'a [u8]) -> Self {
+        Self { buf }
+    }
+
+    /// Return the current buffer as a slice.
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.buf
+    }
+}
+
+impl<'b, 'a: 'b> ThriftCompactInputProtocol<'b> for ThriftSliceInputProtocol<'a> {
+    #[inline]
+    fn read_byte(&mut self) -> ThriftProtocolResult<u8> {
+        let ret = *self.buf.first().ok_or(ThriftProtocolError::Eof)?;
+        self.buf = &self.buf[1..];
+        Ok(ret)
+    }
+
+    fn read_bytes(&mut self) -> ThriftProtocolResult<&'b [u8]> {
+        let len = self.read_vlq()? as usize;
+        let ret = self.buf.get(..len).ok_or(ThriftProtocolError::Eof)?;
+        self.buf = &self.buf[len..];
+        Ok(ret)
+    }
+
+    fn read_bytes_owned(&mut self) -> ThriftProtocolResult<Vec<u8>> {
+        Ok(self.read_bytes()?.to_vec())
+    }
+
+    #[inline]
+    fn skip_bytes(&mut self, n: usize) -> ThriftProtocolResult<()> {
+        self.buf.get(..n).ok_or(ThriftProtocolError::Eof)?;
+        self.buf = &self.buf[n..];
+        Ok(())
+    }
+
+    fn read_double(&mut self) -> ThriftProtocolResult<f64> {
+        let slice = self.buf.get(..8).ok_or(ThriftProtocolError::Eof)?;
+        self.buf = &self.buf[8..];
+        match slice.try_into() {
+            Ok(slice) => Ok(f64::from_le_bytes(slice)),
+            Err(_) => unreachable!(),
+        }
+    }
+}
+
+/// A Thrift input protocol that wraps a [`Read`] object.
+///
+/// Note that this is only intended for use in reading Parquet page headers. This will panic
+/// if Thrift `binary` data is encountered because a slice of that data cannot be returned.
+pub(crate) struct ThriftReadInputProtocol<R: Read> {
+    reader: R,
+}
+
+impl<R: Read> ThriftReadInputProtocol<R> {
+    pub(crate) fn new(reader: R) -> Self {
+        Self { reader }
+    }
+}
+
+impl<'a, R: Read> ThriftCompactInputProtocol<'a> for ThriftReadInputProtocol<R> {
+    #[inline]
+    fn read_byte(&mut self) -> ThriftProtocolResult<u8> {
+        let mut buf = [0_u8; 1];
+        self.reader.read_exact(&mut buf)?;
+        Ok(buf[0])
+    }
+
+    fn read_bytes(&mut self) -> ThriftProtocolResult<&'a [u8]> {
+        unimplemented!()
+    }
+
+    fn read_bytes_owned(&mut self) -> ThriftProtocolResult<Vec<u8>> {
+        let len = self.read_vlq()? as usize;
+        let mut v = Vec::with_capacity(len);
+        std::io::copy(&mut self.reader.by_ref().take(len as u64), &mut v)?;
+        Ok(v)
+    }
+
+    fn skip_bytes(&mut self, n: usize) -> ThriftProtocolResult<()> {
+        std::io::copy(
+            &mut self.reader.by_ref().take(n as u64),
+            &mut std::io::sink(),
+        )?;
+        Ok(())
+    }
+
+    fn read_double(&mut self) -> ThriftProtocolResult<f64> {
+        let mut buf = [0_u8; 8];
+        self.reader.read_exact(&mut buf)?;
+        Ok(f64::from_le_bytes(buf))
+    }
+}
+
+/// Trait implemented for objects that can be deserialized from a Thrift input stream.
+/// Implementations are provided for Thrift primitive types.
+pub(crate) trait ReadThrift<'a, R: ThriftCompactInputProtocol<'a>> {
+    /// Read an object of type `Self` from the input protocol object.
+    fn read_thrift(prot: &mut R) -> Result<Self>
+    where
+        Self: Sized;
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for bool {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_bool()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i8 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_i8()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i16 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_i16()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i32 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_i32()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i64 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_i64()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for OrderedF64 {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(OrderedF64(prot.read_double()?))
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a str {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_string()?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for String {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(String::from_utf8(prot.read_bytes_owned()?)?)
+    }
+}
+
+impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a [u8] {
+    fn read_thrift(prot: &mut R) -> Result<Self> {
+        Ok(prot.read_bytes()?)
+    }
+}
+
+/// Read a Thrift encoded [list] from the input protocol object.
+///
+/// [list]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
+pub(crate) fn read_thrift_vec<'a, T, R>(prot: &mut R) -> Result<Vec<T>>
+where
+    R: ThriftCompactInputProtocol<'a>,
+    T: ReadThrift<'a, R>,
+{
+    let list_ident = prot.read_list_begin()?;
+    let mut res = Vec::with_capacity(list_ident.size as usize);
+    for _ in 0..list_ident.size {
+        let val = T::read_thrift(prot)?;
+        res.push(val);
+    }
+    Ok(res)
+}
+
+/////////////////////////
+// thrift compact output
+
+/// Low-level object used to serialize structs to the Thrift [compact output] protocol.
+///
+/// This struct serves as a wrapper around a [`Write`] object, to which thrift encoded data
+/// will written. The implementation provides functions to write Thrift primitive types, as well
+/// as functions used in the encoding of lists and structs. This should rarely be used directly,
+/// but is instead intended for use by implementers of [`WriteThrift`] and [`WriteThriftField`].
+///
+/// [compact output]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md
+pub(crate) struct ThriftCompactOutputProtocol<W: Write> {
+    writer: W,
+}
+
+impl<W: Write> ThriftCompactOutputProtocol<W> {
+    /// Create a new `ThriftCompactOutputProtocol` wrapping the byte sink `writer`.
+    pub(crate) fn new(writer: W) -> Self {
+        Self { writer }
+    }
+
+    /// Write a single byte to the output stream.
+    fn write_byte(&mut self, b: u8) -> Result<()> {
+        self.writer.write_all(&[b])?;
+        Ok(())
+    }
+
+    /// Write the given `u64` as a ULEB128 encoded varint.
+    fn write_vlq(&mut self, val: u64) -> Result<()> {
+        let mut v = val;
+        while v > 0x7f {
+            self.write_byte(v as u8 | 0x80)?;
+            v >>= 7;
+        }
+        self.write_byte(v as u8)
+    }
+
+    /// Write the given `i64` as a zig-zag encoded varint.
+    fn write_zig_zag(&mut self, val: i64) -> Result<()> {
+        let s = (val < 0) as i64;
+        self.write_vlq((((val ^ -s) << 1) + s) as u64)
+    }
+
+    /// Used to mark the start of a Thrift struct field of type `field_type`. `last_field_id`
+    /// is used to compute a delta to the given `field_id` per the compact protocol [spec].
+    ///
+    /// [spec]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding
+    pub(crate) fn write_field_begin(
+        &mut self,
+        field_type: FieldType,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<()> {
+        let delta = field_id.wrapping_sub(last_field_id);
+        if delta > 0 && delta <= 0xf {
+            self.write_byte((delta as u8) << 4 | field_type as u8)
+        } else {
+            self.write_byte(field_type as u8)?;
+            self.write_i16(field_id)
+        }
+    }
+
+    /// Used to indicate the start of a list of `element_type` elements.
+    pub(crate) fn write_list_begin(&mut self, element_type: ElementType, len: usize) -> Result<()> {
+        if len < 15 {
+            self.write_byte((len as u8) << 4 | element_type as u8)
+        } else {
+            self.write_byte(0xf0u8 | element_type as u8)?;
+            self.write_vlq(len as _)
+        }
+    }
+
+    /// Used to mark the end of a struct. This must be called after all fields of the struct have
+    /// been written.
+    pub(crate) fn write_struct_end(&mut self) -> Result<()> {
+        self.write_byte(0)
+    }
+
+    /// Serialize a slice of `u8`s. This will encode a length, and then write the bytes without
+    /// further encoding.
+    pub(crate) fn write_bytes(&mut self, val: &[u8]) -> Result<()> {
+        self.write_vlq(val.len() as u64)?;
+        self.writer.write_all(val)?;
+        Ok(())
+    }
+
+    /// Short-cut method used to encode structs that have no fields (often used in Thrift unions).
+    /// This simply encodes the field id and then immediately writes the end-of-struct marker.
+    pub(crate) fn write_empty_struct(&mut self, field_id: i16, last_field_id: i16) -> Result<i16> {
+        self.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+        self.write_struct_end()?;
+        Ok(last_field_id)
+    }
+
+    /// Write a boolean value.
+    pub(crate) fn write_bool(&mut self, val: bool) -> Result<()> {
+        match val {
+            true => self.write_byte(1),
+            false => self.write_byte(2),
+        }
+    }
+
+    /// Write a zig-zag encoded `i8` value.
+    pub(crate) fn write_i8(&mut self, val: i8) -> Result<()> {
+        self.write_byte(val as u8)
+    }
+
+    /// Write a zig-zag encoded `i16` value.
+    pub(crate) fn write_i16(&mut self, val: i16) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    /// Write a zig-zag encoded `i32` value.
+    pub(crate) fn write_i32(&mut self, val: i32) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    /// Write a zig-zag encoded `i64` value.
+    pub(crate) fn write_i64(&mut self, val: i64) -> Result<()> {
+        self.write_zig_zag(val as _)
+    }
+
+    /// Write a double value.
+    pub(crate) fn write_double(&mut self, val: f64) -> Result<()> {
+        self.writer.write_all(&val.to_le_bytes())?;
+        Ok(())
+    }
+}
+
+/// Trait implemented by objects that are to be serialized to a Thrift [compact output] protocol
+/// stream. Implementations are also provided for primitive Thrift types.
+///
+/// [compact output]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md
+pub(crate) trait WriteThrift {
+    /// The [`ElementType`] to use when a list of this object is written.
+    const ELEMENT_TYPE: ElementType;
+
+    /// Serialize this object to the given `writer`.
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()>;
+}
+
+/// Implementation for a vector of thrift serializable objects that implement [`WriteThrift`].
+/// This will write the necessary list header and then serialize the elements one-at-a-time.
+impl<T> WriteThrift for Vec<T>
+where
+    T: WriteThrift,
+{
+    const ELEMENT_TYPE: ElementType = ElementType::List;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_list_begin(T::ELEMENT_TYPE, self.len())?;
+        for item in self {
+            item.write_thrift(writer)?;
+        }
+        Ok(())
+    }
+}
+
+impl WriteThrift for bool {
+    const ELEMENT_TYPE: ElementType = ElementType::Bool;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_bool(*self)
+    }
+}
+
+impl WriteThrift for i8 {
+    const ELEMENT_TYPE: ElementType = ElementType::Byte;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_i8(*self)
+    }
+}
+
+impl WriteThrift for i16 {
+    const ELEMENT_TYPE: ElementType = ElementType::I16;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_i16(*self)
+    }
+}
+
+impl WriteThrift for i32 {
+    const ELEMENT_TYPE: ElementType = ElementType::I32;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_i32(*self)
+    }
+}
+
+impl WriteThrift for i64 {
+    const ELEMENT_TYPE: ElementType = ElementType::I64;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_i64(*self)
+    }
+}
+
+impl WriteThrift for OrderedF64 {
+    const ELEMENT_TYPE: ElementType = ElementType::Double;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_double(self.0)
+    }
+}
+
+impl WriteThrift for f64 {
+    const ELEMENT_TYPE: ElementType = ElementType::Double;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_double(*self)
+    }
+}
+
+impl WriteThrift for &[u8] {
+    const ELEMENT_TYPE: ElementType = ElementType::Binary;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_bytes(self)
+    }
+}
+
+impl WriteThrift for &str {
+    const ELEMENT_TYPE: ElementType = ElementType::Binary;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_bytes(self.as_bytes())
+    }
+}
+
+impl WriteThrift for String {
+    const ELEMENT_TYPE: ElementType = ElementType::Binary;
+
+    fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+        writer.write_bytes(self.as_bytes())
+    }
+}
+
+/// Trait implemented by objects that are fields of Thrift structs.
+///
+/// For example, given the Thrift struct definition
+/// ```ignore
+/// struct MyStruct {
+///   1: required i32 field1
+///   2: optional bool field2
+///   3: optional OtherStruct field3
+/// }
+/// ```
+///
+/// which becomes in Rust
+/// ```no_run
+/// # struct OtherStruct {}
+/// struct MyStruct {
+///   field1: i32,
+///   field2: Option<bool>,
+///   field3: Option<OtherStruct>,
+/// }
+/// ```
+/// the impl of `WriteThrift` for `MyStruct` will use the `WriteThriftField` impls for `i32`,
+/// `bool`, and `OtherStruct`.
+///
+/// ```ignore
+/// impl WriteThrift for MyStruct {
+///   fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
+///     let mut last_field_id = 0i16;
+///     last_field_id = self.field1.write_thrift_field(writer, 1, last_field_id)?;
+///     if self.field2.is_some() {
+///       // if field2 is `None` then this assignment won't happen and last_field_id will remain
+///       // `1` when writing `field3`
+///       last_field_id = self.field2.write_thrift_field(writer, 2, last_field_id)?;
+///     }
+///     if self.field3.is_some() {
+///       // no need to assign last_field_id since this is the final field.
+///       self.field3.write_thrift_field(writer, 3, last_field_id)?;
+///     }
+///     writer.write_struct_end()
+///   }
+/// }
+/// ```
+///
+pub(crate) trait WriteThriftField {
+    /// Used to write struct fields (which may be primitive or IDL defined types). This will
+    /// write the field marker for the given `field_id`, using `last_field_id` to compute the
+    /// field delta used by the Thrift [compact protocol]. On success this will return `field_id`
+    /// to be used in chaining.
+    ///
+    /// [compact protocol]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16>;
+}
+
+// bool struct fields are written differently to bool values
+impl WriteThriftField for bool {
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16> {
+        // boolean only writes the field header
+        match *self {
+            true => writer.write_field_begin(FieldType::BooleanTrue, field_id, last_field_id)?,
+            false => writer.write_field_begin(FieldType::BooleanFalse, field_id, last_field_id)?,
+        }
+        Ok(field_id)
+    }
+}
+
+write_thrift_field!(i8, FieldType::Byte);
+write_thrift_field!(i16, FieldType::I16);
+write_thrift_field!(i32, FieldType::I32);
+write_thrift_field!(i64, FieldType::I64);
+write_thrift_field!(OrderedF64, FieldType::Double);
+write_thrift_field!(f64, FieldType::Double);
+write_thrift_field!(String, FieldType::Binary);
+
+impl WriteThriftField for &[u8] {
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16> {
+        writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?;
+        writer.write_bytes(self)?;
+        Ok(field_id)
+    }
+}
+
+impl WriteThriftField for &str {
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16> {
+        writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?;
+        writer.write_bytes(self.as_bytes())?;
+        Ok(field_id)
+    }
+}
+
+impl<T> WriteThriftField for Vec<T>
+where
+    T: WriteThrift,
+{
+    fn write_thrift_field<W: Write>(
+        &self,
+        writer: &mut ThriftCompactOutputProtocol<W>,
+        field_id: i16,
+        last_field_id: i16,
+    ) -> Result<i16> {
+        writer.write_field_begin(FieldType::List, field_id, last_field_id)?;
+        self.write_thrift(writer)?;
+        Ok(field_id)
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use crate::basic::{TimeUnit, Type};
+
+    use super::*;
+    use std::fmt::Debug;
+
+    pub(crate) fn test_roundtrip<T>(val: T)
+    where
+        T: for<'a> ReadThrift<'a, ThriftSliceInputProtocol<'a>> + WriteThrift + PartialEq + Debug,
+    {
+        let mut buf = Vec::<u8>::new();
+        {
+            let mut writer = ThriftCompactOutputProtocol::new(&mut buf);
+            val.write_thrift(&mut writer).unwrap();
+        }
+
+        let mut prot = ThriftSliceInputProtocol::new(&buf);
+        let read_val = T::read_thrift(&mut prot).unwrap();
+        assert_eq!(val, read_val);
+    }
+
+    #[test]
+    fn test_enum_roundtrip() {
+        test_roundtrip(Type::BOOLEAN);
+        test_roundtrip(Type::INT32);
+        test_roundtrip(Type::INT64);
+        test_roundtrip(Type::INT96);
+        test_roundtrip(Type::FLOAT);
+        test_roundtrip(Type::DOUBLE);
+        test_roundtrip(Type::BYTE_ARRAY);
+        test_roundtrip(Type::FIXED_LEN_BYTE_ARRAY);
+    }
+
+    #[test]
+    fn test_union_all_empty_roundtrip() {
+        test_roundtrip(TimeUnit::MILLIS);
+        test_roundtrip(TimeUnit::MICROS);
+        test_roundtrip(TimeUnit::NANOS);
+    }
+
+    #[test]
+    fn test_decode_empty_list() {
+        let data = vec![0u8; 1];
+        let mut prot = ThriftSliceInputProtocol::new(&data);
+        let header = prot.read_list_begin().expect("error reading list header");
+        assert_eq!(header.size, 0);
+        assert_eq!(header.element_type, ElementType::Byte);
+    }
+}
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index 0f84fe60854b..33a1464fa3d8 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -21,8 +21,8 @@ use std::fmt;
 
 use chrono::{TimeZone, Utc};
 use half::f16;
-use num::traits::Float;
 use num_bigint::{BigInt, Sign};
+use num_traits::Float;
 
 use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
 use crate::data_type::{ByteArray, Decimal, Int96};
@@ -98,7 +98,7 @@ impl Row {
     ///     println!("column index: {}, column name: {}, column value: {}", idx, name, field);
     /// }
     /// ```
-    pub fn get_column_iter(&self) -> RowColumnIter {
+    pub fn get_column_iter(&self) -> RowColumnIter<'_> {
         RowColumnIter {
             fields: &self.fields,
             curr: 0,
@@ -397,8 +397,8 @@ macro_rules! list_primitive_accessor {
 macro_rules! list_complex_accessor {
     ($METHOD:ident, $VARIANT:ident, $TY:ty) => {
         fn $METHOD(&self, i: usize) -> Result<&$TY> {
-            match self.elements[i] {
-                Field::$VARIANT(ref v) => Ok(v),
+            match &self.elements[i] {
+                Field::$VARIANT(v) => Ok(&v),
                 _ => Err(general_err!(
                     "Cannot access {} as {}",
                     self.elements[i].get_type_name(),
@@ -602,6 +602,12 @@ pub enum Field {
     /// Date without a time of day, stores the number of days from the
     /// Unix epoch, 1 January 1970.
     Date(i32),
+
+    /// The total number of milliseconds since midnight.
+    TimeMillis(i32),
+    /// The total number of microseconds since midnight.
+    TimeMicros(i64),
+
     /// Milliseconds from the Unix epoch, 1 January 1970.
     TimestampMillis(i64),
     /// Microseconds from the Unix epoch, 1 January 1970.
@@ -638,6 +644,8 @@ impl Field {
             Field::Date(_) => "Date",
             Field::Str(_) => "Str",
             Field::Bytes(_) => "Bytes",
+            Field::TimeMillis(_) => "TimeMillis",
+            Field::TimeMicros(_) => "TimeMicros",
             Field::TimestampMillis(_) => "TimestampMillis",
             Field::TimestampMicros(_) => "TimestampMicros",
             Field::Group(_) => "Group",
@@ -671,7 +679,7 @@ impl Field {
             ConvertedType::UINT_16 => Field::UShort(value as u16),
             ConvertedType::UINT_32 => Field::UInt(value as u32),
             ConvertedType::DATE => Field::Date(value),
-            ConvertedType::TIME_MILLIS => Field::TimestampMillis(value as i64),
+            ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
             ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
                 value,
                 descr.type_precision(),
@@ -687,6 +695,7 @@ impl Field {
         match descr.converted_type() {
             ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
             ConvertedType::UINT_64 => Field::ULong(value as u64),
+            ConvertedType::TIME_MICROS => Field::TimeMicros(value),
             ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
             ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
             ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
@@ -747,7 +756,7 @@ impl Field {
                     descr.type_precision(),
                     descr.type_scale(),
                 )),
-                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
+                ConvertedType::NONE if descr.logical_type_ref() == Some(&LogicalType::Float16) => {
                     if value.len() != 2 {
                         return Err(general_err!(
                             "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
@@ -768,8 +777,8 @@ impl Field {
     /// Converts the Parquet field into a JSON [`Value`].
     #[cfg(any(feature = "json", test))]
     pub fn to_json_value(&self) -> Value {
-        use base64::prelude::BASE64_STANDARD;
         use base64::Engine;
+        use base64::prelude::BASE64_STANDARD;
 
         match &self {
             Field::Null => Value::Null,
@@ -795,6 +804,8 @@ impl Field {
             Field::Str(s) => Value::String(s.to_owned()),
             Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
             Field::Date(d) => Value::String(convert_date_to_string(*d)),
+            Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
+            Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
             Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
             Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
             Field::Group(row) => row.to_json_value(),
@@ -864,6 +875,12 @@ impl fmt::Display for Field {
             Field::Str(ref value) => write!(f, "\"{value}\""),
             Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
             Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
+            Field::TimeMillis(value) => {
+                write!(f, "{}", convert_time_millis_to_string(value))
+            }
+            Field::TimeMicros(value) => {
+                write!(f, "{}", convert_time_micros_to_string(value))
+            }
             Field::TimestampMillis(value) => {
                 write!(f, "{}", convert_timestamp_millis_to_string(value))
             }
@@ -911,29 +928,48 @@ fn convert_date_to_string(value: i32) -> String {
     format!("{}", dt.format("%Y-%m-%d"))
 }
 
-/// Helper method to convert Parquet timestamp into a string.
-/// Input `value` is a number of seconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
-#[inline]
-fn convert_timestamp_secs_to_string(value: i64) -> String {
-    let dt = Utc.timestamp_opt(value, 0).unwrap();
-    format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"))
-}
-
 /// Helper method to convert Parquet timestamp into a string.
 /// Input `value` is a number of milliseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
+/// Datetime is displayed in UTC timezone.
 #[inline]
 fn convert_timestamp_millis_to_string(value: i64) -> String {
-    convert_timestamp_secs_to_string(value / 1000)
+    let dt = Utc.timestamp_millis_opt(value).unwrap();
+    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.3f %:z"))
 }
 
 /// Helper method to convert Parquet timestamp into a string.
 /// Input `value` is a number of microseconds since the epoch in UTC.
-/// Datetime is displayed in local timezone.
+/// Datetime is displayed in UTC timezone.
 #[inline]
 fn convert_timestamp_micros_to_string(value: i64) -> String {
-    convert_timestamp_secs_to_string(value / 1000000)
+    let dt = Utc.timestamp_micros(value).unwrap();
+    format!("{}", dt.format("%Y-%m-%d %H:%M:%S%.6f %:z"))
+}
+
+/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
+/// Input `value` is a number of milliseconds since midnight.
+/// Time is displayed in HH:MM:SS.sss format.
+#[inline]
+fn convert_time_millis_to_string(value: i32) -> String {
+    let total_ms = value as u64;
+    let hours = total_ms / (60 * 60 * 1000);
+    let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
+    let seconds = (total_ms % (60 * 1000)) / 1000;
+    let millis = total_ms % 1000;
+    format!("{hours:02}:{minutes:02}:{seconds:02}.{millis:03}")
+}
+
+/// Helper method to convert Parquet time (microseconds since midnight) into a string.
+/// Input `value` is a number of microseconds since midnight.
+/// Time is displayed in HH:MM:SS.ssssss format.
+#[inline]
+fn convert_time_micros_to_string(value: i64) -> String {
+    let total_us = value as u64;
+    let hours = total_us / (60 * 60 * 1000 * 1000);
+    let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
+    let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
+    let micros = total_us % (1000 * 1000);
+    format!("{hours:02}:{minutes:02}:{seconds:02}.{micros:06}")
 }
 
 /// Helper method to convert Parquet decimal into a string.
@@ -1057,7 +1093,7 @@ mod tests {
 
         let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
         let row = Field::convert_int32(&descr, 14611);
-        assert_eq!(row, Field::TimestampMillis(14611));
+        assert_eq!(row, Field::TimeMillis(14611));
 
         let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
         let row = Field::convert_int32(&descr, 444);
@@ -1082,6 +1118,10 @@ mod tests {
         let row = Field::convert_int64(&descr, 1541186529153123);
         assert_eq!(row, Field::TimestampMicros(1541186529153123));
 
+        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
+        let row = Field::convert_int64(&descr, 47445123456);
+        assert_eq!(row, Field::TimeMicros(47445123456));
+
         let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
         let row = Field::convert_int64(&descr, 2222);
         assert_eq!(row, Field::Long(2222));
@@ -1231,44 +1271,75 @@ mod tests {
 
     #[test]
     fn test_convert_timestamp_millis_to_string() {
-        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
+        fn check_datetime_conversion(
+            (y, m, d, h, mi, s, milli): (u32, u32, u32, u32, u32, u32, u32),
+            exp: &str,
+        ) {
             let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
                 .unwrap()
-                .and_hms_opt(h, mi, s)
+                .and_hms_milli_opt(h, mi, s, milli)
                 .unwrap();
             let dt = Utc.from_utc_datetime(&datetime);
             let res = convert_timestamp_millis_to_string(dt.timestamp_millis());
-            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
             assert_eq!(res, exp);
         }
 
-        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
-        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
-        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
-        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
-        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
-        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
+        check_datetime_conversion((1969, 9, 10, 1, 2, 3, 4), "1969-09-10 01:02:03.004 +00:00");
+        check_datetime_conversion(
+            (2010, 1, 2, 13, 12, 54, 42),
+            "2010-01-02 13:12:54.042 +00:00",
+        );
+        check_datetime_conversion((2011, 1, 3, 8, 23, 1, 27), "2011-01-03 08:23:01.027 +00:00");
+        check_datetime_conversion((2012, 4, 5, 11, 6, 32, 0), "2012-04-05 11:06:32.000 +00:00");
+        check_datetime_conversion(
+            (2013, 5, 12, 16, 38, 0, 15),
+            "2013-05-12 16:38:00.015 +00:00",
+        );
+        check_datetime_conversion(
+            (2014, 11, 28, 21, 15, 12, 59),
+            "2014-11-28 21:15:12.059 +00:00",
+        );
     }
 
     #[test]
     fn test_convert_timestamp_micros_to_string() {
-        fn check_datetime_conversion(y: u32, m: u32, d: u32, h: u32, mi: u32, s: u32) {
+        fn check_datetime_conversion(
+            (y, m, d, h, mi, s, micro): (u32, u32, u32, u32, u32, u32, u32),
+            exp: &str,
+        ) {
             let datetime = chrono::NaiveDate::from_ymd_opt(y as i32, m, d)
                 .unwrap()
-                .and_hms_opt(h, mi, s)
+                .and_hms_micro_opt(h, mi, s, micro)
                 .unwrap();
             let dt = Utc.from_utc_datetime(&datetime);
             let res = convert_timestamp_micros_to_string(dt.timestamp_micros());
-            let exp = format!("{}", dt.format("%Y-%m-%d %H:%M:%S %:z"));
             assert_eq!(res, exp);
         }
 
-        check_datetime_conversion(1969, 9, 10, 1, 2, 3);
-        check_datetime_conversion(2010, 1, 2, 13, 12, 54);
-        check_datetime_conversion(2011, 1, 3, 8, 23, 1);
-        check_datetime_conversion(2012, 4, 5, 11, 6, 32);
-        check_datetime_conversion(2013, 5, 12, 16, 38, 0);
-        check_datetime_conversion(2014, 11, 28, 21, 15, 12);
+        check_datetime_conversion(
+            (1969, 9, 10, 1, 2, 3, 4),
+            "1969-09-10 01:02:03.000004 +00:00",
+        );
+        check_datetime_conversion(
+            (2010, 1, 2, 13, 12, 54, 42),
+            "2010-01-02 13:12:54.000042 +00:00",
+        );
+        check_datetime_conversion(
+            (2011, 1, 3, 8, 23, 1, 27),
+            "2011-01-03 08:23:01.000027 +00:00",
+        );
+        check_datetime_conversion(
+            (2012, 4, 5, 11, 6, 32, 0),
+            "2012-04-05 11:06:32.000000 +00:00",
+        );
+        check_datetime_conversion(
+            (2013, 5, 12, 16, 38, 0, 15),
+            "2013-05-12 16:38:00.000015 +00:00",
+        );
+        check_datetime_conversion(
+            (2014, 11, 28, 21, 15, 12, 59),
+            "2014-11-28 21:15:12.000059 +00:00",
+        );
     }
 
     #[test]
@@ -1435,28 +1506,34 @@ mod tests {
         assert!(Field::Decimal(Decimal::from_i32(4, 8, 2)).is_primitive());
 
         // complex types
-        assert!(!Field::Group(Row::new(vec![
-            ("x".to_string(), Field::Null),
-            ("Y".to_string(), Field::Int(2)),
-            ("z".to_string(), Field::Float(3.1)),
-            ("a".to_string(), Field::Str("abc".to_string()))
-        ]))
-        .is_primitive());
+        assert!(
+            !Field::Group(Row::new(vec![
+                ("x".to_string(), Field::Null),
+                ("Y".to_string(), Field::Int(2)),
+                ("z".to_string(), Field::Float(3.1)),
+                ("a".to_string(), Field::Str("abc".to_string()))
+            ]))
+            .is_primitive()
+        );
 
-        assert!(!Field::ListInternal(make_list(vec![
-            Field::Int(2),
-            Field::Int(1),
-            Field::Null,
-            Field::Int(12)
-        ]))
-        .is_primitive());
+        assert!(
+            !Field::ListInternal(make_list(vec![
+                Field::Int(2),
+                Field::Int(1),
+                Field::Null,
+                Field::Int(12)
+            ]))
+            .is_primitive()
+        );
 
-        assert!(!Field::MapInternal(make_map(vec![
-            (Field::Int(1), Field::Float(1.2)),
-            (Field::Int(2), Field::Float(4.5)),
-            (Field::Int(3), Field::Float(2.3))
-        ]))
-        .is_primitive());
+        assert!(
+            !Field::MapInternal(make_map(vec![
+                (Field::Int(1), Field::Float(1.2)),
+                (Field::Int(2), Field::Float(4.5)),
+                (Field::Int(3), Field::Float(2.3))
+            ]))
+            .is_primitive()
+        );
     }
 
     #[test]
@@ -1953,11 +2030,19 @@ mod tests {
         );
         assert_eq!(
             Field::TimestampMillis(12345678).to_json_value(),
-            Value::String("1970-01-01 03:25:45 +00:00".to_string())
+            Value::String("1970-01-01 03:25:45.678 +00:00".to_string())
         );
         assert_eq!(
             Field::TimestampMicros(12345678901).to_json_value(),
-            Value::String(convert_timestamp_micros_to_string(12345678901))
+            Value::String("1970-01-01 03:25:45.678901 +00:00".to_string())
+        );
+        assert_eq!(
+            Field::TimeMillis(47445123).to_json_value(),
+            Value::String(String::from("13:10:45.123"))
+        );
+        assert_eq!(
+            Field::TimeMicros(47445123456).to_json_value(),
+            Value::String(String::from("13:10:45.123456"))
         );
 
         let fields = vec![
@@ -1994,7 +2079,7 @@ mod tests {
 #[cfg(test)]
 #[allow(clippy::many_single_char_names)]
 mod api_tests {
-    use super::{make_list, make_map, Row};
+    use super::{Row, make_list, make_map};
     use crate::record::Field;
 
     #[test]
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index 9e70f7a980db..a6b8d2d54cd7 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -24,7 +24,7 @@ use crate::basic::{ConvertedType, Repetition};
 use crate::errors::{ParquetError, Result};
 use crate::file::reader::{FileReader, RowGroupReader};
 use crate::record::{
-    api::{make_list, make_map, Field, Row},
+    api::{Field, Row, make_list, make_map},
     triplet::TripletIter,
 };
 use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr};
@@ -522,7 +522,7 @@ impl Reader {
             Reader::PrimitiveReader(ref field, _) => field.name(),
             Reader::OptionReader(_, ref reader) => reader.field_name(),
             Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.name(),
+                Some(field) => field.name(),
                 None => panic!("Field is None for group reader"),
             },
             Reader::RepeatedReader(ref field, ..) => field.name(),
@@ -536,7 +536,7 @@ impl Reader {
             Reader::PrimitiveReader(ref field, _) => field.get_basic_info().repetition(),
             Reader::OptionReader(_, ref reader) => reader.repetition(),
             Reader::GroupReader(ref opt, ..) => match opt {
-                Some(ref field) => field.get_basic_info().repetition(),
+                Some(field) => field.get_basic_info().repetition(),
                 None => panic!("Field is None for group reader"),
             },
             Reader::RepeatedReader(ref field, ..) => field.get_basic_info().repetition(),
diff --git a/parquet/src/record/triplet.rs b/parquet/src/record/triplet.rs
index 902641c08bcf..8244dfb12823 100644
--- a/parquet/src/record/triplet.rs
+++ b/parquet/src/record/triplet.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::basic::Type as PhysicalType;
-use crate::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl};
+use crate::column::reader::{ColumnReader, ColumnReaderImpl, get_typed_column_reader};
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
 use crate::record::api::Field;
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index 0a67250476c7..36cf2dc5175e 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -178,9 +178,9 @@ fn parse_timeunit(
     value
         .ok_or_else(|| general_err!(not_found_msg))
         .and_then(|v| match v.to_uppercase().as_str() {
-            "MILLIS" => Ok(TimeUnit::MILLIS(Default::default())),
-            "MICROS" => Ok(TimeUnit::MICROS(Default::default())),
-            "NANOS" => Ok(TimeUnit::NANOS(Default::default())),
+            "MILLIS" => Ok(TimeUnit::MILLIS),
+            "MICROS" => Ok(TimeUnit::MICROS),
+            "NANOS" => Ok(TimeUnit::NANOS),
             _ => Err(general_err!(parse_fail_msg)),
         })
 }
@@ -311,187 +311,188 @@ impl Parser<'_> {
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
         // Parse converted type
-        let (logical_type, converted_type, precision, scale) =
-            if let Some("(") = self.tokenizer.next() {
-                let (mut logical, mut converted) = self
-                    .tokenizer
-                    .next()
-                    .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
-                    .and_then(|v| {
-                        let upper = v.to_uppercase();
-                        let logical = upper.parse::<LogicalType>();
-                        match logical {
-                            Ok(logical) => {
-                                Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
-                            }
-                            Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+        let (logical_type, converted_type, precision, scale) = if let Some("(") =
+            self.tokenizer.next()
+        {
+            let (mut logical, mut converted) = self
+                .tokenizer
+                .next()
+                .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
+                .and_then(|v| {
+                    let upper = v.to_uppercase();
+                    let logical = upper.parse::<LogicalType>();
+                    match logical {
+                        Ok(logical) => {
+                            Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
                         }
-                    })?;
-
-                // Parse precision and scale for decimals
-                let mut precision: i32 = -1;
-                let mut scale: i32 = -1;
-
-                // Parse the concrete logical type
-                if let Some(tpe) = &logical {
-                    match tpe {
-                        LogicalType::Decimal { .. } => {
-                            if let Some("(") = self.tokenizer.next() {
-                                precision = parse_i32(
+                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+                    }
+                })?;
+
+            // Parse precision and scale for decimals
+            let mut precision: i32 = -1;
+            let mut scale: i32 = -1;
+
+            // Parse the concrete logical type
+            if let Some(tpe) = &logical {
+                match tpe {
+                    LogicalType::Decimal { .. } => {
+                        if let Some("(") = self.tokenizer.next() {
+                            precision = parse_i32(
+                                self.tokenizer.next(),
+                                "Expected precision, found None",
+                                "Failed to parse precision for DECIMAL type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                scale = parse_i32(
                                     self.tokenizer.next(),
-                                    "Expected precision, found None",
-                                    "Failed to parse precision for DECIMAL type",
+                                    "Expected scale, found None",
+                                    "Failed to parse scale for DECIMAL type",
                                 )?;
-                                if let Some(",") = self.tokenizer.next() {
-                                    scale = parse_i32(
-                                        self.tokenizer.next(),
-                                        "Expected scale, found None",
-                                        "Failed to parse scale for DECIMAL type",
-                                    )?;
-                                    assert_token(self.tokenizer.next(), ")")?;
-                                } else {
-                                    scale = 0
-                                }
-                                logical = Some(LogicalType::Decimal { scale, precision });
-                                converted = ConvertedType::from(logical.clone());
+                                assert_token(self.tokenizer.next(), ")")?;
+                            } else {
+                                scale = 0
                             }
+                            logical = Some(LogicalType::Decimal { scale, precision });
+                            converted = ConvertedType::from(logical.clone());
                         }
-                        LogicalType::Time { .. } => {
-                            if let Some("(") = self.tokenizer.next() {
-                                let unit = parse_timeunit(
+                    }
+                    LogicalType::Time { .. } => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let unit = parse_timeunit(
+                                self.tokenizer.next(),
+                                "Invalid timeunit found",
+                                "Failed to parse timeunit for TIME type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                let is_adjusted_to_u_t_c = parse_bool(
                                     self.tokenizer.next(),
-                                    "Invalid timeunit found",
-                                    "Failed to parse timeunit for TIME type",
+                                    "Invalid boolean found",
+                                    "Failed to parse timezone info for TIME type",
                                 )?;
-                                if let Some(",") = self.tokenizer.next() {
-                                    let is_adjusted_to_u_t_c = parse_bool(
-                                        self.tokenizer.next(),
-                                        "Invalid boolean found",
-                                        "Failed to parse timezone info for TIME type",
-                                    )?;
-                                    assert_token(self.tokenizer.next(), ")")?;
-                                    logical = Some(LogicalType::Time {
-                                        is_adjusted_to_u_t_c,
-                                        unit,
-                                    });
-                                    converted = ConvertedType::from(logical.clone());
-                                } else {
-                                    // Invalid token for unit
-                                    self.tokenizer.backtrack();
-                                }
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = Some(LogicalType::Time {
+                                    is_adjusted_to_u_t_c,
+                                    unit,
+                                });
+                                converted = ConvertedType::from(logical.clone());
+                            } else {
+                                // Invalid token for unit
+                                self.tokenizer.backtrack();
                             }
                         }
-                        LogicalType::Timestamp { .. } => {
-                            if let Some("(") = self.tokenizer.next() {
-                                let unit = parse_timeunit(
+                    }
+                    LogicalType::Timestamp { .. } => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let unit = parse_timeunit(
+                                self.tokenizer.next(),
+                                "Invalid timeunit found",
+                                "Failed to parse timeunit for TIMESTAMP type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                let is_adjusted_to_u_t_c = parse_bool(
                                     self.tokenizer.next(),
-                                    "Invalid timeunit found",
-                                    "Failed to parse timeunit for TIMESTAMP type",
+                                    "Invalid boolean found",
+                                    "Failed to parse timezone info for TIMESTAMP type",
                                 )?;
-                                if let Some(",") = self.tokenizer.next() {
-                                    let is_adjusted_to_u_t_c = parse_bool(
-                                        self.tokenizer.next(),
-                                        "Invalid boolean found",
-                                        "Failed to parse timezone info for TIMESTAMP type",
-                                    )?;
-                                    assert_token(self.tokenizer.next(), ")")?;
-                                    logical = Some(LogicalType::Timestamp {
-                                        is_adjusted_to_u_t_c,
-                                        unit,
-                                    });
-                                    converted = ConvertedType::from(logical.clone());
-                                } else {
-                                    // Invalid token for unit
-                                    self.tokenizer.backtrack();
-                                }
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = Some(LogicalType::Timestamp {
+                                    is_adjusted_to_u_t_c,
+                                    unit,
+                                });
+                                converted = ConvertedType::from(logical.clone());
+                            } else {
+                                // Invalid token for unit
+                                self.tokenizer.backtrack();
                             }
                         }
-                        LogicalType::Integer { .. } => {
-                            if let Some("(") = self.tokenizer.next() {
-                                let bit_width = parse_i32(
-                                    self.tokenizer.next(),
-                                    "Invalid bit_width found",
-                                    "Failed to parse bit_width for INTEGER type",
-                                )? as i8;
-                                match physical_type {
-                                    PhysicalType::INT32 => match bit_width {
-                                        8 | 16 | 32 => {}
-                                        _ => {
-                                            return Err(general_err!(
-                                                "Incorrect bit width {} for INT32",
-                                                bit_width
-                                            ))
-                                        }
-                                    },
-                                    PhysicalType::INT64 => {
-                                        if bit_width != 64 {
-                                            return Err(general_err!(
-                                                "Incorrect bit width {} for INT64",
-                                                bit_width
-                                            ));
-                                        }
-                                    }
+                    }
+                    LogicalType::Integer { .. } => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let bit_width = parse_i32(
+                                self.tokenizer.next(),
+                                "Invalid bit_width found",
+                                "Failed to parse bit_width for INTEGER type",
+                            )? as i8;
+                            match physical_type {
+                                PhysicalType::INT32 => match bit_width {
+                                    8 | 16 | 32 => {}
                                     _ => {
                                         return Err(general_err!(
-                                        "Logical type Integer cannot be used with physical type {}",
-                                        physical_type
-                                    ))
+                                            "Incorrect bit width {} for INT32",
+                                            bit_width
+                                        ));
+                                    }
+                                },
+                                PhysicalType::INT64 => {
+                                    if bit_width != 64 {
+                                        return Err(general_err!(
+                                            "Incorrect bit width {} for INT64",
+                                            bit_width
+                                        ));
                                     }
                                 }
-                                if let Some(",") = self.tokenizer.next() {
-                                    let is_signed = parse_bool(
-                                        self.tokenizer.next(),
-                                        "Invalid boolean found",
-                                        "Failed to parse is_signed for INTEGER type",
-                                    )?;
-                                    assert_token(self.tokenizer.next(), ")")?;
-                                    logical = Some(LogicalType::Integer {
-                                        bit_width,
-                                        is_signed,
-                                    });
-                                    converted = ConvertedType::from(logical.clone());
-                                } else {
-                                    // Invalid token for unit
-                                    self.tokenizer.backtrack();
+                                _ => {
+                                    return Err(general_err!(
+                                        "Logical type Integer cannot be used with physical type {}",
+                                        physical_type
+                                    ));
                                 }
                             }
+                            if let Some(",") = self.tokenizer.next() {
+                                let is_signed = parse_bool(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failed to parse is_signed for INTEGER type",
+                                )?;
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = Some(LogicalType::Integer {
+                                    bit_width,
+                                    is_signed,
+                                });
+                                converted = ConvertedType::from(logical.clone());
+                            } else {
+                                // Invalid token for unit
+                                self.tokenizer.backtrack();
+                            }
                         }
-                        _ => {}
                     }
-                } else if converted == ConvertedType::DECIMAL {
-                    if let Some("(") = self.tokenizer.next() {
-                        // Parse precision
-                        precision = parse_i32(
+                    _ => {}
+                }
+            } else if converted == ConvertedType::DECIMAL {
+                if let Some("(") = self.tokenizer.next() {
+                    // Parse precision
+                    precision = parse_i32(
+                        self.tokenizer.next(),
+                        "Expected precision, found None",
+                        "Failed to parse precision for DECIMAL type",
+                    )?;
+
+                    // Parse scale
+                    scale = if let Some(",") = self.tokenizer.next() {
+                        parse_i32(
                             self.tokenizer.next(),
-                            "Expected precision, found None",
-                            "Failed to parse precision for DECIMAL type",
-                        )?;
-
-                        // Parse scale
-                        scale = if let Some(",") = self.tokenizer.next() {
-                            parse_i32(
-                                self.tokenizer.next(),
-                                "Expected scale, found None",
-                                "Failed to parse scale for DECIMAL type",
-                            )?
-                        } else {
-                            // Scale is not provided, set it to 0.
-                            self.tokenizer.backtrack();
-                            0
-                        };
-
-                        assert_token(self.tokenizer.next(), ")")?;
+                            "Expected scale, found None",
+                            "Failed to parse scale for DECIMAL type",
+                        )?
                     } else {
+                        // Scale is not provided, set it to 0.
                         self.tokenizer.backtrack();
-                    }
+                        0
+                    };
+
+                    assert_token(self.tokenizer.next(), ")")?;
+                } else {
+                    self.tokenizer.backtrack();
                 }
+            }
 
-                assert_token(self.tokenizer.next(), ")")?;
-                (logical, converted, precision, scale)
-            } else {
-                self.tokenizer.backtrack();
-                (None, ConvertedType::NONE, -1, -1)
-            };
+            assert_token(self.tokenizer.next(), ")")?;
+            (logical, converted, precision, scale)
+        } else {
+            self.tokenizer.backtrack();
+            (None, ConvertedType::NONE, -1, -1)
+        };
 
         // Parse optional id
         let id = if let Some("=") = self.tokenizer.next() {
@@ -1075,7 +1076,7 @@ mod tests {
             Arc::new(
                 Type::primitive_type_builder("_6", PhysicalType::INT32)
                     .with_logical_type(Some(LogicalType::Time {
-                        unit: TimeUnit::MILLIS(Default::default()),
+                        unit: TimeUnit::MILLIS,
                         is_adjusted_to_u_t_c: false,
                     }))
                     .build()
@@ -1084,7 +1085,7 @@ mod tests {
             Arc::new(
                 Type::primitive_type_builder("_7", PhysicalType::INT64)
                     .with_logical_type(Some(LogicalType::Time {
-                        unit: TimeUnit::MICROS(Default::default()),
+                        unit: TimeUnit::MICROS,
                         is_adjusted_to_u_t_c: true,
                     }))
                     .build()
@@ -1093,7 +1094,7 @@ mod tests {
             Arc::new(
                 Type::primitive_type_builder("_8", PhysicalType::INT64)
                     .with_logical_type(Some(LogicalType::Timestamp {
-                        unit: TimeUnit::MILLIS(Default::default()),
+                        unit: TimeUnit::MILLIS,
                         is_adjusted_to_u_t_c: true,
                     }))
                     .build()
@@ -1102,7 +1103,7 @@ mod tests {
             Arc::new(
                 Type::primitive_type_builder("_9", PhysicalType::INT64)
                     .with_logical_type(Some(LogicalType::Timestamp {
-                        unit: TimeUnit::NANOS(Default::default()),
+                        unit: TimeUnit::NANOS,
                         is_adjusted_to_u_t_c: false,
                     }))
                     .build()
diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs
index 44c742fca66e..4ad3b6b93e00 100644
--- a/parquet/src/schema/printer.rs
+++ b/parquet/src/schema/printer.rs
@@ -171,11 +171,7 @@ fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaD
 fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
     writeln!(out, "column type: {}", cc_metadata.column_type());
     writeln!(out, "column path: {}", cc_metadata.column_path());
-    let encoding_strs: Vec<_> = cc_metadata
-        .encodings()
-        .iter()
-        .map(|e| format!("{e}"))
-        .collect();
+    let encoding_strs: Vec<_> = cc_metadata.encodings().map(|e| format!("{e}")).collect();
     writeln!(out, "encodings: {}", encoding_strs.join(" "));
     let file_path_str = cc_metadata.file_path().unwrap_or("N/A");
     writeln!(out, "file path: {file_path_str}");
@@ -277,9 +273,9 @@ impl<'a> Printer<'a> {
 #[inline]
 fn print_timeunit(unit: &TimeUnit) -> &str {
     match unit {
-        TimeUnit::MILLIS(_) => "MILLIS",
-        TimeUnit::MICROS(_) => "MICROS",
-        TimeUnit::NANOS(_) => "NANOS",
+        TimeUnit::MILLIS => "MILLIS",
+        TimeUnit::MICROS => "MICROS",
+        TimeUnit::NANOS => "NANOS",
     }
 }
 
@@ -326,7 +322,26 @@ fn print_logical_and_converted(
             LogicalType::List => "LIST".to_string(),
             LogicalType::Map => "MAP".to_string(),
             LogicalType::Float16 => "FLOAT16".to_string(),
+            LogicalType::Variant {
+                specification_version,
+            } => format!("VARIANT({specification_version:?})"),
+            LogicalType::Geometry { crs } => {
+                if let Some(crs) = crs {
+                    format!("GEOMETRY({crs})")
+                } else {
+                    "GEOMETRY".to_string()
+                }
+            }
+            LogicalType::Geography { crs, algorithm } => {
+                let algorithm = algorithm.unwrap_or_default();
+                if let Some(crs) = crs {
+                    format!("GEOGRAPHY({algorithm}, {crs})")
+                } else {
+                    format!("GEOGRAPHY({algorithm})")
+                }
+            }
             LogicalType::Unknown => "UNKNOWN".to_string(),
+            LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"),
         },
         None => {
             // Also print converted type if it is available
@@ -385,13 +400,13 @@ impl Printer<'_> {
                 // Also print logical type if it is available
                 // If there is a logical type, do not print converted type
                 let logical_type_str = print_logical_and_converted(
-                    basic_info.logical_type().as_ref(),
+                    basic_info.logical_type_ref(),
                     basic_info.converted_type(),
                     precision,
                     scale,
                 );
                 if !logical_type_str.is_empty() {
-                    write!(self.output, " ({});", logical_type_str);
+                    write!(self.output, " ({logical_type_str});");
                 } else {
                     write!(self.output, ";");
                 }
@@ -411,7 +426,7 @@ impl Printer<'_> {
                         write!(self.output, "[{}] ", basic_info.id());
                     }
                     let logical_str = print_logical_and_converted(
-                        basic_info.logical_type().as_ref(),
+                        basic_info.logical_type_ref(),
                         basic_info.converted_type(),
                         0,
                         0,
@@ -446,7 +461,7 @@ mod tests {
 
     use std::sync::Arc;
 
-    use crate::basic::{Repetition, Type as PhysicalType};
+    use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType};
     use crate::errors::Result;
     use crate::schema::parser::parse_message_type;
 
@@ -642,7 +657,7 @@ mod tests {
                     PhysicalType::INT64,
                     Some(LogicalType::Timestamp {
                         is_adjusted_to_u_t_c: true,
-                        unit: TimeUnit::MILLIS(Default::default()),
+                        unit: TimeUnit::MILLIS,
                     }),
                     ConvertedType::NONE,
                     Repetition::REQUIRED,
@@ -668,7 +683,7 @@ mod tests {
                     None,
                     PhysicalType::INT32,
                     Some(LogicalType::Time {
-                        unit: TimeUnit::MILLIS(Default::default()),
+                        unit: TimeUnit::MILLIS,
                         is_adjusted_to_u_t_c: false,
                     }),
                     ConvertedType::TIME_MILLIS,
@@ -683,7 +698,7 @@ mod tests {
                     Some(42),
                     PhysicalType::INT32,
                     Some(LogicalType::Time {
-                        unit: TimeUnit::MILLIS(Default::default()),
+                        unit: TimeUnit::MILLIS,
                         is_adjusted_to_u_t_c: false,
                     }),
                     ConvertedType::TIME_MILLIS,
@@ -776,6 +791,62 @@ mod tests {
                 .unwrap(),
                 "REQUIRED BYTE_ARRAY field [42] (STRING);",
             ),
+            (
+                build_primitive_type(
+                    "field",
+                    None,
+                    PhysicalType::BYTE_ARRAY,
+                    Some(LogicalType::Geometry { crs: None }),
+                    ConvertedType::NONE,
+                    Repetition::REQUIRED,
+                )
+                .unwrap(),
+                "REQUIRED BYTE_ARRAY field (GEOMETRY);",
+            ),
+            (
+                build_primitive_type(
+                    "field",
+                    None,
+                    PhysicalType::BYTE_ARRAY,
+                    Some(LogicalType::Geometry {
+                        crs: Some("non-missing CRS".to_string()),
+                    }),
+                    ConvertedType::NONE,
+                    Repetition::REQUIRED,
+                )
+                .unwrap(),
+                "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));",
+            ),
+            (
+                build_primitive_type(
+                    "field",
+                    None,
+                    PhysicalType::BYTE_ARRAY,
+                    Some(LogicalType::Geography {
+                        crs: None,
+                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
+                    }),
+                    ConvertedType::NONE,
+                    Repetition::REQUIRED,
+                )
+                .unwrap(),
+                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));",
+            ),
+            (
+                build_primitive_type(
+                    "field",
+                    None,
+                    PhysicalType::BYTE_ARRAY,
+                    Some(LogicalType::Geography {
+                        crs: Some("non-missing CRS".to_string()),
+                        algorithm: Some(EdgeInterpolationAlgorithm::default()),
+                    }),
+                    ConvertedType::NONE,
+                    Repetition::REQUIRED,
+                )
+                .unwrap(),
+                "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));",
+            ),
         ];
 
         types_and_strings.into_iter().for_each(|(field, expected)| {
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 68492e19f437..85f3ed48972c 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -17,10 +17,11 @@
 
 //! Contains structs and methods to build Parquet schema and schema descriptors.
 
+use std::vec::IntoIter;
 use std::{collections::HashMap, fmt, sync::Arc};
 
 use crate::file::metadata::HeapSize;
-use crate::format::SchemaElement;
+use crate::file::metadata::thrift::SchemaElement;
 
 use crate::basic::{
     ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit, Type as PhysicalType,
@@ -78,12 +79,15 @@ impl HeapSize for Type {
 
 impl Type {
     /// Creates primitive type builder with provided field name and physical type.
-    pub fn primitive_type_builder(name: &str, physical_type: PhysicalType) -> PrimitiveTypeBuilder {
+    pub fn primitive_type_builder(
+        name: &str,
+        physical_type: PhysicalType,
+    ) -> PrimitiveTypeBuilder<'_> {
         PrimitiveTypeBuilder::new(name, physical_type)
     }
 
     /// Creates group type builder with provided column name.
-    pub fn group_type_builder(name: &str) -> GroupTypeBuilder {
+    pub fn group_type_builder(name: &str) -> GroupTypeBuilder<'_> {
         GroupTypeBuilder::new(name)
     }
 
@@ -207,8 +211,8 @@ impl Type {
     pub(crate) fn is_list(&self) -> bool {
         if self.is_group() {
             let basic_info = self.get_basic_info();
-            if let Some(logical_type) = basic_info.logical_type() {
-                return logical_type == LogicalType::List;
+            if let Some(logical_type) = basic_info.logical_type_ref() {
+                return logical_type == &LogicalType::List;
             }
             return basic_info.converted_type() == ConvertedType::LIST;
         }
@@ -375,13 +379,13 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 (LogicalType::Date, PhysicalType::INT32) => {}
                 (
                     LogicalType::Time {
-                        unit: TimeUnit::MILLIS(_),
+                        unit: TimeUnit::MILLIS,
                         ..
                     },
                     PhysicalType::INT32,
                 ) => {}
                 (LogicalType::Time { unit, .. }, PhysicalType::INT64) => {
-                    if *unit == TimeUnit::MILLIS(Default::default()) {
+                    if *unit == TimeUnit::MILLIS {
                         return Err(general_err!(
                             "Cannot use millisecond unit on INT64 type for field '{}'",
                             self.name
@@ -398,28 +402,31 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 (LogicalType::String, PhysicalType::BYTE_ARRAY) => {}
                 (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
                 (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
+                (LogicalType::Geometry { .. }, PhysicalType::BYTE_ARRAY) => {}
+                (LogicalType::Geography { .. }, PhysicalType::BYTE_ARRAY) => {}
                 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {}
                 (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
                     return Err(general_err!(
                         "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field",
                         self.name
-                    ))
+                    ));
                 }
-                (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    if self.length == 2 => {}
+                (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 2 => {}
                 (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
                     return Err(general_err!(
                         "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field",
                         self.name
-                    ))
+                    ));
                 }
+                // unknown logical type means just use physical type
+                (LogicalType::_Unknown { .. }, _) => {}
                 (a, b) => {
                     return Err(general_err!(
                         "Cannot annotate {:?} from {} for field '{}'",
                         a,
                         b,
                         self.name
-                    ))
+                    ));
                 }
             }
         }
@@ -700,11 +707,23 @@ impl BasicTypeInfo {
     }
 
     /// Returns [`LogicalType`] value for the type.
+    ///
+    /// Note that this function will clone the `LogicalType`. If performance is a concern,
+    /// use [`Self::logical_type_ref`] instead.
+    #[deprecated(
+        since = "57.1.0",
+        note = "use `BasicTypeInfo::logical_type_ref` instead (LogicalType cloning is non trivial)"
+    )]
     pub fn logical_type(&self) -> Option<LogicalType> {
         // Unlike ConvertedType, LogicalType cannot implement Copy, thus we clone it
         self.logical_type.clone()
     }
 
+    /// Return a reference to the [`LogicalType`] value for the type.
+    pub fn logical_type_ref(&self) -> Option<&LogicalType> {
+        self.logical_type.as_ref()
+    }
+
     /// Returns `true` if id is set, `false` otherwise.
     pub fn has_id(&self) -> bool {
         self.id.is_some()
@@ -840,7 +859,9 @@ pub struct ColumnDescriptor {
 
 impl HeapSize for ColumnDescriptor {
     fn heap_size(&self) -> usize {
-        self.primitive_type.heap_size() + self.path.heap_size()
+        // Don't include the heap size of primitive_type, this is already
+        // accounted for via SchemaDescriptor::schema
+        self.path.heap_size()
     }
 }
 
@@ -899,8 +920,23 @@ impl ColumnDescriptor {
     }
 
     /// Returns [`LogicalType`] for this column.
+    ///
+    /// Note that this function will clone the `LogicalType`. If performance is a concern,
+    /// use [`Self::logical_type_ref`] instead.
+    #[deprecated(
+        since = "57.1.0",
+        note = "use `ColumnDescriptor::logical_type_ref` instead (LogicalType cloning is non trivial)"
+    )]
     pub fn logical_type(&self) -> Option<LogicalType> {
-        self.primitive_type.get_basic_info().logical_type()
+        self.primitive_type
+            .get_basic_info()
+            .logical_type_ref()
+            .cloned()
+    }
+
+    /// Returns a reference to the [`LogicalType`] for this column.
+    pub fn logical_type_ref(&self) -> Option<&LogicalType> {
+        self.primitive_type.get_basic_info().logical_type_ref()
     }
 
     /// Returns physical type for this column.
@@ -941,8 +977,8 @@ impl ColumnDescriptor {
 
     /// Returns the sort order for this column
     pub fn sort_order(&self) -> SortOrder {
-        ColumnOrder::get_sort_order(
-            self.logical_type(),
+        ColumnOrder::sort_order_for_type(
+            self.logical_type_ref(),
             self.converted_type(),
             self.physical_type(),
         )
@@ -979,7 +1015,7 @@ impl ColumnDescriptor {
 ///   )
 /// );
 /// ```
-#[derive(PartialEq)]
+#[derive(PartialEq, Clone)]
 pub struct SchemaDescriptor {
     /// The top-level logical schema (the "message" type).
     ///
@@ -1024,11 +1060,15 @@ impl HeapSize for SchemaDescriptor {
 impl SchemaDescriptor {
     /// Creates new schema descriptor from Parquet schema.
     pub fn new(tp: TypePtr) -> Self {
+        const INIT_SCHEMA_DEPTH: usize = 16;
         assert!(tp.is_group(), "SchemaDescriptor should take a GroupType");
-        let mut leaves = vec![];
-        let mut leaf_to_base = Vec::new();
+        // unwrap should be safe since we just asserted tp is a group
+        let n_leaves = num_leaves(&tp).unwrap();
+        let mut leaves = Vec::with_capacity(n_leaves);
+        let mut leaf_to_base = Vec::with_capacity(n_leaves);
+        let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH);
         for (root_idx, f) in tp.get_fields().iter().enumerate() {
-            let mut path = vec![];
+            path.clear();
             build_tree(f, root_idx, 0, 0, &mut leaves, &mut leaf_to_base, &mut path);
         }
 
@@ -1107,6 +1147,50 @@ impl SchemaDescriptor {
     }
 }
 
+// walk tree and count nodes
+pub(crate) fn num_nodes(tp: &TypePtr) -> Result<usize> {
+    if !tp.is_group() {
+        return Err(general_err!("Root schema must be Group type"));
+    }
+    let mut n_nodes = 1usize; // count root
+    for f in tp.get_fields().iter() {
+        count_nodes(f, &mut n_nodes);
+    }
+    Ok(n_nodes)
+}
+
+pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) {
+    *n_nodes += 1;
+    if let Type::GroupType { fields, .. } = tp.as_ref() {
+        for f in fields {
+            count_nodes(f, n_nodes);
+        }
+    }
+}
+
+// do a quick walk of the tree to get proper sizing for SchemaDescriptor arrays
+fn num_leaves(tp: &TypePtr) -> Result<usize> {
+    if !tp.is_group() {
+        return Err(general_err!("Root schema must be Group type"));
+    }
+    let mut n_leaves = 0usize;
+    for f in tp.get_fields().iter() {
+        count_leaves(f, &mut n_leaves);
+    }
+    Ok(n_leaves)
+}
+
+fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) {
+    match tp.as_ref() {
+        Type::PrimitiveType { .. } => *n_leaves += 1,
+        Type::GroupType { fields, .. } => {
+            for f in fields {
+                count_leaves(f, n_leaves);
+            }
+        }
+    }
+}
+
 fn build_tree<'a>(
     tp: &'a TypePtr,
     root_idx: usize,
@@ -1142,7 +1226,7 @@ fn build_tree<'a>(
             )));
             leaf_to_base.push(root_idx);
         }
-        Type::GroupType { ref fields, .. } => {
+        Type::GroupType { fields, .. } => {
             for f in fields {
                 build_tree(
                     f,
@@ -1159,12 +1243,30 @@ fn build_tree<'a>(
     }
 }
 
-/// Method to convert from Thrift.
-pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
+/// Checks if the logical type is valid.
+fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
+    if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
+        if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
+            return Err(general_err!(
+                "Bit width must be 8, 16, 32, or 64 for Integer logical type"
+            ));
+        }
+    }
+    Ok(())
+}
+
+// convert thrift decoded array of `SchemaElement` into this crate's representation of
+// parquet types. this function consumes `elements`.
+pub(crate) fn parquet_schema_from_array<'a>(elements: Vec<SchemaElement<'a>>) -> Result<TypePtr> {
     let mut index = 0;
-    let mut schema_nodes = Vec::new();
-    while index < elements.len() {
-        let t = from_thrift_helper(elements, index)?;
+    let num_elements = elements.len();
+    let mut schema_nodes = Vec::with_capacity(1); // there should only be one element when done
+
+    // turn into iterator so we can take ownership of elements of the vector
+    let mut elements = elements.into_iter();
+
+    while index < num_elements {
+        let t = schema_from_array_helper(&mut elements, num_elements, index)?;
         index = t.0;
         schema_nodes.push(t.1);
     }
@@ -1182,54 +1284,40 @@ pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
     Ok(schema_nodes.remove(0))
 }
 
-/// Checks if the logical type is valid.
-fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
-    if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
-        if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
-            return Err(general_err!(
-                "Bit width must be 8, 16, 32, or 64 for Integer logical type"
-            ));
-        }
-    }
-    Ok(())
-}
-
-/// Constructs a new Type from the `elements`, starting at index `index`.
-/// The first result is the starting index for the next Type after this one. If it is
-/// equal to `elements.len()`, then this Type is the last one.
-/// The second result is the result Type.
-fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize, TypePtr)> {
+// recursive helper function for schema conversion
+fn schema_from_array_helper<'a>(
+    elements: &mut IntoIter<SchemaElement<'a>>,
+    num_elements: usize,
+    index: usize,
+) -> Result<(usize, TypePtr)> {
     // Whether or not the current node is root (message type).
     // There is only one message type node in the schema tree.
     let is_root_node = index == 0;
 
-    if index >= elements.len() {
+    if index >= num_elements {
         return Err(general_err!(
             "Index out of bound, index = {}, len = {}",
             index,
-            elements.len()
+            num_elements
         ));
     }
-    let element = &elements[index];
+    let element = elements.next().expect("schema vector should not be empty");
 
     // Check for empty schema
     if let (true, None | Some(0)) = (is_root_node, element.num_children) {
-        let builder = Type::group_type_builder(&element.name);
+        let builder = Type::group_type_builder(element.name);
         return Ok((index + 1, Arc::new(builder.build().unwrap())));
     }
 
-    let converted_type = ConvertedType::try_from(element.converted_type)?;
-    // LogicalType is only present in v2 Parquet files. ConvertedType is always
-    // populated, regardless of the version of the file (v1 or v2).
-    let logical_type = element
-        .logical_type
-        .as_ref()
-        .map(|value| LogicalType::from(value.clone()));
+    let converted_type = element.converted_type.unwrap_or(ConvertedType::NONE);
+
+    // LogicalType is prefered to ConvertedType, but both may be present.
+    let logical_type = element.logical_type;
 
     check_logical_type(&logical_type)?;
 
-    let field_id = elements[index].field_id;
-    match elements[index].num_children {
+    let field_id = element.field_id;
+    match element.num_children {
         // From parquet-format:
         //   The children count is used to construct the nested relationship.
         //   This field is not set when the element is a primitive type
@@ -1237,18 +1325,17 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
         // have to handle this case too.
         None | Some(0) => {
             // primitive type
-            if elements[index].repetition_type.is_none() {
+            if element.repetition_type.is_none() {
                 return Err(general_err!(
                     "Repetition level must be defined for a primitive type"
                 ));
             }
-            let repetition = Repetition::try_from(elements[index].repetition_type.unwrap())?;
-            if let Some(type_) = elements[index].type_ {
-                let physical_type = PhysicalType::try_from(type_)?;
-                let length = elements[index].type_length.unwrap_or(-1);
-                let scale = elements[index].scale.unwrap_or(-1);
-                let precision = elements[index].precision.unwrap_or(-1);
-                let name = &elements[index].name;
+            let repetition = element.repetition_type.unwrap();
+            if let Some(physical_type) = element.r#type {
+                let length = element.type_length.unwrap_or(-1);
+                let scale = element.scale.unwrap_or(-1);
+                let precision = element.precision.unwrap_or(-1);
+                let name = element.name;
                 let builder = Type::primitive_type_builder(name, physical_type)
                     .with_repetition(repetition)
                     .with_converted_type(converted_type)
@@ -1259,7 +1346,7 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
                     .with_id(field_id);
                 Ok((index + 1, Arc::new(builder.build()?)))
             } else {
-                let mut builder = Type::group_type_builder(&elements[index].name)
+                let mut builder = Type::group_type_builder(element.name)
                     .with_converted_type(converted_type)
                     .with_logical_type(logical_type)
                     .with_id(field_id);
@@ -1277,122 +1364,38 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
             }
         }
         Some(n) => {
-            let repetition = elements[index]
-                .repetition_type
-                .map(Repetition::try_from)
-                .transpose()?;
+            let repetition = element.repetition_type;
 
-            let mut fields = vec![];
+            let mut fields = Vec::with_capacity(n as usize);
             let mut next_index = index + 1;
             for _ in 0..n {
-                let child_result = from_thrift_helper(elements, next_index)?;
+                let child_result = schema_from_array_helper(elements, num_elements, next_index)?;
                 next_index = child_result.0;
                 fields.push(child_result.1);
             }
 
-            let mut builder = Type::group_type_builder(&elements[index].name)
+            let mut builder = Type::group_type_builder(element.name)
                 .with_converted_type(converted_type)
                 .with_logical_type(logical_type)
                 .with_fields(fields)
                 .with_id(field_id);
-            if let Some(rep) = repetition {
-                // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
-                // REPEATED for root node.
-                //
-                // We only set repetition for group types that are not top-level message
-                // type. According to parquet-format:
-                //   Root of the schema does not have a repetition_type.
-                //   All other types must have one.
-                if !is_root_node {
-                    builder = builder.with_repetition(rep);
-                }
-            }
-            Ok((next_index, Arc::new(builder.build().unwrap())))
-        }
-    }
-}
-
-/// Method to convert to Thrift.
-pub fn to_thrift(schema: &Type) -> Result<Vec<SchemaElement>> {
-    if !schema.is_group() {
-        return Err(general_err!("Root schema must be Group type"));
-    }
-    let mut elements: Vec<SchemaElement> = Vec::new();
-    to_thrift_helper(schema, &mut elements);
-    Ok(elements)
-}
 
-/// Constructs list of `SchemaElement` from the schema using depth-first traversal.
-/// Here we assume that schema is always valid and starts with group type.
-fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
-    match *schema {
-        Type::PrimitiveType {
-            ref basic_info,
-            physical_type,
-            type_length,
-            scale,
-            precision,
-        } => {
-            let element = SchemaElement {
-                type_: Some(physical_type.into()),
-                type_length: if type_length >= 0 {
-                    Some(type_length)
-                } else {
-                    None
-                },
-                repetition_type: Some(basic_info.repetition().into()),
-                name: basic_info.name().to_owned(),
-                num_children: None,
-                converted_type: basic_info.converted_type().into(),
-                scale: if scale >= 0 { Some(scale) } else { None },
-                precision: if precision >= 0 {
-                    Some(precision)
-                } else {
-                    None
-                },
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-        }
-        Type::GroupType {
-            ref basic_info,
-            ref fields,
-        } => {
-            let repetition = if basic_info.has_repetition() {
-                Some(basic_info.repetition().into())
-            } else {
-                None
-            };
-
-            let element = SchemaElement {
-                type_: None,
-                type_length: None,
-                repetition_type: repetition,
-                name: basic_info.name().to_owned(),
-                num_children: Some(fields.len() as i32),
-                converted_type: basic_info.converted_type().into(),
-                scale: None,
-                precision: None,
-                field_id: if basic_info.has_id() {
-                    Some(basic_info.id())
-                } else {
-                    None
-                },
-                logical_type: basic_info.logical_type().map(|value| value.into()),
-            };
-
-            elements.push(element);
-
-            // Add child elements for a group
-            for field in fields {
-                to_thrift_helper(field, elements);
+            // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or
+            // REPEATED for root node.
+            //
+            // We only set repetition for group types that are not top-level message
+            // type. According to parquet-format:
+            //   Root of the schema does not have a repetition_type.
+            //   All other types must have one.
+            if !is_root_node {
+                let Some(rep) = repetition else {
+                    return Err(general_err!(
+                        "Repetition level must be defined for non-root types"
+                    ));
+                };
+                builder = builder.with_repetition(rep);
             }
+            Ok((next_index, Arc::new(builder.build()?)))
         }
     }
 }
@@ -1401,7 +1404,10 @@ fn to_thrift_helper(schema: &Type, elements: &mut Vec<SchemaElement>) {
 mod tests {
     use super::*;
 
-    use crate::schema::parser::parse_message_type;
+    use crate::{
+        file::metadata::thrift::tests::{buf_to_schema_list, roundtrip_schema, schema_to_buf},
+        schema::parser::parse_message_type,
+    };
 
     // TODO: add tests for v2 types
 
@@ -1422,8 +1428,8 @@ mod tests {
             let basic_info = tp.get_basic_info();
             assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
             assert_eq!(
-                basic_info.logical_type(),
-                Some(LogicalType::Integer {
+                basic_info.logical_type_ref(),
+                Some(&LogicalType::Integer {
                     bit_width: 32,
                     is_signed: true
                 })
@@ -1737,6 +1743,12 @@ mod tests {
                 "Parquet error: UUID cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(16) field"
             );
         }
+
+        // test unknown logical types are ok
+        result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
+            .with_logical_type(Some(LogicalType::_Unknown { field_id: 100 }))
+            .build();
+        assert!(result.is_ok());
     }
 
     #[test]
@@ -1767,7 +1779,7 @@ mod tests {
         assert!(tp.is_group());
         assert!(!tp.is_primitive());
         assert_eq!(basic_info.repetition(), Repetition::REPEATED);
-        assert_eq!(basic_info.logical_type(), Some(LogicalType::List));
+        assert_eq!(basic_info.logical_type_ref(), Some(&LogicalType::List));
         assert_eq!(basic_info.converted_type(), ConvertedType::LIST);
         assert_eq!(basic_info.id(), 1);
         assert_eq!(tp.get_fields().len(), 2);
@@ -2064,9 +2076,11 @@ mod tests {
         let f2 = test_new_group_type(
             "f",
             Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f2", PhysicalType::INT64)
-                .build()
-                .unwrap()],
+            vec![
+                Type::primitive_type_builder("f2", PhysicalType::INT64)
+                    .build()
+                    .unwrap(),
+            ],
         );
         assert!(f1.check_contains(&f2));
 
@@ -2129,9 +2143,11 @@ mod tests {
         let f2 = test_new_group_type(
             "f",
             Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f3", PhysicalType::INT32)
-                .build()
-                .unwrap()],
+            vec![
+                Type::primitive_type_builder("f3", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+            ],
         );
         assert!(!f1.check_contains(&f2));
     }
@@ -2150,9 +2166,11 @@ mod tests {
         let f1 = test_new_group_type(
             "f",
             Repetition::REPEATED,
-            vec![Type::primitive_type_builder("f1", PhysicalType::INT32)
-                .build()
-                .unwrap()],
+            vec![
+                Type::primitive_type_builder("f1", PhysicalType::INT32)
+                    .build()
+                    .unwrap(),
+            ],
         );
         let f2 = Type::primitive_type_builder("f1", PhysicalType::INT32)
             .build()
@@ -2168,9 +2186,11 @@ mod tests {
                 test_new_group_type(
                     "b",
                     Repetition::REPEATED,
-                    vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                        .build()
-                        .unwrap()],
+                    vec![
+                        Type::primitive_type_builder("c", PhysicalType::INT32)
+                            .build()
+                            .unwrap(),
+                    ],
                 ),
                 Type::primitive_type_builder("d", PhysicalType::INT64)
                     .build()
@@ -2186,9 +2206,11 @@ mod tests {
             vec![test_new_group_type(
                 "b",
                 Repetition::REPEATED,
-                vec![Type::primitive_type_builder("c", PhysicalType::INT32)
-                    .build()
-                    .unwrap()],
+                vec![
+                    Type::primitive_type_builder("c", PhysicalType::INT32)
+                        .build()
+                        .unwrap(),
+                ],
             )],
         );
         assert!(f1.check_contains(&f2)); // should match
@@ -2200,7 +2222,8 @@ mod tests {
         let schema = Type::primitive_type_builder("col", PhysicalType::INT32)
             .build()
             .unwrap();
-        let thrift_schema = to_thrift(&schema);
+        let schema = Arc::new(schema);
+        let thrift_schema = schema_to_buf(&schema);
         assert!(thrift_schema.is_err());
         if let Err(e) = thrift_schema {
             assert_eq!(
@@ -2260,8 +2283,7 @@ mod tests {
     }
     ";
         let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
+        let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
         assert_eq!(result_schema, Arc::new(expected_schema));
     }
 
@@ -2276,8 +2298,7 @@ mod tests {
     }
     ";
         let expected_schema = parse_message_type(message_type).unwrap();
-        let thrift_schema = to_thrift(&expected_schema).unwrap();
-        let result_schema = from_thrift(&thrift_schema).unwrap();
+        let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap();
         assert_eq!(result_schema, Arc::new(expected_schema));
     }
 
@@ -2297,8 +2318,10 @@ mod tests {
     }
     ";
 
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
+        let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
+        let mut buf = schema_to_buf(&expected_schema).unwrap();
+        let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
+
         // Change all of None to Some(0)
         for elem in &mut thrift_schema[..] {
             if elem.num_children.is_none() {
@@ -2306,8 +2329,8 @@ mod tests {
             }
         }
 
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
+        let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
+        assert_eq!(result_schema, expected_schema);
     }
 
     // Sometimes parquet-cpp sets repetition level for the root node, which is against
@@ -2322,23 +2345,25 @@ mod tests {
     }
     ";
 
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
+        let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
+        let mut buf = schema_to_buf(&expected_schema).unwrap();
+        let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
+        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
 
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
+        let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
+        assert_eq!(result_schema, expected_schema);
     }
 
     #[test]
     fn test_schema_from_thrift_group_has_no_child() {
         let message_type = "message schema {}";
 
-        let expected_schema = parse_message_type(message_type).unwrap();
-        let mut thrift_schema = to_thrift(&expected_schema).unwrap();
-        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into());
+        let expected_schema = Arc::new(parse_message_type(message_type).unwrap());
+        let mut buf = schema_to_buf(&expected_schema).unwrap();
+        let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap();
+        thrift_schema[0].repetition_type = Some(Repetition::REQUIRED);
 
-        let result_schema = from_thrift(&thrift_schema).unwrap();
-        assert_eq!(result_schema, Arc::new(expected_schema));
+        let result_schema = parquet_schema_from_array(thrift_schema).unwrap();
+        assert_eq!(result_schema, expected_schema);
     }
 }
diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs
index bf8a2926aae0..2eb91162ac38 100644
--- a/parquet/src/thrift.rs
+++ b/parquet/src/thrift.rs
@@ -18,10 +18,7 @@
 //! Custom thrift definitions
 
 pub use thrift::protocol::TCompactOutputProtocol;
-use thrift::protocol::{
-    TFieldIdentifier, TInputProtocol, TListIdentifier, TMapIdentifier, TMessageIdentifier,
-    TOutputProtocol, TSetIdentifier, TStructIdentifier, TType,
-};
+use thrift::protocol::{TInputProtocol, TOutputProtocol};
 
 /// Reads and writes the struct to Thrift protocols.
 ///
@@ -33,333 +30,57 @@ pub trait TSerializable: Sized {
     fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()>;
 }
 
-/// A more performant implementation of [`TCompactInputProtocol`] that reads a slice
-///
-/// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol
-pub(crate) struct TCompactSliceInputProtocol<'a> {
-    buf: &'a [u8],
-    // Identifier of the last field deserialized for a struct.
-    last_read_field_id: i16,
-    // Stack of the last read field ids (a new entry is added each time a nested struct is read).
-    read_field_id_stack: Vec<i16>,
-    // Boolean value for a field.
-    // Saved because boolean fields and their value are encoded in a single byte,
-    // and reading the field only occurs after the field id is read.
-    pending_read_bool_value: Option<bool>,
-}
-
-impl<'a> TCompactSliceInputProtocol<'a> {
-    pub fn new(buf: &'a [u8]) -> Self {
-        Self {
-            buf,
-            last_read_field_id: 0,
-            read_field_id_stack: Vec::with_capacity(16),
-            pending_read_bool_value: None,
-        }
-    }
-
-    pub fn as_slice(&self) -> &'a [u8] {
-        self.buf
-    }
-
-    fn read_vlq(&mut self) -> thrift::Result<u64> {
-        let mut in_progress = 0;
-        let mut shift = 0;
-        loop {
-            let byte = self.read_byte()?;
-            in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift);
-            shift += 7;
-            if byte & 0x80 == 0 {
-                return Ok(in_progress);
-            }
-        }
-    }
-
-    fn read_zig_zag(&mut self) -> thrift::Result<i64> {
-        let val = self.read_vlq()?;
-        Ok((val >> 1) as i64 ^ -((val & 1) as i64))
-    }
-
-    fn read_list_set_begin(&mut self) -> thrift::Result<(TType, i32)> {
-        let header = self.read_byte()?;
-        let element_type = collection_u8_to_type(header & 0x0F)?;
-
-        let possible_element_count = (header & 0xF0) >> 4;
-        let element_count = if possible_element_count != 15 {
-            // high bits set high if count and type encoded separately
-            possible_element_count as i32
-        } else {
-            self.read_vlq()? as _
-        };
-
-        Ok((element_type, element_count))
-    }
-}
-
-macro_rules! thrift_unimplemented {
-    () => {
-        Err(thrift::Error::Protocol(thrift::ProtocolError {
-            kind: thrift::ProtocolErrorKind::NotImplemented,
-            message: "not implemented".to_string(),
-        }))
-    };
-}
-
-impl TInputProtocol for TCompactSliceInputProtocol<'_> {
-    fn read_message_begin(&mut self) -> thrift::Result<TMessageIdentifier> {
-        unimplemented!()
-    }
-
-    fn read_message_end(&mut self) -> thrift::Result<()> {
-        thrift_unimplemented!()
-    }
-
-    fn read_struct_begin(&mut self) -> thrift::Result<Option<TStructIdentifier>> {
-        self.read_field_id_stack.push(self.last_read_field_id);
-        self.last_read_field_id = 0;
-        Ok(None)
-    }
-
-    fn read_struct_end(&mut self) -> thrift::Result<()> {
-        self.last_read_field_id = self
-            .read_field_id_stack
-            .pop()
-            .expect("should have previous field ids");
-        Ok(())
-    }
-
-    fn read_field_begin(&mut self) -> thrift::Result<TFieldIdentifier> {
-        // we can read at least one byte, which is:
-        // - the type
-        // - the field delta and the type
-        let field_type = self.read_byte()?;
-        let field_delta = (field_type & 0xF0) >> 4;
-        let field_type = match field_type & 0x0F {
-            0x01 => {
-                self.pending_read_bool_value = Some(true);
-                Ok(TType::Bool)
-            }
-            0x02 => {
-                self.pending_read_bool_value = Some(false);
-                Ok(TType::Bool)
-            }
-            ttu8 => u8_to_type(ttu8),
-        }?;
-
-        match field_type {
-            TType::Stop => Ok(
-                TFieldIdentifier::new::<Option<String>, String, Option<i16>>(
-                    None,
-                    TType::Stop,
-                    None,
-                ),
-            ),
-            _ => {
-                if field_delta != 0 {
-                    self.last_read_field_id = self
-                        .last_read_field_id
-                        .checked_add(field_delta as i16)
-                        .map_or_else(
-                            || {
-                                Err(thrift::Error::Protocol(thrift::ProtocolError {
-                                    kind: thrift::ProtocolErrorKind::InvalidData,
-                                    message: format!(
-                                        "cannot add {} to {}",
-                                        field_delta, self.last_read_field_id
-                                    ),
-                                }))
-                            },
-                            Ok,
-                        )?;
-                } else {
-                    self.last_read_field_id = self.read_i16()?;
-                };
-
-                Ok(TFieldIdentifier {
-                    name: None,
-                    field_type,
-                    id: Some(self.last_read_field_id),
-                })
-            }
-        }
-    }
-
-    fn read_field_end(&mut self) -> thrift::Result<()> {
-        Ok(())
-    }
-
-    fn read_bool(&mut self) -> thrift::Result<bool> {
-        match self.pending_read_bool_value.take() {
-            Some(b) => Ok(b),
-            None => {
-                let b = self.read_byte()?;
-                // Previous versions of the thrift specification said to use 0 and 1 inside collections,
-                // but that differed from existing implementations.
-                // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8.
-                // At least the go implementation seems to have followed the previously documented values.
-                match b {
-                    0x01 => Ok(true),
-                    0x00 | 0x02 => Ok(false),
-                    unkn => Err(thrift::Error::Protocol(thrift::ProtocolError {
-                        kind: thrift::ProtocolErrorKind::InvalidData,
-                        message: format!("cannot convert {} into bool", unkn),
-                    })),
-                }
-            }
-        }
-    }
-
-    fn read_bytes(&mut self) -> thrift::Result<Vec<u8>> {
-        let len = self.read_vlq()? as usize;
-        let ret = self.buf.get(..len).ok_or_else(eof_error)?.to_vec();
-        self.buf = &self.buf[len..];
-        Ok(ret)
-    }
-
-    fn read_i8(&mut self) -> thrift::Result<i8> {
-        Ok(self.read_byte()? as _)
-    }
-
-    fn read_i16(&mut self) -> thrift::Result<i16> {
-        Ok(self.read_zig_zag()? as _)
-    }
-
-    fn read_i32(&mut self) -> thrift::Result<i32> {
-        Ok(self.read_zig_zag()? as _)
-    }
-
-    fn read_i64(&mut self) -> thrift::Result<i64> {
-        self.read_zig_zag()
-    }
-
-    fn read_double(&mut self) -> thrift::Result<f64> {
-        let slice = (self.buf[..8]).try_into().unwrap();
-        self.buf = &self.buf[8..];
-        Ok(f64::from_le_bytes(slice))
-    }
-
-    fn read_string(&mut self) -> thrift::Result<String> {
-        let bytes = self.read_bytes()?;
-        String::from_utf8(bytes).map_err(From::from)
-    }
-
-    fn read_list_begin(&mut self) -> thrift::Result<TListIdentifier> {
-        let (element_type, element_count) = self.read_list_set_begin()?;
-        Ok(TListIdentifier::new(element_type, element_count))
-    }
-
-    fn read_list_end(&mut self) -> thrift::Result<()> {
-        Ok(())
-    }
-
-    fn read_set_begin(&mut self) -> thrift::Result<TSetIdentifier> {
-        thrift_unimplemented!()
-    }
-
-    fn read_set_end(&mut self) -> thrift::Result<()> {
-        thrift_unimplemented!()
-    }
-
-    fn read_map_begin(&mut self) -> thrift::Result<TMapIdentifier> {
-        thrift_unimplemented!()
-    }
-
-    fn read_map_end(&mut self) -> thrift::Result<()> {
-        Ok(())
-    }
-
-    #[inline]
-    fn read_byte(&mut self) -> thrift::Result<u8> {
-        let ret = *self.buf.first().ok_or_else(eof_error)?;
-        self.buf = &self.buf[1..];
-        Ok(ret)
-    }
-}
-
-fn collection_u8_to_type(b: u8) -> thrift::Result<TType> {
-    match b {
-        // For historical and compatibility reasons, a reader should be capable to deal with both cases.
-        // The only valid value in the original spec was 2, but due to an widespread implementation bug
-        // the defacto standard across large parts of the library became 1 instead.
-        // As a result, both values are now allowed.
-        // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set
-        0x01 | 0x02 => Ok(TType::Bool),
-        o => u8_to_type(o),
-    }
-}
-
-fn u8_to_type(b: u8) -> thrift::Result<TType> {
-    match b {
-        0x00 => Ok(TType::Stop),
-        0x03 => Ok(TType::I08), // equivalent to TType::Byte
-        0x04 => Ok(TType::I16),
-        0x05 => Ok(TType::I32),
-        0x06 => Ok(TType::I64),
-        0x07 => Ok(TType::Double),
-        0x08 => Ok(TType::String),
-        0x09 => Ok(TType::List),
-        0x0A => Ok(TType::Set),
-        0x0B => Ok(TType::Map),
-        0x0C => Ok(TType::Struct),
-        unkn => Err(thrift::Error::Protocol(thrift::ProtocolError {
-            kind: thrift::ProtocolErrorKind::InvalidData,
-            message: format!("cannot convert {} into TType", unkn),
-        })),
-    }
-}
-
-fn eof_error() -> thrift::Error {
-    thrift::Error::Transport(thrift::TransportError {
-        kind: thrift::TransportErrorKind::EndOfFile,
-        message: "Unexpected EOF".to_string(),
-    })
-}
-
 #[cfg(test)]
 mod tests {
-    use crate::format::{BoundaryOrder, ColumnIndex};
-    use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+    use crate::{
+        basic::Type,
+        file::page_index::{column_index::ColumnIndexMetaData, index_reader::decode_column_index},
+    };
 
     #[test]
     pub fn read_boolean_list_field_type() {
         // Boolean collection type encoded as 0x01, as used by this crate when writing.
         // Values encoded as 1 (true) or 2 (false) as in the current version of the thrift
         // documentation.
-        let bytes = vec![0x19, 0x21, 2, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0];
-
-        let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice());
-        let index = ColumnIndex::read_from_in_protocol(&mut protocol).unwrap();
-        let expected = ColumnIndex {
-            null_pages: vec![false, true],
-            min_values: vec![],
-            max_values: vec![],
-            boundary_order: BoundaryOrder::UNORDERED,
-            null_counts: None,
-            repetition_level_histograms: None,
-            definition_level_histograms: None,
+        let bytes = vec![
+            0x19, 0x21, 2, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0,
+        ];
+        let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap();
+
+        let index = match index {
+            ColumnIndexMetaData::BOOLEAN(index) => index,
+            _ => panic!("expected boolean column index"),
         };
 
-        assert_eq!(&index, &expected);
+        // should be false, true
+        assert!(!index.is_null_page(0));
+        assert!(index.is_null_page(1));
+        assert!(!index.min_value(0).unwrap()); // min is false
+        assert!(index.max_value(0).unwrap()); // max is true
+        assert!(index.min_value(1).is_none());
+        assert!(index.max_value(1).is_none());
     }
 
     #[test]
     pub fn read_boolean_list_alternative_encoding() {
         // Boolean collection type encoded as 0x02, as allowed by the spec.
         // Values encoded as 1 (true) or 0 (false) as before the thrift documentation change on 2024-12-13.
-        let bytes = vec![0x19, 0x22, 0, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0];
-
-        let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice());
-        let index = ColumnIndex::read_from_in_protocol(&mut protocol).unwrap();
-        let expected = ColumnIndex {
-            null_pages: vec![false, true],
-            min_values: vec![],
-            max_values: vec![],
-            boundary_order: BoundaryOrder::UNORDERED,
-            null_counts: None,
-            repetition_level_histograms: None,
-            definition_level_histograms: None,
+        let bytes = vec![
+            0x19, 0x22, 0, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0,
+        ];
+        let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap();
+
+        let index = match index {
+            ColumnIndexMetaData::BOOLEAN(index) => index,
+            _ => panic!("expected boolean column index"),
         };
 
-        assert_eq!(&index, &expected);
+        // should be false, true
+        assert!(!index.is_null_page(0));
+        assert!(index.is_null_page(1));
+        assert!(!index.min_value(0).unwrap()); // min is false
+        assert!(index.max_value(0).unwrap()); // max is true
+        assert!(index.min_value(1).is_none());
+        assert!(index.max_value(1).is_none());
     }
 }
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 8f6c2d8f8184..3a26603fabc4 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -21,7 +21,7 @@ use bytes::Bytes;
 
 use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
 use crate::errors::{ParquetError, Result};
-use crate::util::bit_pack::{unpack16, unpack32, unpack64, unpack8};
+use crate::util::bit_pack::{unpack8, unpack16, unpack32, unpack64};
 
 #[inline]
 fn array_from_slice<const N: usize>(bs: &[u8]) -> Result<[u8; N]> {
@@ -150,8 +150,8 @@ where
 /// This function should be removed after
 /// [`int_roundings`](https://github.com/rust-lang/rust/issues/88581) is stable.
 #[inline]
-pub fn ceil<T: num::Integer>(value: T, divisor: T) -> T {
-    num::Integer::div_ceil(&value, &divisor)
+pub fn ceil<T: num_integer::Integer>(value: T, divisor: T) -> T {
+    num_integer::Integer::div_ceil(&value, &divisor)
 }
 
 /// Returns the `num_bits` least-significant bits of `v`
@@ -245,7 +245,7 @@ impl BitWriter {
     pub fn skip(&mut self, num_bytes: usize) -> usize {
         self.flush();
         let result = self.buffer.len();
-        self.buffer.extend(std::iter::repeat(0).take(num_bytes));
+        self.buffer.extend(std::iter::repeat_n(0, num_bytes));
         result
     }
 
@@ -283,9 +283,9 @@ impl BitWriter {
     /// The `num_bits` must not be greater than 64. This is bit packed.
     #[inline]
     pub fn put_value(&mut self, v: u64, num_bits: usize) {
-        assert!(num_bits <= 64);
+        debug_assert!(num_bits <= 64);
         let num_bits = num_bits as u8;
-        assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
+        debug_assert_eq!(v.checked_shr(num_bits as u32).unwrap_or(0), 0); // covers case v >> 64
 
         // Add value to buffered_values
         self.buffered_values |= v << self.bit_offset;
diff --git a/parquet/src/util/mod.rs b/parquet/src/util/mod.rs
index 1431132473e9..145cdd693e59 100644
--- a/parquet/src/util/mod.rs
+++ b/parquet/src/util/mod.rs
@@ -20,6 +20,7 @@ pub mod bit_util;
 mod bit_pack;
 pub(crate) mod interner;
 
+pub mod push_buffers;
 #[cfg(any(test, feature = "test_common"))]
 pub(crate) mod test_common;
 pub mod utf8;
diff --git a/parquet/src/util/push_buffers.rs b/parquet/src/util/push_buffers.rs
new file mode 100644
index 000000000000..0c00cf9bd57f
--- /dev/null
+++ b/parquet/src/util/push_buffers.rs
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::errors::ParquetError;
+use crate::file::reader::{ChunkReader, Length};
+use bytes::Bytes;
+use std::fmt::Display;
+use std::ops::Range;
+
+/// Holds multiple buffers of data
+///
+/// This is the in-memory buffer for the ParquetDecoder and ParquetMetadataDecoders
+///
+/// Features:
+/// 1. Zero copy
+/// 2. non contiguous ranges of bytes
+///
+/// # Non Coalescing
+///
+/// This buffer does not coalesce  (merging adjacent ranges of bytes into a
+/// single range). Coalescing at this level would require copying the data but
+/// the caller may already have the needed data in a single buffer which would
+/// require no copying.
+///
+/// Thus, the implementation defers to the caller to coalesce subsequent requests
+/// if desired.
+#[derive(Debug, Clone)]
+pub(crate) struct PushBuffers {
+    /// the virtual "offset" of this buffers (added to any request)
+    offset: u64,
+    /// The total length of the file being decoded
+    file_len: u64,
+    /// The ranges of data that are available for decoding (not adjusted for offset)
+    ranges: Vec<Range<u64>>,
+    /// The buffers of data that can be used to decode the Parquet file
+    buffers: Vec<Bytes>,
+}
+
+impl Display for PushBuffers {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        writeln!(
+            f,
+            "Buffers (offset: {}, file_len: {})",
+            self.offset, self.file_len
+        )?;
+        writeln!(f, "Available Ranges (w/ offset):")?;
+        for range in &self.ranges {
+            writeln!(
+                f,
+                "  {}..{} ({}..{}): {} bytes",
+                range.start,
+                range.end,
+                range.start + self.offset,
+                range.end + self.offset,
+                range.end - range.start
+            )?;
+        }
+
+        Ok(())
+    }
+}
+
+impl PushBuffers {
+    /// Create a new Buffers instance with the given file length
+    pub fn new(file_len: u64) -> Self {
+        Self {
+            offset: 0,
+            file_len,
+            ranges: Vec::new(),
+            buffers: Vec::new(),
+        }
+    }
+
+    /// Push all the ranges and buffers
+    pub fn push_ranges(&mut self, ranges: Vec<Range<u64>>, buffers: Vec<Bytes>) {
+        assert_eq!(
+            ranges.len(),
+            buffers.len(),
+            "Number of ranges must match number of buffers"
+        );
+        for (range, buffer) in ranges.into_iter().zip(buffers.into_iter()) {
+            self.push_range(range, buffer);
+        }
+    }
+
+    /// Push a new range and its associated buffer
+    pub fn push_range(&mut self, range: Range<u64>, buffer: Bytes) {
+        assert_eq!(
+            (range.end - range.start) as usize,
+            buffer.len(),
+            "Range length must match buffer length"
+        );
+        self.ranges.push(range);
+        self.buffers.push(buffer);
+    }
+
+    /// Returns true if the Buffers contains data for the given range
+    pub fn has_range(&self, range: &Range<u64>) -> bool {
+        self.ranges
+            .iter()
+            .any(|r| r.start <= range.start && r.end >= range.end)
+    }
+
+    fn iter(&self) -> impl Iterator<Item = (&Range<u64>, &Bytes)> {
+        self.ranges.iter().zip(self.buffers.iter())
+    }
+
+    /// return the file length of the Parquet file being read
+    pub fn file_len(&self) -> u64 {
+        self.file_len
+    }
+
+    /// Specify a new offset
+    pub fn with_offset(mut self, offset: u64) -> Self {
+        self.offset = offset;
+        self
+    }
+
+    /// Return the total of all buffered ranges
+    #[cfg(feature = "arrow")]
+    pub fn buffered_bytes(&self) -> u64 {
+        self.ranges.iter().map(|r| r.end - r.start).sum()
+    }
+
+    /// Clear any range and corresponding buffer that is exactly in the ranges_to_clear
+    #[cfg(feature = "arrow")]
+    pub fn clear_ranges(&mut self, ranges_to_clear: &[Range<u64>]) {
+        let mut new_ranges = Vec::new();
+        let mut new_buffers = Vec::new();
+
+        for (range, buffer) in self.iter() {
+            if !ranges_to_clear
+                .iter()
+                .any(|r| r.start == range.start && r.end == range.end)
+            {
+                new_ranges.push(range.clone());
+                new_buffers.push(buffer.clone());
+            }
+        }
+        self.ranges = new_ranges;
+        self.buffers = new_buffers;
+    }
+}
+
+impl Length for PushBuffers {
+    fn len(&self) -> u64 {
+        self.file_len
+    }
+}
+
+/// less efficient implementation of Read for Buffers
+impl std::io::Read for PushBuffers {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        // Find the range that contains the start offset
+        let mut found = false;
+        for (range, data) in self.iter() {
+            if range.start <= self.offset && range.end >= self.offset + buf.len() as u64 {
+                // Found the range, figure out the starting offset in the buffer
+                let start_offset = (self.offset - range.start) as usize;
+                let end_offset = start_offset + buf.len();
+                let slice = data.slice(start_offset..end_offset);
+                buf.copy_from_slice(slice.as_ref());
+                found = true;
+                break;
+            }
+        }
+        if found {
+            // If we found the range, we can return the number of bytes read
+            // advance our offset
+            self.offset += buf.len() as u64;
+            Ok(buf.len())
+        } else {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::UnexpectedEof,
+                "No data available in Buffers",
+            ))
+        }
+    }
+}
+
+impl ChunkReader for PushBuffers {
+    type T = Self;
+
+    fn get_read(&self, start: u64) -> Result<Self::T, ParquetError> {
+        Ok(self.clone().with_offset(self.offset + start))
+    }
+
+    fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes, ParquetError> {
+        // find the range that contains the start offset
+        for (range, data) in self.iter() {
+            if range.start <= start && range.end >= start + length as u64 {
+                // Found the range, figure out the starting offset in the buffer
+                let start_offset = (start - range.start) as usize;
+                return Ok(data.slice(start_offset..start_offset + length));
+            }
+        }
+        // Signal that we need more data
+        let requested_end = start + length as u64;
+        Err(ParquetError::NeedMoreDataRange(start..requested_end))
+    }
+}
diff --git a/parquet/src/util/test_common/page_util.rs b/parquet/src/util/test_common/page_util.rs
index a1709efa92b3..5b64eb54133c 100644
--- a/parquet/src/util/test_common/page_util.rs
+++ b/parquet/src/util/test_common/page_util.rs
@@ -21,7 +21,7 @@ use crate::basic::Encoding;
 use crate::column::page::{Page, PageIterator};
 use crate::column::page::{PageMetadata, PageReader};
 use crate::data_type::DataType;
-use crate::encodings::encoding::{get_encoder, Encoder};
+use crate::encodings::encoding::{Encoder, get_encoder};
 use crate::encodings::levels::LevelEncoder;
 use crate::errors::Result;
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType};
diff --git a/parquet/src/util/test_common/rand_gen.rs b/parquet/src/util/test_common/rand_gen.rs
index 36a7e4b52a1c..e4ab2f7a0fb9 100644
--- a/parquet/src/util/test_common/rand_gen.rs
+++ b/parquet/src/util/test_common/rand_gen.rs
@@ -19,8 +19,9 @@ use crate::basic::Encoding;
 use crate::column::page::Page;
 use bytes::Bytes;
 use rand::{
-    distr::{uniform::SampleUniform, Distribution, StandardUniform},
-    rng, Rng,
+    Rng,
+    distr::{Distribution, StandardUniform, uniform::SampleUniform},
+    rng,
 };
 use std::collections::VecDeque;
 
@@ -31,37 +32,37 @@ use crate::util::{DataPageBuilder, DataPageBuilderImpl};
 
 /// Random generator of data type `T` values and sequences.
 pub trait RandGen<T: DataType> {
-    fn gen(len: i32) -> T::T;
+    fn r#gen(len: i32) -> T::T;
 
     fn gen_vec(len: i32, total: usize) -> Vec<T::T> {
         let mut result = vec![];
         for _ in 0..total {
-            result.push(Self::gen(len))
+            result.push(Self::r#gen(len))
         }
         result
     }
 }
 
 impl RandGen<BoolType> for BoolType {
-    fn gen(_: i32) -> bool {
+    fn r#gen(_: i32) -> bool {
         rng().random::<bool>()
     }
 }
 
 impl RandGen<Int32Type> for Int32Type {
-    fn gen(_: i32) -> i32 {
+    fn r#gen(_: i32) -> i32 {
         rng().random::<i32>()
     }
 }
 
 impl RandGen<Int64Type> for Int64Type {
-    fn gen(_: i32) -> i64 {
+    fn r#gen(_: i32) -> i64 {
         rng().random::<i64>()
     }
 }
 
 impl RandGen<Int96Type> for Int96Type {
-    fn gen(_: i32) -> Int96 {
+    fn r#gen(_: i32) -> Int96 {
         let mut rng = rng();
         let mut result = Int96::new();
         result.set_data(
@@ -74,19 +75,19 @@ impl RandGen<Int96Type> for Int96Type {
 }
 
 impl RandGen<FloatType> for FloatType {
-    fn gen(_: i32) -> f32 {
+    fn r#gen(_: i32) -> f32 {
         rng().random::<f32>()
     }
 }
 
 impl RandGen<DoubleType> for DoubleType {
-    fn gen(_: i32) -> f64 {
+    fn r#gen(_: i32) -> f64 {
         rng().random::<f64>()
     }
 }
 
 impl RandGen<ByteArrayType> for ByteArrayType {
-    fn gen(_: i32) -> ByteArray {
+    fn r#gen(_: i32) -> ByteArray {
         let mut rng = rng();
         let mut result = ByteArray::new();
         let mut value = vec![];
@@ -100,7 +101,7 @@ impl RandGen<ByteArrayType> for ByteArrayType {
 }
 
 impl RandGen<FixedLenByteArrayType> for FixedLenByteArrayType {
-    fn gen(len: i32) -> FixedLenByteArray {
+    fn r#gen(len: i32) -> FixedLenByteArray {
         assert!(len >= 0);
         let value = random_bytes(len as usize);
         ByteArray::from(value).into()
diff --git a/parquet/src/variant.rs b/parquet/src/variant.rs
new file mode 100644
index 000000000000..b7dde6b3c8a4
--- /dev/null
+++ b/parquet/src/variant.rs
@@ -0,0 +1,303 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! ⚠️ Experimental Support for reading and writing [`Variant`]s to / from Parquet files ⚠️
+//!
+//! This is a 🚧 Work In Progress
+//!
+//! Note: Requires the `variant_experimental` feature of the `parquet` crate to be enabled.
+//!
+//! # Features
+//! * Representation of [`Variant`], and [`VariantArray`] for working with
+//!   Variant values (see [`parquet_variant`] for more details)
+//! * Kernels for working with arrays of Variant values
+//!   such as conversion between `Variant` and JSON, and shredding/unshredding
+//!   (see [`parquet_variant_compute`] for more details)
+//!
+//! # Example: Writing a Parquet file with Variant column
+//! ```rust
+//! # use parquet::variant::{VariantArray, VariantType, VariantArrayBuilder, VariantBuilderExt};
+//! # use std::sync::Arc;
+//! # use arrow_array::{Array, ArrayRef, RecordBatch};
+//! # use arrow_schema::{DataType, Field, Schema};
+//! # use parquet::arrow::ArrowWriter;
+//! # fn main() -> Result<(), parquet::errors::ParquetError> {
+//!  // Use the VariantArrayBuilder to build a VariantArray
+//!  let mut builder = VariantArrayBuilder::new(3);
+//!  builder.new_object().with_field("name", "Alice").finish(); // row 1: {"name": "Alice"}
+//!  builder.append_value("such wow"); // row 2: "such wow" (a string)
+//!  let array = builder.build();
+//!
+//!  // Since VariantArray is an ExtensionType, it needs to be converted
+//!  // to an ArrayRef and Field with the appropriate metadata
+//!  // before it can be written to a Parquet file
+//!  let field = array.field("data");
+//!  let array = ArrayRef::from(array);
+//!  // create a RecordBatch with the VariantArray
+//!  let schema = Schema::new(vec![field]);
+//!  let batch = RecordBatch::try_new(Arc::new(schema), vec![array])?;
+//!
+//!  // Now you can write the RecordBatch to the Parquet file, as normal
+//!  let file = std::fs::File::create("variant.parquet")?;
+//!  let mut writer = ArrowWriter::try_new(file, batch.schema(), None)?;
+//!  writer.write(&batch)?;
+//!  writer.close()?;
+//!
+//! # std::fs::remove_file("variant.parquet")?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! # Example: Writing JSON into a Parquet file with Variant column
+//! ```rust
+//! # use std::sync::Arc;
+//! # use arrow_array::{ArrayRef, RecordBatch, StringArray};
+//! # use arrow_schema::Schema;
+//! # use parquet::variant::{json_to_variant, VariantArray};
+//! # use parquet::arrow::ArrowWriter;
+//! # fn main() -> Result<(), parquet::errors::ParquetError> {
+//!  // Create an array of JSON strings, simulating a column of JSON data
+//!  let input_array: ArrayRef = Arc::new(StringArray::from(vec![
+//!   Some(r#"{"name": "Alice", "age": 30}"#),
+//!   Some(r#"{"name": "Bob", "age": 25, "address": {"city": "New York"}}"#),
+//!   None,
+//!   Some("{}"),
+//!  ]));
+//!
+//!  // Convert the JSON strings to a VariantArray
+//!  let array: VariantArray = json_to_variant(&input_array)?;
+//!  // create a RecordBatch with the VariantArray
+//!  let schema = Schema::new(vec![array.field("data")]);
+//!  let batch = RecordBatch::try_new(Arc::new(schema), vec![ArrayRef::from(array)])?;
+//!
+//!  // write the RecordBatch to a Parquet file as normal
+//!  let file = std::fs::File::create("variant-json.parquet")?;
+//!  let mut writer = ArrowWriter::try_new(file, batch.schema(), None)?;
+//!  writer.write(&batch)?;
+//!  writer.close()?;
+//! # std::fs::remove_file("variant-json.parquet")?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! # Example: Reading a Parquet file with Variant column
+//!
+//! Use the [`VariantType`] extension type to find the Variant column:
+//!
+//! ```
+//! # use std::sync::Arc;
+//! # use std::path::PathBuf;
+//! # use arrow_array::{ArrayRef, RecordBatch, RecordBatchReader};
+//! # use parquet::variant::{Variant, VariantArray, VariantType};
+//! # use parquet::arrow::arrow_reader::ArrowReaderBuilder;
+//! # fn main() -> Result<(), parquet::errors::ParquetError> {
+//! # use arrow_array::StructArray;
+//! # fn file_path() -> PathBuf { // return a testing file path
+//! #    PathBuf::from(arrow::util::test_util::parquet_test_data())
+//! #   .join("..")
+//! #   .join("shredded_variant")
+//! #   .join("case-075.parquet")
+//! # }
+//! // Read the Parquet file using standard Arrow Parquet reader.
+//! // Note this file has 2 columns: "id", "var", and the "var" column
+//  // contains a variant that looks like this:
+//  // "Variant(metadata=VariantMetadata(dict={}), value=Variant(type=STRING, value=iceberg))"
+//! let file = std::fs::File::open(file_path())?;
+//! let mut reader = ArrowReaderBuilder::try_new(file)?.build()?;
+//!
+//! // You can check if a column contains a Variant using
+//! // the VariantType extension type
+//! let schema = reader.schema();
+//! let field = schema.field_with_name("var")?;
+//! assert!(field.try_extension_type::<VariantType>().is_ok());
+//!
+//! // The reader will yield RecordBatches with a StructArray
+//! // to convert them to VariantArray, use VariantArray::try_new
+//! let batch = reader.next().unwrap().unwrap();
+//!
+//! let col = batch.column_by_name("var").unwrap();
+//! let var_array = VariantArray::try_new(col)?;
+//! assert_eq!(var_array.len(), 1);
+//! let var_value: Variant = var_array.value(0);
+//! assert_eq!(var_value, Variant::from("iceberg")); // the value in case-075.parquet
+//! # Ok(())
+//! # }
+//! ```
+pub use parquet_variant::*;
+pub use parquet_variant_compute::*;
+
+#[cfg(test)]
+mod tests {
+    use crate::arrow::ArrowWriter;
+    use crate::arrow::arrow_reader::ArrowReaderBuilder;
+    use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+    use crate::file::reader::ChunkReader;
+    use arrow::util::test_util::parquet_test_data;
+    use arrow_array::{ArrayRef, RecordBatch};
+    use arrow_schema::Schema;
+    use bytes::Bytes;
+    use parquet_variant::{Variant, VariantBuilderExt};
+    use parquet_variant_compute::{VariantArray, VariantArrayBuilder, VariantType};
+    use std::path::PathBuf;
+    use std::sync::Arc;
+
+    #[test]
+    fn roundtrip_basic() {
+        roundtrip(variant_array());
+    }
+
+    /// Ensure a file with Variant LogicalType, written by another writer in
+    /// parquet-testing, can be read as a VariantArray
+    #[test]
+    fn read_logical_type() {
+        // Note: case-075 2 columns ("id", "var")
+        // The variant looks like this:
+        // "Variant(metadata=VariantMetadata(dict={}), value=Variant(type=STRING, value=iceberg))"
+        let batch = read_shredded_variant_test_case("case-075.parquet");
+
+        assert_variant_metadata(&batch, "var");
+        let var_column = batch.column_by_name("var").expect("expected var column");
+        let var_array =
+            VariantArray::try_new(&var_column).expect("expected var column to be a VariantArray");
+
+        // verify the value
+        assert_eq!(var_array.len(), 1);
+        assert!(var_array.is_valid(0));
+        let var_value = var_array.value(0);
+        assert_eq!(var_value, Variant::from("iceberg"));
+    }
+
+    /// Writes a variant to a parquet file and ensures the parquet logical type
+    /// annotation is correct
+    #[test]
+    fn write_logical_type() {
+        let array = variant_array();
+        let batch = variant_array_to_batch(array);
+        let buffer = write_to_buffer(&batch);
+
+        // read the parquet file's metadata and verify the logical type
+        let metadata = read_metadata(&Bytes::from(buffer));
+        let schema = metadata.file_metadata().schema_descr();
+        let fields = schema.root_schema().get_fields();
+        assert_eq!(fields.len(), 1);
+        let field = &fields[0];
+        assert_eq!(field.name(), "data");
+        // data should have been written with the Variant logical type
+        assert_eq!(
+            field.get_basic_info().logical_type_ref(),
+            Some(&crate::basic::LogicalType::Variant {
+                specification_version: None
+            })
+        );
+    }
+
+    /// Return a VariantArray with 3 rows:
+    ///
+    /// 1. `{"name": "Alice"}`
+    /// 2. `"such wow"` (a string)
+    /// 3. `null`
+    fn variant_array() -> VariantArray {
+        let mut builder = VariantArrayBuilder::new(3);
+        // row 1: {"name": "Alice"}
+        builder.new_object().with_field("name", "Alice").finish();
+        // row 2: "such wow" (a string)
+        builder.append_value("such wow");
+        // row 3: null
+        builder.append_null();
+        builder.build()
+    }
+
+    /// Writes a VariantArray to a parquet file and reads it back, verifying that
+    /// the data is the same
+    fn roundtrip(array: VariantArray) {
+        let source_batch = variant_array_to_batch(array);
+        assert_variant_metadata(&source_batch, "data");
+
+        let buffer = write_to_buffer(&source_batch);
+        let result_batch = read_to_batch(Bytes::from(buffer));
+        assert_variant_metadata(&result_batch, "data");
+        assert_eq!(result_batch, source_batch); // NB this also checks the schemas
+    }
+
+    /// creates a RecordBatch with a single column "data" from a VariantArray,
+    fn variant_array_to_batch(array: VariantArray) -> RecordBatch {
+        let field = array.field("data");
+        let schema = Schema::new(vec![field]);
+        RecordBatch::try_new(Arc::new(schema), vec![ArrayRef::from(array)]).unwrap()
+    }
+
+    /// writes a RecordBatch to memory buffer and returns the buffer
+    fn write_to_buffer(batch: &RecordBatch) -> Vec<u8> {
+        let mut buffer = vec![];
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(batch).unwrap();
+        writer.close().unwrap();
+        buffer
+    }
+
+    /// Reads the Parquet metadata
+    fn read_metadata<T: ChunkReader + 'static>(input: &T) -> ParquetMetaData {
+        let mut reader = ParquetMetaDataReader::new();
+        reader.try_parse(input).unwrap();
+        reader.finish().unwrap()
+    }
+
+    /// Reads a RecordBatch from a reader (e.g. Vec or File)
+    fn read_to_batch<T: ChunkReader + 'static>(reader: T) -> RecordBatch {
+        let reader = ArrowReaderBuilder::try_new(reader)
+            .unwrap()
+            .build()
+            .unwrap();
+        let mut batches: Vec<RecordBatch> = reader.collect::<Result<Vec<_>, _>>().unwrap();
+        assert_eq!(batches.len(), 1);
+        batches.swap_remove(0)
+    }
+
+    /// Verifies the variant metadata is present in the schema for the specified
+    /// field name.
+    fn assert_variant_metadata(batch: &RecordBatch, field_name: &str) {
+        let schema = batch.schema();
+        let field = schema
+            .field_with_name(field_name)
+            .expect("could not find expected field");
+
+        // explicitly check the metadata so it is clear in the tests what the
+        // names are
+        let metadata_value = field
+            .metadata()
+            .get("ARROW:extension:name")
+            .expect("metadata does not exist");
+
+        assert_eq!(metadata_value, "arrow.parquet.variant");
+
+        // verify that `VariantType` also correctly finds the metadata
+        field
+            .try_extension_type::<VariantType>()
+            .expect("VariantExtensionType should be readable");
+    }
+
+    /// Read the specified test case filename from parquet-testing
+    /// See parquet-testing/shredded_variant/cases.json for more details
+    fn read_shredded_variant_test_case(name: &str) -> RecordBatch {
+        let case_file = PathBuf::from(parquet_test_data())
+            .join("..") // go up from data/ to parquet-testing/
+            .join("shredded_variant")
+            .join(name);
+        let case_file = std::fs::File::open(case_file).unwrap();
+        read_to_batch(case_file)
+    }
+}
diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs
index b427bd4302e2..54c92976e41c 100644
--- a/parquet/tests/arrow_reader/bad_data.rs
+++ b/parquet/tests/arrow_reader/bad_data.rs
@@ -80,17 +80,16 @@ fn test_invalid_files() {
 #[test]
 fn test_parquet_1481() {
     let err = read_file("PARQUET-1481.parquet").unwrap_err();
-    assert_eq!(
-        err.to_string(),
-        "Parquet error: unexpected parquet type: -7"
-    );
+    assert_eq!(err.to_string(), "Parquet error: Unexpected Type -7");
 }
 
 #[test]
-#[should_panic(expected = "assertion failed: self.current_value.is_some()")]
 fn test_arrow_gh_41321() {
     let err = read_file("ARROW-GH-41321.parquet").unwrap_err();
-    assert_eq!(err.to_string(), "TBD (currently panics)");
+    assert_eq!(
+        err.to_string(),
+        "External: Parquet argument error: Parquet error: Invalid or corrupted RLE bit width 254. Max allowed is 32"
+    );
 }
 
 #[test]
@@ -98,7 +97,7 @@ fn test_arrow_gh_41317() {
     let err = read_file("ARROW-GH-41317.parquet").unwrap_err();
     assert_eq!(
         err.to_string(),
-        "External: Parquet argument error: External: bad data"
+        "External: Parquet argument error: Parquet error: StructArrayReader out of sync in read_records, expected 5 read, got 2"
     );
 }
 
@@ -135,7 +134,7 @@ fn test_arrow_rs_gh_45185_dict_levels() {
 /// Returns an error if the file is invalid
 fn read_file(name: &str) -> Result<usize, ParquetError> {
     let path = bad_data_dir().join(name);
-    println!("Reading file: {:?}", path);
+    println!("Reading file: {path:?}");
 
     let file = std::fs::File::open(&path).unwrap();
     let reader = ArrowReaderBuilder::try_new(file)?.build()?;
@@ -150,6 +149,7 @@ fn read_file(name: &str) -> Result<usize, ParquetError> {
 
 #[cfg(feature = "async")]
 #[tokio::test]
+#[allow(deprecated)]
 async fn bad_metadata_err() {
     use bytes::Bytes;
     use parquet::file::metadata::ParquetMetaDataReader;
diff --git a/parquet/tests/arrow_reader/checksum.rs b/parquet/tests/arrow_reader/checksum.rs
index b500b7cb1df8..1a3728992556 100644
--- a/parquet/tests/arrow_reader/checksum.rs
+++ b/parquet/tests/arrow_reader/checksum.rs
@@ -63,7 +63,7 @@ fn test_rle_dict_snappy_checksum() {
 /// The record batch data is replaced with () and errors are stringified.
 fn read_file_batch_errors(name: &str) -> Vec<Result<(), String>> {
     let path = PathBuf::from(parquet_test_data()).join(name);
-    println!("Reading file: {:?}", path);
+    println!("Reading file: {path:?}");
     let file = std::fs::File::open(&path).unwrap();
     let reader = ArrowReaderBuilder::try_new(file).unwrap().build().unwrap();
     reader
diff --git a/parquet/tests/arrow_reader/int96_stats_roundtrip.rs b/parquet/tests/arrow_reader/int96_stats_roundtrip.rs
new file mode 100644
index 000000000000..d6ba8d419e3e
--- /dev/null
+++ b/parquet/tests/arrow_reader/int96_stats_roundtrip.rs
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use chrono::{DateTime, NaiveDateTime, Utc};
+use parquet::basic::Type;
+use parquet::data_type::{Int96, Int96Type};
+use parquet::file::properties::{EnabledStatistics, WriterProperties};
+use parquet::file::reader::{FileReader, SerializedFileReader};
+use parquet::file::statistics::Statistics;
+use parquet::file::writer::SerializedFileWriter;
+use parquet::schema::parser::parse_message_type;
+use rand::seq::SliceRandom;
+use std::fs::File;
+use std::sync::Arc;
+use tempfile::Builder;
+
+fn datetime_to_int96(dt: &str) -> Int96 {
+    let naive = NaiveDateTime::parse_from_str(dt, "%Y-%m-%d %H:%M:%S%.f").unwrap();
+    let datetime: DateTime<Utc> = DateTime::from_naive_utc_and_offset(naive, Utc);
+    let nanos = datetime.timestamp_nanos_opt().unwrap();
+    let mut int96 = Int96::new();
+    const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588;
+    const NANOSECONDS_IN_DAY: i64 = 86_400_000_000_000;
+    let days = nanos / NANOSECONDS_IN_DAY;
+    let remaining_nanos = nanos % NANOSECONDS_IN_DAY;
+    let julian_day = (days + JULIAN_DAY_OF_EPOCH) as i32;
+    let julian_day_u32 = julian_day as u32;
+    let nanos_low = (remaining_nanos & 0xFFFFFFFF) as u32;
+    let nanos_high = ((remaining_nanos >> 32) & 0xFFFFFFFF) as u32;
+    int96.set_data(nanos_low, nanos_high, julian_day_u32);
+    int96
+}
+
+fn verify_ordering(data: Vec<Int96>) {
+    // Create a temporary file
+    let tmp = Builder::new()
+        .prefix("test_int96_stats")
+        .tempfile()
+        .unwrap();
+    let file_path = tmp.path().to_owned();
+
+    // Create schema with INT96 field
+    let message_type = "
+        message test {
+            REQUIRED INT96 timestamp;
+        }
+    ";
+    let schema = parse_message_type(message_type).unwrap();
+
+    // Configure writer properties to enable statistics
+    let props = WriterProperties::builder()
+        .set_statistics_enabled(EnabledStatistics::Page)
+        .build();
+
+    let expected_min = data[0];
+    let expected_max = data[data.len() - 1];
+
+    {
+        let file = File::create(&file_path).unwrap();
+        let mut writer = SerializedFileWriter::new(file, schema.into(), Arc::new(props)).unwrap();
+        let mut row_group = writer.next_row_group().unwrap();
+        let mut col_writer = row_group.next_column().unwrap().unwrap();
+
+        {
+            let writer = col_writer.typed::<Int96Type>();
+            let mut shuffled_data = data.clone();
+            shuffled_data.shuffle(&mut rand::rng());
+            writer.write_batch(&shuffled_data, None, None).unwrap();
+        }
+        col_writer.close().unwrap();
+        row_group.close().unwrap();
+        writer.close().unwrap();
+    }
+
+    let file = File::open(&file_path).unwrap();
+    let reader = SerializedFileReader::new(file).unwrap();
+    let metadata = reader.metadata();
+    let row_group = metadata.row_group(0);
+    let column = row_group.column(0);
+
+    let stats = column.statistics().unwrap();
+    assert_eq!(stats.physical_type(), Type::INT96);
+
+    if let Statistics::Int96(stats) = stats {
+        let min = stats.min_opt().unwrap();
+        let max = stats.max_opt().unwrap();
+
+        assert_eq!(
+            *min, expected_min,
+            "Min value should be {expected_min} but was {min}"
+        );
+        assert_eq!(
+            *max, expected_max,
+            "Max value should be {expected_max} but was {max}"
+        );
+        assert_eq!(stats.null_count_opt(), Some(0));
+    } else {
+        panic!("Expected Int96 statistics");
+    }
+}
+
+#[test]
+fn test_multiple_dates() {
+    let data = vec![
+        datetime_to_int96("2020-01-01 00:00:00.000"),
+        datetime_to_int96("2020-02-29 23:59:59.000"),
+        datetime_to_int96("2020-12-31 23:59:59.000"),
+        datetime_to_int96("2021-01-01 00:00:00.000"),
+        datetime_to_int96("2023-06-15 12:30:45.000"),
+        datetime_to_int96("2024-02-29 15:45:30.000"),
+        datetime_to_int96("2024-12-25 07:00:00.000"),
+        datetime_to_int96("2025-01-01 00:00:00.000"),
+        datetime_to_int96("2025-07-04 20:00:00.000"),
+        datetime_to_int96("2025-12-31 23:59:59.000"),
+    ];
+    verify_ordering(data);
+}
+
+#[test]
+fn test_same_day_different_time() {
+    let data = vec![
+        datetime_to_int96("2020-01-01 00:01:00.000"),
+        datetime_to_int96("2020-01-01 00:02:00.000"),
+        datetime_to_int96("2020-01-01 00:03:00.000"),
+    ];
+    verify_ordering(data);
+}
+
+#[test]
+fn test_increasing_day_decreasing_time() {
+    let data = vec![
+        datetime_to_int96("2020-01-01 12:00:00.000"),
+        datetime_to_int96("2020-02-01 11:00:00.000"),
+        datetime_to_int96("2020-03-01 10:00:00.000"),
+    ];
+    verify_ordering(data);
+}
diff --git a/parquet/tests/arrow_reader/io/async_reader.rs b/parquet/tests/arrow_reader/io/async_reader.rs
new file mode 100644
index 000000000000..2f49de8a38b2
--- /dev/null
+++ b/parquet/tests/arrow_reader/io/async_reader.rs
@@ -0,0 +1,430 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for the async reader ([`ParquetRecordBatchStreamBuilder`])
+
+use crate::io::{
+    LogEntry, OperationLog, TestParquetFile, filter_a_175_b_625, filter_b_575_625, filter_b_false,
+    test_file, test_options,
+};
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use futures::{FutureExt, StreamExt};
+use parquet::arrow::arrow_reader::{ArrowReaderOptions, RowSelection, RowSelector};
+use parquet::arrow::async_reader::AsyncFileReader;
+use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::errors::Result;
+use parquet::file::metadata::ParquetMetaData;
+use std::ops::Range;
+use std::sync::Arc;
+
+#[tokio::test]
+async fn test_read_entire_file() {
+    // read entire file without any filtering or projection
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+    [
+        "Get Provided Metadata",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Read Multi:",
+        "  Row Group 0, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        "  Row Group 0, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        "  Row Group 0, column 'c': MultiPage(dictionary_page: true, data_pages: [0, 1])  (7346 bytes, 1 requests) [data]",
+        "Read Multi:",
+        "  Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        "  Row Group 1, column 'c': MultiPage(dictionary_page: true, data_pages: [0, 1])  (7456 bytes, 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_single_group() {
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options())
+        .await
+        // read only second row group
+        .with_row_groups(vec![1]);
+
+    // Expect to see only IO for Row Group 1. Should see no IO for Row Group 0.
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "  Row Group 1, column 'c': MultiPage(dictionary_page: true, data_pages: [0, 1])  (7456 bytes, 1 requests) [data]",
+        ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_single_column() {
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder.with_projection(ProjectionMask::columns(&schema_descr, ["b"]));
+    // Expect to see only IO for column "b". Should see no IO for columns "a" or "c".
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 0, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_row_selection() {
+    // There are 400 total rows spread across 4 data pages (100 rows each)
+    // select rows 175..225 (i.e. DataPage(1) of row group 0 and DataPage(0) of row group 1)
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+        .with_row_selection(RowSelection::from(vec![
+            RowSelector::skip(175),
+            RowSelector::select(50),
+        ]));
+
+    // Expect to see only data IO for one page for each column for each row group
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+    [
+        "Get Provided Metadata",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Read Multi:",
+        "  Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "  Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "  Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "  Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Read Multi:",
+        "  Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "  Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "  Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "  Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_limit() {
+    // There are 400 total rows spread across 4 data pages (100 rows each)
+    // a limit of 125 rows should only fetch the first two data pages (DataPage(0) and DataPage(1)) from row group 0
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a"]))
+        .with_limit(125);
+
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+    [
+        "Get Provided Metadata",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Read Multi:",
+        "  Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "  Row Group 0, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "  Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_single_row_filter() {
+    // Values from column "b" range 400..799
+    // filter  "b" > 575 and < than 625
+    // (last data page in Row Group 0 and first DataPage in Row Group 1)
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+        .with_row_filter(filter_b_575_625(&schema_descr));
+
+    // Expect to see I/O for column b in both row groups to evaluate filter,
+    // then a single pages for the "a" column in each row group
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 0, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+            "  Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+            "  Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_single_row_filter_no_page_index() {
+    // Values from column "b" range 400..799
+    // Apply a filter  "b" > 575 and <less> than 625
+    // (last data page in Row Group 0 and first DataPage in Row Group 1)
+    let test_file = test_file();
+    let options = test_options().with_page_index(false);
+    let builder = async_builder(&test_file, options).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+        .with_row_filter(filter_b_575_625(&schema_descr));
+
+    // Since we don't have the page index, expect to see:
+    // 1. I/O for all pages of column b to evaluate the filter
+    // 2. IO for all pages of column a as the reader doesn't know where the page
+    //    boundaries are so needs to scan them.
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 0, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 0, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_multiple_row_filter() {
+    // Values in column "a" range 0..399
+    // Values in column "b" range 400..799
+    // First filter: "a" > 175  (last data page in Row Group 0)
+    // Second filter: "b" < 625 (last data page in Row Group 0 and first DataPage in RowGroup 1)
+    // Read column "c"
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["c"]))
+        .with_row_filter(filter_a_175_b_625(&schema_descr));
+
+    // Expect that we will see
+    // 1. IO for all pages of column A (to evaluate the first filter)
+    // 2. IO for pages of column b that passed the first filter (to evaluate the second filter)
+    // 3. IO after reader is built only for column c for the rows that passed both filters
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 0, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+            "  Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 0, column 'c': DictionaryPage   (7107 bytes, 1 requests) [data]",
+            "  Row Group 0, column 'c': DataPage(1)      (126 bytes , 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+            "  Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+            "  Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'c': DictionaryPage   (7217 bytes, 1 requests) [data]",
+            "  Row Group 1, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
+        ]
+    "#);
+}
+
+#[tokio::test]
+async fn test_read_single_row_filter_all() {
+    // Apply a filter that filters out all rows
+
+    let test_file = test_file();
+    let builder = async_builder(&test_file, test_options()).await;
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+        .with_row_filter(filter_b_false(&schema_descr));
+
+    // Expect to see reads for column "b" to evaluate the filter, but no reads
+    // for column "a" as no rows pass the filter
+    insta::assert_debug_snapshot!(run(
+        &test_file,
+        builder).await, @r#"
+        [
+            "Get Provided Metadata",
+            "Event: Builder Configured",
+            "Event: Reader Built",
+            "Read Multi:",
+            "  Row Group 0, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+            "Read Multi:",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
+        ]
+    "#);
+}
+
+/// Return a [`ParquetRecordBatchStreamBuilder`] for reading this file
+async fn async_builder(
+    test_file: &TestParquetFile,
+    options: ArrowReaderOptions,
+) -> ParquetRecordBatchStreamBuilder<RecordingAsyncFileReader> {
+    let parquet_meta_data = if options.page_index() {
+        Arc::clone(test_file.parquet_metadata())
+    } else {
+        // strip out the page index from the metadata
+        let metadata = test_file
+            .parquet_metadata()
+            .as_ref()
+            .clone()
+            .into_builder()
+            .set_column_index(None)
+            .set_offset_index(None)
+            .build();
+        Arc::new(metadata)
+    };
+
+    let reader = RecordingAsyncFileReader {
+        bytes: test_file.bytes().clone(),
+        ops: Arc::clone(test_file.ops()),
+        parquet_meta_data,
+    };
+
+    ParquetRecordBatchStreamBuilder::new_with_options(reader, options)
+        .await
+        .unwrap()
+}
+
+/// Build the reader from the specified builder and read all batches from it,
+/// and return the operations log.
+async fn run(
+    test_file: &TestParquetFile,
+    builder: ParquetRecordBatchStreamBuilder<RecordingAsyncFileReader>,
+) -> Vec<String> {
+    let ops = test_file.ops();
+    ops.add_entry(LogEntry::event("Builder Configured"));
+    let mut stream = builder.build().unwrap();
+    ops.add_entry(LogEntry::event("Reader Built"));
+    while let Some(batch) = stream.next().await {
+        match batch {
+            Ok(_) => {}
+            Err(e) => panic!("Error reading batch: {e}"),
+        }
+    }
+    ops.snapshot()
+}
+
+struct RecordingAsyncFileReader {
+    bytes: Bytes,
+    ops: Arc<OperationLog>,
+    parquet_meta_data: Arc<ParquetMetaData>,
+}
+
+impl AsyncFileReader for RecordingAsyncFileReader {
+    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
+        let ops = Arc::clone(&self.ops);
+        let data = self
+            .bytes
+            .slice(range.start as usize..range.end as usize)
+            .clone();
+
+        // translate to usize from u64
+        let logged_range = Range {
+            start: range.start as usize,
+            end: range.end as usize,
+        };
+        async move {
+            ops.add_entry_for_range(&logged_range);
+            Ok(data)
+        }
+        .boxed()
+    }
+
+    fn get_byte_ranges(&mut self, ranges: Vec<Range<u64>>) -> BoxFuture<'_, Result<Vec<Bytes>>> {
+        let ops = Arc::clone(&self.ops);
+        let datas = ranges
+            .iter()
+            .map(|range| {
+                self.bytes
+                    .slice(range.start as usize..range.end as usize)
+                    .clone()
+            })
+            .collect::<Vec<_>>();
+        // translate to usize from u64
+        let logged_ranges = ranges
+            .into_iter()
+            .map(|r| Range {
+                start: r.start as usize,
+                end: r.end as usize,
+            })
+            .collect::<Vec<_>>();
+
+        async move {
+            ops.add_entry_for_ranges(&logged_ranges);
+            Ok(datas)
+        }
+        .boxed()
+    }
+
+    fn get_metadata<'a>(
+        &'a mut self,
+        _options: Option<&'a ArrowReaderOptions>,
+    ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
+        let ops = Arc::clone(&self.ops);
+        let parquet_meta_data = Arc::clone(&self.parquet_meta_data);
+        async move {
+            ops.add_entry(LogEntry::GetProvidedMetadata);
+            Ok(parquet_meta_data)
+        }
+        .boxed()
+    }
+}
diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs
new file mode 100644
index 000000000000..86b7674121b5
--- /dev/null
+++ b/parquet/tests/arrow_reader/io/mod.rs
@@ -0,0 +1,702 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for IO read patterns in the Parquet Reader
+//!
+//! Each test:
+//! 1. Creates a temporary Parquet file with a known row group structure
+//! 2. Reads data from that file using the Arrow Parquet Reader, recording the IO operations
+//! 3. Asserts the expected IO patterns based on the read operations
+//!
+//! Note this module contains test infrastructure only. The actual tests are in the
+//! sub-modules [`sync_reader`] and [`async_reader`].
+//!
+//! Key components:
+//! - [`TestParquetFile`] - Represents a Parquet file and its layout
+//! - [`OperationLog`] - Records IO operations performed on the file
+//! - [`LogEntry`] - Represents a single IO operation in the log
+
+mod sync_reader;
+
+#[cfg(feature = "async")]
+mod async_reader;
+
+use arrow::compute::and;
+use arrow::compute::kernels::cmp::{gt, lt};
+use arrow_array::cast::AsArray;
+use arrow_array::types::Int64Type;
+use arrow_array::{ArrayRef, BooleanArray, Int64Array, RecordBatch, StringViewArray};
+use bytes::Bytes;
+use parquet::arrow::arrow_reader::{
+    ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowFilter,
+};
+use parquet::arrow::{ArrowWriter, ProjectionMask};
+use parquet::data_type::AsBytes;
+use parquet::file::FOOTER_SIZE;
+use parquet::file::metadata::{FooterTail, ParquetMetaData, ParquetOffsetIndex};
+use parquet::file::page_index::offset_index::PageLocation;
+use parquet::file::properties::WriterProperties;
+use parquet::schema::types::SchemaDescriptor;
+use std::collections::BTreeMap;
+use std::fmt::Display;
+use std::ops::Range;
+use std::sync::{Arc, LazyLock, Mutex};
+
+/// Create a new `TestParquetFile` with:
+/// 3 columns: "a", "b", "c"
+///
+/// 2 row groups, each with 200 rows
+/// each data page has 100 rows
+///
+/// Values of column "a" are 0..399
+/// Values of column "b" are 400..799
+/// Values of column "c" are alternating strings of length 12 and longer
+fn test_file() -> TestParquetFile {
+    TestParquetFile::new(TEST_FILE_DATA.clone())
+}
+
+/// Default options for tests
+///
+/// Note these tests use the PageIndex to reduce IO
+fn test_options() -> ArrowReaderOptions {
+    ArrowReaderOptions::default().with_page_index(true)
+}
+
+/// Return a row filter that evaluates "b > 575" AND "b < 625"
+///
+/// last data page in Row Group 0 and first DataPage in Row Group 1
+fn filter_b_575_625(schema_descr: &SchemaDescriptor) -> RowFilter {
+    // "b" > 575 and "b" < 625
+    let predicate = ArrowPredicateFn::new(
+        ProjectionMask::columns(schema_descr, ["b"]),
+        |batch: RecordBatch| {
+            let scalar_575 = Int64Array::new_scalar(575);
+            let scalar_625 = Int64Array::new_scalar(625);
+            let column = batch.column(0).as_primitive::<Int64Type>();
+            and(&gt(column, &scalar_575)?, &lt(column, &scalar_625)?)
+        },
+    );
+    RowFilter::new(vec![Box::new(predicate)])
+}
+
+/// Filter a > 175 and b < 625
+/// First filter: "a" > 175  (last data page in Row Group 0)
+/// Second filter: "b" < 625 (last data page in Row Group 0 and first DataPage in RowGroup 1)
+fn filter_a_175_b_625(schema_descr: &SchemaDescriptor) -> RowFilter {
+    // "a" > 175 and "b" < 625
+    let predicate_a = ArrowPredicateFn::new(
+        ProjectionMask::columns(schema_descr, ["a"]),
+        |batch: RecordBatch| {
+            let scalar_175 = Int64Array::new_scalar(175);
+            let column = batch.column(0).as_primitive::<Int64Type>();
+            gt(column, &scalar_175)
+        },
+    );
+
+    let predicate_b = ArrowPredicateFn::new(
+        ProjectionMask::columns(schema_descr, ["b"]),
+        |batch: RecordBatch| {
+            let scalar_625 = Int64Array::new_scalar(625);
+            let column = batch.column(0).as_primitive::<Int64Type>();
+            lt(column, &scalar_625)
+        },
+    );
+
+    RowFilter::new(vec![Box::new(predicate_a), Box::new(predicate_b)])
+}
+
+/// Filter FALSE (no rows) with b
+/// Entirely filters out both row groups
+/// Note it selects "b"
+fn filter_b_false(schema_descr: &SchemaDescriptor) -> RowFilter {
+    // "false"
+    let predicate = ArrowPredicateFn::new(
+        ProjectionMask::columns(schema_descr, ["b"]),
+        |batch: RecordBatch| {
+            let result =
+                BooleanArray::from_iter(std::iter::repeat_n(Some(false), batch.num_rows()));
+            Ok(result)
+        },
+    );
+    RowFilter::new(vec![Box::new(predicate)])
+}
+
+/// Create a parquet file in memory for testing. See [`test_file`] for details.
+static TEST_FILE_DATA: LazyLock<Bytes> = LazyLock::new(|| {
+    // Input batch has 400 rows, with 3 columns: "a", "b", "c"
+    // Note c is a different types (so the data page sizes will be different)
+    let a: ArrayRef = Arc::new(Int64Array::from_iter_values(0..400));
+    let b: ArrayRef = Arc::new(Int64Array::from_iter_values(400..800));
+    let c: ArrayRef = Arc::new(StringViewArray::from_iter_values((0..400).map(|i| {
+        if i % 2 == 0 {
+            format!("string_{i}")
+        } else {
+            format!("A string larger than 12 bytes and thus not inlined {i}")
+        }
+    })));
+
+    let input_batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
+
+    let mut output = Vec::new();
+
+    let writer_options = WriterProperties::builder()
+        .set_max_row_group_size(200)
+        .set_data_page_row_count_limit(100)
+        .build();
+    let mut writer =
+        ArrowWriter::try_new(&mut output, input_batch.schema(), Some(writer_options)).unwrap();
+
+    // since the limits are only enforced on batch boundaries, write the input
+    // batch in chunks of 50
+    let mut row_remain = input_batch.num_rows();
+    while row_remain > 0 {
+        let chunk_size = row_remain.min(50);
+        let chunk = input_batch.slice(input_batch.num_rows() - row_remain, chunk_size);
+        writer.write(&chunk).unwrap();
+        row_remain -= chunk_size;
+    }
+    writer.close().unwrap();
+    Bytes::from(output)
+});
+
+/// A test parquet file and its layout.
+struct TestParquetFile {
+    bytes: Bytes,
+    /// The operation log for IO operations performed on this file
+    ops: Arc<OperationLog>,
+    /// The (pre-parsed) parquet metadata for this file
+    parquet_metadata: Arc<ParquetMetaData>,
+}
+
+impl TestParquetFile {
+    /// Create a new `TestParquetFile` with the specified temporary directory and path
+    /// and determines the row group layout.
+    fn new(bytes: Bytes) -> Self {
+        // Read the parquet file to determine its layout
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+            bytes.clone(),
+            ArrowReaderOptions::default().with_page_index(true),
+        )
+        .unwrap();
+
+        let parquet_metadata = Arc::clone(builder.metadata());
+
+        let offset_index = parquet_metadata
+            .offset_index()
+            .expect("Parquet metadata should have a page index");
+
+        let row_groups = TestRowGroups::new(&parquet_metadata, offset_index);
+
+        // figure out the footer location in the file
+        let footer_location = bytes.len() - FOOTER_SIZE..bytes.len();
+        let footer = bytes.slice(footer_location.clone());
+        let footer: &[u8; FOOTER_SIZE] = footer
+            .as_bytes()
+            .try_into() // convert to a fixed size array
+            .unwrap();
+
+        // figure out the metadata location
+        let footer = FooterTail::try_new(footer).unwrap();
+        let metadata_len = footer.metadata_length();
+        let metadata_location = footer_location.start - metadata_len..footer_location.start;
+
+        let ops = Arc::new(OperationLog::new(
+            footer_location,
+            metadata_location,
+            row_groups,
+        ));
+
+        TestParquetFile {
+            bytes,
+            ops,
+            parquet_metadata,
+        }
+    }
+
+    /// Return the internal bytes of the parquet file
+    fn bytes(&self) -> &Bytes {
+        &self.bytes
+    }
+
+    /// Return the operation log for this file
+    fn ops(&self) -> &Arc<OperationLog> {
+        &self.ops
+    }
+
+    /// Return the parquet metadata for this file
+    fn parquet_metadata(&self) -> &Arc<ParquetMetaData> {
+        &self.parquet_metadata
+    }
+}
+
+/// Information about a column chunk
+#[derive(Debug)]
+struct TestColumnChunk {
+    /// The name of the column
+    name: String,
+
+    /// The location of the entire column chunk in the file including dictionary pages
+    /// and data pages.
+    location: Range<usize>,
+
+    /// The offset of the start of of the dictionary page if any
+    dictionary_page_location: Option<i64>,
+
+    /// The location of the data pages in the file
+    page_locations: Vec<PageLocation>,
+}
+
+/// Information about the pages in a single row group
+#[derive(Debug)]
+struct TestRowGroup {
+    /// Maps column_name -> Information about the column chunk
+    columns: BTreeMap<String, TestColumnChunk>,
+}
+
+/// Information about all the row groups in a Parquet file, extracted from its metadata
+#[derive(Debug)]
+struct TestRowGroups {
+    /// List of row groups, each containing information about its columns and page locations
+    row_groups: Vec<TestRowGroup>,
+}
+
+impl TestRowGroups {
+    fn new(parquet_metadata: &ParquetMetaData, offset_index: &ParquetOffsetIndex) -> Self {
+        let row_groups = parquet_metadata
+            .row_groups()
+            .iter()
+            .enumerate()
+            .map(|(rg_index, rg_meta)| {
+                let columns = rg_meta
+                    .columns()
+                    .iter()
+                    .enumerate()
+                    .map(|(col_idx, col_meta)| {
+                        let column_name = col_meta.column_descr().name().to_string();
+                        let page_locations = offset_index[rg_index][col_idx].page_locations();
+                        let dictionary_page_location = col_meta.dictionary_page_offset();
+
+                        // We can find the byte range of the entire column chunk
+                        let (start_offset, length) = col_meta.byte_range();
+                        let start_offset = start_offset as usize;
+                        let end_offset = start_offset + length as usize;
+
+                        TestColumnChunk {
+                            name: column_name.clone(),
+                            location: start_offset..end_offset,
+                            dictionary_page_location,
+                            page_locations: page_locations.clone(),
+                        }
+                    })
+                    .map(|test_column_chunk| {
+                        // make key=value pairs to insert into the BTreeMap
+                        (test_column_chunk.name.clone(), test_column_chunk)
+                    })
+                    .collect::<BTreeMap<_, _>>();
+                TestRowGroup { columns }
+            })
+            .collect();
+
+        Self { row_groups }
+    }
+
+    fn iter(&self) -> impl Iterator<Item = &TestRowGroup> {
+        self.row_groups.iter()
+    }
+}
+
+/// Type of data read
+#[derive(Debug, PartialEq)]
+enum PageType {
+    /// The data page with the specified index
+    Data {
+        data_page_index: usize,
+    },
+    Dictionary,
+    /// Multiple pages read together
+    Multi {
+        /// Was the dictionary page included?
+        dictionary_page: bool,
+        /// The data pages included
+        data_page_indices: Vec<usize>,
+    },
+}
+
+impl Display for PageType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            PageType::Data { data_page_index } => {
+                write!(f, "DataPage({data_page_index})")
+            }
+            PageType::Dictionary => write!(f, "DictionaryPage"),
+            PageType::Multi {
+                dictionary_page,
+                data_page_indices,
+            } => {
+                let dictionary_page = if *dictionary_page {
+                    "dictionary_page: true, "
+                } else {
+                    ""
+                };
+                write!(
+                    f,
+                    "MultiPage({dictionary_page}data_pages: {data_page_indices:?})",
+                )
+            }
+        }
+    }
+}
+
+/// Read single logical data object (data page or dictionary page)
+/// in one or more requests
+#[derive(Debug)]
+struct ReadInfo {
+    row_group_index: usize,
+    column_name: String,
+    range: Range<usize>,
+    read_type: PageType,
+    /// Number of distinct requests (function calls) that were used
+    num_requests: usize,
+}
+
+impl Display for ReadInfo {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            row_group_index,
+            column_name,
+            range,
+            read_type,
+            num_requests,
+        } = self;
+
+        // If the average read size is less than 10 bytes, assume it is the thrift
+        // decoder reading the page headers and add an annotation
+        let annotation = if (range.len() / num_requests) < 10 {
+            " [header]"
+        } else {
+            " [data]"
+        };
+
+        // align the read type to 20 characters for better readability, not sure why
+        // this does not work inline with write! macro below
+        write!(
+            f,
+            "Row Group {row_group_index}, column '{column_name}': {:15}  ({:10}, {:8}){annotation}",
+            // convert to strings so alignment works
+            format!("{read_type}"),
+            format!("{} bytes", range.len()),
+            format!("{num_requests} requests"),
+        )
+    }
+}
+
+/// Store structured entries in the log to make it easier to combine multiple entries
+#[derive(Debug)]
+enum LogEntry {
+    /// Read the footer (last 8 bytes) of the parquet file
+    ReadFooter(Range<usize>),
+    /// Read the metadata of the parquet file
+    ReadMetadata(Range<usize>),
+    /// Access previously parsed metadata
+    GetProvidedMetadata,
+    /// Read a single logical data object
+    ReadData(ReadInfo),
+    /// Read one or more logical data objects in a single operation
+    ReadMultipleData(Vec<LogEntry>),
+    /// Not known where the read came from
+    Unknown(Range<usize>),
+    /// A user defined event
+    Event(String),
+}
+
+impl LogEntry {
+    fn event(event: impl Into<String>) -> Self {
+        LogEntry::Event(event.into())
+    }
+
+    /// Appends a string representation of this log entry to the output vector
+    fn append_string(&self, output: &mut Vec<String>, indent: usize) {
+        let indent_str = " ".repeat(indent);
+        match self {
+            LogEntry::ReadFooter(range) => {
+                output.push(format!("{indent_str}Footer: {} bytes", range.len()))
+            }
+            LogEntry::ReadMetadata(range) => {
+                output.push(format!("{indent_str}Metadata: {}", range.len()))
+            }
+            LogEntry::GetProvidedMetadata => {
+                output.push(format!("{indent_str}Get Provided Metadata"))
+            }
+            LogEntry::ReadData(read_info) => output.push(format!("{indent_str}{read_info}")),
+            LogEntry::ReadMultipleData(read_infos) => {
+                output.push(format!("{indent_str}Read Multi:"));
+                for read_info in read_infos {
+                    let new_indent = indent + 2;
+                    read_info.append_string(output, new_indent);
+                }
+            }
+            LogEntry::Unknown(range) => {
+                output.push(format!("{indent_str}UNKNOWN: {range:?} (maybe Page Index)"))
+            }
+            LogEntry::Event(event) => output.push(format!("Event: {event}")),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct OperationLog {
+    /// The operations performed on the file
+    ops: Mutex<Vec<LogEntry>>,
+
+    /// Footer location in the parquet file
+    footer_location: Range<usize>,
+
+    /// Metadata location in the parquet file
+    metadata_location: Range<usize>,
+
+    /// Information about the row group layout in the parquet file, used to
+    /// translate read operations into human understandable IO operations
+    /// Path to the parquet file
+    row_groups: TestRowGroups,
+}
+
+impl OperationLog {
+    fn new(
+        footer_location: Range<usize>,
+        metadata_location: Range<usize>,
+        row_groups: TestRowGroups,
+    ) -> Self {
+        OperationLog {
+            ops: Mutex::new(Vec::new()),
+            metadata_location,
+            footer_location,
+            row_groups,
+        }
+    }
+
+    /// Add an operation to the log
+    fn add_entry(&self, entry: LogEntry) {
+        let mut ops = self.ops.lock().unwrap();
+        ops.push(entry);
+    }
+
+    /// Adds an entry to the operation log for the interesting object that is
+    /// accessed by the specified range
+    ///
+    /// This function checks the ranges in order against possible locations
+    /// and adds the appropriate operation to the log for the first match found.
+    fn add_entry_for_range(&self, range: &Range<usize>) {
+        self.add_entry(self.entry_for_range(range));
+    }
+
+    /// Adds entries to the operation log for each interesting object that is
+    /// accessed by the specified range
+    ///
+    /// It behaves the same as [`add_entry_for_range`] but for multiple ranges.
+    fn add_entry_for_ranges<'a>(&self, ranges: impl IntoIterator<Item = &'a Range<usize>>) {
+        let entries = ranges
+            .into_iter()
+            .map(|range| self.entry_for_range(range))
+            .collect::<Vec<_>>();
+        self.add_entry(LogEntry::ReadMultipleData(entries));
+    }
+
+    /// Create an appropriate LogEntry for the specified range
+    fn entry_for_range(&self, range: &Range<usize>) -> LogEntry {
+        let start = range.start as i64;
+        let end = range.end as i64;
+
+        // figure out what logical part of the file this range corresponds to
+        if self.metadata_location.contains(&range.start)
+            || self.metadata_location.contains(&(range.end - 1))
+        {
+            return LogEntry::ReadMetadata(range.clone());
+        }
+
+        if self.footer_location.contains(&range.start)
+            || self.footer_location.contains(&(range.end - 1))
+        {
+            return LogEntry::ReadFooter(range.clone());
+        }
+
+        // Search for the location in each column chunk.
+        //
+        // The actual parquet reader must in general decode the page headers
+        // and determine the byte ranges of the pages. However, for this test
+        // we assume the following layout:
+        //
+        // ```text
+        // (Dictionary Page)
+        // (Data Page)
+        // ...
+        // (Data Page)
+        // ```
+        //
+        // We also assume that `self.page_locations` holds the location of all
+        // data pages, so any read operation that overlaps with a data page
+        // location is considered a read of that page, and any other read must
+        // be a dictionary page read.
+        for (row_group_index, row_group) in self.row_groups.iter().enumerate() {
+            for (column_name, test_column_chunk) in &row_group.columns {
+                // Check if the range overlaps with any data page locations
+                let page_locations = test_column_chunk.page_locations.iter();
+
+                // What data pages does this range overlap with?
+                let mut data_page_indices = vec![];
+
+                for (data_page_index, page_location) in page_locations.enumerate() {
+                    let page_offset = page_location.offset;
+                    let page_end = page_offset + page_location.compressed_page_size as i64;
+
+                    // if the range fully contains the page, consider it a read of that page
+                    if start >= page_offset && end <= page_end {
+                        let read_info = ReadInfo {
+                            row_group_index,
+                            column_name: column_name.clone(),
+                            range: range.clone(),
+                            read_type: PageType::Data { data_page_index },
+                            num_requests: 1,
+                        };
+                        return LogEntry::ReadData(read_info);
+                    }
+
+                    // if the range overlaps with the page, add it to the list of overlapping pages
+                    if start < page_end && end > page_offset {
+                        data_page_indices.push(data_page_index);
+                    }
+                }
+
+                // was the dictionary page read?
+                let mut dictionary_page = false;
+
+                // Check if the range overlaps with the dictionary page location
+                if let Some(dict_page_offset) = test_column_chunk.dictionary_page_location {
+                    let dict_page_end = dict_page_offset + test_column_chunk.location.len() as i64;
+                    if start >= dict_page_offset && end < dict_page_end {
+                        let read_info = ReadInfo {
+                            row_group_index,
+                            column_name: column_name.clone(),
+                            range: range.clone(),
+                            read_type: PageType::Dictionary,
+                            num_requests: 1,
+                        };
+
+                        return LogEntry::ReadData(read_info);
+                    }
+
+                    // if the range overlaps with the dictionary page, add it to the list of overlapping pages
+                    if start < dict_page_end && end > dict_page_offset {
+                        dictionary_page = true;
+                    }
+                }
+
+                // If we can't find a page, but the range overlaps with the
+                // column chunk location, use the column chunk location
+                let column_byte_range = &test_column_chunk.location;
+                if column_byte_range.contains(&range.start)
+                    && column_byte_range.contains(&(range.end - 1))
+                {
+                    let read_data_entry = ReadInfo {
+                        row_group_index,
+                        column_name: column_name.clone(),
+                        range: range.clone(),
+                        read_type: PageType::Multi {
+                            data_page_indices,
+                            dictionary_page,
+                        },
+                        num_requests: 1,
+                    };
+
+                    return LogEntry::ReadData(read_data_entry);
+                }
+            }
+        }
+
+        // If we reach here, the range does not match any known logical part of the file
+        LogEntry::Unknown(range.clone())
+    }
+
+    // Combine entries in the log that are similar to reduce noise in the log.
+    fn coalesce_entries(&self) {
+        let mut ops = self.ops.lock().unwrap();
+
+        // Coalesce entries with the same read type
+        let prev_ops = std::mem::take(&mut *ops);
+        for entry in prev_ops {
+            let Some(last) = ops.last_mut() else {
+                ops.push(entry);
+                continue;
+            };
+
+            let LogEntry::ReadData(ReadInfo {
+                row_group_index: last_rg_index,
+                column_name: last_column_name,
+                range: last_range,
+                read_type: last_read_type,
+                num_requests: last_num_reads,
+            }) = last
+            else {
+                // If the last entry is not a ReadColumnChunk, just push it
+                ops.push(entry);
+                continue;
+            };
+
+            // If the entry is not a ReadColumnChunk, just push it
+            let LogEntry::ReadData(ReadInfo {
+                row_group_index,
+                column_name,
+                range,
+                read_type,
+                num_requests: num_reads,
+            }) = &entry
+            else {
+                ops.push(entry);
+                continue;
+            };
+
+            // Combine the entries if they are the same and this read is less than 10b.
+            //
+            // This heuristic is used to combine small reads (typically 1-2
+            // byte) made by the thrift decoder when reading the data/dictionary
+            // page headers.
+            if *row_group_index != *last_rg_index
+                || column_name != last_column_name
+                || read_type != last_read_type
+                || (range.start > last_range.end)
+                || (range.end < last_range.start)
+                || range.len() > 10
+            {
+                ops.push(entry);
+                continue;
+            }
+            // combine
+            *last_range = last_range.start.min(range.start)..last_range.end.max(range.end);
+            *last_num_reads += num_reads;
+        }
+    }
+
+    /// return a snapshot of the current operations in the log.
+    fn snapshot(&self) -> Vec<String> {
+        self.coalesce_entries();
+        let ops = self.ops.lock().unwrap();
+        let mut actual = vec![];
+        let indent = 0;
+        ops.iter()
+            .for_each(|s| s.append_string(&mut actual, indent));
+        actual
+    }
+}
diff --git a/parquet/tests/arrow_reader/io/sync_reader.rs b/parquet/tests/arrow_reader/io/sync_reader.rs
new file mode 100644
index 000000000000..77c200fa8641
--- /dev/null
+++ b/parquet/tests/arrow_reader/io/sync_reader.rs
@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for the sync reader - [`ParquetRecordBatchReaderBuilder`]
+
+use crate::io::{
+    LogEntry, OperationLog, TestParquetFile, filter_a_175_b_625, filter_b_575_625, filter_b_false,
+    test_file, test_options,
+};
+
+use bytes::Bytes;
+use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{
+    ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection, RowSelector,
+};
+use parquet::file::reader::{ChunkReader, Length};
+use std::io::Read;
+use std::sync::Arc;
+
+#[test]
+fn test_read_entire_file() {
+    // read entire file without any filtering or projection
+    let test_file = test_file();
+    // Expect to see IO for all data pages for each row group and column
+    let builder = sync_builder(&test_file, test_options());
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 0, column 'c': DictionaryPage   (7107 bytes, 1 requests) [data]",
+        "Row Group 0, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'c': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'c': DictionaryPage   (7217 bytes, 1 requests) [data]",
+        "Row Group 1, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'c': DataPage(1)      (126 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_single_group() {
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options()).with_row_groups(vec![1]); // read only second row group
+
+    // Expect to see only IO for Row Group 1. Should see no IO for Row Group 0.
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'c': DictionaryPage   (7217 bytes, 1 requests) [data]",
+        "Row Group 1, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'c': DataPage(1)      (126 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_single_column() {
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder.with_projection(ProjectionMask::columns(&schema_descr, ["b"]));
+    // Expect to see only IO for column "b". Should see no IO for columns "a" or "c".
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_single_column_no_page_index() {
+    let test_file = test_file();
+    let options = test_options().with_page_index(false);
+    let builder = sync_builder(&test_file, options);
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder.with_projection(ProjectionMask::columns(&schema_descr, ["b"]));
+    // Expect to see only IO for column "b", should see no IO for columns "a" or "c".
+    //
+    // Note that we need to read all data page headers to find the pages for column b
+    // so there are many more small reads than in the test_read_single_column test above
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 0, column 'b': DictionaryPage   (17 bytes  , 17 requests) [header]",
+        "Row Group 0, column 'b': DictionaryPage   (1600 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(0)      (20 bytes  , 20 requests) [header]",
+        "Row Group 0, column 'b': DataPage(0)      (93 bytes  , 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (20 bytes  , 20 requests) [header]",
+        "Row Group 0, column 'b': DataPage(1)      (106 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (17 bytes  , 17 requests) [header]",
+        "Row Group 1, column 'b': DictionaryPage   (1600 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (20 bytes  , 20 requests) [header]",
+        "Row Group 1, column 'b': DataPage(0)      (93 bytes  , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (20 bytes  , 20 requests) [header]",
+        "Row Group 1, column 'b': DataPage(1)      (106 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_row_selection() {
+    // There are 400 total rows spread across 4 data pages (100 rows each)
+    // select rows 175..225 (i.e. DataPage(1) of row group 0 and DataPage(0) of row group 1)
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder
+        .with_projection(
+            // read both "a" and "b"
+            ProjectionMask::columns(&schema_descr, ["a", "b"]),
+        )
+        .with_row_selection(RowSelection::from(vec![
+            RowSelector::skip(175),
+            RowSelector::select(50),
+        ]));
+
+    // Expect to see only data IO for one page for each column for each row group
+    // Note the data page headers for all pages need to be read to find the correct pages
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_limit() {
+    // There are 400 total rows spread across 4 data pages (100 rows each)
+    // a limit of 125 rows should only fetch the first two data pages (DataPage(0) and DataPage(1)) from row group 0
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a"]))
+        .with_limit(125);
+
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Event: Reader Built",
+        "Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_single_row_filter() {
+    // Values from column "b" range 400..799
+    // filter  "b" > 575 and < 625
+    // (last data page in Row Group 0 and first DataPage in Row Group 1)
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(
+            // read both "a" and "b"
+            ProjectionMask::columns(&schema_descr, ["a", "b"]),
+        )
+        // "b" > 575 and "b" < 625
+        .with_row_filter(filter_b_575_625(&schema_descr));
+
+    // Expect to see I/O for column b in both row groups and then reading just a
+    // single pages for a in each row group
+    //
+    // Note there is significant IO that happens during the construction of the
+    // reader (between "Builder Configured" and "Reader Built")
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Event: Reader Built",
+        "Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_multiple_row_filter() {
+    // Values in column "a" range 0..399
+    // Values in column "b" range 400..799
+    // First filter: "a" > 175  (last data page in Row Group 0)
+    // Second filter: "b" < 625 (last data page in Row Group 0 and first DataPage in RowGroup 1)
+    // Read column "c"
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(
+            ProjectionMask::columns(&schema_descr, ["c"]), // read "c"
+        )
+        // a > 175 and b < 625
+        .with_row_filter(filter_a_175_b_625(&schema_descr));
+
+    // Expect that we will see
+    // 1. IO for all pages of column A
+    // 2. IO for pages of column b that passed 1.
+    // 3. IO after reader is built only for column c
+    //
+    // Note there is significant IO that happens during the construction of the
+    // reader (between "Builder Configured" and "Reader Built")
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Row Group 0, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'a': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Event: Reader Built",
+        "Row Group 0, column 'c': DictionaryPage   (7107 bytes, 1 requests) [data]",
+        "Row Group 0, column 'c': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'c': DictionaryPage   (7217 bytes, 1 requests) [data]",
+        "Row Group 1, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
+    ]
+    "#);
+}
+
+#[test]
+fn test_read_single_row_filter_all() {
+    // Apply a filter that entirely filters out rows based on a predicate from one column
+    // should not read any data pages for any other column
+
+    let test_file = test_file();
+    let builder = sync_builder(&test_file, test_options());
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    let builder = builder
+        .with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+        .with_row_filter(filter_b_false(&schema_descr));
+
+    // Expect to see the Footer and Metadata, then I/O for column b
+    // in both row groups but then nothing for column "a"
+    // since the row filter entirely filters out all rows.
+    //
+    // Note that all IO that happens during the construction of the reader
+    // (between "Builder Configured" and "Reader Built")
+    insta::assert_debug_snapshot!(run(&test_file, builder),
+        @r#"
+    [
+        "Footer: 8 bytes",
+        "Metadata: 1162",
+        "UNKNOWN: 22230..22877 (maybe Page Index)",
+        "Event: Builder Configured",
+        "Row Group 0, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 0, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
+        "Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+        "Event: Reader Built",
+    ]
+    "#);
+}
+
+/// Return a [`ParquetRecordBatchReaderBuilder`] for reading this file
+fn sync_builder(
+    test_file: &TestParquetFile,
+    options: ArrowReaderOptions,
+) -> ParquetRecordBatchReaderBuilder<RecordingChunkReader> {
+    let reader = RecordingChunkReader {
+        inner: test_file.bytes().clone(),
+        ops: Arc::clone(test_file.ops()),
+    };
+    ParquetRecordBatchReaderBuilder::try_new_with_options(reader, options)
+        .expect("ParquetRecordBatchReaderBuilder")
+}
+
+/// build the reader, and read all batches from it, returning the recorded IO operations
+fn run(
+    test_file: &TestParquetFile,
+    builder: ParquetRecordBatchReaderBuilder<RecordingChunkReader>,
+) -> Vec<String> {
+    let ops = test_file.ops();
+    ops.add_entry(LogEntry::event("Builder Configured"));
+    let reader = builder.build().unwrap();
+    ops.add_entry(LogEntry::event("Reader Built"));
+    for batch in reader {
+        match batch {
+            Ok(_) => {}
+            Err(e) => panic!("Error reading batch: {e}"),
+        }
+    }
+    ops.snapshot()
+}
+
+/// Records IO operations on an in-memory chunk reader
+struct RecordingChunkReader {
+    inner: Bytes,
+    ops: Arc<OperationLog>,
+}
+
+impl Length for RecordingChunkReader {
+    fn len(&self) -> u64 {
+        self.inner.len() as u64
+    }
+}
+
+impl ChunkReader for RecordingChunkReader {
+    type T = RecordingStdIoReader;
+
+    fn get_read(&self, start: u64) -> parquet::errors::Result<Self::T> {
+        let reader = RecordingStdIoReader {
+            start: start as usize,
+            inner: self.inner.clone(),
+            ops: Arc::clone(&self.ops),
+        };
+        Ok(reader)
+    }
+
+    fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
+        let start = start as usize;
+        let range = start..start + length;
+        self.ops.add_entry_for_range(&range);
+        Ok(self.inner.slice(start..start + length))
+    }
+}
+
+/// Wrapper around a `Bytes` object that implements `Read`
+struct RecordingStdIoReader {
+    /// current offset in the inner `Bytes` that this reader is reading from
+    start: usize,
+    inner: Bytes,
+    ops: Arc<OperationLog>,
+}
+
+impl Read for RecordingStdIoReader {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        let remain = self.inner.len() - self.start;
+        let start = self.start;
+        let read_length = buf.len().min(remain);
+        let read_range = start..start + read_length;
+
+        self.ops.add_entry_for_range(&read_range);
+
+        buf.copy_from_slice(self.inner.slice(read_range).as_ref());
+        // Update the inner position
+        self.start += read_length;
+        Ok(read_length)
+    }
+}
diff --git a/parquet/tests/arrow_reader/mod.rs b/parquet/tests/arrow_reader/mod.rs
index 21aa1c3f26f0..3d566306a907 100644
--- a/parquet/tests/arrow_reader/mod.rs
+++ b/parquet/tests/arrow_reader/mod.rs
@@ -15,15 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::types::{Int32Type, Int8Type};
+use arrow_array::types::{Int8Type, Int32Type};
 use arrow_array::{
     Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, DictionaryArray, FixedSizeBinaryArray, Float16Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
-    LargeStringArray, RecordBatch, StringArray, StringViewArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, DictionaryArray,
+    FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, Int8Array, Int16Array,
+    Int32Array, Int64Array, LargeBinaryArray, LargeStringArray, RecordBatch, StringArray,
+    StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray,
+    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
+    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt8Array,
+    UInt16Array, UInt32Array, UInt64Array,
 };
 use arrow_buffer::i256;
 use arrow_schema::{DataType, Field, Schema, TimeUnit};
@@ -32,7 +33,7 @@ use chrono::{Duration, TimeDelta};
 use half::f16;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{
-    EnabledStatistics, WriterProperties, DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
+    DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, EnabledStatistics, WriterProperties,
 };
 use std::sync::Arc;
 use tempfile::NamedTempFile;
@@ -40,6 +41,10 @@ use tempfile::NamedTempFile;
 mod bad_data;
 #[cfg(feature = "crc")]
 mod checksum;
+mod int96_stats_roundtrip;
+mod io;
+#[cfg(feature = "async")]
+mod predicate_cache;
 mod statistics;
 
 // returns a struct array with columns "int32_col", "float32_col" and "float64_col" with the specified values
@@ -86,7 +91,9 @@ enum Scenario {
     Float16,
     Float32,
     Float64,
-    Decimal,
+    Decimal32,
+    Decimal64,
+    Decimal128,
     Decimal256,
     ByteArray,
     Dictionary,
@@ -330,9 +337,9 @@ fn make_uint_batches(start: u8, end: u8) -> RecordBatch {
         Field::new("u64", DataType::UInt64, true),
     ]));
     let v8: Vec<u8> = (start..end).collect();
-    let v16: Vec<u16> = (start as _..end as _).collect();
-    let v32: Vec<u32> = (start as _..end as _).collect();
-    let v64: Vec<u64> = (start as _..end as _).collect();
+    let v16: Vec<u16> = (start as _..end as u16).collect();
+    let v32: Vec<u32> = (start as _..end as u32).collect();
+    let v64: Vec<u64> = (start as _..end as u64).collect();
     RecordBatch::try_new(
         schema,
         vec![
@@ -381,13 +388,49 @@ fn make_f16_batch(v: Vec<f16>) -> RecordBatch {
     RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
 }
 
-/// Return record batch with decimal vector
+/// Return record batch with decimal32 vector
 ///
 /// Columns are named
-/// "decimal_col" -> DecimalArray
-fn make_decimal_batch(v: Vec<i128>, precision: u8, scale: i8) -> RecordBatch {
+/// "decimal32_col" -> Decimal32Array
+fn make_decimal32_batch(v: Vec<i32>, precision: u8, scale: i8) -> RecordBatch {
     let schema = Arc::new(Schema::new(vec![Field::new(
-        "decimal_col",
+        "decimal32_col",
+        DataType::Decimal32(precision, scale),
+        true,
+    )]));
+    let array = Arc::new(
+        Decimal32Array::from(v)
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    ) as ArrayRef;
+    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
+/// Return record batch with decimal64 vector
+///
+/// Columns are named
+/// "decimal64_col" -> Decimal64Array
+fn make_decimal64_batch(v: Vec<i64>, precision: u8, scale: i8) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "decimal64_col",
+        DataType::Decimal64(precision, scale),
+        true,
+    )]));
+    let array = Arc::new(
+        Decimal64Array::from(v)
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    ) as ArrayRef;
+    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
+/// Return record batch with decimal128 vector
+///
+/// Columns are named
+/// "decimal128_col" -> Decimal128Array
+fn make_decimal128_batch(v: Vec<i128>, precision: u8, scale: i8) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "decimal128_col",
         DataType::Decimal128(precision, scale),
         true,
     )]));
@@ -505,7 +548,7 @@ fn make_bytearray_batch(
     large_binary_values: Vec<&[u8]>,
 ) -> RecordBatch {
     let num_rows = string_values.len();
-    let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect();
+    let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect();
     let service_string: StringArray = string_values.iter().map(Some).collect();
     let service_binary: BinaryArray = binary_values.iter().map(Some).collect();
     let service_fixedsize: FixedSizeBinaryArray = fixedsize_values
@@ -552,7 +595,7 @@ fn make_bytearray_batch(
 /// name | service.name
 fn make_names_batch(name: &str, service_name_values: Vec<&str>) -> RecordBatch {
     let num_rows = service_name_values.len();
-    let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect();
+    let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect();
     let service_name: StringArray = service_name_values.iter().map(Some).collect();
 
     let schema = Schema::new(vec![
@@ -744,12 +787,28 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_f64_batch(vec![5.0, 6.0, 7.0, 8.0, 9.0]),
             ]
         }
-        Scenario::Decimal => {
+        Scenario::Decimal32 => {
+            // decimal record batch
+            vec![
+                make_decimal32_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal32_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal32_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+            ]
+        }
+        Scenario::Decimal64 => {
+            // decimal record batch
+            vec![
+                make_decimal64_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal64_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal64_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+            ]
+        }
+        Scenario::Decimal128 => {
             // decimal record batch
             vec![
-                make_decimal_batch(vec![100, 200, 300, 400, 600], 9, 2),
-                make_decimal_batch(vec![-500, 100, 300, 400, 600], 9, 2),
-                make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+                make_decimal128_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal128_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal128_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
             ]
         }
         Scenario::Decimal256 => {
diff --git a/parquet/tests/arrow_reader/predicate_cache.rs b/parquet/tests/arrow_reader/predicate_cache.rs
new file mode 100644
index 000000000000..b419c37158dc
--- /dev/null
+++ b/parquet/tests/arrow_reader/predicate_cache.rs
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Test for predicate cache in Parquet Arrow reader
+
+use arrow::array::ArrayRef;
+use arrow::array::Int64Array;
+use arrow::compute::and;
+use arrow::compute::kernels::cmp::{gt, lt};
+use arrow_array::cast::AsArray;
+use arrow_array::types::Int64Type;
+use arrow_array::{RecordBatch, StringArray, StringViewArray, StructArray};
+use arrow_schema::{DataType, Field};
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use futures::{FutureExt, StreamExt};
+use parquet::arrow::arrow_reader::metrics::ArrowReaderMetrics;
+use parquet::arrow::arrow_reader::{ArrowPredicateFn, ArrowReaderOptions, RowFilter};
+use parquet::arrow::arrow_reader::{ArrowReaderBuilder, ParquetRecordBatchReaderBuilder};
+use parquet::arrow::async_reader::AsyncFileReader;
+use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
+use parquet::file::properties::WriterProperties;
+use std::ops::Range;
+use std::sync::Arc;
+use std::sync::LazyLock;
+
+#[tokio::test]
+async fn test_default_read() {
+    // The cache is not used without predicates, so we expect 0 records read from cache
+    let test = ParquetPredicateCacheTest::new().with_expected_records_read_from_cache(0);
+    let sync_builder = test.sync_builder();
+    test.run_sync(sync_builder);
+    let async_builder = test.async_builder().await;
+    test.run_async(async_builder).await;
+}
+
+#[tokio::test]
+async fn test_async_cache_with_filters() {
+    let test = ParquetPredicateCacheTest::new().with_expected_records_read_from_cache(49);
+    let async_builder = test.async_builder().await.add_project_ab_and_filter_b();
+    test.run_async(async_builder).await;
+}
+
+#[tokio::test]
+async fn test_sync_cache_with_filters() {
+    let test = ParquetPredicateCacheTest::new()
+        // The sync reader does not use the cache. See https://github.com/apache/arrow-rs/issues/8000
+        .with_expected_records_read_from_cache(0);
+
+    let sync_builder = test.sync_builder().add_project_ab_and_filter_b();
+    test.run_sync(sync_builder);
+}
+
+#[tokio::test]
+async fn test_cache_disabled_with_filters() {
+    // expect no records to be read from cache, because the cache is disabled
+    let test = ParquetPredicateCacheTest::new().with_expected_records_read_from_cache(0);
+    let sync_builder = test
+        .sync_builder()
+        .with_max_predicate_cache_size(0)
+        .add_project_ab_and_filter_b();
+    test.run_sync(sync_builder);
+
+    let async_builder = test
+        .async_builder()
+        .await
+        .with_max_predicate_cache_size(0)
+        .add_project_ab_and_filter_b();
+    test.run_async(async_builder).await;
+}
+
+#[tokio::test]
+async fn test_cache_projection_excludes_nested_columns() {
+    let test = ParquetPredicateCacheTest::new_nested().with_expected_records_read_from_cache(0);
+
+    let sync_builder = test.sync_builder().add_nested_filter();
+    test.run_sync(sync_builder);
+
+    let async_builder = test.async_builder().await.add_nested_filter();
+    test.run_async(async_builder).await;
+}
+
+// --  Begin test infrastructure --
+
+/// A test parquet file
+struct ParquetPredicateCacheTest {
+    bytes: Bytes,
+    expected_records_read_from_cache: usize,
+}
+impl ParquetPredicateCacheTest {
+    /// Create a new `TestParquetFile` with:
+    /// 3 columns: "a", "b", "c"
+    ///
+    /// 2 row groups, each with 200 rows
+    /// each data page has 100 rows
+    ///
+    /// Values of column "a" are 0..399
+    /// Values of column "b" are 400..799
+    /// Values of column "c" are alternating strings of length 12 and longer
+    fn new() -> Self {
+        Self {
+            bytes: TEST_FILE_DATA.clone(),
+            expected_records_read_from_cache: 0,
+        }
+    }
+
+    /// Create a new `TestParquetFile` with
+    /// 2 columns:
+    ///
+    /// * string column `a`
+    /// * nested struct column `b { aa, bb }`
+    fn new_nested() -> Self {
+        Self {
+            bytes: NESTED_TEST_FILE_DATA.clone(),
+            expected_records_read_from_cache: 0,
+        }
+    }
+
+    /// Set the expected number of records read from the cache
+    fn with_expected_records_read_from_cache(
+        mut self,
+        expected_records_read_from_cache: usize,
+    ) -> Self {
+        self.expected_records_read_from_cache = expected_records_read_from_cache;
+        self
+    }
+
+    /// Return a [`ParquetRecordBatchReaderBuilder`] for reading this file
+    fn sync_builder(&self) -> ParquetRecordBatchReaderBuilder<Bytes> {
+        let reader = self.bytes.clone();
+        ParquetRecordBatchReaderBuilder::try_new_with_options(reader, ArrowReaderOptions::default())
+            .expect("ParquetRecordBatchReaderBuilder")
+    }
+
+    /// Return a [`ParquetRecordBatchReaderBuilder`] for reading this file
+    async fn async_builder(&self) -> ParquetRecordBatchStreamBuilder<TestReader> {
+        let reader = TestReader::new(self.bytes.clone());
+        ParquetRecordBatchStreamBuilder::new_with_options(reader, ArrowReaderOptions::default())
+            .await
+            .unwrap()
+    }
+
+    /// Build the reader from the specified builder, reading all batches from it,
+    /// and asserts the
+    fn run_sync(&self, builder: ParquetRecordBatchReaderBuilder<Bytes>) {
+        let metrics = ArrowReaderMetrics::enabled();
+
+        let reader = builder.with_metrics(metrics.clone()).build().unwrap();
+        for batch in reader {
+            match batch {
+                Ok(_) => {}
+                Err(e) => panic!("Error reading batch: {e}"),
+            }
+        }
+        self.verify_metrics(metrics)
+    }
+
+    /// Build the reader from the specified builder, reading all batches from it,
+    /// and asserts the
+    async fn run_async(&self, builder: ParquetRecordBatchStreamBuilder<TestReader>) {
+        let metrics = ArrowReaderMetrics::enabled();
+
+        let mut stream = builder.with_metrics(metrics.clone()).build().unwrap();
+        while let Some(batch) = stream.next().await {
+            match batch {
+                Ok(_) => {}
+                Err(e) => panic!("Error reading batch: {e}"),
+            }
+        }
+        self.verify_metrics(metrics)
+    }
+
+    fn verify_metrics(&self, metrics: ArrowReaderMetrics) {
+        let Self {
+            bytes: _,
+            expected_records_read_from_cache,
+        } = self;
+
+        let read_from_cache = metrics
+            .records_read_from_cache()
+            .expect("Metrics enabled, so should have metrics");
+
+        assert_eq!(
+            &read_from_cache, expected_records_read_from_cache,
+            "Expected {expected_records_read_from_cache} records read from cache, but got {read_from_cache}"
+        );
+    }
+}
+
+/// Create a parquet file in memory for testing. See [`test_file`] for details.
+static TEST_FILE_DATA: LazyLock<Bytes> = LazyLock::new(|| {
+    // Input batch has 400 rows, with 3 columns: "a", "b", "c"
+    // Note c is a different types (so the data page sizes will be different)
+    let a: ArrayRef = Arc::new(Int64Array::from_iter_values(0..400));
+    let b: ArrayRef = Arc::new(Int64Array::from_iter_values(400..800));
+    let c: ArrayRef = Arc::new(StringViewArray::from_iter_values((0..400).map(|i| {
+        if i % 2 == 0 {
+            format!("string_{i}")
+        } else {
+            format!("A string larger than 12 bytes and thus not inlined {i}")
+        }
+    })));
+
+    let input_batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
+
+    let mut output = Vec::new();
+
+    let writer_options = WriterProperties::builder()
+        .set_max_row_group_size(200)
+        .set_data_page_row_count_limit(100)
+        .build();
+    let mut writer =
+        ArrowWriter::try_new(&mut output, input_batch.schema(), Some(writer_options)).unwrap();
+
+    // since the limits are only enforced on batch boundaries, write the input
+    // batch in chunks of 50
+    let mut row_remain = input_batch.num_rows();
+    while row_remain > 0 {
+        let chunk_size = row_remain.min(50);
+        let chunk = input_batch.slice(input_batch.num_rows() - row_remain, chunk_size);
+        writer.write(&chunk).unwrap();
+        row_remain -= chunk_size;
+    }
+    writer.close().unwrap();
+    Bytes::from(output)
+});
+
+/// Build a ParquetFile with a
+///
+/// * string column `a`
+/// * nested struct column `b { aa, bb }`
+static NESTED_TEST_FILE_DATA: LazyLock<Bytes> = LazyLock::new(|| {
+    const NUM_ROWS: usize = 100;
+    let a: StringArray = (0..NUM_ROWS).map(|i| Some(format!("r{i}"))).collect();
+
+    let aa: StringArray = (0..NUM_ROWS).map(|i| Some(format!("v{i}"))).collect();
+    let bb: StringArray = (0..NUM_ROWS).map(|i| Some(format!("w{i}"))).collect();
+    let b = StructArray::from(vec![
+        (
+            Arc::new(Field::new("aa", DataType::Utf8, true)),
+            Arc::new(aa) as ArrayRef,
+        ),
+        (
+            Arc::new(Field::new("bb", DataType::Utf8, true)),
+            Arc::new(bb) as ArrayRef,
+        ),
+    ]);
+
+    let input_batch = RecordBatch::try_from_iter([
+        ("a", Arc::new(a) as ArrayRef),
+        ("b", Arc::new(b) as ArrayRef),
+    ])
+    .unwrap();
+
+    let mut output = Vec::new();
+    let writer_options = None;
+    let mut writer =
+        ArrowWriter::try_new(&mut output, input_batch.schema(), writer_options).unwrap();
+    writer.write(&input_batch).unwrap();
+    writer.close().unwrap();
+    Bytes::from(output)
+});
+
+trait ArrowReaderBuilderExt {
+    /// Applies the following:
+    /// 1. a projection selecting the "a" and "b" column
+    /// 2. a row_filter applied to "b": 575 < "b" < 625 (select 1 data page from each row group)
+    fn add_project_ab_and_filter_b(self) -> Self;
+
+    /// Adds a row filter that projects the nested leaf column "b.aa" and
+    /// returns true for all rows.
+    fn add_nested_filter(self) -> Self;
+}
+
+impl<T> ArrowReaderBuilderExt for ArrowReaderBuilder<T> {
+    fn add_project_ab_and_filter_b(self) -> Self {
+        let schema_descr = self.metadata().file_metadata().schema_descr_ptr();
+
+        // "b" > 575 and "b" < 625
+        let row_filter = ArrowPredicateFn::new(
+            ProjectionMask::columns(&schema_descr, ["b"]),
+            |batch: RecordBatch| {
+                let scalar_575 = Int64Array::new_scalar(575);
+                let scalar_625 = Int64Array::new_scalar(625);
+                let column = batch.column(0).as_primitive::<Int64Type>();
+                and(&gt(column, &scalar_575)?, &lt(column, &scalar_625)?)
+            },
+        );
+
+        self.with_projection(ProjectionMask::columns(&schema_descr, ["a", "b"]))
+            .with_row_filter(RowFilter::new(vec![Box::new(row_filter)]))
+    }
+
+    fn add_nested_filter(self) -> Self {
+        let schema_descr = self.metadata().file_metadata().schema_descr_ptr();
+
+        // Build a RowFilter whose predicate projects a leaf under the nested root `b`
+        // Leaf indices are depth-first; with schema [a, b.aa, b.bb] we pick index 1 (b.aa)
+        let nested_leaf_mask = ProjectionMask::leaves(&schema_descr, vec![1]);
+
+        let always_true = ArrowPredicateFn::new(nested_leaf_mask.clone(), |batch: RecordBatch| {
+            Ok(arrow_array::BooleanArray::from(vec![
+                true;
+                batch.num_rows()
+            ]))
+        });
+        let row_filter = RowFilter::new(vec![Box::new(always_true)]);
+
+        self.with_projection(nested_leaf_mask)
+            .with_row_filter(row_filter)
+    }
+}
+
+/// Copy paste version of the `AsyncFileReader` trait for testing purposes 🤮
+/// TODO put this in a common place
+#[derive(Clone)]
+struct TestReader {
+    data: Bytes,
+    metadata: Option<Arc<ParquetMetaData>>,
+}
+
+impl TestReader {
+    fn new(data: Bytes) -> Self {
+        Self {
+            data,
+            metadata: Default::default(),
+        }
+    }
+}
+
+impl AsyncFileReader for TestReader {
+    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
+        let range = range.clone();
+        futures::future::ready(Ok(self
+            .data
+            .slice(range.start as usize..range.end as usize)))
+        .boxed()
+    }
+
+    fn get_metadata<'a>(
+        &'a mut self,
+        options: Option<&'a ArrowReaderOptions>,
+    ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
+        let metadata_reader = ParquetMetaDataReader::new().with_page_index_policy(
+            PageIndexPolicy::from(options.is_some_and(|o| o.page_index())),
+        );
+        self.metadata = Some(Arc::new(
+            metadata_reader.parse_and_finish(&self.data).unwrap(),
+        ));
+        futures::future::ready(Ok(self.metadata.clone().unwrap().clone())).boxed()
+    }
+}
diff --git a/parquet/tests/arrow_reader/statistics.rs b/parquet/tests/arrow_reader/statistics.rs
index 7a389fb5eb9a..aef473fa849a 100644
--- a/parquet/tests/arrow_reader/statistics.rs
+++ b/parquet/tests/arrow_reader/statistics.rs
@@ -23,28 +23,29 @@ use std::fs::File;
 use std::sync::Arc;
 
 use super::make_test_file_rg;
-use super::{struct_array, Scenario};
+use super::{Scenario, struct_array};
 use arrow::compute::kernels::cast_utils::Parser;
 use arrow::datatypes::{
-    i256, Date32Type, Date64Type, TimestampMicrosecondType, TimestampMillisecondType,
-    TimestampNanosecondType, TimestampSecondType,
+    Date32Type, Date64Type, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType, i256,
 };
 use arrow_array::{
-    make_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray,
-    Date32Array, Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
-    LargeStringArray, RecordBatch, StringArray, StringViewArray, Time32MillisecondArray,
-    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
+    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray,
+    Float16Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
+    LargeBinaryArray, LargeStringArray, RecordBatch, StringArray, StringViewArray,
+    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, make_array,
+    new_null_array,
 };
 use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use half::f16;
+use parquet::arrow::ArrowWriter;
 use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::arrow::arrow_reader::{
     ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
 };
-use parquet::arrow::ArrowWriter;
 use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
 use parquet::file::properties::{EnabledStatistics, WriterProperties};
 use parquet::file::statistics::{Statistics, ValueStatistics};
@@ -82,7 +83,7 @@ impl Int64Case {
                 Int64Array::from_iter(
                     v64.into_iter()
                         .map(Some)
-                        .chain(std::iter::repeat(None).take(self.null_values)),
+                        .chain(std::iter::repeat_n(None, self.null_values)),
                 )
                 .to_data(),
             )],
@@ -603,6 +604,9 @@ async fn test_data_page_stats_with_all_null_page() {
         DataType::Utf8,
         DataType::LargeUtf8,
         DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+        DataType::Decimal32(8, 2),   // as INT32
+        DataType::Decimal64(8, 2),   // as INT32
+        DataType::Decimal64(10, 2),  // as INT64
         DataType::Decimal128(8, 2),  // as INT32
         DataType::Decimal128(10, 2), // as INT64
         DataType::Decimal128(20, 2), // as FIXED_LEN_BYTE_ARRAY
@@ -1944,11 +1948,77 @@ async fn test_float16() {
 }
 
 #[tokio::test]
-async fn test_decimal() {
-    // This creates a parquet file of 1 column "decimal_col" with decimal data type and precicion 9, scale 2
+async fn test_decimal32() {
+    // This creates a parquet file of 1 column "decimal32_col" with decimal data type and precision 9, scale 2
+    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
+    let reader = TestReader {
+        scenario: Scenario::Decimal32,
+        row_per_group: 5,
+    }
+    .build()
+    .await;
+
+    Test {
+        reader: &reader,
+        expected_min: Arc::new(
+            Decimal32Array::from(vec![100, -500, 2000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_max: Arc::new(
+            Decimal32Array::from(vec![600, 600, 6000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
+        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
+        column_name: "decimal32_col",
+        check: Check::Both,
+    }
+    .run();
+}
+#[tokio::test]
+async fn test_decimal64() {
+    // This creates a parquet file of 1 column "decimal64_col" with decimal data type and precision 9, scale 2
+    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
+    let reader = TestReader {
+        scenario: Scenario::Decimal64,
+        row_per_group: 5,
+    }
+    .build()
+    .await;
+
+    Test {
+        reader: &reader,
+        expected_min: Arc::new(
+            Decimal64Array::from(vec![100, -500, 2000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_max: Arc::new(
+            Decimal64Array::from(vec![600, 600, 6000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
+        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
+        column_name: "decimal64_col",
+        check: Check::Both,
+    }
+    .run();
+}
+#[tokio::test]
+async fn test_decimal128() {
+    // This creates a parquet file of 1 column "decimal128_col" with decimal data type and precision 9, scale 2
     // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
     let reader = TestReader {
-        scenario: Scenario::Decimal,
+        scenario: Scenario::Decimal128,
         row_per_group: 5,
     }
     .build()
@@ -1971,7 +2041,7 @@ async fn test_decimal() {
         // stats are exact
         expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
         expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
-        column_name: "decimal_col",
+        column_name: "decimal128_col",
         check: Check::Both,
     }
     .run();
@@ -2556,9 +2626,9 @@ mod test {
     use super::*;
     use arrow::util::test_util::parquet_test_data;
     use arrow_array::{
-        new_empty_array, ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array,
-        Int16Array, Int32Array, Int64Array, Int8Array, RecordBatch, StringArray,
-        TimestampNanosecondArray,
+        ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int8Array, Int16Array,
+        Int32Array, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray,
+        new_empty_array,
     };
     use arrow_schema::{DataType, SchemaRef, TimeUnit};
     use bytes::Bytes;
@@ -2607,6 +2677,8 @@ mod test {
             // DataType::Struct(Fields),
             // DataType::Union(UnionFields, UnionMode),
             // DataType::Dictionary(Box<DataType>, Box<DataType>),
+            // DataType::Decimal32(u8, i8),
+            // DataType::Decimal64(u8, i8),
             // DataType::Decimal128(u8, i8),
             // DataType::Decimal256(u8, i8),
             // DataType::Map(FieldRef, bool),
diff --git a/parquet/tests/arrow_writer_layout.rs b/parquet/tests/arrow_writer_layout.rs
index 9a66d13f84d7..f78370ca8d4b 100644
--- a/parquet/tests/arrow_writer_layout.rs
+++ b/parquet/tests/arrow_writer_layout.rs
@@ -21,8 +21,8 @@ use arrow::array::{Int32Array, StringArray};
 use arrow::record_batch::RecordBatch;
 use arrow_array::builder::{Int32Builder, ListBuilder};
 use bytes::Bytes;
-use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
 use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
 use parquet::basic::{Encoding, PageType};
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{ReaderProperties, WriterProperties};
@@ -177,6 +177,7 @@ fn test_primitive() {
         .set_dictionary_enabled(false)
         .set_data_page_size_limit(1000)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     // Test spill plain encoding pages
@@ -207,6 +208,7 @@ fn test_primitive() {
         .set_dictionary_page_size_limit(1000)
         .set_data_page_size_limit(10000)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     do_test(LayoutTest {
@@ -249,6 +251,7 @@ fn test_primitive() {
         .set_dictionary_page_size_limit(10000)
         .set_data_page_size_limit(500)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     do_test(LayoutTest {
@@ -318,6 +321,7 @@ fn test_primitive() {
         .set_dictionary_enabled(false)
         .set_data_page_row_count_limit(100)
         .set_write_batch_size(100)
+        .set_write_page_header_statistics(true)
         .build();
 
     do_test(LayoutTest {
@@ -352,6 +356,7 @@ fn test_string() {
         .set_dictionary_enabled(false)
         .set_data_page_size_limit(1000)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     // Test spill plain encoding pages
@@ -389,6 +394,7 @@ fn test_string() {
         .set_dictionary_page_size_limit(1000)
         .set_data_page_size_limit(10000)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     do_test(LayoutTest {
@@ -438,6 +444,7 @@ fn test_string() {
         .set_dictionary_page_size_limit(20000)
         .set_data_page_size_limit(500)
         .set_write_batch_size(10)
+        .set_write_page_header_statistics(true)
         .build();
 
     do_test(LayoutTest {
@@ -520,6 +527,7 @@ fn test_list() {
         .set_dictionary_enabled(false)
         .set_data_page_row_count_limit(20)
         .set_write_batch_size(3)
+        .set_write_page_header_statistics(true)
         .build();
 
     // Test rows not split across pages
diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs
index 7079e91d1209..f999abab95de 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -18,17 +18,18 @@
 //! This module contains tests for reading encrypted Parquet files with the Arrow API
 
 use crate::encryption_util::{
-    verify_column_indexes, verify_encryption_test_data, TestKeyRetriever,
+    TestKeyRetriever, read_and_roundtrip_to_encrypted_file, verify_column_indexes,
+    verify_encryption_test_file_read,
 };
 use arrow::array::*;
 use arrow::error::Result as ArrowResult;
 use arrow_array::{Int32Array, RecordBatch};
 use arrow_schema::{DataType as ArrowDataType, DataType, Field, Schema};
+use parquet::arrow::ArrowWriter;
 use parquet::arrow::arrow_reader::{
     ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection,
     RowSelector,
 };
-use parquet::arrow::ArrowWriter;
 use parquet::data_type::{ByteArray, ByteArrayType};
 use parquet::encryption::decrypt::FileDecryptionProperties;
 use parquet::encryption::encrypt::FileEncryptionProperties;
@@ -88,14 +89,16 @@ fn test_plaintext_footer_signature_verification() {
         .build()
         .unwrap();
 
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
     let result = ArrowReaderMetadata::load(&file, options.clone());
     assert!(result.is_err());
-    assert!(result
-        .unwrap_err()
-        .to_string()
-        .starts_with("Parquet error: Footer signature verification failed. Computed: ["));
+    assert!(
+        result
+            .unwrap_err()
+            .to_string()
+            .starts_with("Parquet error: Footer signature verification failed. Computed: [")
+    );
 }
 
 #[test]
@@ -145,8 +148,8 @@ fn test_non_uniform_encryption_disabled_aad_storage() {
         .unwrap();
 
     let file = File::open(path).unwrap();
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
     let result = ArrowReaderMetadata::load(&file, options.clone());
     assert!(result.is_err());
     assert_eq!(
@@ -276,8 +279,8 @@ fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
         .build()
         .unwrap();
 
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
     let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
 
     // Write data into temporary file with plaintext footer and footer key metadata
@@ -317,8 +320,8 @@ fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
         .build()
         .unwrap();
 
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
     let _ = ArrowReaderMetadata::load(&temp_file, options.clone()).unwrap();
 
     // Read temporary file with plaintext metadata using key retriever with invalid key
@@ -331,14 +334,16 @@ fn test_uniform_encryption_plaintext_footer_with_key_retriever() {
     let decryption_properties = FileDecryptionProperties::with_key_retriever(key_retriever)
         .build()
         .unwrap();
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
     let result = ArrowReaderMetadata::load(&temp_file, options.clone());
     assert!(result.is_err());
-    assert!(result
-        .unwrap_err()
-        .to_string()
-        .starts_with("Parquet error: Footer signature verification failed. Computed: ["));
+    assert!(
+        result
+            .unwrap_err()
+            .to_string()
+            .starts_with("Parquet error: Footer signature verification failed. Computed: [")
+    );
 }
 
 #[test]
@@ -377,21 +382,6 @@ fn test_uniform_encryption_with_key_retriever() {
     verify_encryption_test_file_read(file, decryption_properties);
 }
 
-fn verify_encryption_test_file_read(file: File, decryption_properties: FileDecryptionProperties) {
-    let options =
-        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
-    let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
-    let metadata = reader_metadata.metadata();
-
-    let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
-    let record_reader = builder.build().unwrap();
-    let record_batches = record_reader
-        .map(|x| x.unwrap())
-        .collect::<Vec<RecordBatch>>();
-
-    verify_encryption_test_data(record_batches, metadata);
-}
-
 fn row_group_sizes(metadata: &ParquetMetaData) -> Vec<i64> {
     metadata.row_groups().iter().map(|x| x.num_rows()).collect()
 }
@@ -630,6 +620,7 @@ fn uniform_encryption_page_skipping(page_index: bool) -> parquet::errors::Result
 fn test_write_non_uniform_encryption() {
     let testdata = arrow::util::test_util::parquet_test_data();
     let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    let file = File::open(path).unwrap();
 
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
     let column_names = vec!["double_field", "float_field"];
@@ -647,13 +638,14 @@ fn test_write_non_uniform_encryption() {
         .build()
         .unwrap();
 
-    read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties);
+    read_and_roundtrip_to_encrypted_file(&file, decryption_properties, file_encryption_properties);
 }
 
 #[test]
 fn test_write_uniform_encryption_plaintext_footer() {
     let testdata = arrow::util::test_util::parquet_test_data();
     let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
+    let file = File::open(path).unwrap();
 
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
     let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16
@@ -679,8 +671,8 @@ fn test_write_uniform_encryption_plaintext_footer() {
 
     // Try writing plaintext footer and then reading it with the correct footer key
     read_and_roundtrip_to_encrypted_file(
-        &path,
-        decryption_properties.clone(),
+        &file,
+        Arc::clone(&decryption_properties),
         file_encryption_properties.clone(),
     );
 
@@ -688,7 +680,6 @@ fn test_write_uniform_encryption_plaintext_footer() {
     let temp_file = tempfile::tempfile().unwrap();
 
     // read example data
-    let file = File::open(path).unwrap();
     let options = ArrowReaderOptions::default()
         .with_file_decryption_properties(decryption_properties.clone());
     let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
@@ -720,16 +711,19 @@ fn test_write_uniform_encryption_plaintext_footer() {
         ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties);
     let result = ArrowReaderMetadata::load(&temp_file, options.clone());
     assert!(result.is_err());
-    assert!(result
-        .unwrap_err()
-        .to_string()
-        .starts_with("Parquet error: Footer signature verification failed. Computed: ["));
+    assert!(
+        result
+            .unwrap_err()
+            .to_string()
+            .starts_with("Parquet error: Footer signature verification failed. Computed: [")
+    );
 }
 
 #[test]
 fn test_write_uniform_encryption() {
     let testdata = arrow::util::test_util::parquet_test_data();
     let path = format!("{testdata}/uniform_encryption.parquet.encrypted");
+    let file = File::open(path).unwrap();
 
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
 
@@ -741,7 +735,7 @@ fn test_write_uniform_encryption() {
         .build()
         .unwrap();
 
-    read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties);
+    read_and_roundtrip_to_encrypted_file(&file, decryption_properties, file_encryption_properties);
 }
 
 #[test]
@@ -934,8 +928,8 @@ fn test_write_encrypted_struct_field() {
         .with_column_key("struct_col.float64_col", column_key_2)
         .build()
         .unwrap();
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
 
     let builder =
         ParquetRecordBatchReaderBuilder::try_new_with_options(temp_file, options).unwrap();
@@ -994,23 +988,17 @@ pub fn test_retrieve_row_group_statistics_after_encrypted_write() {
     }
     let file_metadata = writer.close().unwrap();
 
-    assert_eq!(file_metadata.row_groups.len(), 1);
-    let row_group = &file_metadata.row_groups[0];
-    assert_eq!(row_group.columns.len(), 1);
-    let column = &row_group.columns[0];
-    let column_stats = column
-        .meta_data
-        .as_ref()
-        .unwrap()
-        .statistics
-        .as_ref()
-        .unwrap();
+    assert_eq!(file_metadata.num_row_groups(), 1);
+    let row_group = file_metadata.row_group(0);
+    assert_eq!(row_group.num_columns(), 1);
+    let column = row_group.column(0);
+    let column_stats = column.statistics().unwrap();
     assert_eq!(
-        column_stats.min_value.as_deref(),
+        column_stats.min_bytes_opt(),
         Some(3i32.to_le_bytes().as_slice())
     );
     assert_eq!(
-        column_stats.max_value.as_deref(),
+        column_stats.max_bytes_opt(),
         Some(19i32.to_le_bytes().as_slice())
     );
 }
@@ -1048,7 +1036,7 @@ fn test_decrypt_page_index_non_uniform() {
 
 fn test_decrypt_page_index(
     path: &str,
-    decryption_properties: FileDecryptionProperties,
+    decryption_properties: Arc<FileDecryptionProperties>,
 ) -> Result<(), ParquetError> {
     let file = File::open(path)?;
     let options = ArrowReaderOptions::default()
@@ -1061,43 +1049,3 @@ fn test_decrypt_page_index(
 
     Ok(())
 }
-
-fn read_and_roundtrip_to_encrypted_file(
-    path: &str,
-    decryption_properties: FileDecryptionProperties,
-    encryption_properties: FileEncryptionProperties,
-) {
-    let temp_file = tempfile::tempfile().unwrap();
-
-    // read example data
-    let file = File::open(path).unwrap();
-    let options = ArrowReaderOptions::default()
-        .with_file_decryption_properties(decryption_properties.clone());
-    let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
-
-    let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
-    let batch_reader = builder.build().unwrap();
-    let batches = batch_reader
-        .collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
-        .unwrap();
-
-    // write example data
-    let props = WriterProperties::builder()
-        .with_file_encryption_properties(encryption_properties)
-        .build();
-
-    let mut writer = ArrowWriter::try_new(
-        temp_file.try_clone().unwrap(),
-        metadata.schema().clone(),
-        Some(props),
-    )
-    .unwrap();
-    for batch in batches {
-        writer.write(&batch).unwrap();
-    }
-
-    writer.close().unwrap();
-
-    // check re-written example data
-    verify_encryption_test_file_read(temp_file, decryption_properties);
-}
diff --git a/parquet/tests/encryption/encryption_agnostic.rs b/parquet/tests/encryption/encryption_agnostic.rs
index e071471712f4..604155c81a83 100644
--- a/parquet/tests/encryption/encryption_agnostic.rs
+++ b/parquet/tests/encryption/encryption_agnostic.rs
@@ -20,8 +20,8 @@
 use arrow_array::cast::AsArray;
 use arrow_array::types;
 use arrow_schema::ArrowError;
-use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder};
 use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder};
 use std::fs::File;
 
 pub fn read_plaintext_footer_file_without_decryption_properties() {
@@ -72,7 +72,7 @@ pub fn read_plaintext_footer_file_without_decryption_properties() {
 
     match record_reader.next() {
         Some(Err(ArrowError::ParquetError(s))) => {
-            assert!(s.contains("protocol error"));
+            assert!(s.contains("Parquet error"));
         }
         _ => {
             panic!("Expected ArrowError::ParquetError");
@@ -137,10 +137,10 @@ pub async fn read_plaintext_footer_file_without_decryption_properties_async() {
 
     match record_reader.next().await {
         Some(Err(ParquetError::ArrowError(s))) => {
-            assert!(s.contains("protocol error"));
+            assert!(s.contains("Parquet error"));
         }
-        _ => {
-            panic!("Expected ArrowError::ParquetError");
+        err => {
+            panic!("Expected ArrowError::ParquetError, got {err:?}");
         }
     };
 }
diff --git a/parquet/tests/encryption/encryption_async.rs b/parquet/tests/encryption/encryption_async.rs
index e0fbbcdfafe3..51acd7374879 100644
--- a/parquet/tests/encryption/encryption_async.rs
+++ b/parquet/tests/encryption/encryption_async.rs
@@ -18,19 +18,31 @@
 //! This module contains tests for reading encrypted Parquet files with the async Arrow API
 
 use crate::encryption_util::{
-    verify_column_indexes, verify_encryption_test_data, TestKeyRetriever,
+    TestKeyRetriever, read_encrypted_file, verify_column_indexes,
+    verify_encryption_double_test_data, verify_encryption_test_data,
 };
+use arrow_array::RecordBatch;
+use arrow_schema::Schema;
 use futures::TryStreamExt;
 use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
-use parquet::arrow::arrow_writer::ArrowWriterOptions;
-use parquet::arrow::AsyncArrowWriter;
-use parquet::arrow::ParquetRecordBatchStreamBuilder;
+use parquet::arrow::arrow_writer::{
+    ArrowColumnChunk, ArrowColumnWriter, ArrowLeafColumn, ArrowRowGroupWriterFactory,
+    ArrowWriterOptions, compute_leaves,
+};
+use parquet::arrow::{
+    ArrowSchemaConverter, ArrowWriter, AsyncArrowWriter, ParquetRecordBatchStreamBuilder,
+};
 use parquet::encryption::decrypt::FileDecryptionProperties;
 use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
-use parquet::file::properties::WriterProperties;
+use parquet::file::metadata::ParquetMetaData;
+use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
+use parquet::file::writer::SerializedFileWriter;
+use std::io::Write;
 use std::sync::Arc;
 use tokio::fs::File;
+use tokio::sync::mpsc::{Receiver, Sender};
+use tokio::task::JoinHandle;
 
 #[tokio::test]
 async fn test_non_uniform_encryption_plaintext_footer() {
@@ -284,9 +296,9 @@ async fn get_encrypted_meta_store() -> (
     object_store::ObjectMeta,
     std::sync::Arc<dyn object_store::ObjectStore>,
 ) {
+    use object_store::ObjectStore;
     use object_store::local::LocalFileSystem;
     use object_store::path::Path;
-    use object_store::ObjectStore;
 
     use std::sync::Arc;
     let test_data = arrow::util::test_util::parquet_test_data();
@@ -421,7 +433,7 @@ async fn test_decrypt_page_index_non_uniform() {
 
 async fn test_decrypt_page_index(
     path: &str,
-    decryption_properties: FileDecryptionProperties,
+    decryption_properties: Arc<FileDecryptionProperties>,
 ) -> Result<(), ParquetError> {
     let mut file = File::open(&path).await?;
 
@@ -438,7 +450,7 @@ async fn test_decrypt_page_index(
 
 async fn verify_encryption_test_file_read_async(
     file: &mut tokio::fs::File,
-    decryption_properties: FileDecryptionProperties,
+    decryption_properties: Arc<FileDecryptionProperties>,
 ) -> Result<(), ParquetError> {
     let options = ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties);
 
@@ -458,14 +470,14 @@ async fn verify_encryption_test_file_read_async(
 
 async fn read_and_roundtrip_to_encrypted_file_async(
     path: &str,
-    decryption_properties: FileDecryptionProperties,
-    encryption_properties: FileEncryptionProperties,
+    decryption_properties: Arc<FileDecryptionProperties>,
+    encryption_properties: Arc<FileEncryptionProperties>,
 ) -> Result<(), ParquetError> {
     let temp_file = tempfile::tempfile().unwrap();
     let mut file = File::open(&path).await.unwrap();
 
-    let options =
-        ArrowReaderOptions::new().with_file_decryption_properties(decryption_properties.clone());
+    let options = ArrowReaderOptions::new()
+        .with_file_decryption_properties(Arc::clone(&decryption_properties));
     let arrow_metadata = ArrowReaderMetadata::load_async(&mut file, options).await?;
     let record_reader = ParquetRecordBatchStreamBuilder::new_with_metadata(
         file.try_clone().await?,
@@ -491,3 +503,430 @@ async fn read_and_roundtrip_to_encrypted_file_async(
     let mut file = tokio::fs::File::from_std(temp_file.try_clone().unwrap());
     verify_encryption_test_file_read_async(&mut file, decryption_properties).await
 }
+
+// Type aliases for multithreaded file writing tests
+type ColSender = Sender<ArrowLeafColumn>;
+type ColumnWriterTask = JoinHandle<Result<ArrowColumnWriter, ParquetError>>;
+type RBStreamSerializeResult = Result<(Vec<ArrowColumnChunk>, usize), ParquetError>;
+
+async fn send_arrays_to_column_writers(
+    col_array_channels: &[ColSender],
+    rb: &RecordBatch,
+    schema: &Arc<Schema>,
+) -> Result<(), ParquetError> {
+    // Each leaf column has its own channel, increment next_channel for each leaf column sent.
+    let mut next_channel = 0;
+    for (array, field) in rb.columns().iter().zip(schema.fields()) {
+        for c in compute_leaves(field, array)? {
+            if col_array_channels[next_channel].send(c).await.is_err() {
+                return Ok(());
+            }
+            next_channel += 1;
+        }
+    }
+    Ok(())
+}
+
+/// Spawns a tokio task which joins the parallel column writer tasks,
+/// and finalizes the row group
+fn spawn_rg_join_and_finalize_task(
+    column_writer_tasks: Vec<ColumnWriterTask>,
+    rg_rows: usize,
+) -> JoinHandle<RBStreamSerializeResult> {
+    tokio::task::spawn(async move {
+        let num_cols = column_writer_tasks.len();
+        let mut finalized_rg = Vec::with_capacity(num_cols);
+        for task in column_writer_tasks.into_iter() {
+            let writer = task
+                .await
+                .map_err(|e| ParquetError::General(e.to_string()))??;
+            finalized_rg.push(writer.close()?);
+        }
+        Ok((finalized_rg, rg_rows))
+    })
+}
+
+fn spawn_parquet_parallel_serialization_task(
+    writer_factory: ArrowRowGroupWriterFactory,
+    mut data: Receiver<RecordBatch>,
+    serialize_tx: Sender<JoinHandle<RBStreamSerializeResult>>,
+    schema: Arc<Schema>,
+) -> JoinHandle<Result<(), ParquetError>> {
+    tokio::spawn(async move {
+        let max_buffer_rb = 10;
+        let max_row_group_rows = 10;
+        let mut row_group_index = 0;
+
+        let column_writers = writer_factory.create_column_writers(row_group_index)?;
+
+        let (mut col_writer_tasks, mut col_array_channels) =
+            spawn_column_parallel_row_group_writer(column_writers, max_buffer_rb)?;
+
+        let mut current_rg_rows = 0;
+
+        while let Some(mut rb) = data.recv().await {
+            // This loop allows the "else" block to repeatedly split the RecordBatch to handle the case
+            // when max_row_group_rows < execution.batch_size as an alternative to a recursive async
+            // function.
+            loop {
+                if current_rg_rows + rb.num_rows() < max_row_group_rows {
+                    send_arrays_to_column_writers(&col_array_channels, &rb, &schema).await?;
+                    current_rg_rows += rb.num_rows();
+                    break;
+                } else {
+                    let rows_left = max_row_group_rows - current_rg_rows;
+                    let rb_split = rb.slice(0, rows_left);
+                    send_arrays_to_column_writers(&col_array_channels, &rb_split, &schema).await?;
+
+                    // Signal the parallel column writers that the RowGroup is done, join and finalize RowGroup
+                    // on a separate task, so that we can immediately start on the next RG before waiting
+                    // for the current one to finish.
+                    drop(col_array_channels);
+
+                    let finalize_rg_task =
+                        spawn_rg_join_and_finalize_task(col_writer_tasks, max_row_group_rows);
+
+                    // Do not surface error from closed channel (means something
+                    // else hit an error, and the plan is shutting down).
+                    if serialize_tx.send(finalize_rg_task).await.is_err() {
+                        return Ok(());
+                    }
+
+                    current_rg_rows = 0;
+                    rb = rb.slice(rows_left, rb.num_rows() - rows_left);
+
+                    row_group_index += 1;
+                    let column_writers = writer_factory.create_column_writers(row_group_index)?;
+                    (col_writer_tasks, col_array_channels) =
+                        spawn_column_parallel_row_group_writer(column_writers, 100)?;
+                }
+            }
+        }
+
+        drop(col_array_channels);
+        // Handle leftover rows as final rowgroup, which may be smaller than max_row_group_rows
+        if current_rg_rows > 0 {
+            let finalize_rg_task =
+                spawn_rg_join_and_finalize_task(col_writer_tasks, current_rg_rows);
+
+            // Do not surface error from closed channel (means something
+            // else hit an error, and the plan is shutting down).
+            if serialize_tx.send(finalize_rg_task).await.is_err() {
+                return Ok(());
+            }
+        }
+
+        Ok(())
+    })
+}
+
+fn spawn_column_parallel_row_group_writer(
+    col_writers: Vec<ArrowColumnWriter>,
+    max_buffer_size: usize,
+) -> Result<(Vec<ColumnWriterTask>, Vec<ColSender>), ParquetError> {
+    let num_columns = col_writers.len();
+
+    let mut col_writer_tasks = Vec::with_capacity(num_columns);
+    let mut col_array_channels = Vec::with_capacity(num_columns);
+    for mut col_writer in col_writers.into_iter() {
+        let (send_array, mut receive_array) =
+            tokio::sync::mpsc::channel::<ArrowLeafColumn>(max_buffer_size);
+        col_array_channels.push(send_array);
+        let handle = tokio::spawn(async move {
+            while let Some(col) = receive_array.recv().await {
+                col_writer.write(&col)?;
+            }
+            Ok(col_writer)
+        });
+        col_writer_tasks.push(handle);
+    }
+    Ok((col_writer_tasks, col_array_channels))
+}
+
+/// Consume RowGroups serialized by other parallel tasks and concatenate them
+/// to the final parquet file
+async fn concatenate_parallel_row_groups<W: Write + Send>(
+    mut parquet_writer: SerializedFileWriter<W>,
+    mut serialize_rx: Receiver<JoinHandle<RBStreamSerializeResult>>,
+) -> Result<ParquetMetaData, ParquetError> {
+    while let Some(task) = serialize_rx.recv().await {
+        let result = task.await;
+        let mut rg_out = parquet_writer.next_row_group()?;
+        let (serialized_columns, _cnt) =
+            result.map_err(|e| ParquetError::General(e.to_string()))??;
+
+        for column_chunk in serialized_columns {
+            column_chunk.append_to_row_group(&mut rg_out)?;
+        }
+        rg_out.close()?;
+    }
+
+    let file_metadata = parquet_writer.close()?;
+    Ok(file_metadata)
+}
+
+// This test is based on DataFusion's ParquetSink. Motivation is to test
+// concurrent writing of encrypted data over multiple row groups using the low-level API.
+#[tokio::test]
+async fn test_concurrent_encrypted_writing_over_multiple_row_groups() {
+    // Read example data and set up encryption/decryption properties
+    let testdata = arrow::util::test_util::parquet_test_data();
+    let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    let file = std::fs::File::open(path).unwrap();
+
+    let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+    let decryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+
+    let (record_batches, metadata) =
+        read_encrypted_file(&file, decryption_properties.clone()).unwrap();
+    let schema = metadata.schema();
+
+    // Create a channel to send RecordBatches to the writer and send row groups
+    let (record_batch_tx, data) = tokio::sync::mpsc::channel::<RecordBatch>(100);
+    let data_generator = tokio::spawn(async move {
+        for record_batch in record_batches {
+            record_batch_tx.send(record_batch).await.unwrap();
+        }
+    });
+
+    let props = Arc::new(
+        WriterPropertiesBuilder::default()
+            .with_file_encryption_properties(file_encryption_properties)
+            .build(),
+    );
+    let parquet_schema = ArrowSchemaConverter::new()
+        .with_coerce_types(props.coerce_types())
+        .convert(schema)
+        .unwrap();
+
+    // Create a temporary file to write the encrypted data
+    let temp_file = tempfile::tempfile().unwrap();
+
+    let writer =
+        SerializedFileWriter::new(&temp_file, parquet_schema.root_schema_ptr(), props).unwrap();
+    let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&writer, Arc::clone(schema));
+    let max_row_groups = 1;
+
+    let (serialize_tx, serialize_rx) =
+        tokio::sync::mpsc::channel::<JoinHandle<RBStreamSerializeResult>>(max_row_groups);
+
+    let launch_serialization_task = spawn_parquet_parallel_serialization_task(
+        row_group_writer_factory,
+        data,
+        serialize_tx,
+        schema.clone(),
+    );
+
+    let _file_metadata = concatenate_parallel_row_groups(writer, serialize_rx)
+        .await
+        .unwrap();
+
+    data_generator.await.unwrap();
+    launch_serialization_task.await.unwrap().unwrap();
+
+    // Check that the file was written correctly
+    let (read_record_batches, read_metadata) =
+        read_encrypted_file(&temp_file, decryption_properties.clone()).unwrap();
+
+    assert_eq!(read_metadata.metadata().file_metadata().num_rows(), 50);
+    verify_encryption_test_data(read_record_batches, read_metadata.metadata());
+}
+
+#[tokio::test]
+async fn test_multi_threaded_encrypted_writing() {
+    // Read example data and set up encryption/decryption properties
+    let testdata = arrow::util::test_util::parquet_test_data();
+    let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    let file = std::fs::File::open(path).unwrap();
+
+    let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+    let decryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+
+    let (record_batches, metadata) =
+        read_encrypted_file(&file, Arc::clone(&decryption_properties)).unwrap();
+    let schema = metadata.schema().clone();
+
+    let props = Arc::new(
+        WriterPropertiesBuilder::default()
+            .with_file_encryption_properties(file_encryption_properties)
+            .build(),
+    );
+
+    let parquet_schema = ArrowSchemaConverter::new()
+        .with_coerce_types(props.coerce_types())
+        .convert(&schema)
+        .unwrap();
+
+    // Create a temporary file to write the encrypted data
+    let temp_file = tempfile::tempfile().unwrap();
+    let mut writer =
+        SerializedFileWriter::new(&temp_file, parquet_schema.root_schema_ptr(), props).unwrap();
+    let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&writer, Arc::clone(&schema));
+
+    let (serialize_tx, mut serialize_rx) =
+        tokio::sync::mpsc::channel::<JoinHandle<RBStreamSerializeResult>>(1);
+
+    // Create a channel to send RecordBatches to the writer and send row batches
+    let (record_batch_tx, mut data) = tokio::sync::mpsc::channel::<RecordBatch>(100);
+    let data_generator = tokio::spawn(async move {
+        for record_batch in record_batches {
+            record_batch_tx.send(record_batch).await.unwrap();
+        }
+    });
+
+    // Get column writers
+    let col_writers = row_group_writer_factory.create_column_writers(0).unwrap();
+
+    let (col_writer_tasks, col_array_channels) =
+        spawn_column_parallel_row_group_writer(col_writers, 10).unwrap();
+
+    // Spawn serialization tasks for incoming RecordBatches
+    let launch_serialization_task = tokio::spawn(async move {
+        let Some(rb) = data.recv().await else {
+            panic!()
+        };
+        send_arrays_to_column_writers(&col_array_channels, &rb, &schema)
+            .await
+            .unwrap();
+        let finalize_rg_task = spawn_rg_join_and_finalize_task(col_writer_tasks, 10);
+
+        serialize_tx.send(finalize_rg_task).await.unwrap();
+        drop(col_array_channels);
+    });
+
+    // Append the finalized row groups to the SerializedFileWriter
+    while let Some(task) = serialize_rx.recv().await {
+        let (arrow_column_chunks, _) = task.await.unwrap().unwrap();
+        let mut row_group_writer = writer.next_row_group().unwrap();
+        for chunk in arrow_column_chunks {
+            chunk.append_to_row_group(&mut row_group_writer).unwrap();
+        }
+        row_group_writer.close().unwrap();
+    }
+
+    // Wait for data generator and serialization task to finish
+    data_generator.await.unwrap();
+    launch_serialization_task.await.unwrap();
+    let metadata = writer.close().unwrap();
+
+    // Close the file writer which writes the footer
+    assert_eq!(metadata.file_metadata().num_rows(), 50);
+
+    // Check that the file was written correctly
+    let (read_record_batches, read_metadata) =
+        read_encrypted_file(&temp_file, decryption_properties).unwrap();
+    verify_encryption_test_data(read_record_batches, read_metadata.metadata());
+
+    // Check that file was encrypted
+    let result = ArrowReaderMetadata::load(&temp_file, ArrowReaderOptions::default());
+    assert_eq!(
+        result.unwrap_err().to_string(),
+        "Parquet error: Parquet file has an encrypted footer but decryption properties were not provided"
+    );
+}
+
+#[tokio::test]
+async fn test_multi_threaded_encrypted_writing_deprecated() {
+    // Read example data and set up encryption/decryption properties
+    let testdata = arrow::util::test_util::parquet_test_data();
+    let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
+    let file = std::fs::File::open(path).unwrap();
+
+    let file_encryption_properties = FileEncryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+    let decryption_properties = FileDecryptionProperties::builder(b"0123456789012345".into())
+        .with_column_key("double_field", b"1234567890123450".into())
+        .with_column_key("float_field", b"1234567890123451".into())
+        .build()
+        .unwrap();
+
+    let (record_batches, metadata) =
+        read_encrypted_file(&file, Arc::clone(&decryption_properties)).unwrap();
+    let to_write: Vec<_> = record_batches
+        .iter()
+        .flat_map(|rb| rb.columns().to_vec())
+        .collect();
+    let schema = metadata.schema().clone();
+
+    let props = Some(
+        WriterPropertiesBuilder::default()
+            .with_file_encryption_properties(file_encryption_properties)
+            .build(),
+    );
+
+    // Create a temporary file to write the encrypted data
+    let temp_file = tempfile::tempfile().unwrap();
+    let mut writer = ArrowWriter::try_new(&temp_file, schema.clone(), props).unwrap();
+
+    // LOW-LEVEL API: Use low level API to write into a file using multiple threads
+
+    // Get column writers
+    #[allow(deprecated)]
+    let col_writers = writer.get_column_writers().unwrap();
+    let num_columns = col_writers.len();
+
+    let (col_writer_tasks, mut col_array_channels) =
+        spawn_column_parallel_row_group_writer(col_writers, 100).unwrap();
+
+    // Send the ArrowLeafColumn data to the respective column writer channels
+    let mut worker_iter = col_array_channels.iter_mut();
+    for (array, field) in to_write.iter().zip(schema.fields()) {
+        for leaves in compute_leaves(field, array).unwrap() {
+            worker_iter.next().unwrap().send(leaves).await.unwrap();
+        }
+    }
+    drop(col_array_channels);
+
+    // Wait for all column writers to finish writing
+    let mut finalized_rg = Vec::with_capacity(num_columns);
+    for task in col_writer_tasks.into_iter() {
+        finalized_rg.push(task.await.unwrap().unwrap().close().unwrap());
+    }
+
+    // Append the finalized row group to the SerializedFileWriter
+    #[allow(deprecated)]
+    writer.append_row_group(finalized_rg).unwrap();
+
+    // HIGH-LEVEL API: Write RecordBatches into the file using ArrowWriter
+
+    // Write individual RecordBatches into the file
+    for rb in record_batches {
+        writer.write(&rb).unwrap()
+    }
+    assert!(writer.flush().is_ok());
+
+    // Close the file writer which writes the footer
+    let metadata = writer.finish().unwrap();
+    assert_eq!(metadata.file_metadata().num_rows(), 100);
+
+    // Check that the file was written correctly
+    let (read_record_batches, read_metadata) =
+        read_encrypted_file(&temp_file, decryption_properties).unwrap();
+    verify_encryption_double_test_data(read_record_batches, read_metadata.metadata());
+
+    // Check that file was encrypted
+    let result = ArrowReaderMetadata::load(&temp_file, ArrowReaderOptions::default());
+    assert_eq!(
+        result.unwrap_err().to_string(),
+        "Parquet error: Parquet file has an encrypted footer but decryption properties were not provided"
+    );
+}
diff --git a/parquet/tests/encryption/encryption_util.rs b/parquet/tests/encryption/encryption_util.rs
index 382193d25811..7f4cc5a9da45 100644
--- a/parquet/tests/encryption/encryption_util.rs
+++ b/parquet/tests/encryption/encryption_util.rs
@@ -16,17 +16,26 @@
 // under the License.
 
 use arrow_array::cast::AsArray;
-use arrow_array::{types, RecordBatch};
-use parquet::encryption::decrypt::KeyRetriever;
+use arrow_array::{RecordBatch, types};
+use parquet::arrow::ArrowWriter;
+use parquet::arrow::arrow_reader::{
+    ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
+};
+use parquet::encryption::decrypt::{FileDecryptionProperties, KeyRetriever};
+use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::{ParquetError, Result};
 use parquet::file::metadata::ParquetMetaData;
+use parquet::file::properties::WriterProperties;
 use std::collections::HashMap;
-use std::sync::Mutex;
+use std::fs::File;
+use std::sync::{Arc, Mutex};
 
-/// Verifies data read from an encrypted file from the parquet-testing repository
-pub fn verify_encryption_test_data(record_batches: Vec<RecordBatch>, metadata: &ParquetMetaData) {
+pub(crate) fn verify_encryption_double_test_data(
+    record_batches: Vec<RecordBatch>,
+    metadata: &ParquetMetaData,
+) {
     let file_metadata = metadata.file_metadata();
-    assert_eq!(file_metadata.num_rows(), 50);
+    assert_eq!(file_metadata.num_rows(), 100);
     assert_eq!(file_metadata.schema_descr().num_columns(), 8);
 
     metadata.row_groups().iter().for_each(|rg| {
@@ -35,6 +44,7 @@ pub fn verify_encryption_test_data(record_batches: Vec<RecordBatch>, metadata: &
     });
 
     let mut row_count = 0;
+    let wrap_at = 50;
     for batch in record_batches {
         let batch = batch;
         row_count += batch.num_rows();
@@ -56,23 +66,29 @@ pub fn verify_encryption_test_data(record_batches: Vec<RecordBatch>, metadata: &
             assert_eq!(x.unwrap(), i % 2 == 0);
         }
         for (i, x) in time_col.iter().enumerate() {
-            assert_eq!(x.unwrap(), i as i32);
+            assert_eq!(x.unwrap(), (i % wrap_at) as i32);
         }
         for (i, list_item) in list_col.iter().enumerate() {
             let list_item = list_item.unwrap();
             let list_item = list_item.as_primitive::<types::Int64Type>();
             assert_eq!(list_item.len(), 2);
-            assert_eq!(list_item.value(0), ((i * 2) * 1000000000000) as i64);
-            assert_eq!(list_item.value(1), ((i * 2 + 1) * 1000000000000) as i64);
+            assert_eq!(
+                list_item.value(0),
+                (((i % wrap_at) * 2) * 1000000000000) as i64
+            );
+            assert_eq!(
+                list_item.value(1),
+                (((i % wrap_at) * 2 + 1) * 1000000000000) as i64
+            );
         }
         for x in timestamp_col.iter() {
             assert!(x.is_some());
         }
         for (i, x) in f32_col.iter().enumerate() {
-            assert_eq!(x.unwrap(), i as f32 * 1.1f32);
+            assert_eq!(x.unwrap(), (i % wrap_at) as f32 * 1.1f32);
         }
         for (i, x) in f64_col.iter().enumerate() {
-            assert_eq!(x.unwrap(), i as f64 * 1.1111111f64);
+            assert_eq!(x.unwrap(), (i % wrap_at) as f64 * 1.1111111f64);
         }
         for (i, x) in binary_col.iter().enumerate() {
             assert_eq!(x.is_some(), i % 2 == 0);
@@ -81,16 +97,95 @@ pub fn verify_encryption_test_data(record_batches: Vec<RecordBatch>, metadata: &
             }
         }
         for (i, x) in fixed_size_binary_col.iter().enumerate() {
-            assert_eq!(x.unwrap(), &[i as u8; 10]);
+            assert_eq!(x.unwrap(), &[(i % wrap_at) as u8; 10]);
         }
     }
 
     assert_eq!(row_count, file_metadata.num_rows() as usize);
 }
 
+/// Verifies data read from an encrypted file from the parquet-testing repository
+pub(crate) fn verify_encryption_test_data(
+    record_batches: Vec<RecordBatch>,
+    metadata: &ParquetMetaData,
+) {
+    let file_metadata = metadata.file_metadata();
+    assert_eq!(file_metadata.num_rows(), 50);
+    assert_eq!(file_metadata.schema_descr().num_columns(), 8);
+
+    let mut total_rows = 0;
+    metadata.row_groups().iter().for_each(|rg| {
+        assert_eq!(rg.num_columns(), 8);
+        total_rows += rg.num_rows();
+    });
+    assert_eq!(total_rows, 50);
+
+    let mut row_count = 0;
+    for batch in record_batches {
+        let batch = batch;
+
+        let row_index = |index_in_batch: usize| row_count + index_in_batch;
+
+        let bool_col = batch.column(0).as_boolean();
+        let time_col = batch
+            .column(1)
+            .as_primitive::<types::Time32MillisecondType>();
+        let list_col = batch.column(2).as_list::<i32>();
+        let timestamp_col = batch
+            .column(3)
+            .as_primitive::<types::TimestampNanosecondType>();
+        let f32_col = batch.column(4).as_primitive::<types::Float32Type>();
+        let f64_col = batch.column(5).as_primitive::<types::Float64Type>();
+        let binary_col = batch.column(6).as_binary::<i32>();
+        let fixed_size_binary_col = batch.column(7).as_fixed_size_binary();
+
+        for (i, x) in bool_col.iter().enumerate() {
+            assert_eq!(x.unwrap(), row_index(i) % 2 == 0);
+        }
+        for (i, x) in time_col.iter().enumerate() {
+            assert_eq!(x.unwrap(), row_index(i) as i32);
+        }
+        for (i, list_item) in list_col.iter().enumerate() {
+            let list_item = list_item.unwrap();
+            let list_item = list_item.as_primitive::<types::Int64Type>();
+            assert_eq!(list_item.len(), 2);
+            assert_eq!(
+                list_item.value(0),
+                ((row_index(i) * 2) * 1000000000000) as i64
+            );
+            assert_eq!(
+                list_item.value(1),
+                ((row_index(i) * 2 + 1) * 1000000000000) as i64
+            );
+        }
+        for x in timestamp_col.iter() {
+            assert!(x.is_some());
+        }
+        for (i, x) in f32_col.iter().enumerate() {
+            assert_eq!(x.unwrap(), row_index(i) as f32 * 1.1f32);
+        }
+        for (i, x) in f64_col.iter().enumerate() {
+            assert_eq!(x.unwrap(), row_index(i) as f64 * 1.1111111f64);
+        }
+        for (i, x) in binary_col.iter().enumerate() {
+            assert_eq!(x.is_some(), row_index(i) % 2 == 0);
+            if let Some(x) = x {
+                assert_eq!(&x[0..7], b"parquet");
+            }
+        }
+        for (i, x) in fixed_size_binary_col.iter().enumerate() {
+            assert_eq!(x.unwrap(), &[row_index(i) as u8; 10]);
+        }
+
+        row_count += batch.num_rows();
+    }
+
+    assert_eq!(row_count, file_metadata.num_rows() as usize);
+}
+
 /// Verifies that the column and offset indexes were successfully read from an
 /// encrypted test file.
-pub fn verify_column_indexes(metadata: &ParquetMetaData) {
+pub(crate) fn verify_column_indexes(metadata: &ParquetMetaData) {
     let offset_index = metadata.offset_index().unwrap();
     // 1 row group, 8 columns
     assert_eq!(offset_index.len(), 1);
@@ -107,19 +202,85 @@ pub fn verify_column_indexes(metadata: &ParquetMetaData) {
     let column_index = &column_index[0][float_col_idx];
 
     match column_index {
-        parquet::file::page_index::index::Index::FLOAT(float_index) => {
-            assert_eq!(float_index.indexes.len(), 1);
-            assert_eq!(float_index.indexes[0].min, Some(0.0f32));
-            assert!(float_index.indexes[0]
-                .max
-                .is_some_and(|max| (max - 53.9).abs() < 1e-6));
+        parquet::file::page_index::column_index::ColumnIndexMetaData::FLOAT(float_index) => {
+            assert_eq!(float_index.num_pages(), 1);
+            assert_eq!(float_index.min_value(0), Some(&0.0f32));
+            assert!(
+                float_index
+                    .max_value(0)
+                    .is_some_and(|max| (max - 53.9).abs() < 1e-6)
+            );
         }
         _ => {
-            panic!("Expected a float column index for column {}", float_col_idx);
+            panic!("Expected a float column index for column {float_col_idx}");
         }
     };
 }
 
+pub(crate) fn read_encrypted_file(
+    file: &File,
+    decryption_properties: Arc<FileDecryptionProperties>,
+) -> std::result::Result<(Vec<RecordBatch>, ArrowReaderMetadata), ParquetError> {
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
+    let metadata = ArrowReaderMetadata::load(file, options.clone())?;
+
+    let builder =
+        ParquetRecordBatchReaderBuilder::try_new_with_options(file.try_clone().unwrap(), options)?;
+    let batch_reader = builder.build()?;
+    let batches = batch_reader.collect::<Result<Vec<RecordBatch>, _>>()?;
+    Ok((batches, metadata))
+}
+
+pub(crate) fn read_and_roundtrip_to_encrypted_file(
+    file: &File,
+    decryption_properties: Arc<FileDecryptionProperties>,
+    encryption_properties: Arc<FileEncryptionProperties>,
+) {
+    // read example data
+    let (batches, metadata) =
+        read_encrypted_file(file, Arc::clone(&decryption_properties)).unwrap();
+
+    // write example data to a temporary file
+    let temp_file = tempfile::tempfile().unwrap();
+    let props = WriterProperties::builder()
+        .with_file_encryption_properties(encryption_properties)
+        .build();
+
+    let mut writer = ArrowWriter::try_new(
+        temp_file.try_clone().unwrap(),
+        metadata.schema().clone(),
+        Some(props),
+    )
+    .unwrap();
+    for batch in batches {
+        writer.write(&batch).unwrap();
+    }
+
+    writer.close().unwrap();
+
+    // check re-written example data
+    verify_encryption_test_file_read(temp_file, decryption_properties);
+}
+
+pub(crate) fn verify_encryption_test_file_read(
+    file: File,
+    decryption_properties: Arc<FileDecryptionProperties>,
+) {
+    let options =
+        ArrowReaderOptions::default().with_file_decryption_properties(decryption_properties);
+    let reader_metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
+    let metadata = reader_metadata.metadata();
+
+    let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
+    let record_reader = builder.build().unwrap();
+    let record_batches = record_reader
+        .map(|x| x.unwrap())
+        .collect::<Vec<RecordBatch>>();
+
+    verify_encryption_test_data(record_batches, metadata);
+}
+
 /// A KeyRetriever to use in Parquet encryption tests,
 /// which stores a map from key names/metadata to encryption key bytes.
 pub struct TestKeyRetriever {
@@ -145,14 +306,13 @@ impl TestKeyRetriever {
 impl KeyRetriever for TestKeyRetriever {
     fn retrieve_key(&self, key_metadata: &[u8]) -> Result<Vec<u8>> {
         let key_metadata = std::str::from_utf8(key_metadata).map_err(|e| {
-            ParquetError::General(format!("Could not convert key metadata to string: {}", e))
+            ParquetError::General(format!("Could not convert key metadata to string: {e}"))
         })?;
         let keys = self.keys.lock().unwrap();
         match keys.get(key_metadata) {
             Some(key) => Ok(key.clone()),
             None => Err(ParquetError::General(format!(
-                "Could not retrieve key for metadata {:?}",
-                key_metadata
+                "Could not retrieve key for metadata {key_metadata:?}"
             ))),
         }
     }
diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs
new file mode 100644
index 000000000000..4f449df920e8
--- /dev/null
+++ b/parquet/tests/geospatial.rs
@@ -0,0 +1,435 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[cfg(all(feature = "arrow", feature = "geospatial"))]
+mod test {
+    //! Tests for Geometry and Geography logical types that require the arrow
+    //! and/or geospatial features enabled
+
+    use std::{fs::File, iter::zip, sync::Arc};
+
+    use arrow_array::{ArrayRef, BinaryArray, RecordBatch, create_array};
+    use arrow_schema::{DataType, Field, Schema, SchemaRef, extension::ExtensionType as _};
+    use bytes::Bytes;
+    use parquet::{
+        arrow::{
+            ArrowSchemaConverter, ArrowWriter, arrow_reader::ParquetRecordBatchReaderBuilder,
+            arrow_writer::ArrowWriterOptions,
+        },
+        basic::{EdgeInterpolationAlgorithm, LogicalType},
+        column::reader::ColumnReader,
+        data_type::{ByteArray, ByteArrayType},
+        file::{
+            metadata::{ParquetMetaData, RowGroupMetaData},
+            properties::{EnabledStatistics, WriterProperties},
+            reader::{FileReader, SerializedFileReader},
+            writer::SerializedFileWriter,
+        },
+        geospatial::{bounding_box::BoundingBox, statistics::GeospatialStatistics},
+        schema::types::SchemaDescriptor,
+    };
+    use parquet_geospatial::{WkbEdges, WkbMetadata, WkbType, testing::wkb_point_xy};
+    use serde_json::Value;
+
+    fn read_metadata(geospatial_test_file: &str) -> (Arc<ParquetMetaData>, SchemaRef) {
+        let path = format!(
+            "{}/geospatial/{geospatial_test_file}",
+            arrow::util::test_util::parquet_test_data(),
+        );
+        let file = File::open(path).unwrap();
+        let reader = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+
+        (reader.metadata().clone(), reader.schema().clone())
+    }
+
+    #[test]
+    fn test_read_logical_type() {
+        // Some crs values are short strings
+        let expected_metadata = [
+            (
+                "crs-default.parquet",
+                LogicalType::Geometry { crs: None },
+                WkbMetadata::new(None, None),
+            ),
+            (
+                "crs-srid.parquet",
+                LogicalType::Geometry {
+                    crs: Some("srid:5070".to_string()),
+                },
+                WkbMetadata::new(Some("srid:5070"), None),
+            ),
+            (
+                "crs-projjson.parquet",
+                LogicalType::Geometry {
+                    crs: Some("projjson:projjson_epsg_5070".to_string()),
+                },
+                WkbMetadata::new(Some("projjson:projjson_epsg_5070"), None),
+            ),
+            (
+                "crs-geography.parquet",
+                LogicalType::Geography {
+                    crs: None,
+                    algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
+                },
+                WkbMetadata::new(None, Some(WkbEdges::Spherical)),
+            ),
+        ];
+
+        for (geospatial_file, expected_type, expected_field_meta) in expected_metadata {
+            let (metadata, schema) = read_metadata(geospatial_file);
+            let column_descr = metadata.file_metadata().schema_descr().column(1);
+            let logical_type = column_descr.logical_type_ref().unwrap();
+
+            assert_eq!(logical_type, &expected_type);
+
+            let field = schema.field(1);
+            let wkb_type = field.try_extension_type::<WkbType>().unwrap();
+
+            assert_eq!(wkb_type.metadata().crs, expected_field_meta.crs);
+            assert_eq!(wkb_type.metadata().algorithm, expected_field_meta.algorithm);
+        }
+
+        // The crs value may also contain arbitrary values (in this case some JSON
+        // a bit too lengthy to type out)
+        let (metadata, schema) = read_metadata("crs-arbitrary-value.parquet");
+        let column_descr = metadata.file_metadata().schema_descr().column(1);
+        let logical_type = column_descr.logical_type_ref().unwrap();
+
+        if let LogicalType::Geometry { crs } = logical_type {
+            let crs = crs.as_ref();
+            let crs_parsed: Value = serde_json::from_str(crs.unwrap()).unwrap();
+            assert_eq!(crs_parsed.get("id").unwrap().get("code").unwrap(), 5070);
+        } else {
+            panic!("Expected geometry type but got {logical_type:?}");
+        }
+
+        let field = schema.field(1);
+        let wkb_type = field.try_extension_type::<WkbType>().unwrap();
+        assert_eq!(
+            wkb_type.metadata().crs.as_ref().unwrap()["id"]["code"],
+            5070
+        );
+        assert_eq!(wkb_type.metadata().algorithm, None);
+    }
+
+    #[test]
+    fn test_read_geospatial_statistics() {
+        let (metadata, _) = read_metadata("geospatial.parquet");
+
+        // geospatial.parquet schema:
+        //    optional binary field_id=-1 group (String);
+        //    optional binary field_id=-1 wkt (String);
+        //    optional binary field_id=-1 geometry (Geometry(crs=));
+        let fields = metadata.file_metadata().schema().get_fields();
+        let logical_type = fields[2].get_basic_info().logical_type_ref().unwrap();
+        assert_eq!(logical_type, &LogicalType::Geometry { crs: None });
+
+        let geo_statistics = metadata.row_group(0).column(2).geo_statistics();
+        assert!(geo_statistics.is_some());
+
+        let expected_bbox = BoundingBox::new(10.0, 40.0, 10.0, 40.0)
+            .with_zrange(30.0, 80.0)
+            .with_mrange(200.0, 1600.0);
+        let expected_geospatial_types = vec![
+            1, 2, 3, 4, 5, 6, 7, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 2001, 2002, 2003, 2004,
+            2005, 2006, 2007, 3001, 3002, 3003, 3004, 3005, 3006, 3007,
+        ];
+        assert_eq!(
+            geo_statistics.unwrap().geospatial_types(),
+            Some(&expected_geospatial_types)
+        );
+        assert_eq!(geo_statistics.unwrap().bounding_box(), Some(&expected_bbox));
+    }
+
+    fn read_row_group_metadata(b: Bytes) -> Vec<RowGroupMetaData> {
+        let reader = SerializedFileReader::new(b).unwrap();
+        reader.metadata().row_groups().to_vec()
+    }
+
+    fn read_geo_statistics(b: Bytes, column: usize) -> Vec<Option<GeospatialStatistics>> {
+        read_row_group_metadata(b)
+            .iter()
+            .map(|row_group| row_group.column(column).geo_statistics().cloned())
+            .collect()
+    }
+
+    #[test]
+    fn test_write_statistics_not_arrow() {
+        // Four row groups: one all non-null, one with a null, one with all nulls,
+        // one with invalid WKB
+        let column_values = vec![
+            [wkb_point_xy(1.0, 2.0), wkb_point_xy(11.0, 12.0)].map(ByteArray::from),
+            ["this is not valid wkb".into(), wkb_point_xy(31.0, 32.0)].map(ByteArray::from),
+            [wkb_point_xy(21.0, 22.0), vec![]].map(ByteArray::from),
+            [ByteArray::new(), ByteArray::new()],
+        ];
+        let def_levels = [[1, 1], [1, 1], [1, 0], [0, 0]];
+
+        // Ensure that nulls are omitted, that completely empty stats are omitted,
+        // and that invalid WKB results in empty stats
+        let expected_geometry_types = [Some(vec![1]), None, Some(vec![1]), None];
+        let expected_bounding_box = [
+            Some(BoundingBox::new(1.0, 11.0, 2.0, 12.0)),
+            None,
+            Some(BoundingBox::new(21.0, 21.0, 22.0, 22.0)),
+            None,
+        ];
+
+        let schema = parquet_schema_geometry();
+        let props = WriterProperties::builder()
+            .set_statistics_enabled(EnabledStatistics::Chunk)
+            .build();
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer =
+            SerializedFileWriter::new(&mut buf, schema.root_schema_ptr(), Arc::new(props)).unwrap();
+
+        for (def_levels, values) in zip(&def_levels, &column_values) {
+            let mut rg = writer.next_row_group().unwrap();
+            let mut col = rg.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(values, Some(def_levels), None)
+                .unwrap();
+            col.close().unwrap();
+            rg.close().unwrap();
+        }
+
+        writer.close().unwrap();
+
+        // Check geospatial statistics on file read
+        let buf_bytes = Bytes::from(buf);
+        let all_geo_stats = read_geo_statistics(buf_bytes.clone(), 0);
+        assert_eq!(all_geo_stats.len(), column_values.len());
+        assert_eq!(expected_geometry_types.len(), column_values.len());
+        assert_eq!(expected_bounding_box.len(), column_values.len());
+
+        for i in 0..column_values.len() {
+            if let Some(geo_stats) = all_geo_stats[i].as_ref() {
+                assert_eq!(
+                    geo_stats.geospatial_types(),
+                    expected_geometry_types[i].as_ref()
+                );
+                assert_eq!(geo_stats.bounding_box(), expected_bounding_box[i].as_ref());
+            } else {
+                assert!(expected_geometry_types[i].is_none());
+                assert!(expected_bounding_box[i].is_none());
+            }
+        }
+
+        for (i, rg) in read_row_group_metadata(buf_bytes).iter().enumerate() {
+            // We should have written Statistics with a null_count
+            let stats = rg.column(0).statistics().unwrap();
+            let expected_null_count: u64 = def_levels[i].iter().map(|l| (*l == 0) as u64).sum();
+            assert_eq!(stats.null_count_opt(), Some(expected_null_count));
+
+            // ...but there should be no min or max value
+            assert!(stats.min_bytes_opt().is_none());
+            assert!(stats.max_bytes_opt().is_none());
+
+            // There should be no index for this column
+            assert!(rg.column(0).column_index_length().is_none());
+            assert!(rg.column(0).column_index_offset().is_none());
+        }
+    }
+
+    #[test]
+    fn test_write_statistics_arrow() {
+        let arrow_schema = Arc::new(Schema::new(vec![Field::new(
+            "geom",
+            DataType::Binary,
+            true,
+        )]));
+
+        // Check the same cases as for the non-arrow writer. These need checking again because
+        // the arrow writer uses a different encoder where the code path for skipping nulls
+        // is independent.
+        let column_values = [
+            wkb_array_xy([Some((1.0, 2.0)), Some((11.0, 12.0))]),
+            create_array!(
+                Binary,
+                [
+                    "this is not valid wkb".as_bytes(),
+                    &wkb_point_xy(31.0, 32.0)
+                ]
+            ),
+            wkb_array_xy([Some((21.0, 22.0)), None]),
+            wkb_array_xy([None, None]),
+        ];
+
+        let expected_geometry_types = [Some(vec![1]), None, Some(vec![1]), None];
+        let expected_bounding_box = [
+            Some(BoundingBox::new(1.0, 11.0, 2.0, 12.0)),
+            None,
+            Some(BoundingBox::new(21.0, 21.0, 22.0, 22.0)),
+            None,
+        ];
+
+        let schema = parquet_schema_geometry();
+        let props = WriterProperties::builder()
+            .set_statistics_enabled(EnabledStatistics::Chunk)
+            .build();
+        let options = ArrowWriterOptions::new()
+            .with_parquet_schema(schema)
+            .with_properties(props);
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut file_writer =
+            ArrowWriter::try_new_with_options(&mut buf, arrow_schema.clone(), options).unwrap();
+
+        for values in &column_values {
+            let batch = RecordBatch::try_new(arrow_schema.clone(), vec![values.clone()]).unwrap();
+            file_writer.write(&batch).unwrap();
+            file_writer.flush().unwrap();
+        }
+
+        file_writer.close().unwrap();
+
+        // Check statistics on file read
+        let buf_bytes = Bytes::from(buf);
+        let all_geo_stats = read_geo_statistics(buf_bytes.clone(), 0);
+        assert_eq!(all_geo_stats.len(), column_values.len());
+
+        for i in 0..column_values.len() {
+            if let Some(geo_stats) = all_geo_stats[i].as_ref() {
+                assert_eq!(
+                    geo_stats.geospatial_types(),
+                    expected_geometry_types[i].as_ref()
+                );
+                assert_eq!(geo_stats.bounding_box(), expected_bounding_box[i].as_ref());
+            } else {
+                assert!(expected_geometry_types[i].is_none());
+                assert!(expected_bounding_box[i].is_none());
+            }
+        }
+
+        for (i, rg) in read_row_group_metadata(buf_bytes).iter().enumerate() {
+            // We should have written Statistics with a null_count
+            let stats = rg.column(0).statistics().unwrap();
+            let expected_null_count = column_values[i].null_count();
+            assert_eq!(stats.null_count_opt(), Some(expected_null_count as u64));
+
+            // ...but there should be no min or max value
+            assert!(stats.min_bytes_opt().is_none());
+            assert!(stats.max_bytes_opt().is_none());
+
+            // There should be no index for this column
+            assert!(rg.column(0).column_index_length().is_none());
+            assert!(rg.column(0).column_index_offset().is_none());
+        }
+    }
+
+    #[test]
+    fn test_roundtrip_statistics_geospatial() {
+        let path = format!(
+            "{}/geospatial/geospatial.parquet",
+            arrow::util::test_util::parquet_test_data(),
+        );
+
+        test_roundtrip_statistics(&path, 2);
+    }
+
+    #[test]
+    fn test_roundtrip_geospatial_with_nan() {
+        let path = format!(
+            "{}/geospatial/geospatial-with-nan.parquet",
+            arrow::util::test_util::parquet_test_data(),
+        );
+
+        test_roundtrip_statistics(&path, 0);
+    }
+
+    #[test]
+    fn test_roundtrip_statistics_crs() {
+        let path = format!(
+            "{}/geospatial/crs-default.parquet",
+            arrow::util::test_util::parquet_test_data(),
+        );
+
+        test_roundtrip_statistics(&path, 0);
+    }
+
+    fn test_roundtrip_statistics(path: &str, column: usize) {
+        let file_bytes = Bytes::from(std::fs::read(path).unwrap());
+
+        let reader = SerializedFileReader::new(file_bytes.clone()).unwrap();
+        let mut values = Vec::new();
+        let mut def_levels = Vec::new();
+
+        let schema = parquet_schema_geometry();
+        let props = WriterProperties::builder()
+            .set_statistics_enabled(EnabledStatistics::Chunk)
+            .build();
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer =
+            SerializedFileWriter::new(&mut buf, schema.root_schema_ptr(), Arc::new(props)).unwrap();
+
+        for i in 0..reader.num_row_groups() {
+            let row_group = reader.get_row_group(i).unwrap();
+            values.truncate(0);
+            def_levels.truncate(0);
+
+            let mut row_group_out = writer.next_row_group().unwrap();
+
+            if let ColumnReader::ByteArrayColumnReader(mut reader) =
+                row_group.get_column_reader(column).unwrap()
+            {
+                reader
+                    .read_records(1000000, Some(&mut def_levels), None, &mut values)
+                    .unwrap();
+
+                let mut col = row_group_out.next_column().unwrap().unwrap();
+                col.typed::<ByteArrayType>()
+                    .write_batch(&values, Some(&def_levels), None)
+                    .unwrap();
+                col.close().unwrap();
+                row_group_out.close().unwrap();
+            } else {
+                panic!("Unexpected geometry column type");
+            }
+        }
+
+        writer.close().unwrap();
+
+        let actual_stats = read_geo_statistics(buf.into(), 0);
+        let expected_stats = read_geo_statistics(file_bytes.clone(), column);
+
+        assert_eq!(actual_stats.len(), expected_stats.len());
+        for i in 0..expected_stats.len() {
+            assert_eq!(actual_stats[i], expected_stats[i], "Row group {i}");
+        }
+    }
+
+    fn parquet_schema_geometry() -> SchemaDescriptor {
+        let wkb_meta = WkbMetadata::new(None, None);
+        let wkb_type = WkbType::new(Some(wkb_meta));
+
+        let field = Field::new("geo", DataType::Binary, true).with_extension_type(wkb_type);
+        let schema = Schema::new(vec![field]);
+
+        ArrowSchemaConverter::new().convert(&schema).unwrap()
+    }
+
+    fn wkb_array_xy(coords: impl IntoIterator<Item = Option<(f64, f64)>>) -> ArrayRef {
+        let array = BinaryArray::from_iter(
+            coords
+                .into_iter()
+                .map(|maybe_xy| maybe_xy.map(|(x, y)| wkb_point_xy(x, y))),
+        );
+        Arc::new(array)
+    }
+}
diff --git a/parquet/tests/variant_integration.rs b/parquet/tests/variant_integration.rs
new file mode 100644
index 000000000000..b97b3f225485
--- /dev/null
+++ b/parquet/tests/variant_integration.rs
@@ -0,0 +1,416 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Comprehensive integration tests for Parquet files with Variant columns
+//!
+//! This test harness reads test case definitions from cases.json, loads expected
+//! Variant values from .variant.bin files, reads Parquet files, converts StructArray
+//! to VariantArray, and verifies that extracted values match expected results.
+//!
+//! Inspired by the arrow-go implementation: <https://github.com/apache/arrow-go/pull/455/files>
+
+use arrow::util::test_util::parquet_test_data;
+use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use parquet_variant::{Variant, VariantMetadata};
+use parquet_variant_compute::{VariantArray, unshred_variant};
+use serde::Deserialize;
+use std::path::Path;
+use std::sync::LazyLock;
+use std::{fs, path::PathBuf};
+
+type Result<T> = std::result::Result<T, String>;
+
+/// Creates a test function for a given case number.
+///
+/// If an error message is provided, generate an error test case that expects it.
+///
+/// Note the index is zero-based, while the case number is one-based
+macro_rules! variant_test_case {
+    ($case_num:literal $(, $expected_error:literal )? ) => {
+        paste::paste! {
+            #[test]
+            $( #[should_panic(expected = $expected_error)] )?
+            fn [<test_variant_integration_case_ $case_num>]() {
+                all_cases()[$case_num - 1].run()
+            }
+        }
+    };
+}
+
+// Generate test functions for each case
+// Notes
+// - case 3 is empty in cases.json for some reason
+// - cases 40, 42, 87, 127 and 128 are expected to fail always (they include invalid variants)
+// - the remaining cases are expected to (eventually) pass
+
+variant_test_case!(1);
+variant_test_case!(2);
+// case 3 is empty in cases.json 🤷
+// ```json
+// {
+//   "case_number" : 3
+// },
+// ```
+variant_test_case!(3, "parquet_file must be set");
+variant_test_case!(4);
+variant_test_case!(5);
+variant_test_case!(6);
+variant_test_case!(7);
+variant_test_case!(8);
+variant_test_case!(9);
+variant_test_case!(10);
+variant_test_case!(11);
+variant_test_case!(12);
+variant_test_case!(13);
+variant_test_case!(14);
+variant_test_case!(15);
+variant_test_case!(16);
+variant_test_case!(17);
+variant_test_case!(18);
+variant_test_case!(19);
+variant_test_case!(20);
+variant_test_case!(21);
+variant_test_case!(22);
+variant_test_case!(23);
+variant_test_case!(24);
+variant_test_case!(25);
+variant_test_case!(26);
+variant_test_case!(27);
+variant_test_case!(28);
+variant_test_case!(29);
+variant_test_case!(30);
+variant_test_case!(31);
+variant_test_case!(32);
+variant_test_case!(33);
+variant_test_case!(34);
+variant_test_case!(35);
+variant_test_case!(36);
+variant_test_case!(37);
+variant_test_case!(38);
+variant_test_case!(39);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(40, "both value and typed_value are non-null");
+variant_test_case!(41);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(42, "both value and typed_value are non-null");
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(43, "Field 'b' appears in both typed_value and value");
+variant_test_case!(44);
+variant_test_case!(45);
+variant_test_case!(46);
+variant_test_case!(47);
+variant_test_case!(48);
+variant_test_case!(49);
+variant_test_case!(50);
+variant_test_case!(51);
+variant_test_case!(52);
+variant_test_case!(53);
+variant_test_case!(54);
+variant_test_case!(55);
+variant_test_case!(56);
+variant_test_case!(57);
+variant_test_case!(58);
+variant_test_case!(59);
+variant_test_case!(60);
+variant_test_case!(61);
+variant_test_case!(62);
+variant_test_case!(63);
+variant_test_case!(64);
+variant_test_case!(65);
+variant_test_case!(66);
+variant_test_case!(67);
+variant_test_case!(68);
+variant_test_case!(69);
+variant_test_case!(70);
+variant_test_case!(71);
+variant_test_case!(72);
+variant_test_case!(73);
+variant_test_case!(74);
+variant_test_case!(75);
+variant_test_case!(76);
+variant_test_case!(77);
+variant_test_case!(78);
+variant_test_case!(79);
+variant_test_case!(80);
+variant_test_case!(81);
+variant_test_case!(82);
+variant_test_case!(83);
+// Invalid case, implementations can choose to read the shredded value or error out
+variant_test_case!(84);
+variant_test_case!(85);
+variant_test_case!(86);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(87, "Expected object in value field");
+variant_test_case!(88);
+variant_test_case!(89);
+variant_test_case!(90);
+variant_test_case!(91);
+variant_test_case!(92);
+variant_test_case!(93);
+variant_test_case!(94);
+variant_test_case!(95);
+variant_test_case!(96);
+variant_test_case!(97);
+variant_test_case!(98);
+variant_test_case!(99);
+variant_test_case!(100);
+variant_test_case!(101);
+variant_test_case!(102);
+variant_test_case!(103);
+variant_test_case!(104);
+variant_test_case!(105);
+variant_test_case!(106);
+variant_test_case!(107);
+variant_test_case!(108);
+variant_test_case!(109);
+variant_test_case!(110);
+variant_test_case!(111);
+variant_test_case!(112);
+variant_test_case!(113);
+variant_test_case!(114);
+variant_test_case!(115);
+variant_test_case!(116);
+variant_test_case!(117);
+variant_test_case!(118);
+variant_test_case!(119);
+variant_test_case!(120);
+variant_test_case!(121);
+variant_test_case!(122);
+variant_test_case!(123);
+variant_test_case!(124);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(125, "Field 'b' appears in both typed_value and value");
+variant_test_case!(126);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(127, "Illegal shredded value type: UInt32");
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(128, "Expected object in value field");
+variant_test_case!(129);
+variant_test_case!(130);
+variant_test_case!(131);
+variant_test_case!(132);
+variant_test_case!(133);
+variant_test_case!(134);
+variant_test_case!(135);
+variant_test_case!(136);
+// Is an error case (should be failing as the expected error message indicates)
+variant_test_case!(137, "Illegal shredded value type: FixedSizeBinary(4)");
+variant_test_case!(138);
+
+/// Test case definition structure matching the format from
+/// `parquet-testing/parquet_shredded/cases.json`
+///
+/// See [README] for details.
+///
+/// [README]: https://github.com/apache/parquet-testing/blob/master/shredded_variant/README.md
+///
+/// Example JSON
+/// ```json
+/// {
+///   "case_number" : 5,
+///   "test" : "testShreddedVariantPrimitives",
+///   "parquet_file" : "case-005.parquet",
+///   "variant_file" : "case-005_row-0.variant.bin",
+///   "variant" : "Variant(metadata=VariantMetadata(dict={}), value=Variant(type=BOOLEAN_FALSE, value=false))"
+/// },
+/// ```
+#[allow(dead_code)] // some fields are not used except when printing the struct
+#[derive(Debug, Clone, Deserialize)]
+struct VariantTestCase {
+    /// Case number (e.g., 1, 2, 4, etc. - note: case 3 is missing any data)
+    pub case_number: u32,
+    /// Test method name (e.g., "testSimpleArray")
+    pub test: Option<String>,
+    /// Name of the parquet file (e.g., "case-001.parquet")
+    pub parquet_file: Option<String>,
+
+    /// Expected variant binary file (e.g., "case-001_row-0.variant.bin") - None for error cases
+    pub variant_file: Option<String>,
+    /// Multiple expected variant binary files, for multi row inputs. If there
+    /// is no variant, there is no file
+    pub variant_files: Option<Vec<Option<String>>>,
+    /// Expected error message for negative test cases
+    ///
+    /// (this is the message from the cases.json file, which is from the Iceberg
+    /// implementation, so it is not guaranteed to match the actual Rust error message)
+    pub error_message: Option<String>,
+    /// Description of the variant value (for debugging)
+    pub variant_description: Option<String>,
+}
+
+/// Run a single test case
+impl VariantTestCase {
+    /// Run a test case. Panics on unexpected error
+    fn run(&self) {
+        println!("{self:#?}");
+
+        let variant_data = self.load_variants();
+        let variant_array = self.load_parquet();
+
+        // `load_parquet` returns shredded variant values, but the test expectations are provided as
+        // unshredded variant values. Unshred (failing for invalid input) so we can compare them.
+        let variant_array = unshred_variant(&variant_array).unwrap();
+
+        // if this is an error case, the expected error message should be set
+        if let Some(expected_error) = &self.error_message {
+            // Unshredding variant array should have already triggered the error
+            panic!("Expected an error '{expected_error}`, but got no error");
+        }
+
+        assert_eq!(
+            variant_array.len(),
+            variant_data.len(),
+            "Number of variants in parquet file does not match expected number"
+        );
+        for (i, expected) in variant_data.iter().enumerate() {
+            if variant_array.is_null(i) {
+                assert!(
+                    expected.is_none(),
+                    "Expected null variant at index {i}, but got {:?}",
+                    variant_array.value(i)
+                );
+                continue;
+            }
+            let actual = variant_array.value(i);
+            let expected = variant_data[i]
+                .as_ref()
+                .expect("Expected non-null variant data");
+
+            let expected = expected.as_variant();
+
+            // compare the variants (is this the right way to compare?)
+            assert_eq!(
+                actual, expected,
+                "Variant data mismatch at index {}\n\nactual\n{actual:#?}\n\nexpected\n{expected:#?}",
+                i
+            );
+        }
+    }
+
+    /// Parses the expected variant files, returning a vector of `ExpectedVariant` or None
+    /// if the corresponding entry in `variant_files` is null
+    fn load_variants(&self) -> Vec<Option<ExpectedVariant>> {
+        let variant_files: Box<dyn Iterator<Item = Option<&String>>> =
+            match (&self.variant_files, &self.variant_file) {
+                (Some(files), None) => Box::new(files.iter().map(|f| f.as_ref())),
+                (None, Some(file)) => Box::new(std::iter::once(Some(file))),
+                // error cases may not have any variant files
+                _ => Box::new(std::iter::empty()),
+            };
+
+        // load each file
+        variant_files
+            .map(|f| {
+                let v = ExpectedVariant::try_load(&TEST_CASE_DIR.join(f?))
+                    .expect("Failed to load expected variant");
+                Some(v)
+            })
+            .collect()
+    }
+
+    /// Load the parquet file, extract the Variant column, and return as a VariantArray
+    fn load_parquet(&self) -> VariantArray {
+        let parquet_file = self
+            .parquet_file
+            .as_ref()
+            .expect("parquet_file must be set");
+        let path = TEST_CASE_DIR.join(parquet_file);
+        let file = fs::File::open(&path)
+            .unwrap_or_else(|e| panic!("cannot open parquet file {path:?}: {e}"));
+
+        let reader = ParquetRecordBatchReaderBuilder::try_new(file)
+            .and_then(|b| b.build())
+            .unwrap_or_else(|e| panic!("Error reading parquet reader for {path:?}: {e}"));
+
+        let mut batches: Vec<_> = reader
+            .collect::<std::result::Result<_, _>>()
+            .unwrap_or_else(|e| panic!("Error reading parquet batches for {path:?}: {e}"));
+
+        if batches.is_empty() {
+            panic!("No parquet batches were found in file {path:?}");
+        }
+        if batches.len() > 1 {
+            panic!(
+                "Multiple parquet batches were found in file {path:?}, only single batch supported"
+            );
+        }
+        let batch = batches.swap_remove(0);
+
+        // The schema is "id", "var" for the id and variant columns
+        // TODO: support the actual parquet logical type annotation somehow
+        let var = batch
+            .column_by_name("var")
+            .unwrap_or_else(|| panic!("No 'var' column found in parquet file {path:?}"));
+
+        VariantArray::try_new(var).unwrap_or_else(|e| {
+            panic!("Error converting StructArray to VariantArray for {path:?}: {e}")
+        })
+    }
+}
+
+/// Variant value loaded from .variant.bin file
+#[derive(Debug, Clone)]
+struct ExpectedVariant {
+    data: Vec<u8>,
+    data_offset: usize,
+}
+
+impl ExpectedVariant {
+    fn try_load(path: &Path) -> Result<Self> {
+        // "Each `*.variant.bin` file contains a single variant serialized
+        // by concatenating the serialized bytes of the variant metadata
+        // followed by the serialized bytes of the variant value."
+        let data = fs::read(path).map_err(|e| format!("cannot read variant file {path:?}: {e}"))?;
+        let metadata = VariantMetadata::try_new(&data)
+            .map_err(|e| format!("cannot parse variant metadata from {path:?}: {e}"))?;
+
+        let data_offset = metadata.size();
+        Ok(Self { data, data_offset })
+    }
+
+    fn as_variant(&self) -> Variant<'_, '_> {
+        let metadata = &self.data[0..self.data_offset];
+        let value = &self.data[self.data_offset..];
+        Variant::try_new(metadata, value).expect("Invalid variant data")
+    }
+}
+
+static TEST_CASE_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
+    PathBuf::from(parquet_test_data())
+        .join("..")
+        .join("shredded_variant")
+});
+
+/// All tests
+static ALL_CASES: LazyLock<Result<Vec<VariantTestCase>>> = LazyLock::new(|| {
+    let cases_file = TEST_CASE_DIR.join("cases.json");
+
+    if !cases_file.exists() {
+        return Err(format!("cases.json not found at {}", cases_file.display()));
+    }
+
+    let content = fs::read_to_string(&cases_file)
+        .map_err(|e| format!("cannot read cases file {cases_file:?}: {e}"))?;
+
+    serde_json::from_str::<Vec<VariantTestCase>>(content.as_str())
+        .map_err(|e| format!("cannot parse json from {cases_file:?}: {e}"))
+});
+
+// return a reference to the static ALL_CASES, or panic if loading failed
+fn all_cases() -> &'static [VariantTestCase] {
+    ALL_CASES.as_ref().unwrap()
+}
diff --git a/parquet_derive/README.md b/parquet_derive/README.md
index c267a92430e0..783c71abd599 100644
--- a/parquet_derive/README.md
+++ b/parquet_derive/README.md
@@ -28,6 +28,8 @@ Derive also has some support for the chrono time library. You must must enable t
 
 ## Usage
 
+See example in [ParquetRecordWriter](<https://docs.rs/parquet_derive/latest/parquet_derive/derive.ParquetRecordWriter.html>) for reading/writing to a parquet file.
+
 Add this to your Cargo.toml:
 
 ```toml
@@ -135,6 +137,8 @@ chunks.read_from_row_group(&mut *row_group, 1).unwrap();
 Testing a `*_derive` crate requires an intermediate crate. Go to `parquet_derive_test` and run `cargo test` for
 unit tests.
 
+To compile and test doctests, run `cargo test --doc -- --show-output`
+
 ## Docs
 
 To build documentation, run `cargo doc --no-deps`.
diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs
index 6a5a158155ba..1aaa1abfd2a3 100644
--- a/parquet_derive/src/lib.rs
+++ b/parquet_derive/src/lib.rs
@@ -22,7 +22,7 @@
     html_logo_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![warn(missing_docs)]
 #![recursion_limit = "128"]
 
@@ -34,7 +34,7 @@ extern crate quote;
 
 extern crate parquet;
 
-use ::syn::{parse_macro_input, Data, DataStruct, DeriveInput};
+use ::syn::{Data, DataStruct, DeriveInput, parse_macro_input};
 
 mod parquet_field;
 
@@ -49,43 +49,79 @@ mod parquet_field;
 ///
 /// Example:
 ///
-/// ```no_run
-/// use parquet_derive::ParquetRecordWriter;
-/// use std::io::{self, Write};
+/// ```rust
 /// use parquet::file::properties::WriterProperties;
 /// use parquet::file::writer::SerializedFileWriter;
 /// use parquet::record::RecordWriter;
+/// use parquet_derive::ParquetRecordWriter;
 /// use std::fs::File;
-///
 /// use std::sync::Arc;
 ///
-/// #[derive(ParquetRecordWriter)]
-/// struct ACompleteRecord<'a> {
-///   pub a_bool: bool,
-///   pub a_str: &'a str,
+/// // For reader
+/// use parquet::file::reader::{FileReader, SerializedFileReader};
+/// use parquet::record::RecordReader;
+/// use parquet_derive::ParquetRecordReader;
+///
+/// #[derive(Debug, ParquetRecordWriter, ParquetRecordReader)]
+/// struct ACompleteRecord {
+///     pub a_bool: bool,
+///     pub a_string: String,
+/// }
+///
+/// fn write_some_records() {
+///     let samples = vec![
+///         ACompleteRecord {
+///             a_bool: true,
+///             a_string: "I'm true".into(),
+///         },
+///         ACompleteRecord {
+///             a_bool: false,
+///             a_string: "I'm false".into(),
+///         },
+///     ];
+///
+///     let schema = samples.as_slice().schema().unwrap();
+///
+///     let props = Arc::new(WriterProperties::builder().build());
+///
+///     let file = File::create("example.parquet").unwrap();
+///
+///     let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
+///
+///     let mut row_group = writer.next_row_group().unwrap();
+///
+///     samples
+///         .as_slice()
+///         .write_to_row_group(&mut row_group)
+///         .unwrap();
+///
+///     row_group.close().unwrap();
+///
+///     writer.close().unwrap();
+/// }
+///
+/// fn read_some_records() -> Vec<ACompleteRecord> {
+///     let mut samples: Vec<ACompleteRecord> = Vec::new();
+///     let file = File::open("example.parquet").unwrap();
+///
+///     let reader = SerializedFileReader::new(file).unwrap();
+///     let mut row_group = reader.get_row_group(0).unwrap();
+///     samples.read_from_row_group(&mut *row_group, 2).unwrap();
+///
+///     samples
 /// }
 ///
-/// pub fn write_some_records() {
-///   let samples = vec![
-///     ACompleteRecord {
-///       a_bool: true,
-///       a_str: "I'm true"
-///     },
-///     ACompleteRecord {
-///       a_bool: false,
-///       a_str: "I'm false"
-///     }
-///   ];
-///  let file = File::open("some_file.parquet").unwrap();
+/// pub fn main() {
+///     write_some_records();
 ///
-///  let schema = samples.as_slice().schema().unwrap();
+///     let records = read_some_records();
 ///
-///  let mut writer = SerializedFileWriter::new(file, schema, Default::default()).unwrap();
+///     std::fs::remove_file("example.parquet").unwrap();
 ///
-///  let mut row_group = writer.next_row_group().unwrap();
-///  samples.as_slice().write_to_row_group(&mut row_group).unwrap();
-///  row_group.close().unwrap();
-///  writer.close().unwrap();
+///     assert_eq!(
+///         format!("{:?}", records),
+///         "[ACompleteRecord { a_bool: true, a_string: \"I'm true\" }, ACompleteRecord { a_bool: false, a_string: \"I'm false\" }]"
+///     );
 /// }
 /// ```
 ///
@@ -164,7 +200,7 @@ pub fn parquet_record_writer(input: proc_macro::TokenStream) -> proc_macro::Toke
 ///
 /// Example:
 ///
-/// ```no_run
+/// ```rust
 /// use parquet::record::RecordReader;
 /// use parquet::file::{serialized_reader::SerializedFileReader, reader::FileReader};
 /// use parquet_derive::{ParquetRecordReader};
@@ -241,7 +277,7 @@ pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::Toke
                   return Err(::parquet::errors::ParquetError::General(error_msg));
                 }
               };
-              if let Ok(mut column_reader) = row_group_reader.get_column_reader(idx) {
+              if let Ok(column_reader) = row_group_reader.get_column_reader(idx) {
                   #reader_snippets
               } else {
                   return Err(::parquet::errors::ParquetError::General("Failed to get next column".into()))
diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs
index f99ea3e0356c..7473f2305517 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -86,7 +86,7 @@ impl Field {
 
         let vals_builder = match &self.ty {
             Type::TypePath(_) => self.copied_direct_vals(),
-            Type::Option(ref first_type) => match **first_type {
+            Type::Option(first_type) => match **first_type {
                 Type::TypePath(_) => self.option_into_vals(),
                 Type::Reference(_, ref second_type) => match **second_type {
                     Type::TypePath(_) => self.option_into_vals(),
@@ -98,7 +98,7 @@ impl Field {
                 },
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
-            Type::Reference(_, ref first_type) => match **first_type {
+            Type::Reference(_, first_type) => match **first_type {
                 Type::TypePath(_) => self.copied_direct_vals(),
                 Type::Option(ref second_type) => match **second_type {
                     Type::TypePath(_) => self.option_into_vals(),
@@ -122,7 +122,7 @@ impl Field {
                 },
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
-            Type::Vec(ref first_type) => match **first_type {
+            Type::Vec(first_type) => match **first_type {
                 Type::TypePath(_) => self.copied_direct_vals(),
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
@@ -131,7 +131,7 @@ impl Field {
 
         let definition_levels = match &self.ty {
             Type::TypePath(_) => None,
-            Type::Option(ref first_type) => match **first_type {
+            Type::Option(first_type) => match **first_type {
                 Type::TypePath(_) => Some(self.optional_definition_levels()),
                 Type::Option(_) => unimplemented!("Unsupported nesting encountered"),
                 Type::Reference(_, ref second_type)
@@ -142,10 +142,10 @@ impl Field {
                     _ => unimplemented!("Unsupported nesting encountered"),
                 },
             },
-            Type::Reference(_, ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type, _)
-            | Type::Slice(ref first_type) => match **first_type {
+            Type::Reference(_, first_type)
+            | Type::Vec(first_type)
+            | Type::Array(first_type, _)
+            | Type::Slice(first_type) => match **first_type {
                 Type::TypePath(_) => None,
                 Type::Vec(ref second_type)
                 | Type::Array(ref second_type, _)
@@ -192,7 +192,7 @@ impl Field {
         // this expression just switches between non-nullable and nullable write statements
         let write_batch_expr = if definition_levels.is_some() {
             quote! {
-                if let #column_writer(ref mut typed) = column_writer.untyped() {
+                if let #column_writer(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], Some(&definition_levels[..]), None)?;
                 } else {
                     panic!("Schema and struct disagree on type for {}", stringify!{#ident})
@@ -200,7 +200,7 @@ impl Field {
             }
         } else {
             quote! {
-                if let #column_writer(ref mut typed) = column_writer.untyped() {
+                if let #column_writer(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], None, None)?;
                 } else {
                     panic!("Schema and struct disagree on type for {}", stringify!{#ident})
@@ -265,7 +265,7 @@ impl Field {
         // it to its field in the corresponding struct
         let vals_writer = match &self.ty {
             Type::TypePath(_) => self.copied_direct_fields(),
-            Type::Reference(_, ref first_type) => match **first_type {
+            Type::Reference(_, first_type) => match **first_type {
                 Type::TypePath(_) => self.copied_direct_fields(),
                 Type::Slice(ref second_type) => match **second_type {
                     Type::TypePath(_) => self.copied_direct_fields(),
@@ -273,7 +273,7 @@ impl Field {
                 },
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
-            Type::Vec(ref first_type) => match **first_type {
+            Type::Vec(first_type) => match **first_type {
                 Type::TypePath(_) => self.copied_direct_fields(),
                 ref f => unimplemented!("Unsupported: {:#?}", f),
             },
@@ -356,7 +356,7 @@ impl Field {
         let binding = if copy_to_vec {
             quote! { let Some(inner) = rec.#field_name }
         } else {
-            quote! { let Some(ref inner) = rec.#field_name }
+            quote! { let Some(inner) = &rec.#field_name }
         };
 
         let some = if is_a_timestamp {
@@ -545,11 +545,11 @@ impl Type {
     fn leaf_type_recursive_helper<'a>(ty: &'a Type, parent_ty: Option<&'a Type>) -> &'a Type {
         match ty {
             Type::TypePath(_) => parent_ty.unwrap_or(ty),
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type, _)
-            | Type::Slice(ref first_type)
-            | Type::Reference(_, ref first_type) => {
+            Type::Option(first_type)
+            | Type::Vec(first_type)
+            | Type::Array(first_type, _)
+            | Type::Slice(first_type)
+            | Type::Reference(_, first_type) => {
                 Type::leaf_type_recursive_helper(first_type, Some(ty))
             }
         }
@@ -562,12 +562,12 @@ impl Type {
         let leaf_type = self.leaf_type_recursive();
 
         match leaf_type {
-            Type::TypePath(ref type_) => type_,
-            Type::Option(ref first_type)
-            | Type::Vec(ref first_type)
-            | Type::Array(ref first_type, _)
-            | Type::Slice(ref first_type)
-            | Type::Reference(_, ref first_type) => match **first_type {
+            Type::TypePath(type_) => type_,
+            Type::Option(first_type)
+            | Type::Vec(first_type)
+            | Type::Array(first_type, _)
+            | Type::Slice(first_type)
+            | Type::Reference(_, first_type) => match **first_type {
                 Type::TypePath(ref type_) => type_,
                 _ => unimplemented!("leaf_type() should only return shallow types"),
             },
@@ -612,14 +612,14 @@ impl Type {
         let leaf_type = self.leaf_type_recursive();
 
         match leaf_type {
-            Type::Array(ref first_type, _length) => {
+            Type::Array(first_type, _length) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return BasicType::FIXED_LEN_BYTE_ARRAY;
                     }
                 }
             }
-            Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
+            Type::Vec(first_type) | Type::Slice(first_type) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return BasicType::BYTE_ARRAY;
@@ -643,7 +643,7 @@ impl Type {
             }
             "f32" => BasicType::FLOAT,
             "f64" => BasicType::DOUBLE,
-            "String" | "str" => BasicType::BYTE_ARRAY,
+            "String" | "str" | "Arc < str >" => BasicType::BYTE_ARRAY,
             "Uuid" => BasicType::FIXED_LEN_BYTE_ARRAY,
             f => unimplemented!("{} currently is not supported", f),
         }
@@ -654,7 +654,7 @@ impl Type {
         let leaf_type = self.leaf_type_recursive();
 
         // `[u8; N]` => Some(N)
-        if let Type::Array(ref first_type, length) = leaf_type {
+        if let Type::Array(first_type, length) = leaf_type {
             if let Type::TypePath(_) = **first_type {
                 if last_part == "u8" {
                     return Some(length.clone());
@@ -674,14 +674,14 @@ impl Type {
         let leaf_type = self.leaf_type_recursive();
 
         match leaf_type {
-            Type::Array(ref first_type, _length) => {
+            Type::Array(first_type, _length) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return quote! { None };
                     }
                 }
             }
-            Type::Vec(ref first_type) | Type::Slice(ref first_type) => {
+            Type::Vec(first_type) | Type::Slice(first_type) => {
                 if let Type::TypePath(_) = **first_type {
                     if last_part == "u8" {
                         return quote! { None };
@@ -733,7 +733,7 @@ impl Type {
             "NaiveDate" => quote! { Some(LogicalType::Date) },
             "NaiveDateTime" => quote! { None },
             "f32" | "f64" => quote! { None },
-            "String" | "str" => quote! { Some(LogicalType::String) },
+            "String" | "str" | "Arc < str >" => quote! { Some(LogicalType::String) },
             "Uuid" => quote! { Some(LogicalType::Uuid) },
             f => unimplemented!("{} currently is not supported", f),
         }
@@ -764,10 +764,10 @@ impl Type {
 
     fn from_type(f: &syn::Field, ty: &syn::Type) -> Self {
         match ty {
-            syn::Type::Path(ref p) => Type::from_type_path(f, p),
-            syn::Type::Reference(ref tr) => Type::from_type_reference(f, tr),
-            syn::Type::Array(ref ta) => Type::from_type_array(f, ta),
-            syn::Type::Slice(ref ts) => Type::from_type_slice(f, ts),
+            syn::Type::Path(p) => Type::from_type_path(f, p),
+            syn::Type::Reference(tr) => Type::from_type_reference(f, tr),
+            syn::Type::Array(ta) => Type::from_type_array(f, ta),
+            syn::Type::Slice(ts) => Type::from_type_slice(f, ts),
             other => unimplemented!(
                 "Unable to derive {:?} - it is currently an unsupported type\n{:#?}",
                 f.ident.as_ref().unwrap(),
@@ -790,7 +790,7 @@ impl Type {
                     let first_arg = &angle_args.args[0];
 
                     match first_arg {
-                        syn::GenericArgument::Type(ref typath) => typath.clone(),
+                        syn::GenericArgument::Type(typath) => typath.clone(),
                         other => unimplemented!("Unsupported: {:#?}", other),
                     }
                 }
@@ -857,7 +857,7 @@ mod test {
                         {
                             let vals : Vec < _ > = records . iter ( ) . map ( | rec | rec . counter as i64 ) . collect ( );
 
-                            if let ColumnWriter::Int64ColumnWriter ( ref mut typed ) = column_writer.untyped() {
+                            if let ColumnWriter::Int64ColumnWriter ( typed ) = column_writer.untyped() {
                                 typed . write_batch ( & vals [ .. ] , None , None ) ?;
                             }  else {
                                 panic!("Schema and struct disagree on type for {}" , stringify!{ counter } )
@@ -924,14 +924,14 @@ mod test {
                 let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_str . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
 
                 let vals: Vec <_> = records.iter().filter_map( |rec| {
-                    if let Some ( ref inner ) = rec . optional_str {
+                    if let Some ( inner ) = &rec . optional_str {
                         Some ( (&inner[..]).into() )
                     } else {
                         None
                     }
                 }).collect();
 
-                if let ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer.untyped() {
+                if let ColumnWriter::ByteArrayColumnWriter ( typed ) = column_writer.untyped() {
                     typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify ! { optional_str } )
@@ -948,14 +948,14 @@ mod test {
                         let definition_levels : Vec < i16 > = self . iter ( ) . map ( | rec | if rec . optional_string . is_some ( ) { 1 } else { 0 } ) . collect ( ) ;
 
                         let vals: Vec <_> = records.iter().filter_map( |rec| {
-                            if let Some ( ref inner ) = rec . optional_string {
+                            if let Some ( inner ) = &rec . optional_string {
                                 Some ( (&inner[..]).into() )
                             } else {
                                 None
                             }
                         }).collect();
 
-                        if let ColumnWriter::ByteArrayColumnWriter ( ref mut typed ) = column_writer.untyped() {
+                        if let ColumnWriter::ByteArrayColumnWriter ( typed ) = column_writer.untyped() {
                             typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
                         } else {
                             panic!("Schema and struct disagree on type for {}" , stringify ! { optional_string } )
@@ -978,7 +978,7 @@ mod test {
                             }
                         }).collect();
 
-                        if let ColumnWriter::Int32ColumnWriter ( ref mut typed ) = column_writer.untyped() {
+                        if let ColumnWriter::Int32ColumnWriter ( typed ) = column_writer.untyped() {
                             typed . write_batch ( & vals [ .. ] , Some(&definition_levels[..]) , None ) ? ;
                         }  else {
                             panic!("Schema and struct disagree on type for {}" , stringify ! { optional_dumb_int } )
@@ -1261,7 +1261,7 @@ mod test {
         assert_eq!(when.writer_snippet().to_string(),(quote!{
             {
                 let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.timestamp_millis() ).collect();
-                if let ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::Int64ColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], None, None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
@@ -1281,7 +1281,7 @@ mod test {
                     }
                 }).collect();
 
-                if let ColumnWriter::Int64ColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::Int64ColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
@@ -1335,7 +1335,7 @@ mod test {
         assert_eq!(when.writer_snippet().to_string(),(quote!{
             {
                 let vals : Vec<_> = records.iter().map(|rec| rec.henceforth.signed_duration_since(::chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32).collect();
-                if let ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::Int32ColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], None, None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ henceforth })
@@ -1355,7 +1355,7 @@ mod test {
                     }
                 }).collect();
 
-                if let ColumnWriter::Int32ColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::Int32ColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_happened })
@@ -1409,7 +1409,7 @@ mod test {
         assert_eq!(when.writer_snippet().to_string(),(quote!{
             {
                 let vals : Vec<_> = records.iter().map(|rec| rec.unique_id.as_bytes().to_vec().into() ).collect();
-                if let ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::FixedLenByteArrayColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], None, None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ unique_id })
@@ -1422,14 +1422,14 @@ mod test {
             {
                 let definition_levels : Vec<i16> = self.iter().map(|rec| if rec.maybe_unique_id.is_some() { 1 } else { 0 }).collect();
                 let vals : Vec<_> = records.iter().filter_map(|rec| {
-                    if let Some(ref inner) = rec.maybe_unique_id {
+                    if let Some(inner) = &rec.maybe_unique_id {
                         Some((&inner.to_string()[..]).into())
                     } else {
                         None
                     }
                 }).collect();
 
-                if let ColumnWriter::FixedLenByteArrayColumnWriter(ref mut typed) = column_writer.untyped() {
+                if let ColumnWriter::FixedLenByteArrayColumnWriter(typed) = column_writer.untyped() {
                     typed.write_batch(&vals[..], Some(&definition_levels[..]), None) ?;
                 } else {
                     panic!("Schema and struct disagree on type for {}" , stringify!{ maybe_unique_id })
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index 8375d0eaf9ee..fe96fa0e6122 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -19,10 +19,11 @@
     html_logo_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/parquet-format/25f05e73d8cd7f5c83532ce51cb4f4de8ba5f2a2/logo/parquet-logos_1.svg"
 )]
-#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![allow(clippy::approx_constant)]
 
 use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
+use std::sync::Arc;
 
 #[derive(ParquetRecordWriter)]
 struct ACompleteRecord<'a> {
@@ -30,8 +31,10 @@ struct ACompleteRecord<'a> {
     pub a_str: &'a str,
     pub a_string: String,
     pub a_borrowed_string: &'a String,
+    pub a_arc_str: Arc<str>,
     pub maybe_a_str: Option<&'a str>,
     pub maybe_a_string: Option<String>,
+    pub maybe_a_arc_str: Option<Arc<str>>,
     pub i16: i16,
     pub i32: i32,
     pub u64: u64,
@@ -130,8 +133,10 @@ mod tests {
             REQUIRED BINARY          a_str (STRING);
             REQUIRED BINARY          a_string (STRING);
             REQUIRED BINARY          a_borrowed_string (STRING);
+            REQUIRED BINARY          a_arc_str (STRING);
             OPTIONAL BINARY          maybe_a_str (STRING);
             OPTIONAL BINARY          maybe_a_string (STRING);
+            OPTIONAL BINARY          maybe_a_arc_str (STRING);
             REQUIRED INT32           i16 (INTEGER(16,true));
             REQUIRED INT32           i32;
             REQUIRED INT64           u64 (INTEGER(64,false));
@@ -159,8 +164,10 @@ mod tests {
 
         let a_str = "hello mother".to_owned();
         let a_borrowed_string = "cool news".to_owned();
+        let a_arc_str: Arc<str> = "hello arc".into();
         let maybe_a_string = Some("it's true, I'm a string".to_owned());
         let maybe_a_str = Some(&a_str[..]);
+        let maybe_a_arc_str = Some(a_arc_str.clone());
         let borrowed_byte_vec = vec![0x68, 0x69, 0x70];
         let borrowed_maybe_byte_vec = Some(vec![0x71, 0x72]);
         let borrowed_maybe_borrowed_byte_vec = Some(&borrowed_byte_vec[..]);
@@ -170,8 +177,10 @@ mod tests {
             a_str: &a_str[..],
             a_string: "hello father".into(),
             a_borrowed_string: &a_borrowed_string,
+            a_arc_str,
             maybe_a_str: Some(&a_str[..]),
             maybe_a_string: Some(a_str.clone()),
+            maybe_a_arc_str,
             i16: -45,
             i32: 456,
             u64: 4563424,
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
new file mode 100644
index 000000000000..6d6e1bfb100f
--- /dev/null
+++ b/rust-toolchain.toml
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[toolchain]
+channel = "1.91"
+components = ["rustfmt", "clippy"]
diff --git a/rustfmt.toml b/rustfmt.toml
index 585c1b612978..bc9377059f7d 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -15,4 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-edition = "2021"
+style_edition = "2024"