From 21d850b8c067db08b0066938b3be0445865e392f Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 9 Mar 2026 15:19:58 -0600
Subject: [PATCH 1/5] feat: enable debug assertions in CI profile and
 RUST_BACKTRACE in workflows

Enable debug-assertions in the CI cargo profile to catch potential
issues with unsafe code. Enable RUST_BACKTRACE=1 in all CI workflows
to get full stack traces when panics occur.
---
 .github/workflows/iceberg_spark_test.yml                   | 1 +
 .github/workflows/pr_benchmark_check.yml                   | 1 +
 .github/workflows/pr_build_linux.yml                       | 1 +
 .github/workflows/pr_build_macos.yml                       | 1 +
 .github/workflows/spark_sql_test.yml                       | 1 +
 .github/workflows/spark_sql_test_native_datafusion.yml     | 1 +
 .github/workflows/spark_sql_test_native_iceberg_compat.yml | 1 +
 native/Cargo.toml                                          | 1 +
 8 files changed, 8 insertions(+)

diff --git a/.github/workflows/iceberg_spark_test.yml b/.github/workflows/iceberg_spark_test.yml
index eae8281070..ec764279a6 100644
--- a/.github/workflows/iceberg_spark_test.yml
+++ b/.github/workflows/iceberg_spark_test.yml
@@ -48,6 +48,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
   # Build native library once and share with all test jobs
diff --git a/.github/workflows/pr_benchmark_check.yml b/.github/workflows/pr_benchmark_check.yml
index f3af4d7817..6376a3548f 100644
--- a/.github/workflows/pr_benchmark_check.yml
+++ b/.github/workflows/pr_benchmark_check.yml
@@ -41,6 +41,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
   benchmark-check:
diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml
index 0c7d3fc3e0..dfb63a8800 100644
--- a/.github/workflows/pr_build_linux.yml
+++ b/.github/workflows/pr_build_linux.yml
@@ -48,6 +48,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
 
diff --git a/.github/workflows/pr_build_macos.yml b/.github/workflows/pr_build_macos.yml
index fb4dfa9754..7a57b6334c 100644
--- a/.github/workflows/pr_build_macos.yml
+++ b/.github/workflows/pr_build_macos.yml
@@ -48,6 +48,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
 
diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml
index 91632d04d4..4d777cda87 100644
--- a/.github/workflows/spark_sql_test.yml
+++ b/.github/workflows/spark_sql_test.yml
@@ -54,6 +54,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
 
diff --git a/.github/workflows/spark_sql_test_native_datafusion.yml b/.github/workflows/spark_sql_test_native_datafusion.yml
index d5165a3786..8df9d4801e 100644
--- a/.github/workflows/spark_sql_test_native_datafusion.yml
+++ b/.github/workflows/spark_sql_test_native_datafusion.yml
@@ -28,6 +28,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
   spark-sql-catalyst-native-datafusion:
diff --git a/.github/workflows/spark_sql_test_native_iceberg_compat.yml b/.github/workflows/spark_sql_test_native_iceberg_compat.yml
index 100460385f..af84e8556d 100644
--- a/.github/workflows/spark_sql_test_native_iceberg_compat.yml
+++ b/.github/workflows/spark_sql_test_native_iceberg_compat.yml
@@ -28,6 +28,7 @@ on:
 
 env:
   RUST_VERSION: stable
+  RUST_BACKTRACE: 1
 
 jobs:
   spark-sql-catalyst-native-iceberg-compat:
diff --git a/native/Cargo.toml b/native/Cargo.toml
index d5a6aeabc9..d6bbddd21d 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -70,4 +70,5 @@ strip = "debuginfo"
 inherits = "release"
 lto = false          # Skip LTO for faster linking
 codegen-units = 16   # Parallel codegen (faster compile, slightly larger binary)
+debug-assertions = true
 # overflow-checks inherited as false from release

From d3c31b5f21f45f8db39c343e4c5258a25a0bb1cf Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 9 Mar 2026 16:44:54 -0600
Subject: [PATCH 2/5] feat: set panic=unwind in CI profile to capture debug
 assertion messages

---
 native/Cargo.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/native/Cargo.toml b/native/Cargo.toml
index d6bbddd21d..95504b97bf 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -71,4 +71,5 @@ inherits = "release"
 lto = false          # Skip LTO for faster linking
 codegen-units = 16   # Parallel codegen (faster compile, slightly larger binary)
 debug-assertions = true
+panic = "unwind"     # Allow panics to be caught and logged across FFI boundary
 # overflow-checks inherited as false from release

From 4f4caeda1a0912c8da58ddb8aa31c41e5101b018 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 9 Mar 2026 17:38:32 -0600
Subject: [PATCH 3/5] feat: catch panics across FFI boundary and log to stderr

Log panic messages (including debug assertion text and location) to
stderr in the panic hook so they are visible in CI logs. Wrap the
spawned tokio execution task with catch_unwind so panics propagate
as DataFusionError instead of silently dropping the channel.
---
 native/core/src/errors.rs            |  5 ++++-
 native/core/src/execution/jni_api.rs | 28 ++++++++++++++++++++++++----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/native/core/src/errors.rs b/native/core/src/errors.rs
index ecac7af94e..a33f5d932c 100644
--- a/native/core/src/errors.rs
+++ b/native/core/src/errors.rs
@@ -145,7 +145,10 @@ pub enum CometError {
 }
 
 pub fn init() {
-    std::panic::set_hook(Box::new(|_panic_info| {
+    std::panic::set_hook(Box::new(|panic_info| {
+        // Log the panic message and location to stderr so it is visible in CI logs
+        // even if the exception message is lost crossing the FFI boundary
+        eprintln!("Comet native panic: {panic_info}");
         // Capture the backtrace for a panic
         *PANIC_BACKTRACE.lock().unwrap() =
             Some(std::backtrace::Backtrace::force_capture().to_string());
diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs
index 1030e30aaf..f5931b7dc7 100644
--- a/native/core/src/execution/jni_api.rs
+++ b/native/core/src/execution/jni_api.rs
@@ -29,7 +29,7 @@ use crate::{
 use arrow::array::{Array, RecordBatch, UInt32Array};
 use arrow::compute::{take, TakeOptions};
 use arrow::datatypes::DataType as ArrowDataType;
-use datafusion::common::{Result as DataFusionResult, ScalarValue};
+use datafusion::common::{DataFusionError, Result as DataFusionResult, ScalarValue};
 use datafusion::execution::disk_manager::DiskManagerMode;
 use datafusion::execution::memory_pool::MemoryPool;
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
@@ -59,6 +59,7 @@ use datafusion_spark::function::string::concat::SparkConcat;
 use datafusion_spark::function::string::space::SparkSpace;
 use futures::poll;
 use futures::stream::StreamExt;
+use futures::FutureExt;
 use jni::objects::JByteBuffer;
 use jni::sys::{jlongArray, JNI_FALSE};
 use jni::{
@@ -570,10 +571,29 @@ pub unsafe extern "system" fn Java_org_apache_comet_Native_executePlan(
                     let (tx, rx) = mpsc::channel(2);
                     let mut stream = stream;
                     get_runtime().spawn(async move {
-                        while let Some(batch) = stream.next().await {
-                            if tx.send(batch).await.is_err() {
-                                break;
+                        let result = std::panic::AssertUnwindSafe(async {
+                            while let Some(batch) = stream.next().await {
+                                if tx.send(batch).await.is_err() {
+                                    break;
+                                }
                             }
+                        })
+                        .catch_unwind()
+                        .await;
+
+                        if let Err(panic) = result {
+                            let msg = match panic.downcast_ref::<&str>() {
+                                Some(s) => s.to_string(),
+                                None => match panic.downcast_ref::<String>() {
+                                    Some(s) => s.clone(),
+                                    None => "unknown panic".to_string(),
+                                },
+                            };
+                            let _ = tx
+                                .send(Err(DataFusionError::Execution(format!(
+                                    "native panic: {msg}"
+                                ))))
+                                .await;
                         }
                     });
                     exec_context.batch_receiver = Some(rx);

From 8399c5a84acab2e8014c24e8331500b1492404a7 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 9 Mar 2026 17:49:25 -0600
Subject: [PATCH 4/5] fix: use unaligned reads for null bitset in
 SparkUnsafeRow/Array

Spark's UnsafeRow data from JVM may not be 8-byte aligned. The null
bitset accessors were casting to *const i64 and dereferencing directly,
which triggers a panic with debug assertions enabled. Use
read_unaligned/write_unaligned instead.
---
 native/core/src/execution/shuffle/spark_unsafe/list.rs | 2 +-
 native/core/src/execution/shuffle/spark_unsafe/row.rs  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/native/core/src/execution/shuffle/spark_unsafe/list.rs b/native/core/src/execution/shuffle/spark_unsafe/list.rs
index cc72813946..259ff29a79 100644
--- a/native/core/src/execution/shuffle/spark_unsafe/list.rs
+++ b/native/core/src/execution/shuffle/spark_unsafe/list.rs
@@ -90,7 +90,7 @@ impl SparkUnsafeArray {
         unsafe {
             let mask: i64 = 1i64 << (index & 0x3f);
             let word_offset = (self.row_addr + 8 + (((index >> 6) as i64) << 3)) as *const i64;
-            let word: i64 = *word_offset;
+            let word: i64 = word_offset.read_unaligned();
             (word & mask) != 0
         }
     }
diff --git a/native/core/src/execution/shuffle/spark_unsafe/row.rs b/native/core/src/execution/shuffle/spark_unsafe/row.rs
index 02daf6a34f..ffecfba49d 100644
--- a/native/core/src/execution/shuffle/spark_unsafe/row.rs
+++ b/native/core/src/execution/shuffle/spark_unsafe/row.rs
@@ -290,7 +290,7 @@ impl SparkUnsafeRow {
         unsafe {
             let mask: i64 = 1i64 << (index & 0x3f);
             let word_offset = (self.row_addr + (((index >> 6) as i64) << 3)) as *const i64;
-            let word: i64 = *word_offset;
+            let word: i64 = word_offset.read_unaligned();
             (word & mask) != 0
         }
     }
@@ -303,8 +303,8 @@ impl SparkUnsafeRow {
         unsafe {
             let mask: i64 = 1i64 << (index & 0x3f);
             let word_offset = (self.row_addr + (((index >> 6) as i64) << 3)) as *mut i64;
-            let word: i64 = *word_offset;
-            *word_offset = word & !mask;
+            let word: i64 = word_offset.read_unaligned();
+            word_offset.write_unaligned(word & !mask);
         }
     }
 }

From 343ec1c01a663e887f6a05f94e1f6f1bf801851d Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Mon, 9 Mar 2026 17:49:44 -0600
Subject: [PATCH 5/5] fix: remove miri ignore now that unaligned access is
 fixed

---
 native/core/src/execution/shuffle/spark_unsafe/row.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/native/core/src/execution/shuffle/spark_unsafe/row.rs b/native/core/src/execution/shuffle/spark_unsafe/row.rs
index ffecfba49d..fe80d56d9a 100644
--- a/native/core/src/execution/shuffle/spark_unsafe/row.rs
+++ b/native/core/src/execution/shuffle/spark_unsafe/row.rs
@@ -968,7 +968,6 @@ mod test {
     }
 
     #[test]
-    #[cfg_attr(miri, ignore)] // Unaligned memory access in SparkUnsafeRow
     fn test_append_null_struct_field_to_struct_builder() {
         let data_type = DataType::Struct(Fields::from(vec![
             Field::new("a", DataType::Boolean, true),