From 1da46a62dec92fdb09c7bd86407c18dd2ca57309 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Tue, 12 May 2026 16:27:39 +0530 Subject: [PATCH 1/2] fix: reject readBatch(0) in ArrowConstantColumnReader to prevent [N,0] row count mismatch (#4211) --- .../parquet/ArrowConstantColumnReader.java | 7 ++ .../comet/parquet/TestRowCountMismatch.java | 76 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 common/src/test/java/org/apache/comet/parquet/TestRowCountMismatch.java diff --git a/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java index 521eb4aa5b..e8fbea4ee6 100644 --- a/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java @@ -80,6 +80,13 @@ public void setBatchSize(int batchSize) { @Override public void readBatch(int total) { + // Fail fast: a non-positive total here would silently yield a zero-length + // vector and later surface as a [N, 0] mismatch in NativeUtil.exportBatch + // (apache/datafusion-comet#4211). + if (total <= 0) { + throw new IllegalArgumentException( + "ArrowConstantColumnReader.readBatch requires total > 0, got " + total); + } if (total != currentSize) { close(); initVector(value, total); diff --git a/common/src/test/java/org/apache/comet/parquet/TestRowCountMismatch.java b/common/src/test/java/org/apache/comet/parquet/TestRowCountMismatch.java new file mode 100644 index 0000000000..bcdd851901 --- /dev/null +++ b/common/src/test/java/org/apache/comet/parquet/TestRowCountMismatch.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet.parquet; + +import org.junit.Test; + +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; + +import org.apache.comet.vector.CometVector; + +import static org.junit.Assert.*; + +/** Regression probes for apache/datafusion-comet#4211. */ +public class TestRowCountMismatch { + + private static final StructField NULLABLE_INT = + new StructField("c", DataTypes.IntegerType, true, Metadata.empty()); + + @Test + public void constantNullReaderRespectsBatchSize() throws Exception { + int batchSize = 8192; + try (ArrowConstantColumnReader reader = + new ArrowConstantColumnReader(NULLABLE_INT, batchSize, false)) { + reader.readBatch(batchSize); + CometVector v = reader.currentBatch(); + assertNotNull(v); + assertEquals(batchSize, v.getValueVector().getValueCount()); + } + } + + @Test + public void readBatchRejectsZeroToPreventRowCountMismatch() throws Exception { + int batchSize = 8192; + try (ArrowConstantColumnReader reader = + new ArrowConstantColumnReader(NULLABLE_INT, batchSize, false)) { + reader.readBatch(batchSize); + assertEquals(batchSize, reader.currentBatch().getValueVector().getValueCount()); + + try { + reader.readBatch(0); + fail("readBatch(0) should throw IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("0")); + } + + try { + reader.readBatch(-1); + fail("readBatch(-1) should throw IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + assertTrue(expected.getMessage().contains("-1")); + } + + // Rejected calls must leave the previously-prepared vector intact. + assertEquals(batchSize, reader.currentBatch().getValueVector().getValueCount()); + } + } +} From 3f236c86c157cf3ae05fda19afbec59d4a66dd6f Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Tue, 12 May 2026 20:53:07 +0530 Subject: [PATCH 2/2] fix: drop redundant comment in ArrowConstantColumnReader --- .../org/apache/comet/parquet/ArrowConstantColumnReader.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java b/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java index e8fbea4ee6..961d9276f1 100644 --- a/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java +++ b/common/src/main/java/org/apache/comet/parquet/ArrowConstantColumnReader.java @@ -80,9 +80,6 @@ public void setBatchSize(int batchSize) { @Override public void readBatch(int total) { - // Fail fast: a non-positive total here would silently yield a zero-length - // vector and later surface as a [N, 0] mismatch in NativeUtil.exportBatch - // (apache/datafusion-comet#4211). if (total <= 0) { throw new IllegalArgumentException( "ArrowConstantColumnReader.readBatch requires total > 0, got " + total);