From e56d8e5d038bb5571ed064f0b4dc7481a46576de Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 7 May 2026 08:26:38 -0600 Subject: [PATCH] test: remove "Comet (Scan)" cases from microbenchmarks The "Comet (Scan)" case set COMET_ENABLED=true with COMET_EXEC_ENABLED=false, intending to isolate scan performance. With spark.comet.scan.impl=auto (the default), CometScanRule.nativeDataFusionScan refuses to install when exec is disabled, so the case actually measured native_iceberg_compat scan plus Spark ColumnarToRow. Comparing it against the other Comet case (which uses native_datafusion plus CometNativeColumnarToRow) made the result a proxy for scan-impl choice rather than the intended isolation. Drop the "Comet (Scan)" cases everywhere and rename the combined case to "Comet". Update doc comments accordingly. --- .../sql/benchmark/CometBenchmarkBase.scala | 16 ++----- .../CometCsvExpressionBenchmark.scala | 2 +- .../sql/benchmark/CometExecBenchmark.scala | 45 +++---------------- .../CometJsonExpressionBenchmark.scala | 2 +- .../CometStringExpressionBenchmark.scala | 2 +- 5 files changed, 12 insertions(+), 55 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala index deade5e337..5e4ec734a8 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala @@ -97,8 +97,8 @@ trait CometBenchmarkBase } /** - * Runs an expression benchmark with standard cases: Spark, Comet (Scan), Comet (Scan + Exec). - * This provides a consistent benchmark structure for expression evaluation. + * Runs an expression benchmark with standard cases: Spark, Comet. This provides a consistent + * benchmark structure for expression evaluation. * * @param name * Benchmark name @@ -107,7 +107,7 @@ trait CometBenchmarkBase * @param query * SQL query to benchmark * @param extraCometConfigs - * Additional configurations to apply for Comet cases (optional) + * Additional configurations to apply for the Comet case (optional) */ final def runExpressionBenchmark( name: String, @@ -122,14 +122,6 @@ trait CometBenchmarkBase } } - benchmark.addCase("Comet (Scan)") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_EXEC_ENABLED.key -> "false") { - spark.sql(query).noop() - } - } - val cometExecConfigs = Map( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", @@ -158,7 +150,7 @@ trait CometBenchmarkBase } } - benchmark.addCase("Comet (Scan + Exec)") { _ => + benchmark.addCase("Comet") { _ => withSQLConf(cometExecConfigs.toSeq: _*) { spark.sql(query).noop() } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala index 94288eb9cb..1495b0320e 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala @@ -31,7 +31,7 @@ import org.apache.comet.CometConf * @param query * SQL query to benchmark * @param extraCometConfigs - * Additional Comet configurations for the scan+exec case + * Additional Comet configurations for the Comet case */ case class CsvExprConfig( name: String, diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala index 277cbdae62..740707aefd 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala @@ -84,13 +84,7 @@ object CometExecBenchmark extends CometBenchmarkBase { spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop() } - benchmark.addCase("SQL Parquet - Comet (Scan)") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "true") { - spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop() - } - } - - benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ => + benchmark.addCase("SQL Parquet - Comet") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true") { @@ -128,15 +122,7 @@ object CometExecBenchmark extends CometBenchmarkBase { "col2, col3 FROM parquetV1Table") } - benchmark.addCase("SQL Parquet - Comet (Scan)") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "true") { - spark.sql( - "SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " + - "col2, col3 FROM parquetV1Table") - } - } - - benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ => + benchmark.addCase("SQL Parquet - Comet") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", @@ -164,13 +150,7 @@ object CometExecBenchmark extends CometBenchmarkBase { spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop() } - benchmark.addCase("SQL Parquet - Comet (Scan)") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "true") { - spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop() - } - } - - benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ => + benchmark.addCase("SQL Parquet - Comet") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true") { @@ -199,16 +179,7 @@ object CometExecBenchmark extends CometBenchmarkBase { .noop() } - benchmark.addCase("SQL Parquet - Comet (Scan)") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "true") { - spark - .sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " + - "GROUP BY col1, col2 GROUPING SETS ((col1), (col2))") - .noop() - } - } - - benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ => + benchmark.addCase("SQL Parquet - Comet") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true") { @@ -251,13 +222,7 @@ object CometExecBenchmark extends CometBenchmarkBase { spark.sql(query).noop() } - benchmark.addCase("SQL Parquet - Comet (Scan) (BloomFilterAgg)") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "true") { - spark.sql(query).noop() - } - } - - benchmark.addCase("SQL Parquet - Comet (Scan, Exec) (BloomFilterAgg)") { _ => + benchmark.addCase("SQL Parquet - Comet (BloomFilterAgg)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true") { diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala index 5f1365bd76..82aae3d7b9 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala @@ -32,7 +32,7 @@ import org.apache.comet.CometConf * @param query * SQL query to benchmark * @param extraCometConfigs - * Additional Comet configurations for the scan+exec case + * Additional Comet configurations for the Comet case */ case class JsonExprConfig( name: String, diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index c7c750aed6..d7be505161 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -28,7 +28,7 @@ import org.apache.comet.CometConf * @param query * SQL query to benchmark * @param extraCometConfigs - * Additional Comet configurations for the scan+exec case + * Additional Comet configurations for the Comet case */ case class StringExprConfig( name: String,