From e56d8e5d038bb5571ed064f0b4dc7481a46576de Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Thu, 7 May 2026 08:26:38 -0600
Subject: [PATCH] test: remove "Comet (Scan)" cases from microbenchmarks

The "Comet (Scan)" case set COMET_ENABLED=true with COMET_EXEC_ENABLED=false,
intending to isolate scan performance. With spark.comet.scan.impl=auto (the
default), CometScanRule.nativeDataFusionScan refuses to install when exec is
disabled, so the case actually measured native_iceberg_compat scan plus
Spark ColumnarToRow. Comparing it against the other Comet case (which uses
native_datafusion plus CometNativeColumnarToRow) made the result a proxy for
scan-impl choice rather than the intended isolation.

Drop the "Comet (Scan)" cases everywhere and rename the combined case to
"Comet". Update doc comments accordingly.
---
 .../sql/benchmark/CometBenchmarkBase.scala    | 16 ++-----
 .../CometCsvExpressionBenchmark.scala         |  2 +-
 .../sql/benchmark/CometExecBenchmark.scala    | 45 +++----------------
 .../CometJsonExpressionBenchmark.scala        |  2 +-
 .../CometStringExpressionBenchmark.scala      |  2 +-
 5 files changed, 12 insertions(+), 55 deletions(-)

diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala
index deade5e337..5e4ec734a8 100644
--- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala
@@ -97,8 +97,8 @@ trait CometBenchmarkBase
   }
 
   /**
-   * Runs an expression benchmark with standard cases: Spark, Comet (Scan), Comet (Scan + Exec).
-   * This provides a consistent benchmark structure for expression evaluation.
+   * Runs an expression benchmark with standard cases: Spark, Comet. This provides a consistent
+   * benchmark structure for expression evaluation.
    *
    * @param name
    *   Benchmark name
@@ -107,7 +107,7 @@ trait CometBenchmarkBase
    * @param query
    *   SQL query to benchmark
    * @param extraCometConfigs
-   *   Additional configurations to apply for Comet cases (optional)
+   *   Additional configurations to apply for the Comet case (optional)
    */
   final def runExpressionBenchmark(
       name: String,
@@ -122,14 +122,6 @@ trait CometBenchmarkBase
       }
     }
 
-    benchmark.addCase("Comet (Scan)") { _ =>
-      withSQLConf(
-        CometConf.COMET_ENABLED.key -> "true",
-        CometConf.COMET_EXEC_ENABLED.key -> "false") {
-        spark.sql(query).noop()
-      }
-    }
-
     val cometExecConfigs = Map(
       CometConf.COMET_ENABLED.key -> "true",
       CometConf.COMET_EXEC_ENABLED.key -> "true",
@@ -158,7 +150,7 @@ trait CometBenchmarkBase
       }
     }
 
-    benchmark.addCase("Comet (Scan + Exec)") { _ =>
+    benchmark.addCase("Comet") { _ =>
       withSQLConf(cometExecConfigs.toSeq: _*) {
         spark.sql(query).noop()
       }
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala
index 94288eb9cb..1495b0320e 100644
--- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCsvExpressionBenchmark.scala
@@ -31,7 +31,7 @@ import org.apache.comet.CometConf
  * @param query
  *   SQL query to benchmark
  * @param extraCometConfigs
- *   Additional Comet configurations for the scan+exec case
+ *   Additional Comet configurations for the Comet case
  */
 case class CsvExprConfig(
     name: String,
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala
index 277cbdae62..740707aefd 100644
--- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometExecBenchmark.scala
@@ -84,13 +84,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
           spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
         }
 
-        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
-          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
-            spark.sql("select c2 + 1, c1 + 2 from parquetV1Table where c1 + 1 > 0").noop()
-          }
-        }
-
-        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
           withSQLConf(
             CometConf.COMET_ENABLED.key -> "true",
             CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -128,15 +122,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
               "col2, col3 FROM parquetV1Table")
         }
 
-        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
-          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
-            spark.sql(
-              "SELECT (SELECT max(col1) AS parquetV1Table FROM parquetV1Table) AS a, " +
-                "col2, col3 FROM parquetV1Table")
-          }
-        }
-
-        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
           withSQLConf(
             CometConf.COMET_ENABLED.key -> "true",
             CometConf.COMET_EXEC_ENABLED.key -> "true",
@@ -164,13 +150,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
           spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
         }
 
-        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
-          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
-            spark.sql("select * from parquetV1Table").sortWithinPartitions("value").noop()
-          }
-        }
-
-        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
           withSQLConf(
             CometConf.COMET_ENABLED.key -> "true",
             CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -199,16 +179,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
             .noop()
         }
 
-        benchmark.addCase("SQL Parquet - Comet (Scan)") { _ =>
-          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
-            spark
-              .sql("SELECT col1, col2, SUM(col3) FROM parquetV1Table " +
-                "GROUP BY col1, col2 GROUPING SETS ((col1), (col2))")
-              .noop()
-          }
-        }
-
-        benchmark.addCase("SQL Parquet - Comet (Scan, Exec)") { _ =>
+        benchmark.addCase("SQL Parquet - Comet") { _ =>
           withSQLConf(
             CometConf.COMET_ENABLED.key -> "true",
             CometConf.COMET_EXEC_ENABLED.key -> "true") {
@@ -251,13 +222,7 @@ object CometExecBenchmark extends CometBenchmarkBase {
           spark.sql(query).noop()
         }
 
-        benchmark.addCase("SQL Parquet - Comet (Scan) (BloomFilterAgg)") { _ =>
-          withSQLConf(CometConf.COMET_ENABLED.key -> "true") {
-            spark.sql(query).noop()
-          }
-        }
-
-        benchmark.addCase("SQL Parquet - Comet (Scan, Exec) (BloomFilterAgg)") { _ =>
+        benchmark.addCase("SQL Parquet - Comet (BloomFilterAgg)") { _ =>
           withSQLConf(
             CometConf.COMET_ENABLED.key -> "true",
             CometConf.COMET_EXEC_ENABLED.key -> "true") {
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala
index 5f1365bd76..82aae3d7b9 100644
--- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala
@@ -32,7 +32,7 @@ import org.apache.comet.CometConf
  * @param query
  *   SQL query to benchmark
  * @param extraCometConfigs
- *   Additional Comet configurations for the scan+exec case
+ *   Additional Comet configurations for the Comet case
  */
 case class JsonExprConfig(
     name: String,
diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala
index c7c750aed6..d7be505161 100644
--- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala
@@ -28,7 +28,7 @@ import org.apache.comet.CometConf
  * @param query
  *   SQL query to benchmark
  * @param extraCometConfigs
- *   Additional Comet configurations for the scan+exec case
+ *   Additional Comet configurations for the Comet case
  */
 case class StringExprConfig(
     name: String,