diff --git a/.github/workflows/spark_sql_test.yml b/.github/workflows/spark_sql_test.yml index 980629174f..5b678598fb 100644 --- a/.github/workflows/spark_sql_test.yml +++ b/.github/workflows/spark_sql_test.yml @@ -143,7 +143,7 @@ jobs: - {spark-short: '4.0', spark-full: '4.0.2', java: 21, scan-impl: 'auto'} - {spark-short: '4.1', spark-full: '4.1.1', java: 17, scan-impl: 'auto'} fail-fast: false - name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }} + name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }} # Hive tests stay on the standard GitHub-hosted runner: HiveSparkSubmitSuite # relies on an Ivy 'local-m2-cache' resolver that the runs-on.com # ubuntu24-full-x64 image does not provide, so spark-submit fails there. @@ -192,7 +192,7 @@ jobs: if: ${{ github.event.inputs.collect-fallback-logs == 'true' }} uses: actions/upload-artifact@v7 with: - name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }} + name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}-jdk${{ matrix.config.java }} path: "**/fallback.log" merge-fallback-logs: diff --git a/dev/diffs/4.0.2.diff b/dev/diffs/4.0.2.diff index 5948292d7b..436491f9c8 100644 --- a/dev/diffs/4.0.2.diff +++ b/dev/diffs/4.0.2.diff @@ -3147,6 +3147,47 @@ index 7838e62013d..8fa09652921 100644 import testImplicits._ +diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +index 89f22186f7e..425233f00b2 100644 +--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala ++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +@@ -32,7 +32,8 @@ import org.apache.hadoop.conf.Configuration + import org.apache.hadoop.fs._ + import org.json4s.DefaultFormats + import org.json4s.jackson.JsonMethods +-import org.scalatest.{BeforeAndAfter, PrivateMethodTester} ++import org.scalactic.source.Position ++import org.scalatest.{BeforeAndAfter, PrivateMethodTester, Tag} + import org.scalatest.concurrent.Eventually._ + import org.scalatest.time.SpanSugar._ + +@@ -40,6 +41,7 @@ import org.apache.spark._ + import org.apache.spark.LocalSparkContext._ + import org.apache.spark.sql.SparkSession + import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow} ++import org.apache.spark.sql.classic.{SparkSession => ClassicSparkSession} + import org.apache.spark.sql.catalyst.util.quietly + import org.apache.spark.sql.execution.streaming._ + import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorSuite.withCoordinatorRef +@@ -128,6 +130,18 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] + import StateStoreTestsHelper._ + import StateStoreCoordinatorSuite._ + ++ // Comet: skip this suite under Comet. The tests target streaming state-store internals ++ // (StateStore.get/put/commit), not SQL execution paths, and the `maintenance` test is ++ // flaky in CI. See https://github.com/apache/datafusion-comet/issues/4221 ++ override protected def test(testName: String, testTags: Tag*)(testFun: => Any) ++ (implicit pos: Position): Unit = { ++ if (ClassicSparkSession.isCometEnabled) { ++ ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) ++ } else { ++ super.test(testName, testTags: _*)(testFun) ++ } ++ } ++ + before { + StateStore.stop() + require(!StateStore.isMaintenanceRunning) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index c4b09c4b289..75c3437788e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala diff --git a/dev/diffs/4.1.1.diff b/dev/diffs/4.1.1.diff index ca5341f7f6..bc662dec7d 100644 --- a/dev/diffs/4.1.1.diff +++ b/dev/diffs/4.1.1.diff @@ -3459,6 +3459,47 @@ index 38e5b15465b..ca3e8fef27a 100644 import testImplicits._ testWithColumnFamilies("RocksDBStateStore", +diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +index e839ccd35ec..d182aa07b44 100644 +--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala ++++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +@@ -32,7 +32,8 @@ import org.apache.hadoop.conf.Configuration + import org.apache.hadoop.fs._ + import org.json4s.DefaultFormats + import org.json4s.jackson.JsonMethods +-import org.scalatest.{BeforeAndAfter, PrivateMethodTester} ++import org.scalactic.source.Position ++import org.scalatest.{BeforeAndAfter, PrivateMethodTester, Tag} + import org.scalatest.concurrent.Eventually._ + import org.scalatest.time.SpanSugar._ + +@@ -41,6 +42,7 @@ import org.apache.spark.LocalSparkContext._ + import org.apache.spark.internal.Logging + import org.apache.spark.sql.SparkSession + import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow} ++import org.apache.spark.sql.classic.{SparkSession => ClassicSparkSession} + import org.apache.spark.sql.catalyst.util.quietly + import org.apache.spark.sql.execution.streaming._ + import org.apache.spark.sql.execution.streaming.checkpointing.{CheckpointFileManager, ChecksumCheckpointFileManager, ChecksumFile} +@@ -259,6 +261,18 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider] + import StateStoreTestsHelper._ + import StateStoreCoordinatorSuite._ + ++ // Comet: skip this suite under Comet. The tests target streaming state-store internals ++ // (StateStore.get/put/commit), not SQL execution paths, and the `maintenance` test is ++ // flaky in CI. See https://github.com/apache/datafusion-comet/issues/4221 ++ override protected def test(testName: String, testTags: Tag*)(testFun: => Any) ++ (implicit pos: Position): Unit = { ++ if (ClassicSparkSession.isCometEnabled) { ++ ignore(testName + " (disabled when Comet is on)", testTags: _*)(testFun) ++ } else { ++ super.test(testName, testTags: _*)(testFun) ++ } ++ } ++ + before { + StateStore.stop() + require(!StateStore.isMaintenanceRunning) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 83ebd24384c..32511091bb2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala