apache · andygrove · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala
@@ -94,12 +94,9 @@ object CometConf extends ShimCometConf {
     .createWithEnvVarOrDefault("ENABLE_COMET", true)
 
   val COMET_NATIVE_SCAN_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.enabled")
-    .category(CATEGORY_SCAN)
-    .doc(
-      "Whether to enable native scans. When this is turned on, Spark will use Comet to " +
-        "read supported data sources (currently only Parquet is supported natively). Note " +
-        "that to enable native vectorized execution, both this config and " +
-        "`spark.comet.exec.enabled` need to be enabled.")
+    .category(CATEGORY_TESTING)
+    .doc("Whether to enable native scans. Intended for use in Comet's own test suites to " +
+      "selectively disable native scans; not intended for production use.")
     .booleanConf
     .createWithDefault(true)
 

diff --git a/dev/release/build-release-comet.sh b/dev/release/build-release-comet.sh
@@ -202,7 +202,10 @@ LOCAL_REPO=$(mktemp -d /tmp/comet-staging-repo-XXXXX)
 ./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-3.4 -P scala-2.13  -DskipTests install
 ./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-3.5 -P scala-2.12  -DskipTests install
 ./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-3.5 -P scala-2.13  -DskipTests install
-./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-4.0 -P scala-2.13  -DskipTests install
+# The spark-4.x profiles pin their own Scala 2.13.x patch versions to match the
+# corresponding Spark release, so the scala-2.13 profile is not used here.
+./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-4.0                 -DskipTests install
+./mvnw  "-Dmaven.repo.local=${LOCAL_REPO}" -P spark-4.1                 -DskipTests install
 
 echo "Installed to local repo: ${LOCAL_REPO}"
 

diff --git a/docs/source/about/gluten_comparison.md b/docs/source/about/gluten_comparison.md
@@ -62,8 +62,8 @@ code, then we suggest benchmarking with both solutions and choosing the fastest
 
 Both projects target a similar set of Spark releases.
 
-Comet supports Spark 3.4, 3.5, and 4.0 in production builds, with experimental builds also published for
-Spark 4.1 and the Spark 4.2 preview. See the [Spark version compatibility guide] for the exact patch versions and
+Comet supports Spark 3.4, 3.5, 4.0, and 4.1 in production builds, with an experimental build also published for
+the Spark 4.2 preview. See the [Spark version compatibility guide] for the exact patch versions and
 JDK/Scala combinations.
 
 [Spark version compatibility guide]: /user-guide/latest/compatibility/spark-versions.md

diff --git a/docs/source/about/index.md b/docs/source/about/index.md
diff --git a/docs/source/asf/index.md b/docs/source/asf/index.md
@@ -19,9 +19,14 @@ under the License.
 
 # ASF Links
 
+Apache DataFusion Comet is part of the Apache Software Foundation. The links below point to ASF
+resources covering licensing, donations, security reporting, and the Foundation's code of conduct.
+Select a link from the navigation menu.
+
 ```{toctree}
 :maxdepth: 1
 :caption: ASF Links
+:hidden:
 
 Apache Software Foundation <https://apache.org>
 License <https://www.apache.org/licenses/>

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -134,16 +134,17 @@
     "**": ["docs-sidebar.html"],
 }
 
-# tell myst_parser to auto-generate anchor links for headers h1, h2, h3
-myst_heading_anchors = 3
+# tell myst_parser to auto-generate anchor links for headers h1, h2, h3, h4
+myst_heading_anchors = 4
 
 # enable nice rendering of checkboxes for the task lists
 myst_enable_extensions = ["colon_fence", "deflist", "tasklist"]
 
 redirects = {
-    "overview.html": "about/index.html",
+    "overview.html": "index.html",
+    "about/index.html": "../index.html",
     "gluten_comparison.html": "about/gluten_comparison.html",
-    "user-guide/overview.html": "../about/overview.html",
+    "user-guide/overview.html": "../index.html",
     "user-guide/gluten_comparison.html": "../about/gluten_comparison.html",
     "user-guide/compatibility.html": "latest/compatibility.html",
     "user-guide/configs.html": "latest/configs.html",

diff --git a/docs/source/contributor-guide/benchmarking.md b/docs/source/contributor-guide/benchmarking.md
@@ -39,3 +39,13 @@ Available benchmarking guides:
 - [TPC-DS Benchmarking with spark-sql-perf](benchmarking_spark_sql_perf.md)
 
 We also have many micro benchmarks that can be run from an IDE located [here](https://github.com/apache/datafusion-comet/tree/main/spark/src/test/scala/org/apache/spark/sql/benchmark).
+
+```{toctree}
+:hidden:
+
+benchmark-results/tpc-h
+benchmark-results/tpc-ds
+benchmarking_macos
+benchmarking_aws_ec2
+benchmarking_spark_sql_perf
+```
diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
@@ -19,9 +19,21 @@ under the License.
 
 # Comet Contributor Guide
 
+The Comet contributor guide is for developers working on Comet itself. It covers the project
+architecture, the JVM and native code layout, the Arrow FFI bridge, JVM and native shuffle, and
+how data and plans flow between Spark and the DataFusion execution engine.
+
+It also documents day-to-day workflows including building and testing locally, debugging,
+benchmarking, profiling, tracing, running the SQL test suites, adding new operators and
+expressions, triaging bugs, and the Comet release process.
+
+New contributors should start with the Getting Started page. Select a topic from the navigation
+menu to read more.
+
 ```{toctree}
 :maxdepth: 2
 :caption: Contributor Guide
+:hidden:
 
 Getting Started <contributing>
 Comet Plugin Overview <plugin_overview>
@@ -30,6 +42,7 @@ JVM Shuffle <jvm_shuffle>
 Native Shuffle <native_shuffle>
 Development Guide <development>
 Debugging Guide <debugging>
+ANSI Error Propagation <sql_error_propagation>
 Benchmarking Guide <benchmarking>
 Adding a New Operator <adding_a_new_operator>
 Adding a New Expression <adding_a_new_expression>

diff --git a/docs/source/contributor-guide/roadmap.md b/docs/source/contributor-guide/roadmap.md
@@ -43,14 +43,16 @@ significant family of Spark expressions in one effort.
 
 ## Dynamic Partition Pruning
 
-Both Iceberg table scans and Parquet V1 native scans (`CometNativeScanExec`) support non-AQE Dynamic Partition Pruning
-(DPP) filters generated by Spark's `PlanDynamicPruningFilters` optimizer rule ([#3349], [#3511]). However, Spark's
-`PlanAdaptiveDynamicPruningFilters` optimizer rule runs after Comet's rules, so DPP with Adaptive Query Execution
-requires a redesign of Comet's plan translation. This effort can be tracked at [#3510].
+Native Parquet scans (`CometNativeScanExec`) support Dynamic Partition Pruning (DPP) both with and without
+Adaptive Query Execution. Non-AQE DPP landed in [#4011] and AQE DPP with broadcast reuse landed in [#4112].
+Iceberg native scans currently support non-AQE DPP only ([#3349], [#3511]); extending broadcast reuse to AQE
+DPP for Iceberg is tracked at [#3510].
 
 [#3349]: https://github.com/apache/datafusion-comet/pull/3349
 [#3510]: https://github.com/apache/datafusion-comet/issues/3510
 [#3511]: https://github.com/apache/datafusion-comet/pull/3511
+[#4011]: https://github.com/apache/datafusion-comet/pull/4011
+[#4112]: https://github.com/apache/datafusion-comet/pull/4112
 
 ## TPC-H and TPC-DS Performance
 

diff --git a/docs/source/contributor-guide/spark_expressions_support.md b/docs/source/contributor-guide/spark_expressions_support.md
@@ -356,23 +356,23 @@
 - [x] `/`
 - [x] abs
 - [x] acos
-- [ ] acosh
+- [x] acosh
 - [x] asin
-- [ ] asinh
+- [x] asinh
 - [x] atan
 - [x] atan2
-- [ ] atanh
+- [x] atanh
 - [x] bin
 - [ ] bround
-- [ ] cbrt
+- [x] cbrt
 - [x] ceil
 - [x] ceiling
 - [ ] conv
 - [x] cos
 - [x] cosh
 - [x] cot
 - [ ] csc
-- [ ] degrees
+- [x] degrees
 - [ ] div
 - [ ] e
 - [x] exp
@@ -390,12 +390,12 @@
 - [x] log2
 - [x] mod
 - [x] negative
-- [ ] pi
+- [x] pi
 - [ ] pmod
 - [x] positive
 - [x] pow
 - [x] power
-- [ ] radians
+- [x] radians
 - [x] rand
 - [x] randn
 - [ ] random

diff --git a/docs/source/contributor-guide/sql_error_propagation.md b/docs/source/contributor-guide/sql_error_propagation.md
@@ -398,8 +398,9 @@ def convertToSparkException(e: CometQueryExecutionException): Throwable = {
 
 ### `ShimSparkErrorConverter` calls the real Spark API
 
-Because Spark's `QueryExecutionErrors` API changes between Spark versions (3.4, 3.5, 4.0),
-there is a separate implementation per version (in `spark-3.4/`, `spark-3.5/`, `spark-4.0/`).
+Because Spark's `QueryExecutionErrors` API changes between Spark versions (3.4, 3.5, and the 4.x line),
+there is a separate implementation per branch (in `spark-3.4/`, `spark-3.5/`, and `spark-4.x/`, which is
+shared by Spark 4.0, 4.1, and 4.2).
 
 ![Shim pattern for per-version Spark API bridging](./shim_pattern.svg)
 

diff --git a/docs/source/index.md b/docs/source/index.md
@@ -40,21 +40,14 @@ Comet also accelerates Apache Iceberg, when performing Parquet scans from Spark.
 
 Comet delivers a performance speedup for many queries, enabling faster data processing and shorter time-to-insights.
 
-The following chart shows the time it takes to run the 22 TPC-H queries against 100 GB of data in Parquet format
-using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
-for details of the environment used for these benchmarks.
+The following charts demonstrate Comet accelerating TPC-H @ 1 TB. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
+for details.
 
-When using Comet, the overall run time is reduced from 687 seconds to 302 seconds, a 2.2x speedup.
-
-![](_static/images/benchmark-results/0.11.0/tpch_allqueries.png)
+![](_static/images/benchmark-results/0.15.0/tpch_allqueries.png)
 
 Here is a breakdown showing relative performance of Spark and Comet for each TPC-H query.
 
-![](_static/images/benchmark-results/0.11.0/tpch_queries_compare.png)
-
-These benchmarks can be reproduced in any environment using the documentation in the
-[Comet Benchmarking Guide](/contributor-guide/benchmarking.md). We encourage
-you to run your own benchmarks.
+![](_static/images/benchmark-results/0.15.0/tpch_queries_compare.png)
 
 ## Use Commodity Hardware
 
@@ -68,12 +61,26 @@ Comet aims for 100% compatibility with all supported versions of Apache Spark, a
 your existing Spark deployments and workflows seamlessly. With no code changes required, you can immediately harness
 the benefits of Comet's acceleration capabilities without disrupting your Spark applications.
 
+The project strives to keep feature parity with Apache Spark, that is, users should expect the same behavior (w.r.t
+features, configurations, query results, etc) with Comet turned on or turned off in their Spark jobs. In addition,
+the Comet extension automatically detects unsupported features and falls back to the Spark engine.
+
 ## Tight Integration with Apache DataFusion
 
 Comet tightly integrates with the core Apache DataFusion project, leveraging its powerful execution engine. With
 seamless interoperability between Comet and DataFusion, you can achieve optimal performance and efficiency in your
 Spark workloads.
 
+## Architecture
+
+The following diagram provides an overview of Comet's architecture.
+
+![Comet Overview](_static/images/comet-overview.png)
+
+The following diagram shows how Comet integrates with Apache Spark.
+
+![Comet System Diagram](_static/images/comet-system-diagram.png)
+
 ## Active Community
 
 Comet boasts a vibrant and active community of developers, contributors, and users dedicated to advancing the
@@ -86,8 +93,6 @@ To get started with Apache DataFusion Comet, follow the
 [DataFusion Slack and Discord channels](https://datafusion.apache.org/contributor-guide/communication.html) to connect
 with other users, ask questions, and share your experiences with Comet.
 
-Follow [Apache DataFusion Comet Overview](https://datafusion.apache.org/comet/about/index.html) to get more detailed information
-
 ## Contributing
 
 We welcome contributions from the community to help improve and enhance Apache DataFusion Comet. Whether it's fixing
@@ -100,8 +105,8 @@ shaping the future of Comet. Check out our
 :caption: Index
 :hidden:
 
-Comet Overview <about/index>
 User Guide <user-guide/index>
 Contributor Guide <contributor-guide/index>
+Comparison with Gluten <about/gluten_comparison>
 ASF Links <asf/index>
 ```
diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md
@@ -19,9 +19,17 @@ under the License.
 
 # Comet User Guide
 
+The Comet user guide covers installation, configuration, supported data sources, supported operators
+and expressions, and tuning advice for running Apache Spark with Comet acceleration.
+
+User guides are published for each release. The development snapshot tracks the upcoming release and
+may include features and fixes that are not yet generally available. Select a version from the
+navigation menu to view its guide.
+
 ```{toctree}
 :maxdepth: 2
 :caption: User Guides
+:hidden:
 
 0.16.0-SNAPSHOT <latest/index>
 0.15.x <0.15/index>

diff --git a/docs/source/user-guide/latest/compatibility/expressions/index.md b/docs/source/user-guide/latest/compatibility/expressions/index.md
@@ -27,6 +27,7 @@ Compatibility notes are grouped by expression category:
 
 ```{toctree}
 :maxdepth: 1
+:hidden:
 
 aggregate
 array

diff --git a/docs/source/user-guide/latest/compatibility/index.md b/docs/source/user-guide/latest/compatibility/index.md
@@ -32,6 +32,7 @@ This guide documents areas where Comet's behavior is known to differ from Spark.
 
 ```{toctree}
 :maxdepth: 1
+:hidden:
 
 scans
 floating-point