apache · andygrove · Mar 3, 2026 · Feb 28, 2026 · Feb 28, 2026 · Mar 2, 2026
diff --git a/benchmarks/tpc/README.md b/benchmarks/tpc/README.md
@@ -38,17 +38,21 @@ All benchmarks are run via `run.py`:
 python3 run.py --engine <engine> --benchmark <tpch|tpcds> [options]
 ```
 
-| Option         | Description                                              |
-| -------------- | -------------------------------------------------------- |
-| `--engine`     | Engine name (matches a TOML file in `engines/`)          |
-| `--benchmark`  | `tpch` or `tpcds`                                        |
-| `--iterations` | Number of iterations (default: 1)                        |
-| `--output`     | Output directory (default: `.`)                          |
-| `--query`      | Run a single query number                                |
-| `--no-restart` | Skip Spark master/worker restart                         |
-| `--dry-run`    | Print the spark-submit command without executing         |
-| `--jfr`        | Enable Java Flight Recorder profiling                    |
-| `--jfr-dir`    | Directory for JFR output files (default: `/results/jfr`) |
+| Option                    | Description                                                                     |
+| ------------------------- | ------------------------------------------------------------------------------- |
+| `--engine`                | Engine name (matches a TOML file in `engines/`)                                 |
+| `--benchmark`             | `tpch` or `tpcds`                                                               |
+| `--iterations`            | Number of iterations (default: 1)                                               |
+| `--output`                | Output directory (default: `.`)                                                 |
+| `--query`                 | Run a single query number                                                       |
+| `--no-restart`            | Skip Spark master/worker restart                                                |
+| `--dry-run`               | Print the spark-submit command without executing                                |
+| `--jfr`                   | Enable Java Flight Recorder profiling                                           |
+| `--jfr-dir`               | Directory for JFR output files (default: `/results/jfr`)                        |
+| `--async-profiler`        | Enable async-profiler (profiles Java + native code)                             |
+| `--async-profiler-dir`    | Directory for async-profiler output (default: `/results/async-profiler`)        |
+| `--async-profiler-event`  | Event type: `cpu`, `wall`, `alloc`, `lock`, etc. (default: `cpu`)               |
+| `--async-profiler-format` | Output format: `flamegraph`, `jfr`, `collapsed`, `text` (default: `flamegraph`) |
 
 Available engines: `spark`, `comet`, `comet-iceberg`, `gluten`
 
@@ -392,3 +396,88 @@ docker compose -f benchmarks/tpc/infra/docker/docker-compose.yml \
 
 Open the `.jfr` files with [JDK Mission Control](https://jdk.java.net/jmc/),
 IntelliJ IDEA's profiler, or `jfr` CLI tool (`jfr summary driver.jfr`).
+
+## async-profiler Profiling
+
+Use the `--async-profiler` flag to capture profiles with
+[async-profiler](https://github.com/async-profiler/async-profiler). Unlike JFR,
+async-profiler can profile **both Java and native (Rust/C++) code** in the same
+flame graph, making it especially useful for profiling Comet workloads.
+
+### Prerequisites
+
+async-profiler must be installed on every node where the driver or executors run.
+Set `ASYNC_PROFILER_HOME` to the installation directory:
+
+```shell
+# Download and extract (Linux x64 example)
+wget https://github.com/async-profiler/async-profiler/releases/download/v3.0/async-profiler-3.0-linux-x64.tar.gz
+tar xzf async-profiler-3.0-linux-x64.tar.gz -C /opt/async-profiler --strip-components=1
+export ASYNC_PROFILER_HOME=/opt/async-profiler
+```
+
+On Linux, `perf_event_paranoid` must be set to allow profiling:
+
+```shell
+sudo sysctl kernel.perf_event_paranoid=1   # or 0 / -1 for full access
+sudo sysctl kernel.kptr_restrict=0          # optional: enable kernel symbols
+```
+
+### Basic usage
+
+```shell
+python3 run.py --engine comet --benchmark tpch --async-profiler
+```
+
+This produces HTML flame graphs in `/results/async-profiler/` by default
+(`driver.html` and `executor.html`).
+
+### Choosing events and output format
+
+```shell
+# Wall-clock profiling (includes time spent waiting/sleeping)
+python3 run.py --engine comet --benchmark tpch \
+    --async-profiler --async-profiler-event wall
+
+# Allocation profiling with JFR output
+python3 run.py --engine comet --benchmark tpch \
+    --async-profiler --async-profiler-event alloc --async-profiler-format jfr
+
+# Lock contention profiling
+python3 run.py --engine comet --benchmark tpch \
+    --async-profiler --async-profiler-event lock
+```
+
+| Event   | Description                                         |
+| ------- | --------------------------------------------------- |
+| `cpu`   | On-CPU time (default). Shows where CPU cycles go.   |
+| `wall`  | Wall-clock time. Includes threads that are blocked. |
+| `alloc` | Heap allocation profiling.                          |
+| `lock`  | Lock contention profiling.                          |
+
+| Format       | Extension | Description                              |
+| ------------ | --------- | ---------------------------------------- |
+| `flamegraph` | `.html`   | Interactive HTML flame graph (default).  |
+| `jfr`        | `.jfr`    | JFR format, viewable in JMC or IntelliJ. |
+| `collapsed`  | `.txt`    | Collapsed stacks for FlameGraph scripts. |
+| `text`       | `.txt`    | Flat text summary of hot methods.        |
+
+### Docker usage
+
+The Docker image includes async-profiler pre-installed at
+`/opt/async-profiler`. The `ASYNC_PROFILER_HOME` environment variable is
+already set in the compose files, so no extra configuration is needed:
+
+```shell
+docker compose -f benchmarks/tpc/infra/docker/docker-compose.yml \
+    run --rm bench \
+    python3 /opt/benchmarks/run.py \
+    --engine comet --benchmark tpch --output /results --no-restart --async-profiler
+```
+
+Output files are collected in `$RESULTS_DIR/async-profiler/` on the host.
+
+**Note:** On Linux, the Docker container needs `--privileged` or
+`SYS_PTRACE` capability and `perf_event_paranoid <= 1` on the host for
+`cpu`/`wall` events. Allocation (`alloc`) and lock (`lock`) events work
+without special privileges.
diff --git a/benchmarks/tpc/infra/docker/Dockerfile b/benchmarks/tpc/infra/docker/Dockerfile
@@ -29,10 +29,23 @@ RUN apt-get update \
     && apt-get install -y --no-install-recommends \
        openjdk-8-jdk-headless \
        openjdk-17-jdk-headless \
-       python3 python3-pip procps \
+       python3 python3-pip procps wget \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
+# Install async-profiler for profiling Java + native (Rust/C++) code.
+ARG ASYNC_PROFILER_VERSION=3.0
+RUN ARCH=$(uname -m) && \
+    if [ "$ARCH" = "x86_64" ]; then AP_ARCH="linux-x64"; \
+    elif [ "$ARCH" = "aarch64" ]; then AP_ARCH="linux-aarch64"; \
+    else echo "Unsupported architecture: $ARCH" && exit 1; fi && \
+    wget -q "https://github.com/async-profiler/async-profiler/releases/download/v${ASYNC_PROFILER_VERSION}/async-profiler-${ASYNC_PROFILER_VERSION}-${AP_ARCH}.tar.gz" \
+         -O /tmp/async-profiler.tar.gz && \
+    mkdir -p /opt/async-profiler && \
+    tar xzf /tmp/async-profiler.tar.gz -C /opt/async-profiler --strip-components=1 && \
+    rm /tmp/async-profiler.tar.gz
+ENV ASYNC_PROFILER_HOME=/opt/async-profiler
+
 # Default to Java 17 (override with JAVA_HOME at runtime for Gluten).
 # Detect architecture (amd64 or arm64) so the image works on both Linux and macOS.
 ARG TARGETARCH

diff --git a/benchmarks/tpc/infra/docker/docker-compose-laptop.yml b/benchmarks/tpc/infra/docker/docker-compose-laptop.yml
@@ -30,6 +30,7 @@
 #   ICEBERG_JAR        - Host path to Iceberg Spark runtime JAR
 #   BENCH_JAVA_HOME    - Java home inside container (default: /usr/lib/jvm/java-17-openjdk)
 #                        Set to /usr/lib/jvm/java-8-openjdk for Gluten
+#   ASYNC_PROFILER_HOME - async-profiler install path (default: /opt/async-profiler)
 
 x-volumes: &volumes
   - ${DATA_DIR:-/tmp/tpc-data}:/data:ro
@@ -95,5 +96,6 @@ services:
       - TPCH_DATA=/data
       - TPCDS_DATA=/data
       - SPARK_EVENT_LOG_DIR=/results/spark-events
+      - ASYNC_PROFILER_HOME=/opt/async-profiler
     mem_limit: 4g
     memswap_limit: 4g
diff --git a/benchmarks/tpc/infra/docker/docker-compose.yml b/benchmarks/tpc/infra/docker/docker-compose.yml
@@ -33,6 +33,7 @@
 #   BENCH_MEM_LIMIT    - Hard memory limit for the bench runner (default: 10g)
 #   BENCH_JAVA_HOME    - Java home inside container (default: /usr/lib/jvm/java-17-openjdk)
 #                        Set to /usr/lib/jvm/java-8-openjdk for Gluten
+#   ASYNC_PROFILER_HOME - async-profiler install path (default: /opt/async-profiler)
 
 x-volumes: &volumes
   - ${DATA_DIR:-/tmp/tpc-data}:/data:ro
@@ -109,6 +110,7 @@ services:
       - TPCH_DATA=/data
       - TPCDS_DATA=/data
       - SPARK_EVENT_LOG_DIR=/results/spark-events
+      - ASYNC_PROFILER_HOME=/opt/async-profiler
     mem_limit: ${BENCH_MEM_LIMIT:-10g}
     memswap_limit: ${BENCH_MEM_LIMIT:-10g}
 

diff --git a/benchmarks/tpc/run.py b/benchmarks/tpc/run.py
@@ -279,6 +279,38 @@ def build_spark_submit_cmd(config, benchmark, args):
             existing = conf.get(spark_key, "")
             conf[spark_key] = f"{existing} {jfr_opts}".strip()
 
+    # async-profiler: attach as a Java agent via -agentpath
+    if args.async_profiler:
+        ap_home = os.environ.get("ASYNC_PROFILER_HOME", "")
+        if not ap_home:
+            print(
+                "Error: ASYNC_PROFILER_HOME is not set. "
+                "Set it to the async-profiler installation directory.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        lib_ext = "dylib" if sys.platform == "darwin" else "so"
+        ap_lib = os.path.join(ap_home, "lib", f"libasyncProfiler.{lib_ext}")
+        ap_dir = args.async_profiler_dir
+        ap_event = args.async_profiler_event
+        ap_fmt = args.async_profiler_format
+        ext = {"flamegraph": "html", "jfr": "jfr", "collapsed": "txt", "text": "txt"}[ap_fmt]
+
+        driver_ap = (
+            f"-agentpath:{ap_lib}=start,event={ap_event},"
+            f"{ap_fmt},file={ap_dir}/driver.{ext}"
+        )
+        executor_ap = (
+            f"-agentpath:{ap_lib}=start,event={ap_event},"
+            f"{ap_fmt},file={ap_dir}/executor.{ext}"
+        )
+        for spark_key, ap_opts in [
+            ("spark.driver.extraJavaOptions", driver_ap),
+            ("spark.executor.extraJavaOptions", executor_ap),
+        ]:
+            existing = conf.get(spark_key, "")
+            conf[spark_key] = f"{existing} {ap_opts}".strip()
+
     for key, val in sorted(conf.items()):
         cmd += ["--conf", f"{key}={val}"]
 
@@ -385,6 +417,27 @@ def main():
         default="/results/jfr",
         help="Directory for JFR output files (default: /results/jfr)",
     )
+    parser.add_argument(
+        "--async-profiler",
+        action="store_true",
+        help="Enable async-profiler for driver and executors (profiles Java + native code)",
+    )
+    parser.add_argument(
+        "--async-profiler-dir",
+        default="/results/async-profiler",
+        help="Directory for async-profiler output files (default: /results/async-profiler)",
+    )
+    parser.add_argument(
+        "--async-profiler-event",
+        default="cpu",
+        help="async-profiler event type: cpu, wall, alloc, lock, etc. (default: cpu)",
+    )
+    parser.add_argument(
+        "--async-profiler-format",
+        default="flamegraph",
+        choices=["flamegraph", "jfr", "collapsed", "text"],
+        help="async-profiler output format (default: flamegraph)",
+    )
     args = parser.parse_args()
 
     config = load_engine_config(args.engine)
@@ -401,9 +454,12 @@ def main():
     if not args.no_restart and not args.dry_run:
         restart_spark()
 
-    # Create JFR output directory if profiling is enabled
-    if args.jfr:
-        os.makedirs(args.jfr_dir, exist_ok=True)
+    # Create profiling output directories (skip for dry-run)
+    if not args.dry_run:
+        if args.jfr:
+            os.makedirs(args.jfr_dir, exist_ok=True)
+        if args.async_profiler:
+            os.makedirs(args.async_profiler_dir, exist_ok=True)
 
     cmd = build_spark_submit_cmd(config, args.benchmark, args)