From 503de2886dbb67fd20002116b3b5ea3bf2397433 Mon Sep 17 00:00:00 2001 From: Aviv Dozorets Date: Fri, 29 May 2026 19:22:00 +0300 Subject: [PATCH 1/2] support native (no jvm) setup --- .github/workflows/build-and-push.yml | 24 ++++- .github/workflows/release.yml | 99 +++++++++++++++++++ CLAUDE.md | 21 ++++ Dockerfile.native | 34 +++++++ README.md | 17 ++++ build.gradle.kts | 32 +++++- scripts/benchmark-startup.sh | 59 +++++++++++ .../klag/resource-config.json | 11 +++ 8 files changed, 295 insertions(+), 2 deletions(-) create mode 100644 Dockerfile.native create mode 100755 scripts/benchmark-startup.sh create mode 100644 src/main/resources/META-INF/native-image/io.github.themoah/klag/resource-config.json diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml index a4fcdd3..85ce5d8 100644 --- a/.github/workflows/build-and-push.yml +++ b/.github/workflows/build-and-push.yml @@ -65,4 +65,26 @@ jobs: run: gradle test - name: Build fat JAR - run: gradle shadowJar \ No newline at end of file + run: gradle shadowJar + + # Validates the GraalVM native build (metadata hints stay correct). + native-build: + runs-on: ubuntu-latest + needs: [changes, test-helm-chart] + if: always() && (needs.test-helm-chart.result == 'success' || needs.test-helm-chart.result == 'skipped') + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up GraalVM (JDK 21) + uses: graalvm/setup-graalvm@v1 + with: + java-version: '21' + distribution: 'graalvm-community' + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v4 + + - name: Compile native image + run: gradle nativeCompile --no-daemon \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 277813b..b7c7d2e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,6 +18,8 @@ permissions: jobs: release: runs-on: ubuntu-latest + outputs: + version: ${{ steps.version.outputs.version }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -90,3 +92,100 @@ jobs: tag_name: ${{ github.event_name == 'push' && github.ref_name || format('v{0}', steps.version.outputs.version) }} generate_release_notes: true files: build/libs/klag-${{ steps.version.outputs.version }}-fat.jar + + # GraalVM native image. native-image cannot cross-compile, so each arch is + # built on a matching native runner and pushed by digest, then merged into a + # single multi-arch manifest tagged `:-native` and `:native`. + release-native: + needs: release + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm + runs-on: ${{ matrix.runner }} + outputs: + version: ${{ needs.release.outputs.version }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push native image by digest + id: build + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile.native + platforms: ${{ matrix.platform }} + outputs: type=image,name=themoah/klag,push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digest-${{ strategy.job-index }} + path: /tmp/digests/* + retention-days: 1 + + release-native-manifest: + needs: release-native + runs-on: ubuntu-latest + env: + VERSION: ${{ needs.release-native.outputs.version }} + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digest-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create and push manifests + working-directory: /tmp/digests + run: | + for repo in themoah/klag ghcr.io/themoah/klag; do + docker buildx imagetools create \ + -t "${repo}:${VERSION}-native" \ + -t "${repo}:native" \ + $(printf "${repo}@sha256:%s " *) + done diff --git a/CLAUDE.md b/CLAUDE.md index 4659287..37bffbe 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,27 @@ Klag is a Kafka Lag Exporter built with Vert.x 4.5.22. Monitors consumer lag and ./gradlew run # Run with hot-reload ``` +### GraalVM Native Image (startup/memory optimized) + +Requires a GraalVM JDK 21 (LTS) with `native-image` (e.g. `sdk install java 21.0.2-graalce`). +Run Gradle with that JDK as `JAVA_HOME`/`GRAALVM_HOME`. + +```bash +gradle nativeCompile # -> build/native/nativeCompile/klag (standalone binary) +docker build -f Dockerfile.native -t klag:native . # distroless runtime image +scripts/benchmark-startup.sh native - build/native/nativeCompile/klag # startup/RSS bench +``` + +Native config lives in `build.gradle.kts` (`graalvmNative` block) plus reachability +hints in `src/main/resources/META-INF/native-image/`. Reflection metadata for Netty, +kafka-clients, logback and micrometer comes from the GraalVM Reachability Metadata +Repository (auto-enabled). Entry point is `KlagLauncher` (direct `new MainVerticle()`, +no reflective Vert.x launcher). + +**Measured (macOS arm64, prometheus reporter, Kafka up):** native ≈ 70-100 ms startup / +44 MB RSS vs JVM 21 ≈ 470-520 ms / 119 MB. JVM 25 (LTS) showed no startup/memory gain +over 21 for this workload (slightly higher RSS), so the runtime stays on JDK 21. + ## Architecture Vert.x reactive framework with `Future`-based async API. diff --git a/Dockerfile.native b/Dockerfile.native new file mode 100644 index 0000000..9cf21e8 --- /dev/null +++ b/Dockerfile.native @@ -0,0 +1,34 @@ +# syntax=docker/dockerfile:1 +# ---- Native build stage ---- +# GraalVM CE community image with native-image, JDK 21 (LTS). +FROM ghcr.io/graalvm/native-image-community:21-ol9 AS builder + +ARG GRADLE_VERSION=8.14.3 +WORKDIR /app + +# Install Gradle (GraalVM image ships no build tool) +# Image already ships the C toolchain; native-image needs xargs (findutils) and +# zlib-static for the mostly-static link (-PnativeStatic). +RUN microdnf install -y unzip findutils zlib-static && \ + curl -fsSL "https://services.gradle.org/distributions/gradle-${GRADLE_VERSION}-bin.zip" -o /tmp/gradle.zip && \ + unzip -d /opt/gradle /tmp/gradle.zip && \ + ln -s "/opt/gradle/gradle-${GRADLE_VERSION}/bin/gradle" /usr/bin/gradle && \ + rm -f /tmp/gradle.zip + +# Cache dependencies +COPY build.gradle.kts settings.gradle.kts ./ +RUN gradle dependencies --no-daemon || true + +# Build the native binary (mostly-static: only libc dynamic -> runs on distroless/base) +COPY src src +RUN gradle nativeCompile --no-daemon -PnativeStatic + +# ---- Runtime stage ---- +# distroless/base provides glibc + libz, no JVM, minimal attack surface. +FROM gcr.io/distroless/base-debian12 + +WORKDIR /app +COPY --from=builder /app/build/native/nativeCompile/klag /app/klag + +EXPOSE 8888 +ENTRYPOINT ["/app/klag"] diff --git a/README.md b/README.md index 63547be..432bba8 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,23 @@ docker run -e KAFKA_BOOTSTRAP_SERVERS=kafka:9092 \ Metrics available at `http://localhost:8888/metrics` +### Native image (faster startup, lower memory) + +A GraalVM native build is published alongside the JVM image, tagged `:native` and +`:-native`: + +```bash +docker run -e KAFKA_BOOTSTRAP_SERVERS=kafka:9092 \ + -e METRICS_REPORTER=prometheus \ + -p 8888:8888 \ + themoah/klag:native +``` + +The native binary starts in ~70-100 ms using ~44 MB RSS, versus ~500 ms / ~119 MB for +the JVM image — ideal for fast scaling and low-footprint deployments. Same config, +endpoints, and metrics. Build locally with `gradle nativeCompile` (needs a GraalVM +JDK 21) or `docker build -f Dockerfile.native -t klag:native .`. + ## Metrics Exposed | Metric | Description | diff --git a/build.gradle.kts b/build.gradle.kts index d3604e3..ef2a4b2 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -5,10 +5,11 @@ plugins { java application id("com.gradleup.shadow") version "9.2.2" + id("org.graalvm.buildtools.native") version "0.10.6" } group = "io.github.themoah" -version = "0.1.11" +version = "0.2.0" repositories { mavenCentral() @@ -95,3 +96,32 @@ tasks.withType { ) } } + +// GraalVM native image configuration. +// Entry point is KlagLauncher (direct `new MainVerticle()` - no reflective Vert.x launcher). +// Reachability metadata for Netty, kafka-clients, logback, micrometer is pulled +// from the GraalVM Reachability Metadata Repository; project-specific hints live +// in src/main/resources/META-INF/native-image/. +graalvmNative { + binaries { + named("main") { + imageName.set("klag") + mainClass.set(launcherClassName) + buildArgs.add("--no-fallback") + buildArgs.add("-H:+ReportExceptionStackTraces") + buildArgs.add("--enable-url-protocols=http,https") + // Vert.x/Netty/logback are not safe to initialize at build time. + buildArgs.add("--initialize-at-run-time=io.netty") + // -PnativeStatic (Linux/CI): statically link everything except libc so the + // binary runs on a distroless/base image with no libz.so.1 etc. Not used on + // macOS where static linking is unsupported. + if (project.hasProperty("nativeStatic")) { + buildArgs.add("-H:+StaticExecutableWithDynamicLibC") + } + } + } + metadataRepository { + enabled.set(true) + } + toolchainDetection.set(false) +} diff --git a/scripts/benchmark-startup.sh b/scripts/benchmark-startup.sh new file mode 100755 index 0000000..5646686 --- /dev/null +++ b/scripts/benchmark-startup.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Benchmark startup time and memory footprint for klag variants. +# +# - startup time : launch -> "Klag started successfully" log line +# - RSS : steady state, ~3s after ready +# +# Requires a reachable Kafka (startup blocks on describeCluster): +# docker compose up -d kafka +# +# Usage: +# scripts/benchmark-startup.sh