diff --git a/CHANGELOG.md b/CHANGELOG.md index 65cc57f..e3c05e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,11 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) once a (see `internal/vm/ignition.go`). Fedora CoreOS is now runnable and proven — FCOS stable boots and the validator load/attaches inside the guest (verified on kernel `7.0.11-200.fc44`); fetch the image with `make vm-image-fcos`. RHEL - CoreOS (`rhcos`) shares this boot path but its image is pull-secret-gated via - the OpenShift release payload, so it stays non-runnable until an operator - supplies the image. + CoreOS (`rhcos`) shares this boot path; because its image ships with an + OpenShift release rather than a public URL, the operator stages it with + `make rhcos-image RHCOS_IMAGE=... ` (or `RHCOS_IMAGE_URL=...`) and opts in with + `BPFCOMPAT_ENABLE_RHCOS=1`. Left off, `rhcos` stays unsupported so it is never + claimed runnable without a real image. - Embeddable library mode (`pkg/bpfcompat`). `ValidateBeforeLoad` / `ValidateBytes` do a real load of a compiled eBPF object against the local running kernel — no VM, no network — for use as a pre-load gate (e.g. diff --git a/Makefile b/Makefile index 6272435..aeb0b5f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ LDFLAGS ?= -X $(VERSION_PKG).Version=$(VERSION) \ -X $(VERSION_PKG).BuildDate=$(BUILD_DATE) GO_BUILD_FLAGS ?= -trimpath -ldflags '$(LDFLAGS)' -.PHONY: all deps vendor doctor doctor-virtme doctor-firecracker doctor-arm64-kvm firecracker-install firecracker-kernel-install firecracker-runnable firecracker-preflight arm64-kvm-preflight build test test-vendor tidy validator validator-dynamic validator-static pkg-embed-validator lib-hostload examples examples-arm64 oss-examples oss-evidence compatibility-site clean vm-ubuntu-22 vm-ubuntu-22-arm64 vm-image-fcos vm-images vm-images-tier1 vm-images-extended vm-images-expanded-2026 vm-images-expanded-2026-dry-run vm-images-latest-kernel matrix-runnable matrix-runnable-strict matrix-runnable-keep-manual latest-kernel-runnable upstream-kernel-runnable manual-image-check manual-image-check-strict profile-catalog-audit matrix-readiness runtime-selector-proof runtime-delivery-proof production-runtime-drill beta-tech-check tech-stability production-tech-check acceptance-dev-one acceptance-functional-dev-one acceptance-suite-dev-one acceptance-arm64-smoke acceptance-latest-kernel acceptance-upstream-kernel acceptance-firecracker-dev-one acceptance acceptance-expanded-runnable acceptance-evidence serve azure-provision-vm azure-bootstrap-vm azure-provision-foundation azure-production-boundary-proof azure-configure-tls azure-rotate-registry-secret +.PHONY: all deps vendor doctor doctor-virtme doctor-firecracker doctor-arm64-kvm firecracker-install firecracker-kernel-install firecracker-runnable firecracker-preflight arm64-kvm-preflight build test test-vendor tidy validator validator-dynamic validator-static pkg-embed-validator lib-hostload examples examples-arm64 oss-examples oss-evidence compatibility-site clean vm-ubuntu-22 vm-ubuntu-22-arm64 vm-image-fcos rhcos-image vm-images vm-images-tier1 vm-images-extended vm-images-expanded-2026 vm-images-expanded-2026-dry-run vm-images-latest-kernel matrix-runnable matrix-runnable-strict matrix-runnable-keep-manual latest-kernel-runnable upstream-kernel-runnable manual-image-check manual-image-check-strict profile-catalog-audit matrix-readiness runtime-selector-proof runtime-delivery-proof production-runtime-drill beta-tech-check tech-stability production-tech-check acceptance-dev-one acceptance-functional-dev-one acceptance-suite-dev-one acceptance-arm64-smoke acceptance-latest-kernel acceptance-upstream-kernel acceptance-firecracker-dev-one acceptance acceptance-expanded-runnable acceptance-evidence serve azure-provision-vm azure-bootstrap-vm azure-provision-foundation azure-production-boundary-proof azure-configure-tls azure-rotate-registry-secret all: build validator @@ -173,6 +173,16 @@ vm-ubuntu-22: vm-image-fcos: bash vm/scripts/fetch-fcos-image.sh vm/cache/fedora-coreos-stable.qcow2 +# Stage an operator-supplied RHEL CoreOS (OpenShift) image for the rhcos-4.16 +# profile. RHCOS ships with an OpenShift release, not a public cloud-image URL, +# so the operator provides it: +# make rhcos-image RHCOS_IMAGE=/path/to/rhcos-qemu.x86_64.qcow2 +# make rhcos-image RHCOS_IMAGE_URL=https://internal-mirror/rhcos.qcow2.gz +# Then run with BPFCOMPAT_ENABLE_RHCOS=1 to enable the profile. +rhcos-image: + RHCOS_IMAGE='$(RHCOS_IMAGE)' RHCOS_IMAGE_URL='$(RHCOS_IMAGE_URL)' \ + bash vm/scripts/fetch-rhcos-image.sh vm/cache/rhcos-4.16.qcow2 + vm-ubuntu-22-arm64: bash vm/scripts/fetch-cloud-image.sh \ "https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-arm64.img" \ diff --git a/docs/env-reference.md b/docs/env-reference.md index 6c8ca44..791ee22 100644 --- a/docs/env-reference.md +++ b/docs/env-reference.md @@ -119,6 +119,12 @@ Generated by `bpfcompat env --markdown`. Do not edit by hand. | `BPFCOMPAT_TRUSTED_SIGNING_KEYS_PATH` | _(unset)_ | Path to a keyring file (one trusted key per line). | | `BPFCOMPAT_TRUSTED_SIGNING_PUBLIC_KEYS` | _(unset)_ | Inline trusted public keys (kid:base64, comma-separated). | +## VM Runner + +| Variable | Default | Description | +|---|---|---| +| `BPFCOMPAT_ENABLE_RHCOS` | false | Enable the RHEL CoreOS (rhcos) profile. RHCOS boots via the same Ignition path as Fedora CoreOS, but its image ships with an OpenShift release rather than a public URL. Stage the image with `make rhcos-image` and set this to 1/true once it is present; left off, rhcos stays unsupported so it is never claimed runnable without a real image. | + ## Validator | Variable | Default | Description | diff --git a/docs/profile-catalog.md b/docs/profile-catalog.md index 337b8bc..dde2133 100644 --- a/docs/profile-catalog.md +++ b/docs/profile-catalog.md @@ -112,7 +112,12 @@ Optional licensed image source: - Current VM validator execution path is SSH-based. - `talos`, `bottlerocket`, `flatcar`, and `amazon-linux-2-4.14` are cataloged for planning/roadmap and are marked non-blocking in matrix definitions because the current executor cannot run validator payloads on them. - `fedora-coreos` boots via **Ignition**, not cloud-init: the executor writes a minimal Ignition config (SSH key for the `core` user) and passes it to QEMU via `-fw_cfg name=opt/com.coreos/config` (see `internal/vm/ignition.go`). This path is **runnable and proven** — FCOS stable boots and the validator load/attaches inside the guest (verified on kernel `7.0.11-200.fc44`). It needs the manual image staged at `vm/cache/fedora-coreos-stable.qcow2` (fetch + `xz -d` from the FCOS stable stream). -- `rhcos` (RHEL CoreOS / OpenShift) shares that exact Ignition boot path, so it is **mechanically supported** — but its image ships only through the pull-secret-gated OpenShift release payload, so it cannot be fetched or verified here. `ExecutionTransport()` therefore still reports it unsupported until an operator supplies the image; the matching RHEL/AlmaLinux 9 (5.14) profile approximates the RHCOS kernel in the meantime. +- `rhcos` (RHEL CoreOS / OpenShift) shares that exact Ignition boot path, so the boot is solved. The only difference from Fedora CoreOS is the image: RHCOS ships with an OpenShift release, not a public cloud-image URL, so the operator supplies it. To enable RHCOS: + 1. Obtain the RHCOS qcow2 for your OpenShift version — e.g. `openshift-install coreos print-stream-json | jq -r '.architectures.x86_64.artifacts.qemu.formats["qcow2.gz"].disk.location'` — or use an internal mirror. + 2. Stage it: `make rhcos-image RHCOS_IMAGE=/path/to/rhcos.qcow2` (or `RHCOS_IMAGE_URL=...`). + 3. Opt in: `BPFCOMPAT_ENABLE_RHCOS=1 bpfcompat test --runner vm ...`. + + Left unset, `ExecutionTransport()` keeps `rhcos` unsupported so it is never claimed runnable without a real image. The matching RHEL/AlmaLinux 9 (5.14) profile approximates the RHCOS kernel in the meantime. - `rhel-8-4.18` uses NoCloud config-drive bootstrap in the current SSH executor (prefers `cloud-localds` ISO; falls back to local `vvfat` seed). - `aarch64`/`arm64` profiles select `qemu-system-aarch64`; `x86_64`/`amd64` profiles select `qemu-system-x86_64`. - ARM64 validation requires a matching ARM64-capable self-hosted runner, KVM access, an ARM64 cloud image, and a validator binary built for the guest architecture. The default Azure demo VM is x86_64 and should not be presented as ARM64 validation proof. diff --git a/internal/envref/envref.go b/internal/envref/envref.go index 73da543..5d1713a 100644 --- a/internal/envref/envref.go +++ b/internal/envref/envref.go @@ -294,6 +294,11 @@ var catalog = []Var{ Category: "Validator", Description: "Expected SHA-256 of the validator binary. When set, mismatched binaries are refused before exec.", }, + { + Name: "BPFCOMPAT_ENABLE_RHCOS", Default: "false", + Category: "VM Runner", + Description: "Enable the RHEL CoreOS (rhcos) profile. RHCOS boots via the same Ignition path as Fedora CoreOS, but its image ships with an OpenShift release rather than a public URL. Stage the image with `make rhcos-image` and set this to 1/true once it is present; left off, rhcos stays unsupported so it is never claimed runnable without a real image.", + }, // ---------- HTTP server ---------- { diff --git a/internal/vm/qemu_test.go b/internal/vm/qemu_test.go index d28e2c8..09ae89a 100644 --- a/internal/vm/qemu_test.go +++ b/internal/vm/qemu_test.go @@ -218,10 +218,11 @@ func TestExecutionTransport(t *testing.T) { {name: "flatcar blocked", distro: "flatcar", wantTransport: ExecutionTransportUnsupported, wantSupported: false, wantInMsg: "ignition"}, {name: "fedora-coreos supported", distro: "fedora-coreos", wantTransport: ExecutionTransportSSH, wantSupported: true}, {name: "fcos alias supported", distro: "FCOS", wantTransport: ExecutionTransportSSH, wantSupported: true}, - {name: "rhcos blocked on image", distro: "rhcos", wantTransport: ExecutionTransportUnsupported, wantSupported: false, wantInMsg: "pull-secret"}, - {name: "rhel-coreos blocked on image", distro: "rhel-coreos", wantTransport: ExecutionTransportUnsupported, wantSupported: false, wantInMsg: "pull-secret"}, + {name: "rhcos blocked on image", distro: "rhcos", wantTransport: ExecutionTransportUnsupported, wantSupported: false, wantInMsg: "rhcos-image"}, + {name: "rhel-coreos blocked on image", distro: "rhel-coreos", wantTransport: ExecutionTransportUnsupported, wantSupported: false, wantInMsg: "rhcos-image"}, } + t.Setenv("BPFCOMPAT_ENABLE_RHCOS", "") // ensure the default-off path for the table for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { transport, supported, reason := ExecutionTransport(Profile{ID: tt.id, Distro: tt.distro}) @@ -238,6 +239,18 @@ func TestExecutionTransport(t *testing.T) { } } +func TestExecutionTransportRHCOSOptIn(t *testing.T) { + for _, distro := range []string{"rhcos", "rhel-coreos"} { + t.Run(distro, func(t *testing.T) { + t.Setenv("BPFCOMPAT_ENABLE_RHCOS", "1") + transport, supported, reason := ExecutionTransport(Profile{Distro: distro}) + if !supported || transport != ExecutionTransportSSH { + t.Fatalf("with opt-in, want supported ssh; got transport=%q supported=%t reason=%q", transport, supported, reason) + } + }) + } +} + func TestBuildVirtmeNGArgs(t *testing.T) { profile := Profile{ ID: "kernelorg-mainline-7.1-rc6", diff --git a/internal/vm/transport.go b/internal/vm/transport.go index ca6ae85..5d3b346 100644 --- a/internal/vm/transport.go +++ b/internal/vm/transport.go @@ -1,6 +1,22 @@ package vm -import "strings" +import ( + "os" + "strings" +) + +// rhcosOptIn reports whether an operator has explicitly enabled RHCOS +// validation via BPFCOMPAT_ENABLE_RHCOS. RHCOS boots via the same Ignition path +// as Fedora CoreOS (supported), but its image ships through the OpenShift +// release payload and cannot be fetched here, so the operator must stage the +// image (see `make rhcos-image`) and set this flag to assert it is present. +func rhcosOptIn() bool { + switch strings.ToLower(strings.TrimSpace(os.Getenv("BPFCOMPAT_ENABLE_RHCOS"))) { + case "1", "true", "yes", "on": + return true + } + return false +} const ( ExecutionTransportSSH = "ssh" @@ -29,11 +45,15 @@ func ExecutionTransport(profile Profile) (transport string, supported bool, reas // the core user — implemented in ignition.go and proven on FCOS stable. return ExecutionTransportSSH, true, "" case "rhcos", "rhel-coreos": - // Shares Fedora CoreOS's Ignition+SSH boot path (now implemented), but - // the RHCOS image ships only through the pull-secret-gated OpenShift - // release payload, so it cannot be fetched/verified here. Supply the - // image to enable it; until then it stays non-runnable. - return ExecutionTransportUnsupported, false, "RHEL CoreOS shares the Fedora CoreOS Ignition boot path (now supported), but its image is only available via the pull-secret-gated OpenShift release payload; supply the image to enable it." + // Shares Fedora CoreOS's Ignition+SSH boot path (implemented + proven on + // FCOS). The only missing piece is the image, which ships through the + // OpenShift release payload rather than a public cloud-image URL. An + // operator stages it (see `make rhcos-image`) and opts in explicitly so + // we never claim RHCOS works without a real image present. + if rhcosOptIn() { + return ExecutionTransportSSH, true, "" + } + return ExecutionTransportUnsupported, false, "RHEL CoreOS shares the Fedora CoreOS Ignition boot path (supported), but its image ships via the OpenShift release payload, not a public URL. Stage it with `make rhcos-image` and set BPFCOMPAT_ENABLE_RHCOS=1 to enable." default: return ExecutionTransportSSH, true, "" } diff --git a/vm/profiles/rhcos-4.16-5.14.yaml b/vm/profiles/rhcos-4.16-5.14.yaml index 9019235..f8470b1 100644 --- a/vm/profiles/rhcos-4.16-5.14.yaml +++ b/vm/profiles/rhcos-4.16-5.14.yaml @@ -1,20 +1,21 @@ -# RHEL CoreOS (OpenShift 4.16) — cataloged / roadmap, NOT runnable today. +# RHEL CoreOS (OpenShift 4.16) — runnable with an operator-supplied image. # # RHCOS is the immutable node OS for OpenShift. Its kernel is the RHEL 9.4 # kernel (5.14, heavily backported), so for pure BPF-load questions a RHEL-9 / # AlmaLinux-9 profile already approximates it closely. RHCOS is the requested # "tricky target" because of how it boots and ships, not because of the kernel. # -# Why it's not runnable yet (two gaps): -# 1. Boot: RHCOS boots via Ignition, not cloud-init — same executor gap as -# Fedora CoreOS / Flatcar. ExecutionTransport() reports it unsupported. -# 2. Image: RHCOS qcow2 is distributed through the OpenShift release payload -# and is pull-secret gated; obtain it via the matching openshift-install / -# `oc adm release` for the 4.16 release, then stage at the local_path below. +# Boot: solved. RHCOS boots via Ignition, exactly like Fedora CoreOS, which is +# implemented and proven (see internal/vm/ignition.go). The only remaining gap +# is the image: +# - RHCOS qcow2 ships with an OpenShift release, not a public cloud-image URL. +# Obtain it for the 4.16 release (e.g. `openshift-install coreos +# print-stream-json`) and stage it with `make rhcos-image`, then opt in with +# BPFCOMPAT_ENABLE_RHCOS=1. Until then ExecutionTransport() keeps rhcos +# unsupported so it is never claimed runnable without a real image. # -# Pragmatic interim: validate against the matching RHEL/AlmaLinux 9 (5.14) -# profile, which shares the kernel + backports. A true RHCOS boot is the -# differentiated follow-up once the Ignition bootstrap path lands. +# Pragmatic interim (no image): validate against the matching RHEL/AlmaLinux 9 +# (5.14) profile, which shares the kernel + backports. id: rhcos-4.16-5.14 distro: rhcos version: "4.16" diff --git a/vm/scripts/fetch-rhcos-image.sh b/vm/scripts/fetch-rhcos-image.sh new file mode 100755 index 0000000..b4897eb --- /dev/null +++ b/vm/scripts/fetch-rhcos-image.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Stage an operator-supplied RHEL CoreOS (OpenShift) qemu image into vm/cache/. +# +# RHCOS boot images are not published at a public cloud-image URL like Ubuntu or +# Fedora CoreOS; they ship with an OpenShift release. The operator obtains the +# qcow for their OpenShift version and points this script at it. Two inputs are +# supported (env vars): +# +# RHCOS_IMAGE=/path/to/rhcos-qemu.x86_64.qcow2 # a local file +# RHCOS_IMAGE_URL=https://.../rhcos-...qcow2[.gz|.xz] # an internal mirror +# +# How to obtain the URL for a given OpenShift version (openshift-install is the +# version-pinned tool; the URLs it prints are on the public RHCOS mirror): +# +# openshift-install coreos print-stream-json \ +# | jq -r '.architectures.x86_64.artifacts.qemu.formats["qcow2.gz"].disk.location' +# +# Then either download it yourself and pass RHCOS_IMAGE, or pass RHCOS_IMAGE_URL. +# A .gz/.xz image is decompressed automatically. +set -euo pipefail + +OUT="${1:-vm/cache/rhcos-4.16.qcow2}" +SRC="${RHCOS_IMAGE:-}" +URL="${RHCOS_IMAGE_URL:-}" + +if [[ -f "$OUT" ]]; then + echo "RHCOS image already present at $OUT (delete it to restage)" + exit 0 +fi +if [[ -z "$SRC" && -z "$URL" ]]; then + echo "error: set RHCOS_IMAGE=/path/to/image or RHCOS_IMAGE_URL=https://..." >&2 + echo " (RHCOS images ship with an OpenShift release; see the header of" >&2 + echo " vm/scripts/fetch-rhcos-image.sh for how to obtain one.)" >&2 + exit 2 +fi + +mkdir -p "$(dirname "$OUT")" + +stage() { + # $1 = source file (possibly compressed); decompress into $OUT. + local f="$1" + case "$f" in + *.gz) echo "Decompressing gzip ..."; gzip -dc "$f" > "$OUT" ;; + *.xz) echo "Decompressing xz ..."; xz -dc "$f" > "$OUT" ;; + *) if [[ "$f" != "$OUT" ]]; then cp "$f" "$OUT"; fi ;; + esac +} + +if [[ -n "$SRC" ]]; then + [[ -f "$SRC" ]] || { echo "error: RHCOS_IMAGE not found: $SRC" >&2; exit 2; } + echo "Staging local image $SRC -> $OUT" + stage "$SRC" +else + tmp="$OUT.download" + echo "Downloading $URL ..." + curl -fSL "$URL" -o "$tmp" + # Preserve the URL's extension so stage() can pick the right decompressor. + case "$URL" in + *.gz) mv "$tmp" "$tmp.gz"; stage "$tmp.gz"; rm -f "$tmp.gz" ;; + *.xz) mv "$tmp" "$tmp.xz"; stage "$tmp.xz"; rm -f "$tmp.xz" ;; + *) stage "$tmp"; rm -f "$tmp" ;; + esac +fi + +echo "Staged RHCOS image at $OUT" +echo "Now run with: BPFCOMPAT_ENABLE_RHCOS=1 bpfcompat test --runner vm ..."