diff --git a/architecture/gateway-single-node.md b/architecture/gateway-single-node.md index 57aebd3a..26aff137 100644 --- a/architecture/gateway-single-node.md +++ b/architecture/gateway-single-node.md @@ -260,7 +260,7 @@ On Docker custom networks, `/etc/resolv.conf` contains `127.0.0.11` (Docker's in 2. Getting the container's `eth0` IP as a routable address. 3. Adding DNAT rules in PREROUTING to forward DNS from pod namespaces through to Docker's DNS. 4. Writing a custom resolv.conf pointing to the container IP. -5. Passing `--resolv-conf=/etc/rancher/k3s/resolv.conf` to k3s. +5. Passing `--kubelet-arg=resolv-conf=/etc/rancher/k3s/resolv.conf` to k3s. Falls back to `8.8.8.8` / `8.8.4.4` if iptables detection fails. diff --git a/deploy/docker/Dockerfile.images b/deploy/docker/Dockerfile.images index 9cc50085..afb8857a 100644 --- a/deploy/docker/Dockerfile.images +++ b/deploy/docker/Dockerfile.images @@ -12,7 +12,11 @@ # supervisor-builder Release openshell-sandbox binary # supervisor-output Minimal stage exporting only the supervisor binary +# Pin by tag AND manifest-list digest to prevent silent upstream republishes +# from breaking the build. Update both when bumping k3s versions. +# To refresh: docker buildx imagetools inspect rancher/k3s: | head -3 ARG K3S_VERSION=v1.35.2-k3s1 +ARG K3S_DIGEST=sha256:c3184157c3048112bab0c3e17405991da486cb3413511eba23f7650efd70776b ARG K9S_VERSION=v0.50.18 ARG HELM_VERSION=v3.17.3 ARG NVIDIA_CONTAINER_TOOLKIT_VERSION=1.18.2-1 @@ -181,7 +185,7 @@ CMD ["--port", "8080"] # --------------------------------------------------------------------------- # Cluster asset stages # --------------------------------------------------------------------------- -FROM rancher/k3s:${K3S_VERSION} AS k3s +FROM rancher/k3s:${K3S_VERSION}@${K3S_DIGEST} AS k3s FROM ubuntu:24.04 AS k9s ARG K9S_VERSION @@ -268,6 +272,11 @@ COPY deploy/kube/manifests/*.yaml /opt/openshell/manifests/ COPY deploy/kube/gpu-manifests/*.yaml /opt/openshell/gpu-manifests/ ENTRYPOINT ["/usr/local/bin/cluster-entrypoint.sh"] +# Default to "server" so bare `docker run ` works without requiring +# the caller to pass a subcommand. The openshell CLI already passes +# ["server", "--disable=traefik", ...] as CMD; this default only affects +# manual `docker run` invocations that omit a command. +CMD ["server"] HEALTHCHECK --interval=5s --timeout=5s --start-period=20s --retries=60 \ CMD ["/usr/local/bin/cluster-healthcheck.sh"] diff --git a/deploy/docker/cluster-entrypoint.sh b/deploy/docker/cluster-entrypoint.sh index 2fea6fa6..d4717d88 100644 --- a/deploy/docker/cluster-entrypoint.sh +++ b/deploy/docker/cluster-entrypoint.sh @@ -18,7 +18,7 @@ # embedded DNS resolver at 127.0.0.11. Docker's DNS listens on random high # ports (visible in the DOCKER_OUTPUT iptables chain), so we parse those ports # and set up DNAT rules to forward DNS traffic from k3s pods. We then point -# k3s's --resolv-conf at the container's routable eth0 IP. +# k3s's resolv-conf kubelet arg at the container's routable eth0 IP. # # Per k3s docs: "Manually specified resolver configuration files are not # subject to viability checks." @@ -562,6 +562,8 @@ fi # routing to settle first. wait_for_default_route -# Execute k3s with explicit resolv-conf. +# Execute k3s with explicit resolv-conf passed as a kubelet arg. +# k3s v1.35.2+ no longer accepts --resolv-conf as a top-level server flag; +# it must be passed via --kubelet-arg instead. # shellcheck disable=SC2086 -exec /bin/k3s "$@" --resolv-conf="$RESOLV_CONF" $EXTRA_KUBELET_ARGS +exec /bin/k3s "$@" --kubelet-arg=resolv-conf="$RESOLV_CONF" $EXTRA_KUBELET_ARGS