From 79b7f6ee5ef6c153aef7a042f94d6e8dea4f74db Mon Sep 17 00:00:00 2001 From: Mark Chmarny Date: Wed, 13 May 2026 04:29:06 -0700 Subject: [PATCH 1/2] fix(recipes): migrate GB200 kernel-module-params to preManifestFiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel-module-params ConfigMap on gb200-eks-{training,inference} overlays is consumed by the gpu-operator chart's ClusterPolicy reconcile, not after install — so emitting it under manifestFiles (post-install) deadlocked 'helm install --wait' for ~52 min before terminal failure on every fresh GB200/EKS cluster. Moves the entry to preManifestFiles, introduced by #856, which places the rendered ConfigMap in NNN-gpu-operator-pre/ ahead of the chart in NNN-gpu-operator/. Verified locally: 009-gpu-operator-pre/templates/kernel-module-params.yaml 010-gpu-operator/ (helm install --wait) 011-gpu-operator-post/ Audit of other manifestFiles call sites confirms the remaining entries ship chart-CRD-dependent CRs (Skyhook, NicClusterPolicy, Gateway, ClusterTrainingRuntime) or independent core-API resources, all of which are correctly post-install today. Fixes #859 --- recipes/overlays/gb200-eks-inference.yaml | 4 +++- recipes/overlays/gb200-eks-training.yaml | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/recipes/overlays/gb200-eks-inference.yaml b/recipes/overlays/gb200-eks-inference.yaml index 2e264a09c..31019666b 100644 --- a/recipes/overlays/gb200-eks-inference.yaml +++ b/recipes/overlays/gb200-eks-inference.yaml @@ -41,7 +41,9 @@ spec: # declared in the inheritance chain, so this lives per-leaf for now. - name: gpu-operator type: Helm - manifestFiles: + preManifestFiles: + # Pre-install: ClusterPolicy reconcile reads this ConfigMap during + # `helm install --wait`. See gb200-eks-training.yaml for full context (#859). - components/gpu-operator/manifests/kernel-module-params.yaml dependencyRefs: - nfd diff --git a/recipes/overlays/gb200-eks-training.yaml b/recipes/overlays/gb200-eks-training.yaml index 721518210..1a3aea023 100644 --- a/recipes/overlays/gb200-eks-training.yaml +++ b/recipes/overlays/gb200-eks-training.yaml @@ -39,12 +39,16 @@ spec: # eks-training / eks-inference), so this lives per-leaf for now. - name: gpu-operator type: Helm - manifestFiles: + preManifestFiles: # Ships a ConfigMap with options nvidia NVreg_GrdmaPciTopoCheckOverride=1. # driver.kernelModuleConfig.name (below) points the ClusterPolicy at it, # so the driver DaemonSet mounts nvidia.conf and the kernel loads with # the flag set — required on GB200+EFA for EFA dma-buf attach to the # Grace PCI topology. Without it, NCCL silently falls back to Socket. + # + # Must be preManifestFiles (applied before the chart): the chart's + # ClusterPolicy reconcile reads this ConfigMap during `helm install --wait`, + # so a post-install pass would deadlock until the chart times out (#859). - components/gpu-operator/manifests/kernel-module-params.yaml dependencyRefs: - nfd From 752d8df9e5a4da182f39eb9ebd1593399722a225 Mon Sep 17 00:00:00 2001 From: Mark Chmarny Date: Wed, 13 May 2026 05:34:14 -0700 Subject: [PATCH 2/2] fix(bundler): auto-detect Namespace in pre-folders for --create-namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KWOK e2e for the GB200 recipes (#868) surfaced a latent bug in PR #856: pre-injection install.sh unconditionally omits --create-namespace, which deadlocks any recipe that ships a bare-resource manifest (ConfigMap, ResourceQuota, etc.) in preManifestFiles. The target namespace doesn't exist yet — the primary chart's helm install --create-namespace hasn't run — so the pre-pass install fails with 'namespaces "" not found'. The original 'pre = no --create-namespace' rule was correct for the Talos case (chart ships a Namespace template; --create-namespace would collide with Helm 3 ownership annotations) but wrong as a blanket rule. Move the decision into writeLocalHelmFolder: scan rendered manifests for a top-level 'kind: Namespace'; if one is present, downgrade createNamespace to false; otherwise honor the caller's request. Callers (injectAuxiliaryFolder, the primary local-helm path) now uniformly ask for createNamespace=true and let detection decide. Both call shapes are exercised: - Talos: ships a Namespace → detection downgrades → install.sh omits --create-namespace (unchanged behavior; existing goldens pass) - GB200 kernel-module-params: ships only a ConfigMap → detection leaves it true → install.sh creates 'gpu-operator' namespace before the ConfigMap, ahead of the primary chart's own --create-namespace which becomes a no-op against the existing namespace Refs #859, follow-up to #856 --- .../deployer/localformat/local_helm.go | 46 +++++++++++++++---- pkg/bundler/deployer/localformat/writer.go | 15 +++--- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/pkg/bundler/deployer/localformat/local_helm.go b/pkg/bundler/deployer/localformat/local_helm.go index 11acf75dd..9dec41a9e 100644 --- a/pkg/bundler/deployer/localformat/local_helm.go +++ b/pkg/bundler/deployer/localformat/local_helm.go @@ -20,6 +20,7 @@ import ( "log/slog" "os" "path/filepath" + "regexp" "sort" "strings" "text/template" @@ -29,6 +30,13 @@ import ( "github.com/NVIDIA/aicr/pkg/manifest" ) +// kindNamespaceRE matches a top-level YAML "kind: Namespace" line. Used to +// detect whether a local-helm folder ships its own Namespace template, in +// which case install.sh must omit --create-namespace: Helm 3 refuses to +// import a namespace it created out-of-band via --create-namespace because +// that namespace lacks the release's ownership annotations. +var kindNamespaceRE = regexp.MustCompile(`(?m)^kind:\s+Namespace\s*$`) + // hasYAMLObjects returns true if content contains at least one YAML object // (a non-comment, non-blank, non-separator line). Used to skip writing // fully-conditional manifests that rendered to nothing once values were @@ -65,13 +73,23 @@ var ( // templates/ directory). // // createNamespace controls whether the rendered install.sh passes -// --create-namespace to helm. Set false when this folder's chart contains -// a Namespace template targeting c.Namespace — Helm 3 refuses to import -// an existing namespace lacking app.kubernetes.io/managed-by=Helm and -// meta.helm.sh/release-name annotations, so --create-namespace creating -// the namespace ahead of the chart's own Namespace template collides. -// Pre-injection folders carry the privileged-Namespace manifest by -// design and therefore pass false; primary and post folders pass true. +// --create-namespace to helm. Callers express intent ("yes, the chart +// expects to land in a namespace that may not exist yet"); the function +// downgrades the flag to false automatically when any rendered manifest +// defines its own Namespace resource — Helm 3 refuses to import a +// namespace it created out-of-band via --create-namespace because that +// namespace lacks the release's ownership annotations. +// +// The detection covers two real call shapes: +// - Talos pre-injection folder: ships a privileged Namespace template. +// Caller asks for createNamespace=true; detection downgrades to false +// so the chart's own Namespace template owns the resource. +// - Bare-resource pre-injection folder (e.g., a ConfigMap consumed by +// the primary chart's reconcile): no Namespace template. Caller asks +// for createNamespace=true; detection leaves it true so install.sh +// creates the target namespace before applying the ConfigMap. The +// primary chart's later `helm install --create-namespace` is a no-op +// against the existing namespace. // // Returns the Folder manifest with Files listed in deterministic order. func writeLocalHelmFolder( @@ -121,6 +139,10 @@ func writeLocalHelmFolder( seen := make(map[string]string, len(sortedPaths)) templateRelPaths := make([]string, 0, len(sortedPaths)) + // hasNamespaceTemplate is set when any rendered manifest declares a + // top-level Namespace resource. When true, install.sh below suppresses + // --create-namespace to avoid the Helm 3 ownership collision. + hasNamespaceTemplate := false for _, p := range sortedPaths { baseName := filepath.Base(p) if prev, ok := seen[baseName]; ok { @@ -155,6 +177,9 @@ func writeLocalHelmFolder( "component", c.Name, "manifest", baseName) continue } + if !hasNamespaceTemplate && kindNamespaceRE.Match(rendered) { + hasNamespaceTemplate = true + } outPath, jerr := deployer.SafeJoin(templatesDir, baseName) if jerr != nil { return Folder{}, errors.Wrap(errors.ErrCodeInvalidRequest, @@ -166,12 +191,15 @@ func writeLocalHelmFolder( templateRelPaths = append(templateRelPaths, filepath.Join(dir, "templates", baseName)) } - // install.sh + // install.sh — suppress --create-namespace if a chart template owns the + // target namespace (see createNamespace doc comment for the full + // rationale and call shapes). + effectiveCreateNamespace := createNamespace && !hasNamespaceTemplate installData := struct { Name string Namespace string CreateNamespace bool - }{name, c.Namespace, createNamespace} + }{name, c.Namespace, effectiveCreateNamespace} if err = renderTemplateToFile(localHelmInstallTmpl, installData, folderDir, "install.sh", 0o755); err != nil { return Folder{}, err } diff --git a/pkg/bundler/deployer/localformat/writer.go b/pkg/bundler/deployer/localformat/writer.go index 3e874f0c4..ea68b7677 100644 --- a/pkg/bundler/deployer/localformat/writer.go +++ b/pkg/bundler/deployer/localformat/writer.go @@ -164,17 +164,16 @@ func (opts *Options) injectAuxiliaryFolder(idx int, c Component, phase injection } auxName := c.Name + "-" + string(phase) auxDir := fmt.Sprintf("%03d-%s", idx, auxName) - // Pre-phase folders carry a Namespace manifest by design (the - // privileged-namespace template), so install.sh must not pass - // --create-namespace: Helm 3 refuses to import a pre-existing - // namespace into the release. Post-phase folders never contain a - // Namespace template; --create-namespace is a no-op there since the - // primary release has already created it. - createNamespace := phase != phasePre + // Both pre- and post-phase folders request createNamespace=true; + // writeLocalHelmFolder downgrades to false automatically when the + // rendered templates contain a Namespace resource (Talos + // privileged-namespace pattern). Otherwise the target namespace may + // not yet exist when the pre-phase runs — the primary chart hasn't + // executed --create-namespace yet — so install.sh must create it. f, err := writeLocalHelmFolder( opts.OutputDir, auxDir, idx, c, manifests, renderInputFor(c), - auxName, c.Name, createNamespace, + auxName, c.Name, true, ) if err != nil { return nil, err