diff --git a/pkg/bundler/deployer/localformat/local_helm.go b/pkg/bundler/deployer/localformat/local_helm.go index 11acf75dd..9dec41a9e 100644 --- a/pkg/bundler/deployer/localformat/local_helm.go +++ b/pkg/bundler/deployer/localformat/local_helm.go @@ -20,6 +20,7 @@ import ( "log/slog" "os" "path/filepath" + "regexp" "sort" "strings" "text/template" @@ -29,6 +30,13 @@ import ( "github.com/NVIDIA/aicr/pkg/manifest" ) +// kindNamespaceRE matches a top-level YAML "kind: Namespace" line. Used to +// detect whether a local-helm folder ships its own Namespace template, in +// which case install.sh must omit --create-namespace: Helm 3 refuses to +// import a namespace it created out-of-band via --create-namespace because +// that namespace lacks the release's ownership annotations. +var kindNamespaceRE = regexp.MustCompile(`(?m)^kind:\s+Namespace\s*$`) + // hasYAMLObjects returns true if content contains at least one YAML object // (a non-comment, non-blank, non-separator line). Used to skip writing // fully-conditional manifests that rendered to nothing once values were @@ -65,13 +73,23 @@ var ( // templates/ directory). // // createNamespace controls whether the rendered install.sh passes -// --create-namespace to helm. Set false when this folder's chart contains -// a Namespace template targeting c.Namespace — Helm 3 refuses to import -// an existing namespace lacking app.kubernetes.io/managed-by=Helm and -// meta.helm.sh/release-name annotations, so --create-namespace creating -// the namespace ahead of the chart's own Namespace template collides. -// Pre-injection folders carry the privileged-Namespace manifest by -// design and therefore pass false; primary and post folders pass true. +// --create-namespace to helm. Callers express intent ("yes, the chart +// expects to land in a namespace that may not exist yet"); the function +// downgrades the flag to false automatically when any rendered manifest +// defines its own Namespace resource — Helm 3 refuses to import a +// namespace it created out-of-band via --create-namespace because that +// namespace lacks the release's ownership annotations. +// +// The detection covers two real call shapes: +// - Talos pre-injection folder: ships a privileged Namespace template. +// Caller asks for createNamespace=true; detection downgrades to false +// so the chart's own Namespace template owns the resource. +// - Bare-resource pre-injection folder (e.g., a ConfigMap consumed by +// the primary chart's reconcile): no Namespace template. Caller asks +// for createNamespace=true; detection leaves it true so install.sh +// creates the target namespace before applying the ConfigMap. The +// primary chart's later `helm install --create-namespace` is a no-op +// against the existing namespace. // // Returns the Folder manifest with Files listed in deterministic order. func writeLocalHelmFolder( @@ -121,6 +139,10 @@ func writeLocalHelmFolder( seen := make(map[string]string, len(sortedPaths)) templateRelPaths := make([]string, 0, len(sortedPaths)) + // hasNamespaceTemplate is set when any rendered manifest declares a + // top-level Namespace resource. When true, install.sh below suppresses + // --create-namespace to avoid the Helm 3 ownership collision. + hasNamespaceTemplate := false for _, p := range sortedPaths { baseName := filepath.Base(p) if prev, ok := seen[baseName]; ok { @@ -155,6 +177,9 @@ func writeLocalHelmFolder( "component", c.Name, "manifest", baseName) continue } + if !hasNamespaceTemplate && kindNamespaceRE.Match(rendered) { + hasNamespaceTemplate = true + } outPath, jerr := deployer.SafeJoin(templatesDir, baseName) if jerr != nil { return Folder{}, errors.Wrap(errors.ErrCodeInvalidRequest, @@ -166,12 +191,15 @@ func writeLocalHelmFolder( templateRelPaths = append(templateRelPaths, filepath.Join(dir, "templates", baseName)) } - // install.sh + // install.sh — suppress --create-namespace if a chart template owns the + // target namespace (see createNamespace doc comment for the full + // rationale and call shapes). + effectiveCreateNamespace := createNamespace && !hasNamespaceTemplate installData := struct { Name string Namespace string CreateNamespace bool - }{name, c.Namespace, createNamespace} + }{name, c.Namespace, effectiveCreateNamespace} if err = renderTemplateToFile(localHelmInstallTmpl, installData, folderDir, "install.sh", 0o755); err != nil { return Folder{}, err } diff --git a/pkg/bundler/deployer/localformat/writer.go b/pkg/bundler/deployer/localformat/writer.go index 3e874f0c4..ea68b7677 100644 --- a/pkg/bundler/deployer/localformat/writer.go +++ b/pkg/bundler/deployer/localformat/writer.go @@ -164,17 +164,16 @@ func (opts *Options) injectAuxiliaryFolder(idx int, c Component, phase injection } auxName := c.Name + "-" + string(phase) auxDir := fmt.Sprintf("%03d-%s", idx, auxName) - // Pre-phase folders carry a Namespace manifest by design (the - // privileged-namespace template), so install.sh must not pass - // --create-namespace: Helm 3 refuses to import a pre-existing - // namespace into the release. Post-phase folders never contain a - // Namespace template; --create-namespace is a no-op there since the - // primary release has already created it. - createNamespace := phase != phasePre + // Both pre- and post-phase folders request createNamespace=true; + // writeLocalHelmFolder downgrades to false automatically when the + // rendered templates contain a Namespace resource (Talos + // privileged-namespace pattern). Otherwise the target namespace may + // not yet exist when the pre-phase runs — the primary chart hasn't + // executed --create-namespace yet — so install.sh must create it. f, err := writeLocalHelmFolder( opts.OutputDir, auxDir, idx, c, manifests, renderInputFor(c), - auxName, c.Name, createNamespace, + auxName, c.Name, true, ) if err != nil { return nil, err diff --git a/recipes/overlays/gb200-eks-inference.yaml b/recipes/overlays/gb200-eks-inference.yaml index 2e264a09c..31019666b 100644 --- a/recipes/overlays/gb200-eks-inference.yaml +++ b/recipes/overlays/gb200-eks-inference.yaml @@ -41,7 +41,9 @@ spec: # declared in the inheritance chain, so this lives per-leaf for now. - name: gpu-operator type: Helm - manifestFiles: + preManifestFiles: + # Pre-install: ClusterPolicy reconcile reads this ConfigMap during + # `helm install --wait`. See gb200-eks-training.yaml for full context (#859). - components/gpu-operator/manifests/kernel-module-params.yaml dependencyRefs: - nfd diff --git a/recipes/overlays/gb200-eks-training.yaml b/recipes/overlays/gb200-eks-training.yaml index 721518210..1a3aea023 100644 --- a/recipes/overlays/gb200-eks-training.yaml +++ b/recipes/overlays/gb200-eks-training.yaml @@ -39,12 +39,16 @@ spec: # eks-training / eks-inference), so this lives per-leaf for now. - name: gpu-operator type: Helm - manifestFiles: + preManifestFiles: # Ships a ConfigMap with options nvidia NVreg_GrdmaPciTopoCheckOverride=1. # driver.kernelModuleConfig.name (below) points the ClusterPolicy at it, # so the driver DaemonSet mounts nvidia.conf and the kernel loads with # the flag set — required on GB200+EFA for EFA dma-buf attach to the # Grace PCI topology. Without it, NCCL silently falls back to Socket. + # + # Must be preManifestFiles (applied before the chart): the chart's + # ClusterPolicy reconcile reads this ConfigMap during `helm install --wait`, + # so a post-install pass would deadlock until the chart times out (#859). - components/gpu-operator/manifests/kernel-module-params.yaml dependencyRefs: - nfd