From a7665fc02f3ac2c1e31a1ffd8f3d4adec1689191 Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Fri, 15 May 2026 10:14:00 -0700 Subject: [PATCH] fix(bundler): wire PreManifestFiles through flux deployer with terminal-aware dependsOn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flux deployer never consumed `ComponentPreManifests`, so the synthesized GKE critical-priority `ResourceQuota` from PR #921 was silently dropped on `--deployer flux` bundles and the original symptom from #915 reproduced. The same gap also blocked the os-talos mixin from working on flux. Add `ComponentPreManifests` to `flux.Generator`, wire it through `bundler.buildDeployer`, and emit a `-pre` HelmRelease ahead of the primary when pre-manifests exist. Rewire the primary's `dependsOn` to point at `-pre` so the chain is `previous → -pre → -post → next`. Also fix a pre-existing ordering bug: the next component used to depend on the previous component's primary name, not its terminal (`-post` for mixed components). New `terminalReleaseNameFor` helper resolves the correct tail; `buildPrimaryDependsOn` and the README renderer both use it. Without this fix, Flux could reconcile the next component in parallel with the previous component's post manifests. Add a `-pre` collision guard mirroring the rule in `pkg/bundler/deployer/localformat/writer.go`. Tests: pre-only, pre+post with terminal-aware next-component dependsOn (regression guard), collision rejection, and a refreshed `TestBuildPrimaryDependsOn` covering mixed, manifest-only, and chart-only previous components. Docs: `pkg/bundler/deployer/flux/doc.go` describes the chain and the example tree now includes `-pre/`; `docs/user/cli-reference.md` updates the Flux Deployment Order bullet. Fixes: #923 --- docs/user/cli-reference.md | 2 +- pkg/bundler/bundler.go | 34 ++- pkg/bundler/deployer/flux/doc.go | 15 +- pkg/bundler/deployer/flux/flux.go | 155 ++++++++-- pkg/bundler/deployer/flux/flux_test.go | 399 ++++++++++++++++++++++++- 5 files changed, 556 insertions(+), 49 deletions(-) diff --git a/docs/user/cli-reference.md b/docs/user/cli-reference.md index 3ae4202ab..dec73a5fb 100644 --- a/docs/user/cli-reference.md +++ b/docs/user/cli-reference.md @@ -1210,7 +1210,7 @@ All deployers respect the `deploymentOrder` field from the recipe, ensuring comp - **Helm**: Components listed in README in deployment order - **Argo CD**: Uses `argocd.argoproj.io/sync-wave` annotation (0 = first, 1 = second, etc.) -- **Flux**: Uses `dependsOn` references in HelmRelease/Kustomization CRs (each component depends on its predecessor) +- **Flux**: Uses `dependsOn` references in HelmRelease CRs (each component depends on the previous component's terminal release — its `-post` release when post-manifests are present, otherwise ``). Components with pre-manifests insert a `-pre` release that the primary HelmRelease depends on, so the chain becomes `previous → -pre → -post → next`. The bundle's root `kustomization.yaml` is a plain Kustomize file (not a Flux Kustomization CR). - **Helmfile**: Uses `needs:` references in each release (each component depends on its predecessor) #### Value Overrides diff --git a/pkg/bundler/bundler.go b/pkg/bundler/bundler.go index e58e32c7b..68170d3a8 100644 --- a/pkg/bundler/bundler.go +++ b/pkg/bundler/bundler.go @@ -387,26 +387,28 @@ func (b *DefaultBundler) buildDeployer(ctx context.Context, recipeResult *recipe }, nil case config.DeployerFlux: + componentPreManifests, preErr := b.collectComponentPreManifests(ctx, recipeResult) + if preErr != nil { + return nil, errors.PropagateOrWrap(preErr, errors.ErrCodeInternal, + "failed to collect component pre-manifests") + } componentManifests, manifestErr := b.collectComponentManifests(ctx, recipeResult) if manifestErr != nil { - var se *errors.StructuredError - if stderrors.As(manifestErr, &se) { - return nil, manifestErr - } - return nil, errors.Wrap(errors.ErrCodeInternal, - "failed to collect component manifests", manifestErr) + return nil, errors.PropagateOrWrap(manifestErr, errors.ErrCodeInternal, + "failed to collect component manifests") } return &flux.Generator{ - RecipeResult: recipeResult, - ComponentValues: componentValues, - Version: b.Config.Version(), - RepoURL: b.Config.RepoURL(), - TargetRevision: b.Config.TargetRevision(), - IncludeChecksums: b.Config.IncludeChecksums(), - DataFiles: dataFiles, - ComponentManifests: componentManifests, - DynamicValues: dynamicValues, - VendorCharts: b.Config.VendorCharts(), + RecipeResult: recipeResult, + ComponentValues: componentValues, + Version: b.Config.Version(), + RepoURL: b.Config.RepoURL(), + TargetRevision: b.Config.TargetRevision(), + IncludeChecksums: b.Config.IncludeChecksums(), + DataFiles: dataFiles, + ComponentPreManifests: componentPreManifests, + ComponentManifests: componentManifests, + DynamicValues: dynamicValues, + VendorCharts: b.Config.VendorCharts(), }, nil case config.DeployerHelmfile: diff --git a/pkg/bundler/deployer/flux/doc.go b/pkg/bundler/deployer/flux/doc.go index ba804fccb..d40a9e2cd 100644 --- a/pkg/bundler/deployer/flux/doc.go +++ b/pkg/bundler/deployer/flux/doc.go @@ -35,6 +35,12 @@ Components are deployed in order using Flux dependsOn references. The deployment order is determined by the recipe's DeploymentOrder field. Each component depends on the component immediately preceding it in the order. +When a component declares pre-manifests (ComponentRef.PreManifestFiles, or +synthesized bundler manifests like the GKE critical-priority ResourceQuota), +the generator emits a -pre HelmRelease ahead of the primary chart and +rewires the primary's dependsOn to point at -pre. The chain becomes: +previous → -pre → -post → next. + # Source Deduplication Multiple components sharing the same upstream repository (e.g., two charts from @@ -79,11 +85,16 @@ components produce an ErrCodeInvalidRequest error at generation time. │ └── helmrepo-helm-ngc-nvidia-com-nvidia.yaml ├── cert-manager/ │ └── helmrelease.yaml # HelmRelease (HelmRepository source) + ├── gpu-operator-pre/ # Synthesized when PreManifestFiles is non-empty + │ ├── Chart.yaml # Local Helm chart for pre-phase manifest templates + │ ├── helmrelease.yaml # HelmRelease (GitRepository source); the primary dependsOn this + │ └── templates/ + │ └── gke-critical-pods-quota.yaml # e.g. synthesized GKE ResourceQuota (issue #915) ├── gpu-operator/ - │ └── helmrelease.yaml # HelmRelease (HelmRepository source) + │ └── helmrelease.yaml # HelmRelease (HelmRepository source); dependsOn gpu-operator-pre ├── gpu-operator-post/ │ ├── Chart.yaml # Local Helm chart for manifest templates - │ ├── helmrelease.yaml # HelmRelease (GitRepository source) + │ ├── helmrelease.yaml # HelmRelease (GitRepository source); dependsOn gpu-operator │ └── templates/ │ └── dcgm-exporter.yaml # Manifest template rendered by Helm controller └── nodewright-customizations/ diff --git a/pkg/bundler/deployer/flux/flux.go b/pkg/bundler/deployer/flux/flux.go index e50cdfdc9..807fc1540 100644 --- a/pkg/bundler/deployer/flux/flux.go +++ b/pkg/bundler/deployer/flux/flux.go @@ -44,6 +44,11 @@ const ( fileReadme = "README.md" ) +// summaryTypeHelmRelease is the value placed in ComponentSummary.Type for +// every row of the README's Components table — every emitted release +// (pre / primary / post) is a Flux HelmRelease CR. +const summaryTypeHelmRelease = "HelmRelease" + //go:embed templates/configmap-values.yaml.tmpl var configMapTemplate string @@ -125,6 +130,14 @@ type Generator struct { // components. Components without manifests do not appear in the map. ComponentManifests map[string]map[string][]byte + // ComponentPreManifests maps component name → manifest path → rendered bytes. + // Emitted as a -pre HelmRelease that the primary HelmRelease + // dependsOn, ensuring pre-phase manifests reconcile before the chart. + // Wired by the bundler from ComponentRef.PreManifestFiles and the + // synthesized GKE critical-priority ResourceQuota (see issue #915). + // Components without pre-manifests do not appear in the map. + ComponentPreManifests map[string]map[string][]byte + // DynamicValues maps component names to their dynamic value paths. // When non-empty, dynamic paths are split from inline values into a // ConfigMap and referenced via spec.valuesFrom in the HelmRelease. @@ -202,6 +215,10 @@ func (g *Generator) Generate(ctx context.Context, outputDir string) (*deployer.O } } + if err := g.detectInjectedReleaseCollisions(sortedRefs); err != nil { + return nil, err + } + // Create sources directory. sourcesDir, err := deployer.SafeJoin(outputDir, "sources") if err != nil { @@ -258,7 +275,7 @@ func (g *Generator) Generate(ctx context.Context, outputDir string) (*deployer.O // Write README.md. readmeData := ReadmeData{ BundlerVersion: deployer.NormalizeVersionWithDefault(g.Version), - Components: buildComponentSummaries(sortedRefs, g.ComponentManifests), + Components: buildComponentSummaries(sortedRefs, g.ComponentPreManifests, g.ComponentManifests), } if err := writeTemplate(output, readmeTemplate, readmeData, outputDir, fileReadme, "failed to write README.md"); err != nil { @@ -337,25 +354,52 @@ func (g *Generator) generateComponentResources(ctx context.Context, ref recipe.C fmt.Sprintf("failed to create component directory %s", ref.Name), err) } - dependsOn := buildDependsOn(sortedRefs, index) + primaryDependsOn := g.buildPrimaryDependsOn(sortedRefs, index) + hasPreManifests := len(g.ComponentPreManifests[ref.Name]) > 0 hasManifests := len(g.ComponentManifests[ref.Name]) > 0 var resources []string + // Emit the -pre HelmRelease BEFORE the primary when pre-manifests + // exist, and rewire the primary's dependsOn to point at the pre release. + // Chain becomes: previous → -pre → -post → next. + if hasPreManifests { + preName := ref.Name + "-pre" + preDir, preDirErr := deployer.SafeJoin(outputDir, preName) + if preDirErr != nil { + return nil, preDirErr + } + if mkErr := os.MkdirAll(preDir, 0750); mkErr != nil { + return nil, errors.Wrap(errors.ErrCodeInternal, + fmt.Sprintf("failed to create pre directory %s", preName), mkErr) + } + preWroteCM, preErr := g.generateManifestHelmChart(ref.Name, preName, ref.Namespace, preDir, + g.ComponentPreManifests[ref.Name], gitSources, primaryDependsOn, output) + if preErr != nil { + return nil, preErr + } + resources = append(resources, filepath.Join(preName, fileHelmRelease)) + if preWroteCM { + resources = append(resources, filepath.Join(preName, fileConfigMap)) + } + // Primary chart now waits for the pre release. + primaryDependsOn = []DependsOnRef{{Name: preName}} + } + switch ref.Type { //nolint:exhaustive // only Helm is supported; default rejects others case recipe.ComponentTypeHelm: // Manifest-only Helm component: no chart or source, only manifests. // Package as a local Helm chart so Flux renders the templates natively. if ref.Chart == "" && ref.Source == "" && hasManifests { wroteCM, genErr := g.generateManifestHelmChart(ref.Name, ref.Name, ref.Namespace, compDir, - g.ComponentManifests[ref.Name], gitSources, dependsOn, output) + g.ComponentManifests[ref.Name], gitSources, primaryDependsOn, output) if genErr != nil { return nil, genErr } - res := []string{filepath.Join(ref.Name, fileHelmRelease)} + resources = append(resources, filepath.Join(ref.Name, fileHelmRelease)) if wroteCM { - res = append(res, filepath.Join(ref.Name, fileConfigMap)) + resources = append(resources, filepath.Join(ref.Name, fileConfigMap)) } - return res, nil + return resources, nil } // Vendored Helm component: pull chart tarball, write wrapper, @@ -364,7 +408,7 @@ func (g *Generator) generateComponentResources(ctx context.Context, ref recipe.C // manifests (the existing flow handles them correctly). if g.VendorCharts && isVendorable(ref) { wroteCM, rec, vendErr := g.generateVendoredHelmComponent( - ctx, ref, compDir, dependsOn, gitSources, puller, output) + ctx, ref, compDir, primaryDependsOn, gitSources, puller, output) if vendErr != nil { return nil, vendErr } @@ -377,7 +421,7 @@ func (g *Generator) generateComponentResources(ctx context.Context, ref recipe.C "component", ref.Name, "chart", rec.Chart, "version", rec.Version, "sha256", rec.SHA256) } else { - wroteCM, helmErr := g.generateHelmComponent(ref, compDir, dependsOn, helmSources, output) + wroteCM, helmErr := g.generateHelmComponent(ref, compDir, primaryDependsOn, helmSources, output) if helmErr != nil { return nil, helmErr } @@ -470,15 +514,67 @@ func filterEnabled(refs []recipe.ComponentRef) []recipe.ComponentRef { return enabled } -// buildDependsOn returns a dependsOn reference to the immediate predecessor -// in the deployment order. All components produce HelmReleases, so every -// component at index N depends on component at index N-1. -func buildDependsOn(sortedRefs []recipe.ComponentRef, index int) []DependsOnRef { +// detectInjectedReleaseCollisions rejects recipes that declare both a +// component "foo" (with pre- or post-manifests) and a separate component +// "foo-pre" / "foo-post". The injection rule would synthesize a HelmRelease +// that collides with the explicitly-declared one. Mirrors the rule in +// pkg/bundler/deployer/localformat/writer.go. +func (g *Generator) detectInjectedReleaseCollisions(sortedRefs []recipe.ComponentRef) error { + declared := make(map[string]struct{}, len(sortedRefs)) + for _, ref := range sortedRefs { + declared[ref.Name] = struct{}{} + } + for _, ref := range sortedRefs { + if len(g.ComponentPreManifests[ref.Name]) > 0 { + if _, clash := declared[ref.Name+"-pre"]; clash { + return errors.New(errors.ErrCodeInvalidRequest, + fmt.Sprintf("component %q has preManifestFiles and would inject %q-pre, but a component named %q-pre is already declared in the recipe — rename one to avoid collision", + ref.Name, ref.Name, ref.Name)) + } + } + // Post injection only fires for mixed components (chart/source + + // post-manifests); manifest-only components fold their manifests + // into the primary release and never emit a -post folder. + hasChartOrSource := ref.Chart != "" || ref.Source != "" + if hasChartOrSource && len(g.ComponentManifests[ref.Name]) > 0 { + if _, clash := declared[ref.Name+"-post"]; clash { + return errors.New(errors.ErrCodeInvalidRequest, + fmt.Sprintf("component %q is mixed (helm + manifests) and would inject %q-post, but a component named %q-post is already declared in the recipe — rename one to avoid collision", + ref.Name, ref.Name, ref.Name)) + } + } + } + return nil +} + +// buildPrimaryDependsOn returns the dependsOn reference for the head of the +// current component's chain (the -pre release if pre-manifests exist, +// otherwise the primary HelmRelease). The reference targets the previous +// component's TERMINAL release — its -post folder when the previous +// component has post-manifests, otherwise . This ensures the next +// component waits for the previous component's full chain +// (pre → primary → post) to reconcile before starting. +func (g *Generator) buildPrimaryDependsOn(sortedRefs []recipe.ComponentRef, index int) []DependsOnRef { if index == 0 { return nil } prev := sortedRefs[index-1] - return []DependsOnRef{{Name: prev.Name}} + return []DependsOnRef{{Name: terminalReleaseNameFor(prev, g.ComponentManifests)}} +} + +// terminalReleaseNameFor returns the name of the LAST HelmRelease emitted for a +// component. Mixed components (chart/source + post-manifests) terminate at +// -post; manifest-only and chart-only components terminate at . +// Pre-manifests never extend the tail — they live BEFORE the primary. +// +// Free function so the README renderer (buildComponentSummaries) can reuse it +// without a Generator handle. +func terminalReleaseNameFor(ref recipe.ComponentRef, postManifests map[string]map[string][]byte) string { + hasChart := ref.Chart != "" || ref.Source != "" + if hasChart && len(postManifests[ref.Name]) > 0 { + return ref.Name + "-post" + } + return ref.Name } // nonAlphanumericRe collapses runs of non-DNS characters into a single hyphen. @@ -526,7 +622,9 @@ func sourceResourcePaths(helmSources map[string]*HelmRepoSourceData, gitSources } // buildComponentSummaries builds the component summary list for the README. -func buildComponentSummaries(sortedRefs []recipe.ComponentRef, manifests map[string]map[string][]byte) []ComponentSummary { +// The list mirrors the actual HelmRelease graph (pre → primary → post) so the +// rendered "Depends On" column matches what Flux will reconcile. +func buildComponentSummaries(sortedRefs []recipe.ComponentRef, preManifests, manifests map[string]map[string][]byte) []ComponentSummary { summaries := make([]ComponentSummary, 0, len(sortedRefs)) for i, ref := range sortedRefs { version := ref.Version @@ -534,17 +632,34 @@ func buildComponentSummaries(sortedRefs []recipe.ComponentRef, manifests map[str version = ref.Tag } - dependsOnStr := "-" - if deps := buildDependsOn(sortedRefs, i); len(deps) > 0 { - dependsOnStr = deps[0].Name + // Terminal of the previous component (its -post when post-manifests + // exist, otherwise ). Head of the chain has no previous, so "-". + previousTerminal := "-" + if i > 0 { + previousTerminal = terminalReleaseNameFor(sortedRefs[i-1], manifests) + } + + // When pre-manifests exist, generation inserts a -pre HelmRelease + // before the primary and rewires the primary's dependsOn to point at it. + // Reflect that in the README so the table matches the generated CRs. + primaryDependsOn := previousTerminal + if len(preManifests[ref.Name]) > 0 { + preName := ref.Name + "-pre" + summaries = append(summaries, ComponentSummary{ + Name: preName, + Type: summaryTypeHelmRelease, + Namespace: ref.Namespace, + DependsOnStr: previousTerminal, + }) + primaryDependsOn = preName } summaries = append(summaries, ComponentSummary{ Name: ref.Name, - Type: "HelmRelease", + Type: summaryTypeHelmRelease, Version: version, Namespace: ref.Namespace, - DependsOnStr: dependsOnStr, + DependsOnStr: primaryDependsOn, }) // Mixed components (Helm chart + manifests) produce a post HelmRelease @@ -553,7 +668,7 @@ func buildComponentSummaries(sortedRefs []recipe.ComponentRef, manifests map[str if isMixed { summaries = append(summaries, ComponentSummary{ Name: ref.Name + "-post", - Type: "HelmRelease", + Type: summaryTypeHelmRelease, Namespace: ref.Namespace, DependsOnStr: ref.Name, }) diff --git a/pkg/bundler/deployer/flux/flux_test.go b/pkg/bundler/deployer/flux/flux_test.go index 7d4cb4db0..46ca95cc1 100644 --- a/pkg/bundler/deployer/flux/flux_test.go +++ b/pkg/bundler/deployer/flux/flux_test.go @@ -18,6 +18,7 @@ import ( "context" "crypto/sha256" "encoding/hex" + stderrors "errors" "flag" "fmt" "os" @@ -28,6 +29,7 @@ import ( "gopkg.in/yaml.v3" "github.com/NVIDIA/aicr/pkg/bundler/deployer/localformat" + "github.com/NVIDIA/aicr/pkg/errors" "github.com/NVIDIA/aicr/pkg/recipe" ) @@ -1002,12 +1004,29 @@ func TestSanitizeSourceName(t *testing.T) { } } -func TestBuildDependsOn(t *testing.T) { +func TestBuildPrimaryDependsOn(t *testing.T) { + // Mixed component (cert-manager has source + post-manifests) → its + // terminal release is cert-manager-post. Manifest-only components + // (manifest-only has post-manifests but no source/chart) fold + // manifests into the primary, so terminal is the component name. refs := []recipe.ComponentRef{ - {Name: "cert-manager", Namespace: "cert-manager", Type: recipe.ComponentTypeHelm}, + {Name: "cert-manager", Namespace: "cert-manager", Type: recipe.ComponentTypeHelm, + Chart: "cert-manager", Source: "https://charts.jetstack.io"}, {Name: "manifest-only", Namespace: "default", Type: recipe.ComponentTypeHelm}, - {Name: "gpu-operator", Namespace: "gpu-operator", Type: recipe.ComponentTypeHelm}, - {Name: "network-operator", Namespace: "network-operator", Type: recipe.ComponentTypeHelm}, + {Name: "gpu-operator", Namespace: "gpu-operator", Type: recipe.ComponentTypeHelm, + Chart: "gpu-operator", Source: "https://helm.ngc.nvidia.com/nvidia"}, + {Name: "network-operator", Namespace: "network-operator", Type: recipe.ComponentTypeHelm, + Chart: "network-operator", Source: "https://helm.ngc.nvidia.com/nvidia"}, + } + g := &Generator{ + ComponentManifests: map[string]map[string][]byte{ + // cert-manager is mixed (chart + post-manifests) → terminal is cert-manager-post. + "cert-manager": {"crds.yaml": []byte("---")}, + // manifest-only has post-manifests but no chart/source → manifests fold + // into the primary; terminal stays "manifest-only". + "manifest-only": {"cm.yaml": []byte("---")}, + // gpu-operator is chart-only here → no -post → terminal is "gpu-operator". + }, } tests := []struct { @@ -1017,19 +1036,19 @@ func TestBuildDependsOn(t *testing.T) { wantDep string }{ {"first has no deps", 0, 0, ""}, - {"second depends on first", 1, 1, "cert-manager"}, - {"third depends on second", 2, 1, "manifest-only"}, - {"fourth depends on third", 3, 1, "gpu-operator"}, + {"second depends on cert-manager-post (prev is mixed)", 1, 1, "cert-manager-post"}, + {"third depends on manifest-only (prev folds into primary)", 2, 1, "manifest-only"}, + {"fourth depends on gpu-operator (prev is chart-only, no -post)", 3, 1, "gpu-operator"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := buildDependsOn(refs, tt.index) + got := g.buildPrimaryDependsOn(refs, tt.index) if len(got) != tt.wantLen { - t.Errorf("buildDependsOn() returned %d deps, want %d", len(got), tt.wantLen) + t.Errorf("buildPrimaryDependsOn() returned %d deps, want %d", len(got), tt.wantLen) } if tt.wantLen > 0 && got[0].Name != tt.wantDep { - t.Errorf("buildDependsOn() dep name = %q, want %q", got[0].Name, tt.wantDep) + t.Errorf("buildPrimaryDependsOn() dep name = %q, want %q", got[0].Name, tt.wantDep) } }) } @@ -1647,3 +1666,363 @@ func extractSourceName(t *testing.T, yamlContent string) string { } return "" } + +// TestGenerate_WithPreManifests pins the contract that components with +// pre-manifests emit a -pre HelmRelease BEFORE the primary, and +// that the primary's dependsOn points at -pre instead of the +// previous component. Regression guard for issue #923 (the GKE +// ResourceQuota fix from PR #921 was silently dropped on flux because +// the deployer didn't consume ComponentPreManifests). +func TestGenerate_WithPreManifests(t *testing.T) { + ctx := context.Background() + outputDir := t.TempDir() + + recipeResult := &recipe.RecipeResult{} + recipeResult.Metadata.Version = testVersion + recipeResult.ComponentRefs = []recipe.ComponentRef{ + { + Name: "cert-manager", + Namespace: "cert-manager", + Chart: "cert-manager", + Version: "v1.14.0", + Type: recipe.ComponentTypeHelm, + Source: "https://charts.jetstack.io", + }, + { + Name: "gpu-operator", + Namespace: "gpu-operator", + Chart: "gpu-operator", + Version: "v25.3.3", + Type: recipe.ComponentTypeHelm, + Source: "https://helm.ngc.nvidia.com/nvidia", + }, + } + recipeResult.DeploymentOrder = []string{"cert-manager", "gpu-operator"} + + preManifests := map[string]map[string][]byte{ + "gpu-operator": { + "gke-critical-pods-quota.yaml": []byte("apiVersion: v1\nkind: ResourceQuota\nmetadata:\n name: aicr-gke-critical-pods\n namespace: gpu-operator\nspec:\n hard:\n pods: \"32\"\n"), + }, + } + + g := &Generator{ + RecipeResult: recipeResult, + ComponentValues: map[string]map[string]any{"gpu-operator": {"driver": map[string]any{"enabled": true}}}, + ComponentPreManifests: preManifests, + Version: "v0.9.0", + RepoURL: "https://github.com/my-org/gitops.git", + } + + _, err := g.Generate(ctx, outputDir) + if err != nil { + t.Fatalf("Generate() error = %v", err) + } + + // Pre folder: Chart.yaml + templates/ + helmrelease.yaml. + preDir := filepath.Join(outputDir, "gpu-operator-pre") + for _, rel := range []string{"Chart.yaml", "helmrelease.yaml", filepath.Join("templates", "gke-critical-pods-quota.yaml")} { + path := filepath.Join(preDir, rel) + if _, statErr := os.Stat(path); os.IsNotExist(statErr) { + t.Errorf("expected gpu-operator-pre/%s to exist", rel) + } + } + + // Pre HelmRelease depends on the previous component (cert-manager), + // preserving the deployment-order chain. + preHR := readFile(t, filepath.Join(preDir, "helmrelease.yaml")) + if !strings.Contains(preHR, "name: cert-manager") { + t.Errorf("expected pre HelmRelease to depend on cert-manager; got:\n%s", preHR) + } + + // Primary HelmRelease depends on gpu-operator-pre, NOT cert-manager. + primaryHR := readFile(t, filepath.Join(outputDir, "gpu-operator", "helmrelease.yaml")) + if !strings.Contains(primaryHR, "name: gpu-operator-pre") { + t.Errorf("expected primary HelmRelease to depend on gpu-operator-pre; got:\n%s", primaryHR) + } + if strings.Contains(primaryHR, "name: cert-manager") { + t.Errorf("primary HelmRelease should no longer depend on cert-manager directly; got:\n%s", primaryHR) + } + + // Root kustomization references the pre folder. + rootKustom := readFile(t, filepath.Join(outputDir, "kustomization.yaml")) + if !strings.Contains(rootKustom, "gpu-operator-pre/helmrelease.yaml") { + t.Errorf("expected root kustomization.yaml to include gpu-operator-pre/helmrelease.yaml; got:\n%s", rootKustom) + } +} + +// TestGenerate_PreAndPostManifests pins the full chain when both pre +// and post manifests are present on the same component, and asserts +// the *next* component depends on the previous component's TERMINAL +// release (-post), not its primary. Chain shape: +// +// gpu-operator-pre → gpu-operator → gpu-operator-post → gke-nccl-tcpxo +// +// This is the realistic GKE shape (synthesized quota pre + dcgm-exporter +// post + downstream component) and a regression guard for the issue +// Codex caught: before the fix, the next component depended on +// "gpu-operator" so Flux could reconcile it in parallel with the post +// manifests, defeating the chain's purpose. +func TestGenerate_PreAndPostManifests(t *testing.T) { + ctx := context.Background() + outputDir := t.TempDir() + + recipeResult := &recipe.RecipeResult{} + recipeResult.Metadata.Version = testVersion + recipeResult.ComponentRefs = []recipe.ComponentRef{ + { + Name: "gpu-operator", + Namespace: "gpu-operator", + Chart: "gpu-operator", + Version: "v25.3.3", + Type: recipe.ComponentTypeHelm, + Source: "https://helm.ngc.nvidia.com/nvidia", + }, + { + Name: "gke-nccl-tcpxo", + Namespace: "kube-system", + Chart: "gke-nccl-tcpxo", + Version: "v1.0.0", + Type: recipe.ComponentTypeHelm, + Source: "https://helm.ngc.nvidia.com/nvidia", + }, + } + recipeResult.DeploymentOrder = []string{"gpu-operator", "gke-nccl-tcpxo"} + + g := &Generator{ + RecipeResult: recipeResult, + ComponentValues: map[string]map[string]any{ + "gpu-operator": {"driver": map[string]any{"enabled": true}}, + "gke-nccl-tcpxo": {}, + }, + ComponentPreManifests: map[string]map[string][]byte{ + "gpu-operator": { + "quota.yaml": []byte("apiVersion: v1\nkind: ResourceQuota\nmetadata:\n name: q\n namespace: gpu-operator\n"), + }, + }, + ComponentManifests: map[string]map[string][]byte{ + "gpu-operator": { + "dcgm-exporter.yaml": []byte("apiVersion: apps/v1\nkind: DaemonSet\nmetadata:\n name: dcgm-exporter\n"), + }, + }, + Version: "v0.9.0", + RepoURL: "https://github.com/my-org/gitops.git", + } + + if _, err := g.Generate(ctx, outputDir); err != nil { + t.Fatalf("Generate() error = %v", err) + } + + // Pre is head of the chain (gpu-operator is index 0, no preceding component). + preHR := readFile(t, filepath.Join(outputDir, "gpu-operator-pre", "helmrelease.yaml")) + if strings.Contains(preHR, "dependsOn:") { + t.Errorf("expected pre HelmRelease to have no dependsOn (head of chain); got:\n%s", preHR) + } + + // Primary depends on -pre. + primaryHR := readFile(t, filepath.Join(outputDir, "gpu-operator", "helmrelease.yaml")) + if !strings.Contains(primaryHR, "name: gpu-operator-pre") { + t.Errorf("expected primary to depend on gpu-operator-pre; got:\n%s", primaryHR) + } + + // Post depends on the primary. + postHR := readFile(t, filepath.Join(outputDir, "gpu-operator-post", "helmrelease.yaml")) + if !strings.Contains(postHR, "name: gpu-operator") { + t.Errorf("expected post HelmRelease to depend on gpu-operator; got:\n%s", postHR) + } + + // Next component depends on the TERMINAL release of the previous + // component (gpu-operator-post), not its primary. + nextHR := readFile(t, filepath.Join(outputDir, "gke-nccl-tcpxo", "helmrelease.yaml")) + if !strings.Contains(nextHR, "name: gpu-operator-post") { + t.Errorf("expected next component to depend on gpu-operator-post (terminal of previous chain); got:\n%s", nextHR) + } + + // README's "Components" table must mirror the actual HelmRelease + // graph. A reader skimming the bundle README should see rows for + // gpu-operator-pre and gpu-operator-post alongside the primary, with + // dependsOn pointing at the correct chain links — not at the + // previous component (which is what the README used to render). + readme := readFile(t, filepath.Join(outputDir, "README.md")) + for _, want := range []string{ + "| gpu-operator-pre |", + "| gpu-operator-post |", + } { + if !strings.Contains(readme, want) { + t.Errorf("expected README.md to contain row %q; got:\n%s", want, readme) + } + } + if !strings.Contains(readme, "| gpu-operator | HelmRelease | v25.3.3 | gpu-operator | gpu-operator-pre |") { + t.Errorf("expected README.md to show primary gpu-operator depending on gpu-operator-pre; got:\n%s", readme) + } + if !strings.Contains(readme, "| gke-nccl-tcpxo | HelmRelease | v1.0.0 | kube-system | gpu-operator-post |") { + t.Errorf("expected README.md to show gke-nccl-tcpxo depending on gpu-operator-post; got:\n%s", readme) + } +} + +// TestGenerate_VendoredChartWithPreManifests verifies that the +// pre-manifest rewire works on the vendored-chart code path too. The +// vendored branch (g.generateVendoredHelmComponent) receives the same +// rewired primaryDependsOn, but the dedicated pre/post tests only +// exercise the non-vendored helm path — a future refactor that +// re-shadowed primaryDependsOn on the vendored branch would slip +// through without this guard. +func TestGenerate_VendoredChartWithPreManifests(t *testing.T) { + ctx := context.Background() + outputDir := t.TempDir() + + recipeResult := &recipe.RecipeResult{} + recipeResult.Metadata.Version = testVersion + recipeResult.ComponentRefs = []recipe.ComponentRef{ + { + Name: "gpu-operator", + Namespace: "gpu-operator", + Chart: "gpu-operator", + Version: "v25.3.3", + Type: recipe.ComponentTypeHelm, + Source: "https://helm.ngc.nvidia.com/nvidia", + }, + } + recipeResult.DeploymentOrder = []string{"gpu-operator"} + + g := &Generator{ + RecipeResult: recipeResult, + ComponentValues: map[string]map[string]any{"gpu-operator": {"driver": map[string]any{"enabled": true}}}, + ComponentPreManifests: map[string]map[string][]byte{ + "gpu-operator": { + "quota.yaml": []byte("apiVersion: v1\nkind: ResourceQuota\nmetadata:\n name: q\n namespace: gpu-operator\n"), + }, + }, + Version: "v0.9.0", + RepoURL: "https://github.com/my-org/gitops.git", + VendorCharts: true, + Puller: &stubChartPuller{}, + } + + if _, err := g.Generate(ctx, outputDir); err != nil { + t.Fatalf("Generate() error = %v", err) + } + + // Pre folder is emitted alongside the vendored wrapper chart. + preHRPath := filepath.Join(outputDir, "gpu-operator-pre", "helmrelease.yaml") + if _, statErr := os.Stat(preHRPath); os.IsNotExist(statErr) { + t.Fatal("expected gpu-operator-pre/helmrelease.yaml to exist on vendored path") + } + + // Primary HelmRelease (the vendored wrapper) depends on -pre, not on + // the previous component / chain head. + primaryHR := readFile(t, filepath.Join(outputDir, "gpu-operator", "helmrelease.yaml")) + if !strings.Contains(primaryHR, "name: gpu-operator-pre") { + t.Errorf("expected vendored primary HelmRelease to depend on gpu-operator-pre; got:\n%s", primaryHR) + } + + // Vendored wrapper still references its local GitRepository chart, not + // a HelmRepository — sanity-check that the vendored path is actually + // the one we exercised. + if !strings.Contains(primaryHR, "kind: GitRepository") { + t.Errorf("expected vendored primary HelmRelease to reference GitRepository source; got:\n%s", primaryHR) + } +} + +// TestGenerate_PreManifestsCollision asserts that a recipe declaring +// both component "foo" (with pre-manifests) and a separate component +// "foo-pre" is rejected at bundle time, mirroring the localformat +// writer's guard. +func TestGenerate_PreManifestsCollision(t *testing.T) { + ctx := context.Background() + outputDir := t.TempDir() + + recipeResult := &recipe.RecipeResult{} + recipeResult.Metadata.Version = testVersion + recipeResult.ComponentRefs = []recipe.ComponentRef{ + { + Name: "foo", + Namespace: "foo", + Chart: "foo", + Version: "v1.0.0", + Type: recipe.ComponentTypeHelm, + Source: "https://example.com/charts", + }, + { + Name: "foo-pre", + Namespace: "foo", + Chart: "foo-pre", + Version: "v1.0.0", + Type: recipe.ComponentTypeHelm, + Source: "https://example.com/charts", + }, + } + recipeResult.DeploymentOrder = []string{"foo", "foo-pre"} + + g := &Generator{ + RecipeResult: recipeResult, + ComponentPreManifests: map[string]map[string][]byte{ + "foo": { + "quota.yaml": []byte("apiVersion: v1\nkind: ResourceQuota\nmetadata:\n name: q\n"), + }, + }, + Version: "v0.9.0", + } + + _, err := g.Generate(ctx, outputDir) + if err == nil { + t.Fatal("expected collision error, got nil") + } + if !stderrors.Is(err, errors.New(errors.ErrCodeInvalidRequest, "")) { + t.Errorf("expected ErrCodeInvalidRequest, got: %v", err) + } + if !strings.Contains(err.Error(), `would inject "foo"-pre`) { + t.Errorf("expected collision error to name the offending pair, got: %v", err) + } +} + +// TestGenerate_PostManifestsCollision asserts that a recipe declaring +// both a mixed component "foo" (chart + post-manifests) and a separate +// component "foo-post" is rejected at bundle time. Mirrors the +// pre-manifest rule and the parity declared in the localformat writer. +func TestGenerate_PostManifestsCollision(t *testing.T) { + ctx := context.Background() + outputDir := t.TempDir() + + recipeResult := &recipe.RecipeResult{} + recipeResult.Metadata.Version = testVersion + recipeResult.ComponentRefs = []recipe.ComponentRef{ + { + Name: "foo", + Namespace: "foo", + Chart: "foo", + Version: "v1.0.0", + Type: recipe.ComponentTypeHelm, + Source: "https://example.com/charts", + }, + { + Name: "foo-post", + Namespace: "foo", + Chart: "foo-post", + Version: "v1.0.0", + Type: recipe.ComponentTypeHelm, + Source: "https://example.com/charts", + }, + } + recipeResult.DeploymentOrder = []string{"foo", "foo-post"} + + g := &Generator{ + RecipeResult: recipeResult, + ComponentManifests: map[string]map[string][]byte{ + "foo": { + "cm.yaml": []byte("apiVersion: v1\nkind: ConfigMap\nmetadata:\n name: c\n"), + }, + }, + Version: "v0.9.0", + } + + _, err := g.Generate(ctx, outputDir) + if err == nil { + t.Fatal("expected collision error, got nil") + } + if !stderrors.Is(err, errors.New(errors.ErrCodeInvalidRequest, "")) { + t.Errorf("expected ErrCodeInvalidRequest, got: %v", err) + } + if !strings.Contains(err.Error(), `would inject "foo"-post`) { + t.Errorf("expected collision error to name the offending pair, got: %v", err) + } +}