From 2e3665c3ada66dfbc537c86780bd4a2eca2fb3fc Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Tue, 26 May 2026 15:10:21 -0700 Subject: [PATCH] fix(kwok): make argocd OCI repoURL per-lane (follow-up to #1047) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1047 stripped the per-recipe segment from OCI_IN_CLUSTER_REF for both deployer lanes, but the assignment lives inside the shared `argocd-oci|argocd-helm-oci)` case branch, so plain argocd-oci was incorrectly switched to the parent-only form too. Result: argocd-oci's root `nvidia-stack` app dials oci://…/aicr: while the artifact is pushed to oci://…/aicr/:, so the OCI artifact lookup 404s on every PR. (Caught by Codex review after #1047 merged.) Make the assignment per-lane to restore the working argocd-oci contract while keeping the parent-only form for argocd-helm-oci: - argocd-oci -> oci://…/aicr/ (full path, unchanged from pre-#1047 behavior; root app references the artifact by full path) - argocd-helm-oci -> oci://…/aicr (parent-only; the parent App template appends .Chart.Name) The log_info line is split per-lane too so each prints the URL it will actually dereference. --- kwok/scripts/validate-scheduling.sh | 38 ++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/kwok/scripts/validate-scheduling.sh b/kwok/scripts/validate-scheduling.sh index 0967f58d9..bc0cf6f98 100755 --- a/kwok/scripts/validate-scheduling.sh +++ b/kwok/scripts/validate-scheduling.sh @@ -756,17 +756,27 @@ generate_bundle() { # pass it through to `helm install --set repoURL=…` without # duplicating the runner→service-DNS rewrite rule. # - # Per PR #1032's contract change (and #1035's enforcement on the - # parent App template), --set repoURL must carry the PARENT - # NAMESPACE ONLY — without the per-recipe chart name. The - # argocd-helm parent Application appends .Chart.Name via its - # separate `source.chart` field; path-based child Applications - # append /{{ .Chart.Name }} via their template, so both halves - # resolve to the same artifact regardless of which Argo source - # type the cluster picks. The pushed artifact lives at - # oci://…/aicr/:; the recipe segment is the chart - # name, which Argo appends itself. - OCI_IN_CLUSTER_REF="oci://registry.aicr-registry.svc.cluster.local:5000/aicr" + # Per-lane assignment — the two lanes resolve the in-cluster + # OCI URL differently: + # + # - argocd-oci: the bundle's `nvidia-stack` root application + # references the artifact by its full path. Pass the full + # "aicr/" form here so `--repo` and the root app's + # `repoURL` both match the pushed artifact at + # oci://…/aicr/:. + # + # - argocd-helm-oci: per PR #1032's contract change (and + # #1035's enforcement on the parent App template), --set + # repoURL must carry the PARENT NAMESPACE ONLY — without + # the per-recipe chart name. The argocd-helm parent + # Application appends .Chart.Name via its separate + # `source.chart` field; passing the full path here would + # resolve to oci://…/aicr//: and 404. + if [[ "$DEPLOYER" == "argocd-helm-oci" ]]; then + OCI_IN_CLUSTER_REF="oci://registry.aicr-registry.svc.cluster.local:5000/aicr" + else + OCI_IN_CLUSTER_REF="oci://registry.aicr-registry.svc.cluster.local:5000/aicr/${recipe}" + fi local in_cluster_repo="$OCI_IN_CLUSTER_REF" # Map our deployer-matrix name to aicr's --deployer value. @@ -774,7 +784,11 @@ generate_bundle() { [[ "$DEPLOYER" == "argocd-helm-oci" ]] && deployer_arg="argocd-helm" log_info "Bundling for ${deployer_arg}, pushing to ${OCI_REF}" - log_info "Argo CD will pull from ${in_cluster_repo}/${recipe}:${tag} (parent namespace + .Chart.Name appended by the parent App)" + if [[ "$DEPLOYER" == "argocd-helm-oci" ]]; then + log_info "Argo CD will pull from ${in_cluster_repo}/${recipe}:${tag} (parent namespace + .Chart.Name appended by the parent App)" + else + log_info "Argo CD will pull from ${in_cluster_repo}:${tag}" + fi # When --output is an oci:// reference, `aicr bundle` writes the # local bundle to ./bundle (relative to CWD) — there's no way to # redirect it to an absolute path. cd into WORK_DIR so the local