diff --git a/.github/workflows/test-helm.yml b/.github/workflows/test-helm.yml new file mode 100644 index 00000000..d69c4233 --- /dev/null +++ b/.github/workflows/test-helm.yml @@ -0,0 +1,203 @@ +name: Test Helm Chart + +on: + pull_request: + branches: + - gh-pages + push: + branches: + - gh-pages + +env: + NAMESPACE: kube-amd-network + HELM_INSTALL_NAME: amd-network-operator + +jobs: + test-helm: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 10 + - name: Find newly added chart package + run: | + latest_chart="" + for commit in $(git rev-list HEAD); do + # Get all added files in this commit under charts + diff_output=$(git diff-tree --no-commit-id --diff-filter=A -r "$commit" -- charts/) + + # Print the output for debugging + echo "Commit $commit added files:" + echo "$diff_output" + + # Filter for .tgz files and pick the first one + new_file=$(echo "$diff_output" | awk '{print $6}' | grep '\.tgz$' || true) + if [ -n "$new_file" ]; then + latest_chart="$new_file" + break + fi + done + + if [ -z "$latest_chart" ]; then + echo "No new .tgz file found in history." + exit 1 + fi + + # remove directory prefix (charts/) and suffix (.tgz) + filename=$(basename "$latest_chart" .tgz) + echo "LATEST_CHART version: $filename" + echo "LATEST_CHART=$filename" >> $GITHUB_ENV + - name: Set up Kind cluster + uses: helm/kind-action@v1.12.0 + - name: Setup kubectl (latest stable) + id: kubectl + uses: azure/setup-kubectl@v4 + with: + version: latest + - name: Ensure kubectl in PATH + run: | + if ! which kubectl >/dev/null 2>&1; then + echo "kubectl not found in PATH, adding manually..." + echo "${{ steps.kubectl.outputs.kubectl-path }}" >> $GITHUB_PATH + else + echo "kubectl found at $(which kubectl)" + fi + - name: Install Helm + run: curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + - name: Install Cert Manager + run: | + helm repo add jetstack https://charts.jetstack.io --force-update && + helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version v1.15.1 \ + --set crds.enabled=true + - name: Verify kubectl and list current pods + run: kubectl version && kubectl get pods -A + - name: Install Network Operator chart (Push) + if: github.event_name == 'push' + run: | + helm repo add rocm https://rocm.github.io/network-operator && helm repo update && + helm install ${{ env.HELM_INSTALL_NAME }} rocm/network-operator-charts \ + --namespace ${{ env.NAMESPACE }} \ + --create-namespace \ + --version "${LATEST_CHART#network-operator-charts-}" + - name: Install Network Operator chart (Pull request) + if: github.event_name == 'pull_request' + run: | + echo "LATEST_CHART version: $LATEST_CHART" && + pwd & ls && ls charts && + helm install ${{ env.HELM_INSTALL_NAME }} charts/$LATEST_CHART.tgz \ + --namespace ${{ env.NAMESPACE }} \ + --create-namespace + - name: Wait for all pods to be ready + run: | + set -e + # Background watcher + kubectl get pods -A -w & + WATCH_PID=$! + + # Wait for readiness + if ! kubectl wait --for=condition=Ready pod --all -n ${{ env.NAMESPACE }} --timeout=300s; then + echo "❌ Timeout waiting for pods" + echo "📋 Final pod states:" + kubectl get pods -A -o wide + kill $WATCH_PID || true + # This collects logs from all pods + for pod in $(kubectl get pods -n ${{ env.NAMESPACE }} -o name); do + echo "--- Logs for $pod ---" + kubectl logs $pod -n ${{ env.NAMESPACE }} --all-containers --tail=100 || true + echo "--- Describe $pod ---" + kubectl describe $pod -n ${{ env.NAMESPACE }} || true + done + exit 1 + fi + kill $WATCH_PID || true + echo "✅ All pods are Ready!" + - name: Install yq + run: | + wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 + chmod +x /usr/local/bin/yq + - name: Validate images in pods and CR + env: + NAMESPACE: ${{ env.NAMESPACE }} + BASE_PREFIX: ${{ env.HELM_INSTALL_NAME }}- + run: | + set -e + YAML_FILE="charts/${LATEST_CHART}_meta.yml" + if [ -z "$YAML_FILE" ] || [ ! -f "$YAML_FILE" ]; then + echo "❌ Validation file '$YAML_FILE' not found or variable is empty." + ls -l + exit 1 + fi + echo "🔍 Validating images using $YAML_FILE" + + # Check pods by prefix + yq -r '.images[] | select(.k8s_comp_prefix != null) | .target as $t | .k8s_comp_prefix[] | "\($t) \(. )"' "$YAML_FILE" | \ + while read -r target suffix; do + prefix="${BASE_PREFIX}${suffix}" + pods=$(kubectl get pods -n "$NAMESPACE" --no-headers -o custom-columns=NAME:.metadata.name | grep "^$prefix" || true) + + if [ -z "$pods" ]; then + echo "⚠️ No pods found for prefix '$prefix'" + continue + fi + + for pod in $pods; do + images=$(kubectl get pod "$pod" -n "$NAMESPACE" -o jsonpath='{.spec.containers[*].image}') + found_match=false + for img in $images; do + if [ "$img" = "$target" ]; then + echo "✅ Pod $pod is using expected image: $target" + found_match=true + fi + done + + if [ "$found_match" = false ]; then + echo "❌ Pod $pod is NOT using expected image." + echo " Expected: $target" + echo " Found: $images" + exit 1 + fi + done + done + + # Check CR paths + yq -r '.images[] | select(.cr_path != null) | .target as $t | .cr_path[] | "\($t) \(. )"' "$YAML_FILE" | \ + while read -r target crpath; do + value=$(kubectl get deviceconfigs -n "$NAMESPACE" default -o yaml | yq "$crpath") + if [ "$value" = "$target" ]; then + echo "✅ CR field '$crpath' matches expected image: $target" + else + echo "❌ CR field '$crpath' does NOT match expected image." + echo " Expected: $target" + echo " Found: $value" + exit 1 + fi + done + - name: Helm uninstall + run: helm uninstall ${{ env.HELM_INSTALL_NAME }} -n ${{ env.NAMESPACE }} + + - name: Validate all pods are removed + run: | + set -ex + # Background watcher + kubectl get pods -A -w & + WATCH_PID=$! + + # Wait for all pods to be removed + for i in $(seq 1 30); do + pod_count=$(kubectl get pods -n ${{ env.NAMESPACE }} --no-headers 2>/dev/null | wc -l) + if [ "$pod_count" -eq 0 ]; then + echo "✅ All pods are removed from namespace ${{ env.NAMESPACE }}!" + kill $WATCH_PID || true + exit 0 + fi + echo "Waiting for pods to be removed ... ($pod_count pods remain)" + sleep 5 + done + echo "❌ Timeout waiting for pods to be removed from namespace ${{ env.NAMESPACE }}." + kubectl get pods -n ${{ env.NAMESPACE }} -o wide || true + kill $WATCH_PID || true + exit 1 diff --git a/charts/network-operator-charts-v1.0.0.tgz b/charts/network-operator-charts-v1.0.0.tgz new file mode 100644 index 00000000..b7aa6747 Binary files /dev/null and b/charts/network-operator-charts-v1.0.0.tgz differ diff --git a/charts/network-operator-charts-v1.0.0_meta.yml b/charts/network-operator-charts-v1.0.0_meta.yml new file mode 100644 index 00000000..13110db4 --- /dev/null +++ b/charts/network-operator-charts-v1.0.0_meta.yml @@ -0,0 +1,16 @@ +images: + # contoller + - target: docker.io/rocm/network-operator:v1.0.0 + k8s_comp_prefix: + - network-operator-charts-controller + # operands + # device-plugin + - target: docker.io/rocm/k8s-network-device-plugin:v1.0.0 + # node-labeler + - target: docker.io/rocm/k8s-network-node-labeller:v1.0.0 + # cni-plugins + - target: docker.io/rocm/k8s-cni-plugins:v1.0.0 + # device-metrics-exporter + - target: docker.io/rocm/device-metrics-exporter:nic-v1.0.0 + #utils + - target: docker.io/rocm/network-operator-utils:v1.0.0 diff --git a/index.yaml b/index.yaml index 7d69fbd3..a03bc4b1 100644 --- a/index.yaml +++ b/index.yaml @@ -2,8 +2,8 @@ apiVersion: v1 entries: network-operator-charts: - apiVersion: v2 - appVersion: dev - created: "2025-10-14T17:32:32.881764484Z" + appVersion: v1.0.0 + created: "2025-10-22T18:11:48.136290444Z" dependencies: - condition: node-feature-discovery.enabled name: node-feature-discovery @@ -19,7 +19,7 @@ entries: version: v1.0.0 description: AMD Network Operator simplifies the deployment and management of AMD AINICs within Kubernetes clusters. - digest: 54be700c26de9c98eb1ab1ad8da3e9aadae68d49118b91963565da2067592316 + digest: 43684f1fc0eae61acbce9c9940afc931b9ec9a427cf9ff6d341e2a9cdc745e65 home: https://github.com/ROCm/network-operator icon: https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/helm/logo.png keywords: @@ -43,4 +43,4 @@ entries: urls: - https://rocm.github.io/network-operator/charts/network-operator-charts-v1.0.0.tgz version: v1.0.0 -generated: "2025-10-14T17:32:32.863923134Z" +generated: "2025-10-22T18:11:48.120802375Z"