From 8bb9605b88e4feb6f2253f73cbd16e67975a688c Mon Sep 17 00:00:00 2001 From: Yuvarani Shankar Date: Fri, 5 Jun 2026 08:32:57 -0700 Subject: [PATCH] fix: add missing device-plugin ServiceAccount for OLM deployments (NETOP-174) (#328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The device-plugin DaemonSet failed on OpenShift OLM deployments because the amd-network-operator-device-plugin ServiceAccount was never created. SCC rules for device-plugin, cni-plugins, and kmm-module-loader were in the CSV permissions section (namespace-scoped RoleBindings) instead of clusterPermissions (ClusterRoleBindings), which is required for SCC access. - Move device-plugin, cni-plugins, kmm-module-loader SCC rules to clusterPermissions in the CSV - Remove unused kmm-device-plugin SA (GPU operator leftover) - Fix RoleBinding → ClusterRoleBinding in kustomize and Helm RBAC - Add missing cni-plugins SA and RBAC to OpenShift Helm chart Co-authored-by: Yuva Shankar <11082310+yuva29@users.noreply.github.com> Co-authored-by: Claude Opus 4 (1M context) # Conflicts: # bundle/manifests/amd-network-operator.clusterserviceversion.yaml --- Makefile | 2 +- config/rbac/cni_plugins_role_binding.yaml | 3 +- ...e.yaml => device_plugin_cluster_role.yaml} | 0 ...g.yaml => device_plugin_role_binding.yaml} | 3 +- ...aml => device_plugin_service_account.yaml} | 0 .../rbac/kmm_module_loader_role_binding.yaml | 3 +- config/rbac/kustomization.yaml | 6 ++-- .../metadata-patch/values.yaml | 3 ++ ...plugin-rbac.yaml => cni-plugins-rbac.yaml} | 18 +++++----- .../template-patch/device-plugin-rbac.yaml | 34 +++++++++++++++++++ .../kmm-module-loader-rbac.yaml | 2 +- .../template-patch/serviceaccount.yaml | 12 ++++++- .../testdata/device_plugin_test.yaml | 2 +- 13 files changed, 66 insertions(+), 22 deletions(-) rename config/rbac/{kmm_device_plugin_cluster_role.yaml => device_plugin_cluster_role.yaml} (100%) rename config/rbac/{kmm_device_plugin_role_binding.yaml => device_plugin_role_binding.yaml} (85%) rename config/rbac/{kmm_device_plugin_service_account.yaml => device_plugin_service_account.yaml} (100%) rename hack/openshift-patch/template-patch/{kmm-device-plugin-rbac.yaml => cni-plugins-rbac.yaml} (54%) create mode 100644 hack/openshift-patch/template-patch/device-plugin-rbac.yaml diff --git a/Makefile b/Makefile index 1b4dff25..85bd6850 100644 --- a/Makefile +++ b/Makefile @@ -444,7 +444,7 @@ bundle-build: operator-sdk manifests kustomize cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) cd config/manager-base && $(KUSTOMIZE) edit set image controller=$(IMG) OPERATOR_SDK="${OPERATOR_SDK}" \ - BUNDLE_GEN_FLAGS="${BUNDLE_GEN_FLAGS} --extra-service-accounts amd-network-operator-kmm-device-plugin,amd-network-operator-kmm-module-loader,amd-network-operator-node-labeller,amd-network-operator-metrics-exporter,amd-network-operator-metrics-exporter-rbac-proxy,amd-network-operator-test-runner,amd-network-operator-config-manager,amd-network-operator-utils-container,amd-network-operator-cni-plugins,amd-network-operator-device-plugin" \ + BUNDLE_GEN_FLAGS="${BUNDLE_GEN_FLAGS} --extra-service-accounts amd-network-operator-device-plugin,amd-network-operator-kmm-module-loader,amd-network-operator-node-labeller,amd-network-operator-metrics-exporter,amd-network-operator-metrics-exporter-rbac-proxy,amd-network-operator-test-runner,amd-network-operator-config-manager,amd-network-operator-utils-container,amd-network-operator-cni-plugins" \ PKG=amd-network-operator \ SOURCE_DIR=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) \ KUBECTL_CMD=${KUBECTL_CMD} ./hack/generate-bundle diff --git a/config/rbac/cni_plugins_role_binding.yaml b/config/rbac/cni_plugins_role_binding.yaml index 601f0822..18cb5988 100644 --- a/config/rbac/cni_plugins_role_binding.yaml +++ b/config/rbac/cni_plugins_role_binding.yaml @@ -1,8 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: cni-plugins - namespace: system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/config/rbac/kmm_device_plugin_cluster_role.yaml b/config/rbac/device_plugin_cluster_role.yaml similarity index 100% rename from config/rbac/kmm_device_plugin_cluster_role.yaml rename to config/rbac/device_plugin_cluster_role.yaml diff --git a/config/rbac/kmm_device_plugin_role_binding.yaml b/config/rbac/device_plugin_role_binding.yaml similarity index 85% rename from config/rbac/kmm_device_plugin_role_binding.yaml rename to config/rbac/device_plugin_role_binding.yaml index e0ce75cb..c23aa66a 100644 --- a/config/rbac/kmm_device_plugin_role_binding.yaml +++ b/config/rbac/device_plugin_role_binding.yaml @@ -1,8 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: device-plugin - namespace: system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/config/rbac/kmm_device_plugin_service_account.yaml b/config/rbac/device_plugin_service_account.yaml similarity index 100% rename from config/rbac/kmm_device_plugin_service_account.yaml rename to config/rbac/device_plugin_service_account.yaml diff --git a/config/rbac/kmm_module_loader_role_binding.yaml b/config/rbac/kmm_module_loader_role_binding.yaml index fe83d7a9..1c71aca8 100644 --- a/config/rbac/kmm_module_loader_role_binding.yaml +++ b/config/rbac/kmm_module_loader_role_binding.yaml @@ -1,8 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: kmm-module-loader - namespace: system roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 6557bcce..c38c9128 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -7,9 +7,9 @@ resources: - kmm_module_loader_role_binding.yaml - kmm_module_loader_service_account.yaml - kmm_module_loader_cluster_role.yaml - - kmm_device_plugin_role_binding.yaml - - kmm_device_plugin_service_account.yaml - - kmm_device_plugin_cluster_role.yaml + - device_plugin_role_binding.yaml + - device_plugin_service_account.yaml + - device_plugin_cluster_role.yaml - node_labeller_service_account.yaml - node_labeller_cluster_role.yaml - node_labeller_role_binding.yaml diff --git a/hack/openshift-patch/metadata-patch/values.yaml b/hack/openshift-patch/metadata-patch/values.yaml index dcd63245..133db087 100644 --- a/hack/openshift-patch/metadata-patch/values.yaml +++ b/hack/openshift-patch/metadata-patch/values.yaml @@ -72,3 +72,6 @@ nodeLabeller: metricsExporter: serviceAccount: annotations: {} +cniPlugins: + serviceAccount: + annotations: {} diff --git a/hack/openshift-patch/template-patch/kmm-device-plugin-rbac.yaml b/hack/openshift-patch/template-patch/cni-plugins-rbac.yaml similarity index 54% rename from hack/openshift-patch/template-patch/kmm-device-plugin-rbac.yaml rename to hack/openshift-patch/template-patch/cni-plugins-rbac.yaml index e81a5133..dd95a4c6 100644 --- a/hack/openshift-patch/template-patch/kmm-device-plugin-rbac.yaml +++ b/hack/openshift-patch/template-patch/cni-plugins-rbac.yaml @@ -1,10 +1,10 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ include "helm-charts-openshift.fullname" . }}-kmm-device-plugin + name: {{ include "helm-charts-openshift.fullname" . }}-cni-plugins labels: - app.kubernetes.io/component: amd-gpu - app.kubernetes.io/part-of: amd-gpu + app.kubernetes.io/component: amd-nic + app.kubernetes.io/part-of: amd-nic {{- include "helm-charts-openshift.labels" . | nindent 4 }} rules: - apiGroups: @@ -17,18 +17,18 @@ rules: - use --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: - name: {{ include "helm-charts-openshift.fullname" . }}-kmm-device-plugin + name: {{ include "helm-charts-openshift.fullname" . }}-cni-plugins labels: - app.kubernetes.io/component: amd-gpu - app.kubernetes.io/part-of: amd-gpu + app.kubernetes.io/component: amd-nic + app.kubernetes.io/part-of: amd-nic {{- include "helm-charts-openshift.labels" . | nindent 4 }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: '{{ include "helm-charts-openshift.fullname" . }}-kmm-device-plugin' + name: '{{ include "helm-charts-openshift.fullname" . }}-cni-plugins' subjects: - kind: ServiceAccount - name: amd-gpu-operator-kmm-device-plugin + name: amd-network-operator-cni-plugins namespace: '{{ .Release.Namespace }}' diff --git a/hack/openshift-patch/template-patch/device-plugin-rbac.yaml b/hack/openshift-patch/template-patch/device-plugin-rbac.yaml new file mode 100644 index 00000000..e5331770 --- /dev/null +++ b/hack/openshift-patch/template-patch/device-plugin-rbac.yaml @@ -0,0 +1,34 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "helm-charts-openshift.fullname" . }}-device-plugin + labels: + app.kubernetes.io/component: amd-nic + app.kubernetes.io/part-of: amd-nic + {{- include "helm-charts-openshift.labels" . | nindent 4 }} +rules: +- apiGroups: + - security.openshift.io + resourceNames: + - privileged + resources: + - securitycontextconstraints + verbs: + - use +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "helm-charts-openshift.fullname" . }}-device-plugin + labels: + app.kubernetes.io/component: amd-nic + app.kubernetes.io/part-of: amd-nic + {{- include "helm-charts-openshift.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: '{{ include "helm-charts-openshift.fullname" . }}-device-plugin' +subjects: +- kind: ServiceAccount + name: amd-network-operator-device-plugin + namespace: '{{ .Release.Namespace }}' diff --git a/hack/openshift-patch/template-patch/kmm-module-loader-rbac.yaml b/hack/openshift-patch/template-patch/kmm-module-loader-rbac.yaml index 5d985069..08bfcbd7 100644 --- a/hack/openshift-patch/template-patch/kmm-module-loader-rbac.yaml +++ b/hack/openshift-patch/template-patch/kmm-module-loader-rbac.yaml @@ -17,7 +17,7 @@ rules: - use --- apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding +kind: ClusterRoleBinding metadata: name: {{ include "helm-charts-openshift.fullname" . }}-kmm-module-loader labels: diff --git a/hack/openshift-patch/template-patch/serviceaccount.yaml b/hack/openshift-patch/template-patch/serviceaccount.yaml index 08460ac9..14e85cf8 100644 --- a/hack/openshift-patch/template-patch/serviceaccount.yaml +++ b/hack/openshift-patch/template-patch/serviceaccount.yaml @@ -51,6 +51,16 @@ metadata: app.kubernetes.io/component: amd-nic app.kubernetes.io/part-of: amd-nic {{- include "helm-charts-openshift.labels" . | nindent 4 }} - annotations: annotations: {{- toYaml .Values.metricsExporter.serviceAccount.annotations | nindent 4 }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: amd-network-operator-cni-plugins + labels: + app.kubernetes.io/component: amd-nic + app.kubernetes.io/part-of: amd-nic + {{- include "helm-charts-openshift.labels" . | nindent 4 }} + annotations: + {{- toYaml .Values.cniPlugins.serviceAccount.annotations | nindent 4 }} diff --git a/internal/kmmmodule/testdata/device_plugin_test.yaml b/internal/kmmmodule/testdata/device_plugin_test.yaml index 381cea6c..be37ffab 100644 --- a/internal/kmmmodule/testdata/device_plugin_test.yaml +++ b/internal/kmmmodule/testdata/device_plugin_test.yaml @@ -5,7 +5,7 @@ metadata: namespace: moduleNamespace spec: devicePlugin: - serviceAccountName: "amd-gpu-operator-kmm-device-plugin" + serviceAccountName: "amd-network-operator-device-plugin" container: image: rocm/k8s-device-plugin:latest volumeMounts: