From 4412e821ba0287ec7f4d32135214389c8c931761 Mon Sep 17 00:00:00 2001 From: Yuvarani Shankar Date: Tue, 9 Jun 2026 19:33:54 -0700 Subject: [PATCH] fix: clean up GPU operator metadata leftovers from OLM bundle and Helm charts (#330) Remove GPU operator artifacts that were copied but never adapted for the network operator. - Remove gpu-nfd-default-rule.yaml from K8s Helm chart (detects GPU PCI IDs, creates GPU labels the network operator never uses) - Fix OperatorHub category from "AI/Machine Learning" to "Networking" - Replace GPU keywords (AI, Deep Learning) with NIC-relevant keywords (NIC, AINIC, RDMA, SR-IOV, CNI) - Fix 23 URN descriptors referencing deviceconfigs (GPU CRD) to networkconfigs (network operator CRD) in both CSV base and bundle - Fix maintainer name/email mismatches (GPU operator names paired with network operator emails) - Update image annotations to reference network operator images instead of GPU device-plugin images - Replace GPU device-plugin icon URL with network operator diagram in both K8s and OpenShift Chart.yaml Co-authored-by: Yuva Shankar <11082310+yuva29@users.noreply.github.com> Co-authored-by: Claude Opus 4 (1M context) # Conflicts: # bundle/manifests/amd-network-operator.clusterserviceversion.yaml --- api/v1alpha1/networkconfig_types.go | 46 ++-- ...etwork-operator.clusterserviceversion.yaml | 68 +++--- config/samples/amd.com_networkconfigs.yaml | 8 +- hack/k8s-patch/metadata-patch/Chart.yaml | 7 +- .../template-patch/gpu-nfd-default-rule.yaml | 212 ------------------ .../openshift-patch/metadata-patch/Chart.yaml | 7 +- .../templates/gpu-nfd-default-rule.yaml | 212 ------------------ 7 files changed, 71 insertions(+), 489 deletions(-) delete mode 100644 hack/k8s-patch/template-patch/gpu-nfd-default-rule.yaml delete mode 100644 helm-charts-k8s/templates/gpu-nfd-default-rule.yaml diff --git a/api/v1alpha1/networkconfig_types.go b/api/v1alpha1/networkconfig_types.go index 91fbf1d4..7977d877 100644 --- a/api/v1alpha1/networkconfig_types.go +++ b/api/v1alpha1/networkconfig_types.go @@ -219,7 +219,7 @@ type DriverSpec struct { // NOTE: currently only for OpenShift cluster // set to true to use source image to build driver image on the fly // otherwise use installer debian/rpm packages from radeon repo to build driver image - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="UseSourceImage",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:useSourceImage"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="UseSourceImage",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:useSourceImage"} UseSourceImage *bool `json:"useSourceImage,omitempty"` // radeon repo URL for fetching amdnetwork installer if building driver image on the fly @@ -262,7 +262,7 @@ type DriverSpec struct { ImageSign ImageSignSpec `json:"imageSign,omitempty"` // image build configs - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ImageBuild",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:imageBuild"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ImageBuild",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:imageBuild"} // +optional ImageBuild ImageBuildSpec `json:"imageBuild,omitempty"` @@ -272,7 +272,7 @@ type DriverSpec struct { UpgradePolicy *DriverUpgradePolicySpec `json:"upgradePolicy,omitempty"` // tolerations for kmm module object - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Tolerations",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:tolerations"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Tolerations",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:tolerations"} // +optional Tolerations []v1.Toleration `json:"tolerations,omitempty"` } @@ -398,7 +398,7 @@ type ImageBuildSpec struct { // Use spec.driver.imageRegistrySecret for authentication with private registries. // NOTE: this field won't apply for OpenShift since OpenShift is using its own DriverToolKit image to build driver image // +kubebuilder:default=docker.io - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BaseImageRegistry",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:baseImageRegistry"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BaseImageRegistry",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:baseImageRegistry"} BaseImageRegistry string `json:"baseImageRegistry,omitempty"` // SourceImageRepo specifies the image repository for the driver source code (OpenShift only). @@ -406,12 +406,12 @@ type ImageBuildSpec struct { // based on cluster RHEL version and spec.driver.version (format: coreos--). // Default: docker.io/rocm/amdainic-driver // Use spec.driver.imageRegistrySecret for authentication with private registries. - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="SourceImageRepo",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:sourceImageRepo"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="SourceImageRepo",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:sourceImageRepo"} SourceImageRepo string `json:"sourceImageRepo,omitempty"` // TLS settings for fetching base image // this field will be applied to SourceImageRepo as well - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BaseImageRegistryTLS",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:baseImageRegistryTLS"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BaseImageRegistryTLS",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:baseImageRegistryTLS"} BaseImageRegistryTLS RegistryTLS `json:"baseImageRegistryTLS,omitempty"` } @@ -543,14 +543,14 @@ type MetricsExporterSpec struct { HostNetwork *bool `json:"hostNetwork,omitempty"` // Prometheus configuration for metrics exporter - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Prometheus",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:prometheus"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Prometheus",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:prometheus"} // +optional Prometheus *PrometheusConfig `json:"prometheus,omitempty"` } type PrometheusConfig struct { // ServiceMonitor configuration for Prometheus integration - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ServiceMonitor",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:serviceMonitor"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ServiceMonitor",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:serviceMonitor"} // +optional ServiceMonitor *ServiceMonitorConfig `json:"serviceMonitor,omitempty"` } @@ -558,60 +558,60 @@ type PrometheusConfig struct { // ServiceMonitorConfig provides configuration for ServiceMonitor type ServiceMonitorConfig struct { // Enable or disable ServiceMonitor creation (default false) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:enable"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:enable"} // +optional Enable *bool `json:"enable,omitempty"` // How frequently to scrape metrics. Accepts values with time unit suffix: "30s", "1m", "2h", "500ms" - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Interval",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:interval"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Interval",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:interval"} // +optional // +kubebuilder:validation:Pattern=`^([0-9]+)(ms|s|m|h)$` Interval string `json:"interval,omitempty"` // AttachMetadata defines if Prometheus should attach node metadata to the target - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="AttachMetadata",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:attachMetadata"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="AttachMetadata",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:attachMetadata"} // +optional AttachMetadata *monitoringv1.AttachMetadata `json:"attachMetadata,omitempty"` // HonorLabels chooses the metric's labels on collisions with target labels (default true) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="HonorLabels",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:honorLabels"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="HonorLabels",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:honorLabels"} // +optional // +kubebuilder:default=true HonorLabels *bool `json:"honorLabels,omitempty"` // HonorTimestamps controls whether the scrape endpoints honor timestamps (default false) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="HonorTimestamps",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:honorTimestamps"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="HonorTimestamps",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:honorTimestamps"} // +optional HonorTimestamps *bool `json:"honorTimestamps,omitempty"` // Additional labels to add to the ServiceMonitor (default release: prometheus) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Labels",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:labels"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Labels",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:labels"} // +optional Labels map[string]string `json:"labels,omitempty"` // RelabelConfigs to apply to samples before ingestion - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Relabelings",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:relabelings"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Relabelings",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:relabelings"} // +optional Relabelings []monitoringv1.RelabelConfig `json:"relabelings,omitempty"` // Relabeling rules applied to individual scraped metrics - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="MetricRelabelings",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:metricRelabelings"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="MetricRelabelings",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:metricRelabelings"} // +optional MetricRelabelings []monitoringv1.RelabelConfig `json:"metricRelabelings,omitempty"` // Optional Prometheus authorization configuration for accessing the endpoint - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Authorization",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:authorization"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Authorization",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:authorization"} // +optional Authorization *monitoringv1.SafeAuthorization `json:"authorization,omitempty"` // Path to bearer token file to be used by Prometheus (e.g., service account token path) // Deprecated: Use Authorization instead. This field is kept for backward compatibility. - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BearerTokenFile",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:bearerTokenFile"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="BearerTokenFile",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:bearerTokenFile"} // +optional BearerTokenFile string `json:"bearerTokenFile,omitempty"` // TLS settings used by Prometheus to connect to the metrics endpoint - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="TLSConfig",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:tlsConfig"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="TLSConfig",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:tlsConfig"} // +optional TLSConfig *monitoringv1.TLSConfig `json:"tlsConfig,omitempty"` } @@ -619,11 +619,11 @@ type ServiceMonitorConfig struct { // StaticAuthConfig contains static authorization configuration for kube-rbac-proxy type StaticAuthConfig struct { // Enables static authorization using client certificate CN - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:enable"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:enable"} Enable bool `json:"enable,omitempty"` // Expected CN (Common Name) from client cert (e.g., Prometheus SA identity) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ClientName",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:clientName"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ClientName",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:clientName"} ClientName string `json:"clientName,omitempty"` } @@ -651,12 +651,12 @@ type KubeRbacConfig struct { Secret *v1.LocalObjectReference `json:"secret,omitempty"` // Reference to a configmap containing the client CA (key: ca.crt) for mTLS client validation - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ClientCAConfigMap",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:clientCAConfigMap"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ClientCAConfigMap",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:clientCAConfigMap"} // +optional ClientCAConfigMap *v1.LocalObjectReference `json:"clientCAConfigMap,omitempty"` // Optional static RBAC rules based on client certificate Common Name (CN) - //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="StaticAuthorization",xDescriptors={"urn:alm:descriptor:com.amd.deviceconfigs:staticAuthorization"} + //+operator-sdk:csv:customresourcedefinitions:type=spec,displayName="StaticAuthorization",xDescriptors={"urn:alm:descriptor:com.amd.networkconfigs:staticAuthorization"} // +optional StaticAuthorization *StaticAuthConfig `json:"staticAuthorization,omitempty"` } diff --git a/config/manifests/bases/amd-network-operator.clusterserviceversion.yaml b/config/manifests/bases/amd-network-operator.clusterserviceversion.yaml index cd7e8c45..f5489c6c 100644 --- a/config/manifests/bases/amd-network-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/amd-network-operator.clusterserviceversion.yaml @@ -4,12 +4,12 @@ metadata: annotations: alm-examples: '[]' capabilities: Seamless Upgrades - categories: AI/Machine Learning,Monitoring + categories: AI/Machine Learning,Networking,Monitoring containerImage: registry.test.pensando.io:5000/amd-network-operator:dev description: |- Operator responsible for deploying AMD Network kernel drivers, device plugin, device test runner and device metrics exporter For more information, visit [documentation](https://instinct.docs.amd.com/projects/network-operator/en/latest/) - devicePluginImage: docker.io/rocm/k8s-device-plugin:rhubi-latest + devicePluginImage: docker.io/rocm/k8s-network-device-plugin:v0.0.1 features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "false" features.operators.openshift.io/proxy-aware: "true" @@ -17,8 +17,8 @@ metadata: features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" - metricsExporterImage: docker.io/rocm/device-metrics-exporter:v1.2.0 - nodelabellerImage: docker.io/rocm/k8s-device-plugin:labeller-rhubi-latest + metricsExporterImage: docker.io/rocm/device-metrics-exporter:nic-v0.0.1 + nodelabellerImage: docker.io/rocm/k8s-network-node-labeller:v0.0.1 operatorframework.io/cluster-monitoring: "true" operatorframework.io/suggested-namespace: openshift-amd-network operators.openshift.io/valid-subscription: '[]' @@ -253,7 +253,7 @@ spec: displayName: ImageBuild path: driver.imageBuild x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:imageBuild + - urn:alm:descriptor:com.amd.networkconfigs:imageBuild - description: 'image registry to fetch base image for building driver image, default value is docker.io, the builder will search for corresponding OS base image from given registry e.g. if your worker node is using Ubuntu @@ -264,13 +264,13 @@ spec: displayName: BaseImageRegistry path: driver.imageBuild.baseImageRegistry x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:baseImageRegistry + - urn:alm:descriptor:com.amd.networkconfigs:baseImageRegistry - description: TLS settings for fetching base image this field will be applied to SourceImageRepo as well displayName: BaseImageRegistryTLS path: driver.imageBuild.baseImageRegistryTLS x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:baseImageRegistryTLS + - urn:alm:descriptor:com.amd.networkconfigs:baseImageRegistryTLS - description: If true, check if the container image already exists using plain HTTP. displayName: Insecure @@ -291,7 +291,7 @@ spec: displayName: SourceImageRepo path: driver.imageBuild.sourceImageRepo x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:sourceImageRepo + - urn:alm:descriptor:com.amd.networkconfigs:sourceImageRepo - description: secrets used for pull/push images from/to private registry specified in driversImage displayName: ImageRegistrySecret @@ -337,7 +337,7 @@ spec: displayName: Tolerations path: driver.tolerations x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:tolerations + - urn:alm:descriptor:com.amd.networkconfigs:tolerations - description: policy to upgrade the drivers displayName: UpgradePolicy path: driver.upgradePolicy @@ -389,7 +389,7 @@ spec: displayName: UseSourceImage path: driver.useSourceImage x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:useSourceImage + - urn:alm:descriptor:com.amd.networkconfigs:useSourceImage - description: 'version of the drivers source code, can be used as part of image of dockerfile source image default value for different OS is: ubuntu: 1.117.1-a-42, coreOS: 1.117.1-a-42' @@ -455,75 +455,75 @@ spec: displayName: Prometheus path: metricsExporter.prometheus x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:prometheus + - urn:alm:descriptor:com.amd.networkconfigs:prometheus - description: ServiceMonitor configuration for Prometheus integration displayName: ServiceMonitor path: metricsExporter.prometheus.serviceMonitor x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:serviceMonitor + - urn:alm:descriptor:com.amd.networkconfigs:serviceMonitor - description: AttachMetadata defines if Prometheus should attach node metadata to the target displayName: AttachMetadata path: metricsExporter.prometheus.serviceMonitor.attachMetadata x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:attachMetadata + - urn:alm:descriptor:com.amd.networkconfigs:attachMetadata - description: Optional Prometheus authorization configuration for accessing the endpoint displayName: Authorization path: metricsExporter.prometheus.serviceMonitor.authorization x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:authorization + - urn:alm:descriptor:com.amd.networkconfigs:authorization - description: 'Path to bearer token file to be used by Prometheus (e.g., service account token path) Deprecated: Use Authorization instead. This field is kept for backward compatibility.' displayName: BearerTokenFile path: metricsExporter.prometheus.serviceMonitor.bearerTokenFile x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:bearerTokenFile + - urn:alm:descriptor:com.amd.networkconfigs:bearerTokenFile - description: Enable or disable ServiceMonitor creation (default false) displayName: Enable path: metricsExporter.prometheus.serviceMonitor.enable x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:enable + - urn:alm:descriptor:com.amd.networkconfigs:enable - description: HonorLabels chooses the metric's labels on collisions with target labels (default true) displayName: HonorLabels path: metricsExporter.prometheus.serviceMonitor.honorLabels x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:honorLabels + - urn:alm:descriptor:com.amd.networkconfigs:honorLabels - description: HonorTimestamps controls whether the scrape endpoints honor timestamps (default false) displayName: HonorTimestamps path: metricsExporter.prometheus.serviceMonitor.honorTimestamps x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:honorTimestamps + - urn:alm:descriptor:com.amd.networkconfigs:honorTimestamps - description: 'How frequently to scrape metrics. Accepts values with time unit suffix: "30s", "1m", "2h", "500ms"' displayName: Interval path: metricsExporter.prometheus.serviceMonitor.interval x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:interval + - urn:alm:descriptor:com.amd.networkconfigs:interval - description: 'Additional labels to add to the ServiceMonitor (default release: prometheus)' displayName: Labels path: metricsExporter.prometheus.serviceMonitor.labels x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:labels + - urn:alm:descriptor:com.amd.networkconfigs:labels - description: Relabeling rules applied to individual scraped metrics displayName: MetricRelabelings path: metricsExporter.prometheus.serviceMonitor.metricRelabelings x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:metricRelabelings + - urn:alm:descriptor:com.amd.networkconfigs:metricRelabelings - description: RelabelConfigs to apply to samples before ingestion displayName: Relabelings path: metricsExporter.prometheus.serviceMonitor.relabelings x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:relabelings + - urn:alm:descriptor:com.amd.networkconfigs:relabelings - description: TLS settings used by Prometheus to connect to the metrics endpoint displayName: TLSConfig path: metricsExporter.prometheus.serviceMonitor.tlsConfig x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:tlsConfig + - urn:alm:descriptor:com.amd.networkconfigs:tlsConfig - description: optional kube-rbac-proxy config to provide rbac services displayName: RbacConfig path: metricsExporter.rbacConfig @@ -534,7 +534,7 @@ spec: displayName: ClientCAConfigMap path: metricsExporter.rbacConfig.clientCAConfigMap x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:clientCAConfigMap + - urn:alm:descriptor:com.amd.networkconfigs:clientCAConfigMap - description: disable https protecting the proxy endpoint displayName: DisableHttps path: metricsExporter.rbacConfig.disableHttps @@ -561,18 +561,18 @@ spec: displayName: StaticAuthorization path: metricsExporter.rbacConfig.staticAuthorization x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:staticAuthorization + - urn:alm:descriptor:com.amd.networkconfigs:staticAuthorization - description: Expected CN (Common Name) from client cert (e.g., Prometheus SA identity) displayName: ClientName path: metricsExporter.rbacConfig.staticAuthorization.clientName x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:clientName + - urn:alm:descriptor:com.amd.networkconfigs:clientName - description: Enables static authorization using client certificate CN displayName: Enable path: metricsExporter.rbacConfig.staticAuthorization.enable x-descriptors: - - urn:alm:descriptor:com.amd.deviceconfigs:enable + - urn:alm:descriptor:com.amd.networkconfigs:enable - description: Selector describes on which nodes to enable metrics exporter displayName: Selector path: metricsExporter.selector @@ -833,9 +833,11 @@ spec: keywords: - AMD - Network - - AI - - Deep Learning - - Hardware + - NIC + - AINIC + - RDMA + - SR-IOV + - CNI - Driver - Monitoring links: @@ -843,11 +845,13 @@ spec: url: https://github.com/ROCm/network-operator maintainers: - email: Sundaramurthy.Gurunathan@amd.com - name: Yan Sun + name: Sundaramurthy Gurunathan - email: Yuvarani.Shankar@amd.com - name: Farshad Ghodsian + name: Yuvarani Shankar - email: shrey.ajmera@amd.com name: Shrey Ajmera + - email: Yan.Sun3@amd.com + name: Yan Sun maturity: stable provider: name: Advanced Micro Devices, Inc. diff --git a/config/samples/amd.com_networkconfigs.yaml b/config/samples/amd.com_networkconfigs.yaml index ef126695..cc41cfe4 100644 --- a/config/samples/amd.com_networkconfigs.yaml +++ b/config/samples/amd.com_networkconfigs.yaml @@ -44,12 +44,12 @@ spec: devicePlugin: # Specify the device plugin image - # default value is rocm/k8s-device-plugin:latest - devicePluginImage: rocm/k8s-device-plugin:latest + # default value is rocm/k8s-network-device-plugin:v0.0.1 + devicePluginImage: rocm/k8s-network-device-plugin:v0.0.1 # Specify the node labeller image - # default value is rocm/k8s-device-plugin:labeller-latest - nodeLabellerImage: rocm/k8s-device-plugin:labeller-latest + # default value is rocm/k8s-network-node-labeller:v0.0.1 + nodeLabellerImage: rocm/k8s-network-node-labeller:v0.0.1 # Specifythe node to be managed by this NetworkConfig Custom Resource selector: diff --git a/hack/k8s-patch/metadata-patch/Chart.yaml b/hack/k8s-patch/metadata-patch/Chart.yaml index 19c4995a..99e0eb95 100644 --- a/hack/k8s-patch/metadata-patch/Chart.yaml +++ b/hack/k8s-patch/metadata-patch/Chart.yaml @@ -5,7 +5,7 @@ type: application home: https://github.com/ROCm/network-operator sources: - https://github.com/ROCm/network-operator -icon: https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/helm/logo.png +icon: https://raw.githubusercontent.com/ROCm/network-operator/main/docs/_static/amd-network-operator-diagram.jpg maintainers: - name: Sundara Gurunathan - name: Yuvarani Shankar @@ -16,8 +16,9 @@ keywords: - hardware - amd - network - - ai - - deep learning + - nic + - ainic + - rdma - monitoring kubeVersion: ">= 1.29.0-0" diff --git a/hack/k8s-patch/template-patch/gpu-nfd-default-rule.yaml b/hack/k8s-patch/template-patch/gpu-nfd-default-rule.yaml deleted file mode 100644 index f7f809b7..00000000 --- a/hack/k8s-patch/template-patch/gpu-nfd-default-rule.yaml +++ /dev/null @@ -1,212 +0,0 @@ -{{- if .Values.installdefaultNFDRule }} -apiVersion: nfd.k8s-sigs.io/v1alpha1 -kind: NodeFeatureRule -metadata: - name: {{ .Release.Name }}-gpu-label-nfd-rule - # the PCI info is from these websites: - # source1: https://admin.pci-ids.ucw.cz/read/PC/1002 - # source2: https://devicehunt.com/view/type/pci/vendor/1002 -spec: - rules: - - name: amd-vgpu - labels: - feature.node.kubernetes.io/amd-vgpu: "true" - matchAny: - # AMD Instinct - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7410"]} # MI210 VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b5"]} # MI300X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b9"]} # Mi325X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75b0"]} # Mi350X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75b3"]} # Mi355X VF - # AMD Radeon Pro - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7461"]} # Radeon Pro V710 MxGPU - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73ae"]} # Radeon Pro V620 MxGPU - - name: amd-gpu - labels: - feature.node.kubernetes.io/amd-gpu: "true" - matchAny: - # AMD Instinct - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75a3"]} # MI355X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75a0"]} # MI350X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a5"]} # MI325X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a2"]} # MI308X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b6"]} # MI308X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a8"]} # MI308X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a0"]} # MI300A - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a1"]} # MI300X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a9"]} # MI300X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74bd"]} # MI300X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740f"]} # MI210 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7408"]} # MI250X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740c"]} # MI250/MI250X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["738c"]} # MI100 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["738e"]} # MI100 - # AMD Radeon Pro - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7460"]} # V710 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7448"]} # W7900 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["744a"]} # W7900 Dual Slot - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["745e"]} # W7800 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a2"]} # W6900X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a3"]} # W6800 GL-XL - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73ab"]} # W6800X / W6800X Duo - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a1"]} # V620 - # AMD Radeon - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7550"]} # RX 9070 / 9070 XT - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["744c"]} # RX 7900 XT / 7900 XTX / 7900 GRE / 7900M - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73af"]} # RX 6900 XT - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73bf"]} # RX 6800 / 6800 XT / 6900 XT - - name: amd-gpu-mi210 - labels: - feature.node.kubernetes.io/amd-gpu-mi210: "true" - matchAny: - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740f"]} # MI210 - - name: amd-gpu-mi300x - labels: - feature.node.kubernetes.io/amd-gpu-mi300x: "true" - matchAny: - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a1"]} # MI300X -{{- end }} \ No newline at end of file diff --git a/hack/openshift-patch/metadata-patch/Chart.yaml b/hack/openshift-patch/metadata-patch/Chart.yaml index 15377169..3489c27f 100644 --- a/hack/openshift-patch/metadata-patch/Chart.yaml +++ b/hack/openshift-patch/metadata-patch/Chart.yaml @@ -5,7 +5,7 @@ type: application home: https://github.com/ROCm/network-operator sources: - https://github.com/ROCm/network-operator -icon: https://raw.githubusercontent.com/ROCm/k8s-device-plugin/master/helm/logo.png +icon: https://raw.githubusercontent.com/ROCm/network-operator/main/docs/_static/amd-network-operator-diagram.jpg maintainers: - name: Sundara Gurunathan - name: Yuvarani Shankar @@ -15,8 +15,9 @@ keywords: - hardware - amd - network - - ai - - deep learning + - nic + - ainic + - rdma - monitoring kubeVersion: ">= 1.29.0-0" diff --git a/helm-charts-k8s/templates/gpu-nfd-default-rule.yaml b/helm-charts-k8s/templates/gpu-nfd-default-rule.yaml deleted file mode 100644 index f7f809b7..00000000 --- a/helm-charts-k8s/templates/gpu-nfd-default-rule.yaml +++ /dev/null @@ -1,212 +0,0 @@ -{{- if .Values.installdefaultNFDRule }} -apiVersion: nfd.k8s-sigs.io/v1alpha1 -kind: NodeFeatureRule -metadata: - name: {{ .Release.Name }}-gpu-label-nfd-rule - # the PCI info is from these websites: - # source1: https://admin.pci-ids.ucw.cz/read/PC/1002 - # source2: https://devicehunt.com/view/type/pci/vendor/1002 -spec: - rules: - - name: amd-vgpu - labels: - feature.node.kubernetes.io/amd-vgpu: "true" - matchAny: - # AMD Instinct - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7410"]} # MI210 VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b5"]} # MI300X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b9"]} # Mi325X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75b0"]} # Mi350X VF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75b3"]} # Mi355X VF - # AMD Radeon Pro - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7461"]} # Radeon Pro V710 MxGPU - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73ae"]} # Radeon Pro V620 MxGPU - - name: amd-gpu - labels: - feature.node.kubernetes.io/amd-gpu: "true" - matchAny: - # AMD Instinct - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75a3"]} # MI355X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["75a0"]} # MI350X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a5"]} # MI325X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a2"]} # MI308X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74b6"]} # MI308X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a8"]} # MI308X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a0"]} # MI300A - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a1"]} # MI300X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a9"]} # MI300X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74bd"]} # MI300X HF - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740f"]} # MI210 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7408"]} # MI250X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740c"]} # MI250/MI250X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["738c"]} # MI100 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["738e"]} # MI100 - # AMD Radeon Pro - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7460"]} # V710 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7448"]} # W7900 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["744a"]} # W7900 Dual Slot - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["745e"]} # W7800 - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a2"]} # W6900X - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a3"]} # W6800 GL-XL - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73ab"]} # W6800X / W6800X Duo - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73a1"]} # V620 - # AMD Radeon - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["7550"]} # RX 9070 / 9070 XT - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["744c"]} # RX 7900 XT / 7900 XTX / 7900 GRE / 7900M - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73af"]} # RX 6900 XT - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["73bf"]} # RX 6800 / 6800 XT / 6900 XT - - name: amd-gpu-mi210 - labels: - feature.node.kubernetes.io/amd-gpu-mi210: "true" - matchAny: - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["740f"]} # MI210 - - name: amd-gpu-mi300x - labels: - feature.node.kubernetes.io/amd-gpu-mi300x: "true" - matchAny: - - matchFeatures: - - feature: pci.device - matchExpressions: - vendor: {op: In, value: ["1002"]} - device: {op: In, value: ["74a1"]} # MI300X -{{- end }} \ No newline at end of file