From a4ee7529d94bf4186bd86fe63086003af7f083e5 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Tue, 24 Feb 2026 13:40:07 +0200
Subject: [PATCH 01/23] Remove v3io from
 MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__USER_SPACE and
 MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__MONITORING_APPLICATION

plus removes MLRUN_MODEL_ENDPOINT_MONITORING__ENDPOINT_STORE_CONNECTION
---
 charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
index a284aa5c..e6221ad3 100644
--- a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
+++ b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
@@ -17,13 +17,14 @@ data:
   MLRUN_HTTPDB__REAL_PATH: s3://
   MLRUN_ARTIFACT_PATH: s3://{{ $bucket_name }}/projects/{{ `{{run.project}}` }}/artifacts
   MLRUN_FEATURE_STORE__DATA_PREFIXES__DEFAULT: s3://{{ $bucket_name }}/projects/{project}/FeatureStore/{name}/{kind}
+  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__USER_SPACE: s3://{{ $bucket_name }}/projects/{project}/model-endpoints/{kind}
+  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__MONITORING_APPLICATION: s3://{{ $bucket_name }}/users/pipelines/{project}/monitoring-apps/
   MLRUN_FEATURE_STORE__DATA_PREFIXES__NOSQL: ""
   MLRUN_CE__MODE: {{ .Values.mlrun.ce.mode }}
   MLRUN_CE__VERSION: {{ .Chart.Version }}
   MLRUN_DEFAULT_TENSORBOARD_LOGS_PATH: /home/jovyan/data/tensorboard/{{ `{{project}} `}}
   MLRUN_FEATURE_STORE__DEFAULT_TARGETS: parquet
   MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__DEFAULT: s3://{{ $bucket_name }}/projects/{{ `{{project}}` }}/model-endpoints/{{ `{{kind}}` }}
-  MLRUN_MODEL_ENDPOINT_MONITORING__ENDPOINT_STORE_CONNECTION: "{{ template "mlrun-ce.mlrun.modelMonitoring.DSN" . }}"
   MLRUN_GRAFANA_URL: http://{{ .Values.global.externalHostAddress }}:{{ index .Values "kube-prometheus-stack" "grafana" "service" "nodePort" }}
   MLRUN_DEFAULT_FUNCTION_POD_RESOURCES__LIMITS__CPU: "{{ .Values.mlrun.defaultFunctionPodResources.limits.cpu | default "" }}"
   MLRUN_DEFAULT_FUNCTION_POD_RESOURCES__LIMITS__MEMORY: "{{ .Values.mlrun.defaultFunctionPodResources.limits.memory | default "" }}"

From ce7601ea112a83aedee03d537063ee546ca06ee9 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 25 Feb 2026 11:59:29 +0200
Subject: [PATCH 02/23] fix formating

---
 charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
index e6221ad3..f37779e5 100644
--- a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
+++ b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml
@@ -17,8 +17,8 @@ data:
   MLRUN_HTTPDB__REAL_PATH: s3://
   MLRUN_ARTIFACT_PATH: s3://{{ $bucket_name }}/projects/{{ `{{run.project}}` }}/artifacts
   MLRUN_FEATURE_STORE__DATA_PREFIXES__DEFAULT: s3://{{ $bucket_name }}/projects/{project}/FeatureStore/{name}/{kind}
-  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__USER_SPACE: s3://{{ $bucket_name }}/projects/{project}/model-endpoints/{kind}
-  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__MONITORING_APPLICATION: s3://{{ $bucket_name }}/users/pipelines/{project}/monitoring-apps/
+  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__USER_SPACE: s3://{{ $bucket_name }}/projects/{{ `{{project}}` }}/model-endpoints/{{ `{{kind}}` }}
+  MLRUN_MODEL_ENDPOINT_MONITORING__STORE_PREFIXES__MONITORING_APPLICATION: s3://{{ $bucket_name }}/users/pipelines/{{ `{{project}}` }}/monitoring-apps/
   MLRUN_FEATURE_STORE__DATA_PREFIXES__NOSQL: ""
   MLRUN_CE__MODE: {{ .Values.mlrun.ce.mode }}
   MLRUN_CE__VERSION: {{ .Chart.Version }}

From 061e2a482dccb8fff2577cd9872f8990225eef56 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 25 Feb 2026 14:44:40 +0200
Subject: [PATCH 03/23] fix version and remove deprecated dsn

---
 charts/mlrun-ce/Chart.yaml             |  2 +-
 charts/mlrun-ce/templates/_helpers.tpl | 13 -------------
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml
index 92bc7cb0..eb248c10 100644
--- a/charts/mlrun-ce/Chart.yaml
+++ b/charts/mlrun-ce/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v1
 name: mlrun-ce
-version: 0.11.0-rc9
+version: 0.11.0-rc.12
 description: MLRun Open Source Stack
 home: https://iguazio.com
 icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 27f76052..069e4e10 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -286,19 +286,6 @@ Pipelines labels
 {{ include "mlrun-ce.pipelines.selectorLabels" . }}
 {{- end -}}
 
-{{/*
-Model monitoring DSN
-*/}}
-{{- define "mlrun-ce.mlrun.modelMonitoring.DSN" -}}
-{{- if .Values.mlrun.modelMonitoring.dsn -}}
-{{ .Values.mlrun.modelMonitoring.dsn }}
-{{- else -}}
-{{- if eq "mysql" .Values.mlrun.httpDB.dbType -}}
-{{ .Values.mlrun.httpDB.dsn }}_model_monitoring
-{{- end -}}
-{{- end -}}
-{{- end -}}
-
 {{/*
 TimescaleDB helpers
 */}}

From 0b2243ff11bbb65014cb41880a42e4ba28843f88 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 25 Mar 2026 11:40:11 +0200
Subject: [PATCH 04/23] adding otel to ce

---
 .github/workflows/release.yml                 |   1 +
 charts/mlrun-ce/README.md                     | 278 +++++++++-
 .../mlrun-ce/admin_installation_values.yaml   |  25 +
 ..._admin_cluster_ip_installation_values.yaml |  16 +
 .../non_admin_installation_values.yaml        |  16 +
 charts/mlrun-ce/requirements.lock             |   7 +-
 charts/mlrun-ce/requirements.yaml             |   4 +
 charts/mlrun-ce/templates/NOTES.txt           |  50 ++
 charts/mlrun-ce/templates/_helpers.tpl        |  68 +++
 .../jupyter-notebook/deployment.yaml          |  16 +
 .../templates/opentelemetry/collector.yaml    | 102 ++++
 .../opentelemetry/instrumentation.yaml        |  86 +++
 .../opentelemetry/namespace-label.yaml        |  15 +
 .../templates/opentelemetry/rbac.yaml         |  58 +++
 charts/mlrun-ce/values.yaml                   | 221 ++++++++
 tests/helm-template-test.sh                   | 489 ++++++++++++++++++
 tests/kind-test.sh                            |  48 ++
 17 files changed, 1495 insertions(+), 5 deletions(-)
 create mode 100644 charts/mlrun-ce/templates/opentelemetry/collector.yaml
 create mode 100644 charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
 create mode 100644 charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
 create mode 100644 charts/mlrun-ce/templates/opentelemetry/rbac.yaml
 create mode 100755 tests/helm-template-test.sh

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a2cfb4f2..4d8caf98 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -46,6 +46,7 @@ jobs:
           helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
           helm repo add strimzi https://strimzi.io/charts/
           helm repo add seaweedfs https://seaweedfs.github.io/seaweedfs/helm
+          helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
 
       - name: Run chart-releaser
         uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f
diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index 00fb5425..0da23123 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -14,6 +14,7 @@ The Open source MLRun ce chart includes the following stack:
 * Spark Operator - https://github.com/GoogleCloudPlatform/spark-on-k8s-operator
 * Pipelines - https://github.com/kubeflow/pipelines
 * Prometheus stack - https://github.com/prometheus-community/helm-charts
+* OpenTelemetry Operator - https://github.com/open-telemetry/opentelemetry-operator (observability)
 
 ## Prerequisites
 
@@ -36,6 +37,7 @@ kubectl create namespace mlrun
 Add the mlrun ce helm chart repo
 ```bash
 helm repo add mlrun https://mlrun.github.io/ce
+helm repo update
 ```
 
 To work with the open source MLRun stack, you must an accessible docker-registry. The registry's URL and credentials
@@ -64,6 +66,97 @@ helm --namespace mlrun \
     mlrun/mlrun-ce
 ```
 
+### Complete Installation with OpenTelemetry (From Scratch)
+
+This section provides a complete step-by-step guide to install MLRun CE with full OpenTelemetry observability enabled.
+
+> **Note:** OpenTelemetry is **disabled by default**. Follow these steps to enable it.
+
+#### Step 1: Create the namespace
+
+```bash
+kubectl create namespace mlrun
+```
+
+#### Step 2: Add the Helm repository
+
+```bash
+helm repo add mlrun https://mlrun.github.io/ce
+helm repo update
+```
+
+#### Step 3: Create the docker registry secret
+
+```bash
+kubectl --namespace mlrun create secret docker-registry registry-credentials \
+    --docker-username <registry-username> \
+    --docker-password <login-password> \
+    --docker-server <server URL, e.g. https://index.docker.io/v1/> \
+    --docker-email <user-email>
+```
+
+#### Step 4: Install MLRun CE with OpenTelemetry Enabled
+
+```bash
+helm --namespace mlrun \
+    install my-mlrun \
+    --wait \
+    --timeout 15m \
+    --set global.registry.url=<registry URL e.g. index.docker.io/iguazio> \
+    --set global.registry.secretName=registry-credentials \
+    --set opentelemetry-operator.enabled=true \
+    --set opentelemetry.namespaceLabel.enabled=true \
+    --set opentelemetry.collector.enabled=true \
+    --set opentelemetry.collector.scrapeMode=otel \
+    --set opentelemetry.instrumentation.enabled=true \
+    mlrun/mlrun-ce
+```
+
+> **Important:** When enabling OpenTelemetry, set `opentelemetry.collector.scrapeMode=otel` to collect metrics 
+> via the OTEL sidecar and prevent duplicate metrics. The default is `direct` (for when OTEL is disabled).
+
+The installation will:
+- Deploy the OpenTelemetry Operator
+- Create an OpenTelemetryCollector CR (sidecar mode)
+- Create an Instrumentation CR for Python auto-instrumentation
+- Label the namespace with `opentelemetry.io/inject=enabled`
+- Configure Prometheus to scrape OTEL sidecar metrics (port 8889)
+
+#### Step 5: Verify OpenTelemetry Installation
+
+Check that the OpenTelemetry resources are created:
+
+```bash
+# Check the namespace label
+kubectl get namespace mlrun --show-labels | grep opentelemetry
+
+# Check the OpenTelemetry Collector CR
+kubectl -n mlrun get opentelemetrycollectors
+
+# Check the Instrumentation CR
+kubectl -n mlrun get instrumentations
+
+# Check that the OTEL operator is running
+kubectl -n mlrun get pods | grep opentelemetry
+```
+
+#### Step 6: Verify Jupyter has OTEL Sidecar Annotations
+
+```bash
+kubectl -n mlrun get deployment -l app.kubernetes.io/component=jupyter-notebook \
+    -o jsonpath='{.items[0].spec.template.metadata.annotations}' | jq .
+```
+
+You should see annotations like:
+```json
+{
+  "instrumentation.opentelemetry.io/inject-python": "my-mlrun-otel-instrumentation",
+  "prometheus.io/port": "8889",
+  "prometheus.io/scrape": "true",
+  "sidecar.opentelemetry.io/inject": "my-mlrun-otel-collector"
+}
+```
+
 ### Installing MLRun-ce on minikube
 
 The Open source MLRun ce uses node ports for simplicity. If your kubernetes cluster is running inside a VM, 
@@ -89,6 +182,185 @@ following values:
 Additional configurable values are documented in the `values.yaml`, and the `values.yaml` of all sub charts. 
 Override those [in the normal methods](https://helm.sh/docs/chart_template_guide/values_files/).
 
+### Configuring OpenTelemetry (Observability)
+
+MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces from your ML workloads. 
+The operator runs in **sidecar mode**, automatically injecting collector containers into annotated pods.
+
+> **Note:** OpenTelemetry is **disabled by default**. See below for how to enable it.
+
+#### Namespace Labeling
+
+The OpenTelemetry Operator **only monitors namespaces** with the label `opentelemetry.io/inject=enabled`.
+This is automatically applied to the MLRun namespace when OpenTelemetry is enabled.
+
+When enabling OpenTelemetry, the namespace is labeled automatically:
+```yaml
+# Automatically added to your namespace when opentelemetry.namespaceLabel.enabled=true
+labels:
+  opentelemetry.io/inject: "enabled"
+```
+
+For custom namespaces that need OpenTelemetry instrumentation, add the label manually:
+```bash
+kubectl label namespace <your-namespace> opentelemetry.io/inject=enabled
+```
+
+> **Note:** The controller namespace (where the operator runs) does **NOT** need this label,
+> as only the operator itself runs there - no workloads require instrumentation.
+
+#### Default Configuration
+
+By default, OpenTelemetry is **disabled**. When enabled, it provides:
+- Namespace labeling for OTEL operator webhook targeting
+- Sidecar collector injection for instrumented pods
+- Python auto-instrumentation for Jupyter notebooks
+- Prometheus metrics export on port 8889
+
+#### Enabling OpenTelemetry
+
+To install **with** OpenTelemetry enabled:
+
+```bash
+helm --namespace mlrun install my-mlrun \
+    --set global.registry.url=<registry-url> \
+    --set global.registry.secretName=registry-credentials \
+    --set opentelemetry-operator.enabled=true \
+    --set opentelemetry.namespaceLabel.enabled=true \
+    --set opentelemetry.collector.enabled=true \
+    --set opentelemetry.collector.scrapeMode=otel \
+    --set opentelemetry.instrumentation.enabled=true \
+    mlrun/mlrun-ce
+```
+
+To **enable** OpenTelemetry on an existing installation:
+
+```bash
+helm --namespace mlrun upgrade my-mlrun \
+    --set opentelemetry-operator.enabled=true \
+    --set opentelemetry.namespaceLabel.enabled=true \
+    --set opentelemetry.collector.enabled=true \
+    --set opentelemetry.collector.scrapeMode=otel \
+    --set opentelemetry.instrumentation.enabled=true \
+    mlrun/mlrun-ce
+```
+
+To **disable** OpenTelemetry (default):
+
+```bash
+helm --namespace mlrun upgrade my-mlrun \
+    --set opentelemetry-operator.enabled=false \
+    --set opentelemetry.collector.enabled=false \
+    --set opentelemetry.instrumentation.enabled=false \
+    --set opentelemetry.namespaceLabel.enabled=false \
+    --set opentelemetry.collector.scrapeMode=direct \
+    mlrun/mlrun-ce
+```
+
+#### Custom Resource Limits
+
+Configure collector sidecar resources:
+
+```bash
+helm --namespace mlrun install my-mlrun \
+    --set opentelemetry.collector.resources.requests.cpu=100m \
+    --set opentelemetry.collector.resources.requests.memory=128Mi \
+    --set opentelemetry.collector.resources.limits.cpu=500m \
+    --set opentelemetry.collector.resources.limits.memory=512Mi \
+    mlrun/mlrun-ce
+```
+
+#### Enabling Java Auto-Instrumentation
+
+To enable Java auto-instrumentation (disabled by default):
+
+```bash
+helm --namespace mlrun install my-mlrun \
+    --set opentelemetry.instrumentation.java.enabled=true \
+    mlrun/mlrun-ce
+```
+
+#### Adding OpenTelemetry to Custom Workloads
+
+To instrument your own deployments with the OTEL sidecar and Python auto-instrumentation:
+
+1. Ensure your namespace has the OpenTelemetry label:
+   ```bash
+   kubectl label namespace <your-namespace> opentelemetry.io/inject=enabled
+   ```
+
+2. Add these annotations to your pod spec:
+   ```yaml
+   metadata:
+     annotations:
+       sidecar.opentelemetry.io/inject: "<release-name>-otel-collector"
+       instrumentation.opentelemetry.io/inject-python: "<release-name>-otel-instrumentation"
+       prometheus.io/scrape: "true"
+       prometheus.io/scrape-mode: "otel"
+       prometheus.io/port: "8889"
+   ```
+
+#### Preventing Prometheus/OTEL Metric Overlap
+
+To prevent duplicate metrics when using both Prometheus direct scraping and OpenTelemetry, 
+MLRun CE uses a **scrape-mode** annotation system:
+
+| Scrape Mode | Description | Use Case |
+|-------------|-------------|----------|
+| `direct` | Direct Prometheus scraping only | **Default** - When OTEL is disabled |
+| `otel` | Metrics collected via OTEL sidecar only | **Recommended when OTEL enabled** |
+| `both` | Both OTEL and direct scraping | Debugging/transition only |
+
+> **Note:** The default scrape mode is `direct`. When enabling OpenTelemetry, you must set 
+> `--set opentelemetry.collector.scrapeMode=otel` to collect metrics via the OTEL sidecar.
+
+**How it works:**
+- OTEL-collected metrics have the `mlrun_otel_` prefix and `metrics_source=otel_collector` label
+- Direct-scraped metrics have `metrics_source=direct_scrape` label
+- Prometheus scrape configs filter based on `prometheus.io/scrape-mode` annotation
+
+**Configure scrape mode when enabling OTEL:**
+```bash
+helm --namespace mlrun install my-mlrun \
+    --set opentelemetry-operator.enabled=true \
+    --set opentelemetry.collector.enabled=true \
+    --set opentelemetry.collector.scrapeMode=otel \
+    --set opentelemetry.instrumentation.enabled=true \
+    mlrun/mlrun-ce
+```
+
+**Query metrics by source in Prometheus:**
+```promql
+# OTEL-collected metrics only
+{metrics_source="otel_collector"}
+
+# Direct-scraped metrics only  
+{metrics_source="direct_scrape"}
+
+# OTEL metrics use prefix
+mlrun_otel_http_server_duration_seconds_bucket{...}
+```
+
+#### Split Installation (Admin/Non-Admin)
+
+For multi-tenant clusters, install the operator CRDs at the cluster level and collectors in user namespaces:
+
+**Controller namespace (admin):**
+```bash
+# Operator only - no namespace label needed (no instrumented workloads here)
+helm --namespace controller install mlrun-controller \
+    -f admin_installation_values.yaml \
+    mlrun/mlrun-ce
+```
+
+**User namespace (non-admin):**
+```bash
+# Collector CRs + namespace label applied automatically
+helm --namespace mlrun install my-mlrun \
+    -f non_admin_installation_values.yaml \
+    mlrun/mlrun-ce
+```
+
 ### Working with ECR
 
 To work with ECR, you must create a secret with your AWS credentials and a secret with ECR Token while providing both secret names to the helm install command.
@@ -282,6 +554,6 @@ Refer to the [**Kubeflow documentation**](https://www.kubeflow.org/docs/started/
 
 This table shows the versions of the main components in the MLRun CE chart:
 
-| MLRun CE   | MLRun  | Nuclio | Jupyter | MPI Operator | SeaweedFS | Spark Operator | Pipelines | Kube-Prometheus-Stack |
-|------------|--------|--------|---------|--------------|-----------|----------------|-----------|-----------------------|
-| **0.11.0** | 1.11.0 | 1.15.9 | 4.5.0   | 0.2.3        | 4.0.407   | 2.1.0          | 2.14.3    | 72.1.1                |
+| MLRun CE   | MLRun  | Nuclio | Jupyter | MPI Operator | SeaweedFS | Spark Operator | Pipelines | Kube-Prometheus-Stack | OpenTelemetry Operator |
+|------------|--------|--------|---------|--------------|-----------|----------------|-----------|-----------------------|------------------------|
+| **0.11.0** | 1.11.0 | 1.15.9 | 4.5.0   | 0.2.3        | 4.0.407   | 2.1.0          | 2.14.3    | 72.1.1                | 0.78.1                 |
diff --git a/charts/mlrun-ce/admin_installation_values.yaml b/charts/mlrun-ce/admin_installation_values.yaml
index c9b2bf23..9de962a9 100644
--- a/charts/mlrun-ce/admin_installation_values.yaml
+++ b/charts/mlrun-ce/admin_installation_values.yaml
@@ -57,3 +57,28 @@ strimzi-kafka-operator:
   
 kafka:
   enabled: false
+
+# OpenTelemetry Operator - enabled for CRD installation at cluster level
+opentelemetry-operator:
+  enabled: true
+  admissionWebhooks:
+    certManager:
+      enabled: false
+    autoGenerateCert:
+      enabled: true
+    # Only apply webhooks to namespaces with the opentelemetry label
+    namespaceSelector:
+      matchLabels:
+        opentelemetry.io/inject: "enabled"
+
+# OpenTelemetry CRs - disabled at admin level, enabled in user namespaces
+# Note: Controller namespace does NOT need the opentelemetry label since
+# no workloads are instrumented here - only the operator runs here
+opentelemetry:
+  namespaceLabel:
+    enabled: false
+  collector:
+    enabled: false
+  instrumentation:
+    enabled: false
+
diff --git a/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml b/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml
index a98463ad..87c5e58f 100644
--- a/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml
+++ b/charts/mlrun-ce/non_admin_cluster_ip_installation_values.yaml
@@ -64,3 +64,19 @@ kafka:
 
 kube-prometheus-stack:
   enabled: false
+
+# OpenTelemetry Operator - disabled, CRDs installed at controller level
+opentelemetry-operator:
+  enabled: false
+
+# OpenTelemetry CRs - enabled for user namespace
+# The namespace will be labeled with opentelemetry.io/inject=enabled
+# so the operator can inject sidecars into pods
+opentelemetry:
+  namespaceLabel:
+    enabled: true
+  collector:
+    enabled: true
+  instrumentation:
+    enabled: true
+
diff --git a/charts/mlrun-ce/non_admin_installation_values.yaml b/charts/mlrun-ce/non_admin_installation_values.yaml
index d84f02ee..793c38a4 100644
--- a/charts/mlrun-ce/non_admin_installation_values.yaml
+++ b/charts/mlrun-ce/non_admin_installation_values.yaml
@@ -58,3 +58,19 @@ kafka:
 
 kube-prometheus-stack:
   enabled: false
+
+# OpenTelemetry Operator - disabled, CRDs installed at controller level
+opentelemetry-operator:
+  enabled: false
+
+# OpenTelemetry CRs - enabled for user namespace
+# The namespace will be labeled with opentelemetry.io/inject=enabled
+# so the operator can inject sidecars into pods
+opentelemetry:
+  namespaceLabel:
+    enabled: true
+  collector:
+    enabled: true
+  instrumentation:
+    enabled: true
+
diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock
index fd36b055..9b938f12 100644
--- a/charts/mlrun-ce/requirements.lock
+++ b/charts/mlrun-ce/requirements.lock
@@ -20,5 +20,8 @@ dependencies:
 - name: strimzi-kafka-operator
   repository: https://strimzi.io/charts/
   version: 0.48.0
-digest: sha256:f87ec580f73178cfc897d57e26f5d7b049900f1b7ef75bfe198ca327eb2ed06d
-generated: "2026-02-12T23:52:46.490844+02:00"
+- name: opentelemetry-operator
+  repository: https://open-telemetry.github.io/opentelemetry-helm-charts
+  version: 0.78.1
+digest: sha256:4a47a90d97b21b41cd3bb7f7e9b70b56b42b95fe067bb012e4d490fa1912e18f
+generated: "2026-03-24T16:04:27.962041+02:00"
diff --git a/charts/mlrun-ce/requirements.yaml b/charts/mlrun-ce/requirements.yaml
index 900754a0..918f2195 100644
--- a/charts/mlrun-ce/requirements.yaml
+++ b/charts/mlrun-ce/requirements.yaml
@@ -25,3 +25,7 @@ dependencies:
     repository: "https://strimzi.io/charts/"
     version: "0.48.0"
     condition: strimzi-kafka-operator.enabled
+  - name: opentelemetry-operator
+    repository: "https://open-telemetry.github.io/opentelemetry-helm-charts"
+    version: "0.78.1"
+    condition: opentelemetry-operator.enabled
diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index 980d3b54..a84d79e5 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -120,5 +120,55 @@ TimescaleDB is available at:
 {{- end }}
 {{- end }}
 
+{{- if index .Values "opentelemetry-operator" "enabled" }}
+{{- "\n" }}
+OpenTelemetry Operator is enabled!
+-  Operator manages OpenTelemetryCollector and Instrumentation CRs
+-  Namespace selector: opentelemetry.io/inject=enabled
+{{- if .Values.opentelemetry.collector.enabled }}
+{{- "\n" }}
+OpenTelemetry Collector (sidecar mode):
+-  Collector CR: {{ .Release.Name }}-otel-collector
+-  Mode: {{ .Values.opentelemetry.collector.mode }}
+-  OTLP gRPC endpoint: localhost:{{ .Values.opentelemetry.collector.otlp.grpcPort }} (inside pod)
+-  OTLP HTTP endpoint: localhost:{{ .Values.opentelemetry.collector.otlp.httpPort }} (inside pod)
+-  Prometheus metrics port: {{ .Values.opentelemetry.collector.prometheus.port }}
+-  Prometheus scrape mode: {{ .Values.opentelemetry.collector.scrapeMode }}
+{{- if eq .Values.opentelemetry.collector.scrapeMode "direct" }}
+
+⚠️  WARNING: Scrape mode is "direct" - OTEL sidecar metrics will NOT be collected!
+   To collect metrics via OTEL, reinstall with: --set opentelemetry.collector.scrapeMode=otel
+{{- end }}
+{{- end }}
+{{- if .Values.opentelemetry.instrumentation.enabled }}
+{{- "\n" }}
+OpenTelemetry Auto-Instrumentation:
+-  Instrumentation CR: {{ .Release.Name }}-otel-instrumentation
+{{- if .Values.opentelemetry.instrumentation.python.enabled }}
+-  Python auto-instrumentation: enabled
+{{- end }}
+{{- if .Values.opentelemetry.instrumentation.java.enabled }}
+-  Java auto-instrumentation: enabled
+{{- end }}
+{{- end }}
+{{- if .Values.opentelemetry.namespaceLabel.enabled }}
+{{- "\n" }}
+Namespace Label:
+-  Namespace {{ .Release.Namespace }} is labeled with: {{ .Values.opentelemetry.namespaceLabel.key }}={{ .Values.opentelemetry.namespaceLabel.value }}
+{{- end }}
+{{- "\n" }}
+Prometheus Scrape Modes:
+-  "otel"   : Metrics collected via OTEL sidecar only (recommended)
+-  "direct" : Direct Prometheus scraping only (current: {{ .Values.opentelemetry.collector.scrapeMode }})
+-  "both"   : Both methods active (for debugging)
+{{- "\n" }}
+To add OTEL instrumentation to your pods, add these annotations:
+  sidecar.opentelemetry.io/inject: "{{ .Release.Name }}-otel-collector"
+  instrumentation.opentelemetry.io/inject-python: "{{ .Release.Name }}-otel-instrumentation"
+  prometheus.io/scrape: "true"
+  prometheus.io/scrape-mode: "otel"
+  prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
+{{- end }}
+
 Happy MLOPSing!!! :]
 {{- end }}
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 069e4e10..1a0bfe2a 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -348,3 +348,71 @@ TimescaleDB connection string for MLRun model monitoring
 postgresql://{{ .Values.timescaledb.auth.username | urlquery }}:{{ .Values.timescaledb.auth.password | urlquery }}@{{ include "mlrun-ce.timescaledb.fullname" . }}:{{ .Values.timescaledb.service.port }}/{{ .Values.timescaledb.auth.database }}
 {{- end }}
 
+{{/*
+=============================================================================
+OpenTelemetry helpers
+=============================================================================
+*/}}
+
+{{/*
+OpenTelemetry Collector name
+*/}}
+{{- define "mlrun-ce.otel.collector.name" -}}
+{{- default "otel-collector" .Values.opentelemetry.collector.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+OpenTelemetry Collector fullname
+*/}}
+{{- define "mlrun-ce.otel.collector.fullname" -}}
+{{- if .Values.opentelemetry.collector.fullnameOverride }}
+{{- .Values.opentelemetry.collector.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default "otel-collector" .Values.opentelemetry.collector.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+OpenTelemetry Instrumentation name
+*/}}
+{{- define "mlrun-ce.otel.instrumentation.name" -}}
+{{- default "otel-instrumentation" .Values.opentelemetry.instrumentation.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+OpenTelemetry Instrumentation fullname
+*/}}
+{{- define "mlrun-ce.otel.instrumentation.fullname" -}}
+{{- if .Values.opentelemetry.instrumentation.fullnameOverride }}
+{{- .Values.opentelemetry.instrumentation.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default "otel-instrumentation" .Values.opentelemetry.instrumentation.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+OpenTelemetry common labels
+*/}}
+{{- define "mlrun-ce.otel.labels" -}}
+{{ include "mlrun-ce.common.labels" . }}
+{{ include "mlrun-ce.otel.selectorLabels" . }}
+{{- end }}
+
+{{/*
+OpenTelemetry selector labels
+*/}}
+{{- define "mlrun-ce.otel.selectorLabels" -}}
+{{ include "mlrun-ce.common.selectorLabels" . }}
+app.kubernetes.io/component: opentelemetry
+{{- end }}
+
diff --git a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
index 6e5374f2..83135a1e 100644
--- a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
+++ b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
@@ -14,6 +14,22 @@ spec:
     metadata:
       labels:
         {{- include "mlrun-ce.jupyter.selectorLabels" . | nindent 8 }}
+      {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+      annotations:
+        # OpenTelemetry sidecar injection
+        sidecar.opentelemetry.io/inject: "{{ include "mlrun-ce.otel.collector.fullname" . }}"
+        # Python auto-instrumentation injection
+        instrumentation.opentelemetry.io/inject-python: "{{ include "mlrun-ce.otel.instrumentation.fullname" . }}"
+        # Prometheus scraping configuration
+        # scrape-mode controls how metrics are collected to prevent duplicates:
+        #   "otel"   - Only OTEL sidecar metrics (recommended)
+        #   "both"   - Both OTEL and direct scraping (debugging)
+        #   "direct" - Only direct scraping (OTEL metrics ignored)
+        prometheus.io/scrape: "true"
+        prometheus.io/scrape-mode: {{ .Values.opentelemetry.collector.scrapeMode | quote }}
+        prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
+        prometheus.io/path: "/metrics"
+      {{- end }}
     spec:
       {{- with .Values.jupyterNotebook.image.pullSecrets }}
       imagePullSecrets:
diff --git a/charts/mlrun-ce/templates/opentelemetry/collector.yaml b/charts/mlrun-ce/templates/opentelemetry/collector.yaml
new file mode 100644
index 00000000..e32f3d4a
--- /dev/null
+++ b/charts/mlrun-ce/templates/opentelemetry/collector.yaml
@@ -0,0 +1,102 @@
+{{- if .Values.opentelemetry.collector.enabled }}
+apiVersion: opentelemetry.io/v1beta1
+kind: OpenTelemetryCollector
+metadata:
+  name: {{ include "mlrun-ce.otel.collector.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    # Delay collector CR creation until after CRDs are installed by the operator
+    helm.sh/hook: post-install,post-upgrade
+    helm.sh/hook-weight: "10"
+spec:
+  mode: {{ .Values.opentelemetry.collector.mode }}
+  resources:
+    {{- toYaml .Values.opentelemetry.collector.resources | nindent 4 }}
+  # Pod annotations for Prometheus scraping
+  podAnnotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
+    prometheus.io/path: "/metrics"
+  config:
+    receivers:
+      otlp:
+        protocols:
+          grpc:
+            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
+          http:
+            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.httpPort }}
+
+    processors:
+      # Batch processor for efficient metric export
+      batch:
+        send_batch_size: 10000
+        timeout: 10s
+      # Memory limiter to prevent OOM
+      memory_limiter:
+        check_interval: 1s
+        limit_percentage: 80
+        spike_limit_percentage: 25
+      # Resource detection for Kubernetes metadata
+      resourcedetection:
+        detectors:
+          - env
+          - system
+        timeout: 5s
+        override: false
+
+    exporters:
+      # Prometheus exporter for metrics - scraped by Prometheus
+      prometheus:
+        endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.prometheus.port }}
+        # Metric namespace prefix helps distinguish OTEL-collected metrics
+        # from directly-scraped metrics in Prometheus queries
+        namespace: {{ .Values.opentelemetry.collector.prometheus.namespace }}
+        const_labels:
+          collector_mode: sidecar
+          metrics_source: otel_collector
+        resource_to_telemetry_conversion:
+          enabled: true
+      # Debug exporter for troubleshooting (logs to stdout)
+      debug:
+        verbosity: basic
+        sampling_initial: 5
+        sampling_thereafter: 200
+
+    extensions:
+      health_check:
+        endpoint: 0.0.0.0:13133
+
+    service:
+      extensions:
+        - health_check
+      pipelines:
+        # Metrics pipeline: OTLP -> processing -> Prometheus export
+        metrics:
+          receivers:
+            - otlp
+          processors:
+            - memory_limiter
+            - resourcedetection
+            - batch
+          exporters:
+            - prometheus
+            - debug
+        # Traces pipeline: OTLP -> processing -> debug (no trace backend configured yet)
+        traces:
+          receivers:
+            - otlp
+          processors:
+            - memory_limiter
+            - resourcedetection
+            - batch
+          exporters:
+            - debug
+      telemetry:
+        logs:
+          level: info
+        metrics:
+          address: 0.0.0.0:8888
+{{- end }}
+
diff --git a/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml b/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
new file mode 100644
index 00000000..7b9f0767
--- /dev/null
+++ b/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
@@ -0,0 +1,86 @@
+{{- if .Values.opentelemetry.instrumentation.enabled }}
+apiVersion: opentelemetry.io/v1alpha1
+kind: Instrumentation
+metadata:
+  name: {{ include "mlrun-ce.otel.instrumentation.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    # Delay instrumentation CR creation until after CRDs are installed by the operator
+    helm.sh/hook: post-install,post-upgrade
+    helm.sh/hook-weight: "10"
+spec:
+  # Propagators for distributed tracing context
+  propagators:
+    {{- toYaml .Values.opentelemetry.instrumentation.propagators | nindent 4 }}
+
+  # Sampler configuration
+  sampler:
+    type: {{ .Values.opentelemetry.instrumentation.sampler.type }}
+    argument: {{ .Values.opentelemetry.instrumentation.sampler.argument | quote }}
+
+  # Environment variables injected into instrumented pods
+  env:
+    # Service name will be auto-detected from pod metadata
+    - name: OTEL_SERVICE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.labels['app.kubernetes.io/name']
+    # Resource attributes for better observability
+    - name: OTEL_RESOURCE_ATTRIBUTES
+      value: >-
+        k8s.namespace.name=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE),
+        k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),
+        k8s.container.name=$(OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME),
+        service.namespace=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE)
+    - name: OTEL_RESOURCE_ATTRIBUTES_NAMESPACE
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.namespace
+    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.name
+    - name: OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.name
+    # Export metrics via OTLP to the sidecar collector
+    - name: OTEL_METRICS_EXPORTER
+      value: otlp
+    - name: OTEL_TRACES_EXPORTER
+      value: otlp
+    - name: OTEL_LOGS_EXPORTER
+      value: none
+
+  # Python auto-instrumentation configuration
+  {{- if .Values.opentelemetry.instrumentation.python.enabled }}
+  python:
+    image: {{ .Values.opentelemetry.instrumentation.python.image.repository }}:{{ .Values.opentelemetry.instrumentation.python.image.tag }}
+    resourceRequirements:
+      {{- toYaml .Values.opentelemetry.instrumentation.python.resources | nindent 6 }}
+    env:
+      # Python-specific OTEL configuration
+      - name: OTEL_PYTHON_LOG_CORRELATION
+        value: "true"
+      - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
+        value: "false"
+      # Disable specific instrumentations that might cause issues
+      - name: OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
+        value: ""
+  {{- end }}
+
+  # Java auto-instrumentation configuration
+  {{- if .Values.opentelemetry.instrumentation.java.enabled }}
+  java:
+    image: {{ .Values.opentelemetry.instrumentation.java.image.repository }}:{{ .Values.opentelemetry.instrumentation.java.image.tag }}
+    resourceRequirements:
+      {{- toYaml .Values.opentelemetry.instrumentation.java.resources | nindent 6 }}
+    env:
+      # Java-specific OTEL configuration
+      - name: OTEL_INSTRUMENTATION_COMMON_DEFAULT_ENABLED
+        value: "true"
+  {{- end }}
+{{- end }}
+
diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
new file mode 100644
index 00000000..985bd4fb
--- /dev/null
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -0,0 +1,15 @@
+{{- if and (or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled) .Values.opentelemetry.namespaceLabel.enabled -}}
+# Label the namespace for OpenTelemetry operator webhook injection
+# The operator will only inject sidecars into namespaces with this label
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: {{ .Release.Namespace }}
+  labels:
+{{ include "mlrun-ce.otel.labels" . | indent 4 }}
+    {{ .Values.opentelemetry.namespaceLabel.key }}: {{ .Values.opentelemetry.namespaceLabel.value | quote }}
+  annotations:
+    # This resource only patches the existing namespace with the required label
+    # It does not create the namespace (namespace should already exist)
+    helm.sh/resource-policy: keep
+{{- end -}}
diff --git a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
new file mode 100644
index 00000000..0ffe62b4
--- /dev/null
+++ b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
@@ -0,0 +1,58 @@
+{{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+---
+# ServiceAccount for OpenTelemetry collector sidecar
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: otel-collector
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+---
+# Role for OpenTelemetry collector to access Kubernetes resources
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: otel-collector
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+rules:
+  # Allow reading pod metadata for resource detection
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+      - namespaces
+    verbs:
+      - get
+      - list
+      - watch
+  # Allow reading configmaps for collector configuration
+  - apiGroups:
+      - ""
+    resources:
+      - configmaps
+    verbs:
+      - get
+      - list
+      - watch
+---
+# RoleBinding for OpenTelemetry collector
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: otel-collector
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: otel-collector
+subjects:
+  - kind: ServiceAccount
+    name: otel-collector
+    namespace: {{ .Release.Namespace }}
+{{- end }}
+
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index bed3e5b0..2999f6d5 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -533,6 +533,103 @@ kube-prometheus-stack:
     service:
       type: NodePort
       nodePort: 30020
+    prometheusSpec:
+      # Additional scrape configs for OpenTelemetry collector sidecars
+      # This creates clear separation between direct scraping and OTEL-collected metrics
+      additionalScrapeConfigs:
+        # Job for scraping OTEL collector sidecars (metrics on port 8889)
+        # Only scrapes pods with prometheus.io/scrape-mode: "otel" or "both"
+        - job_name: 'otel-collector-sidecars'
+          kubernetes_sd_configs:
+            - role: pod
+          relabel_configs:
+            # Only scrape pods with OTEL sidecar (port 8889)
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
+              action: keep
+              regex: "8889"
+            # Only scrape if scrape-mode is "otel" or "both" (or legacy scrape=true with port 8889)
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_mode]
+              action: keep
+              regex: (otel|both|)
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+              action: keep
+              regex: true
+            # Set metrics path
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+              action: replace
+              target_label: __metrics_path__
+              regex: (.+)
+            # Set target address with port
+            - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+              action: replace
+              regex: ([^:]+)(?::\d+)?;(\d+)
+              replacement: $1:$2
+              target_label: __address__
+            # Add kubernetes labels
+            - action: labelmap
+              regex: __meta_kubernetes_pod_label_(.+)
+            - source_labels: [__meta_kubernetes_namespace]
+              action: replace
+              target_label: kubernetes_namespace
+            - source_labels: [__meta_kubernetes_pod_name]
+              action: replace
+              target_label: kubernetes_pod_name
+          # Add metric relabeling to identify OTEL-sourced metrics
+          metric_relabel_configs:
+            - action: replace
+              target_label: metrics_source
+              replacement: otel_collector
+        
+        # Job for direct application scraping (non-OTEL)
+        # Only scrapes pods with prometheus.io/scrape-mode: "direct" or "both"
+        # Excludes OTEL sidecar port (8889)
+        - job_name: 'kubernetes-pods-direct'
+          kubernetes_sd_configs:
+            - role: pod
+          relabel_configs:
+            # Only scrape pods with prometheus.io/scrape=true
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+              action: keep
+              regex: true
+            # Exclude OTEL sidecar port (8889) - those are handled by otel-collector-sidecars job
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
+              action: drop
+              regex: "8889"
+            # Only scrape if scrape-mode is "direct" or "both" or not set (default to direct)
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_mode]
+              action: keep
+              regex: (direct|both|)
+            # Set metrics path
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+              action: replace
+              target_label: __metrics_path__
+              regex: (.+)
+              replacement: $1
+            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+              action: replace
+              target_label: __metrics_path__
+              regex: ()
+              replacement: /metrics
+            # Set target address with port
+            - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+              action: replace
+              regex: ([^:]+)(?::\d+)?;(\d+)
+              replacement: $1:$2
+              target_label: __address__
+            # Add kubernetes labels
+            - action: labelmap
+              regex: __meta_kubernetes_pod_label_(.+)
+            - source_labels: [__meta_kubernetes_namespace]
+              action: replace
+              target_label: kubernetes_namespace
+            - source_labels: [__meta_kubernetes_pod_name]
+              action: replace
+              target_label: kubernetes_pod_name
+          # Add metric relabeling to identify direct-scraped metrics
+          metric_relabel_configs:
+            - action: replace
+              target_label: metrics_source
+              replacement: direct_scrape
   kube-state-metrics:
     fullnameOverride: state-metrics
 prometheus-node-exporter:
@@ -645,3 +742,127 @@ kafka:
     # Empty means "use the release namespace"
     # Example: "controller" if that's where you installed the operator
     operatorNamespace: ""
+
+# =============================================================================
+# OpenTelemetry Operator configuration
+# Installs the OpenTelemetry Operator for managing collectors and instrumentation
+# =============================================================================
+opentelemetry-operator:
+  enabled: false
+  # Admission webhooks configuration
+  admissionWebhooks:
+    certManager:
+      enabled: false
+    autoGenerateCert:
+      enabled: true
+    # Only apply webhooks to namespaces with the opentelemetry label
+    # This ensures the operator only monitors labeled namespaces
+    namespaceSelector:
+      matchLabels:
+        opentelemetry.io/inject: "enabled"
+  manager:
+    # Collector image used by the operator when creating collectors
+    collectorImage:
+      repository: otel/opentelemetry-collector-contrib
+      tag: 0.115.0
+    # Auto-instrumentation images
+    autoInstrumentationImage:
+      python:
+        repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python
+        tag: 0.50b0
+      java:
+        repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
+        tag: 2.10.0
+    resources:
+      requests:
+        cpu: 50m
+        memory: 64Mi
+      limits:
+        cpu: 200m
+        memory: 256Mi
+
+# =============================================================================
+# OpenTelemetry Collector and Instrumentation CRs
+# These are managed separately from the operator for admin/non-admin split
+# =============================================================================
+opentelemetry:
+  # Namespace label for enabling OpenTelemetry monitoring
+  # The namespace must have this label for the operator to inject sidecars
+  namespaceLabel:
+    enabled: false
+    key: "opentelemetry.io/inject"
+    value: "enabled"
+  
+  # OpenTelemetry Collector configuration (sidecar mode)
+  collector:
+    enabled: false
+    nameOverride: ""
+    fullnameOverride: ""
+    # Sidecar mode - collector runs as a sidecar in instrumented pods
+    mode: sidecar
+    # Collector sidecar container resources
+    resources:
+      requests:
+        cpu: 50m
+        memory: 64Mi
+      limits:
+        cpu: 200m
+        memory: 256Mi
+    # Prometheus exporter port for metrics
+    prometheus:
+      port: 8889
+      # Metric prefix added to all OTEL-collected metrics
+      # Helps distinguish OTEL metrics from directly-scraped metrics
+      namespace: mlrun_otel
+    # OTLP receiver configuration
+    otlp:
+      grpcPort: 4317
+      httpPort: 4318
+    # Prometheus scrape mode for OTEL-instrumented pods
+    # Options:
+    #   - "direct": Direct Prometheus scraping only (default when OTEL disabled)
+    #   - "otel": Metrics collected via OTEL sidecar only (recommended when OTEL enabled)
+    #   - "both": Both OTEL sidecar and direct scraping (for debugging/transition)
+    # When enabling OTEL, set this to "otel" to prevent duplicate metrics
+    scrapeMode: "direct"
+
+  # Instrumentation configuration for auto-instrumentation
+  instrumentation:
+    enabled: false
+    nameOverride: ""
+    fullnameOverride: ""
+    # Propagators for distributed tracing context
+    propagators:
+      - tracecontext
+      - baggage
+    # Sampler configuration
+    sampler:
+      type: parentbased_traceidratio
+      argument: "1"
+    # Python auto-instrumentation
+    python:
+      enabled: true
+      image:
+        repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python
+        tag: 0.50b0
+      resources:
+        requests:
+          cpu: 50m
+          memory: 64Mi
+        limits:
+          cpu: 200m
+          memory: 256Mi
+    # Java auto-instrumentation (disabled by default, enable if needed)
+    java:
+      enabled: false
+      image:
+        repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
+        tag: 2.10.0
+      resources:
+        requests:
+          cpu: 50m
+          memory: 64Mi
+        limits:
+          cpu: 500m
+          memory: 512Mi
+
diff --git a/tests/helm-template-test.sh b/tests/helm-template-test.sh
new file mode 100755
index 00000000..8b9d79c2
--- /dev/null
+++ b/tests/helm-template-test.sh
@@ -0,0 +1,489 @@
+#!/usr/bin/env bash
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Helm template tests for MLRun CE chart
+# Validates that templates render correctly with various configurations
+
+set -o nounset
+set -o pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CHART_DIR="${SCRIPT_DIR}/../charts/mlrun-ce"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+TESTS_PASSED=0
+TESTS_FAILED=0
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+log_test() { echo -e "${GREEN}[TEST]${NC} $1"; }
+log_pass() { echo -e "${GREEN}[PASS]${NC} $1"; ((TESTS_PASSED++)) || true; }
+log_fail() { echo -e "${RED}[FAIL]${NC} $1"; ((TESTS_FAILED++)) || true; }
+
+# Render a specific template and return the output
+render_template() {
+    local template="$1"
+    shift
+    helm template test "${CHART_DIR}" \
+        --skip-schema-validation \
+        --show-only "${template}" \
+        "$@" 2>/dev/null
+}
+
+# Render all templates and return the output
+render_all() {
+    helm template test "${CHART_DIR}" \
+        --skip-schema-validation \
+        "$@" 2>/dev/null
+}
+
+# Check if output contains a string
+assert_contains() {
+    local output="$1"
+    local expected="$2"
+    local test_name="$3"
+
+    if echo "$output" | grep -q "$expected"; then
+        log_pass "$test_name"
+        return 0
+    else
+        log_fail "$test_name - expected to find: $expected"
+        return 1
+    fi
+}
+
+# Check if output does NOT contain a string
+assert_not_contains() {
+    local output="$1"
+    local not_expected="$2"
+    local test_name="$3"
+
+    if echo "$output" | grep -q "$not_expected"; then
+        log_fail "$test_name - should not contain: $not_expected"
+        return 1
+    else
+        log_pass "$test_name"
+        return 0
+    fi
+}
+
+# Check if template renders (non-empty output)
+assert_renders() {
+    local output="$1"
+    local test_name="$2"
+
+    if [[ -n "$output" ]]; then
+        log_pass "$test_name"
+        return 0
+    else
+        log_fail "$test_name - template produced no output"
+        return 1
+    fi
+}
+
+# Check if template does NOT render (empty output or error)
+assert_not_renders() {
+    local template="$1"
+    local test_name="$2"
+    shift 2
+
+    local output
+    output=$(render_template "$template" "$@" 2>&1) || true
+
+    if [[ -z "$output" ]] || echo "$output" | grep -q "could not find template"; then
+        log_pass "$test_name"
+        return 0
+    else
+        log_fail "$test_name - template should not render"
+        return 1
+    fi
+}
+
+# ============================================================================
+# OpenTelemetry Tests
+# ============================================================================
+
+test_otel_collector_default() {
+    log_test "OpenTelemetry Collector - Enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/collector.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.collector.enabled=true)
+
+    assert_renders "$output" "Collector CR renders"
+    assert_contains "$output" "kind: OpenTelemetryCollector" "Has correct kind"
+    assert_contains "$output" "mode: sidecar" "Uses sidecar mode"
+    assert_contains "$output" "prometheus:" "Has Prometheus exporter"
+    assert_contains "$output" "endpoint: 0.0.0.0:8889" "Prometheus on port 8889"
+    assert_contains "$output" "otlp:" "Has OTLP receiver"
+    assert_contains "$output" "helm.sh/hook: post-install,post-upgrade" "Has Helm hooks"
+}
+
+test_otel_collector_disabled() {
+    log_test "OpenTelemetry Collector - Disabled (default)"
+
+    assert_not_renders "templates/opentelemetry/collector.yaml" \
+        "Collector CR does not render when disabled (default)"
+}
+
+test_otel_collector_resources() {
+    log_test "OpenTelemetry Collector - Custom resources"
+
+    local output
+    output=$(render_template "templates/opentelemetry/collector.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.collector.enabled=true \
+        --set opentelemetry.collector.resources.requests.cpu=100m \
+        --set opentelemetry.collector.resources.requests.memory=128Mi \
+        --set opentelemetry.collector.resources.limits.cpu=500m \
+        --set opentelemetry.collector.resources.limits.memory=512Mi)
+
+    assert_contains "$output" "cpu: 100m" "Custom CPU request"
+    assert_contains "$output" "memory: 128Mi" "Custom memory request"
+    assert_contains "$output" "cpu: 500m" "Custom CPU limit"
+    assert_contains "$output" "memory: 512Mi" "Custom memory limit"
+}
+
+test_otel_instrumentation_default() {
+    log_test "OpenTelemetry Instrumentation - Enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.instrumentation.enabled=true)
+
+    assert_renders "$output" "Instrumentation CR renders"
+    assert_contains "$output" "kind: Instrumentation" "Has correct kind"
+    assert_contains "$output" "tracecontext" "Has tracecontext propagator"
+    assert_contains "$output" "baggage" "Has baggage propagator"
+    assert_contains "$output" "parentbased_traceidratio" "Has sampler type"
+    assert_contains "$output" "python:" "Has Python instrumentation"
+    assert_contains "$output" "autoinstrumentation-python" "Uses Python auto-instrumentation image"
+}
+
+test_otel_instrumentation_disabled() {
+    log_test "OpenTelemetry Instrumentation - Disabled (default)"
+
+    assert_not_renders "templates/opentelemetry/instrumentation.yaml" \
+        "Instrumentation CR does not render when disabled (default)"
+}
+
+test_otel_instrumentation_java_enabled() {
+    log_test "OpenTelemetry Instrumentation - Java enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.instrumentation.enabled=true \
+        --set opentelemetry.instrumentation.java.enabled=true)
+
+    assert_contains "$output" "java:" "Has Java instrumentation section"
+    assert_contains "$output" "autoinstrumentation-java" "Uses Java auto-instrumentation image"
+}
+
+test_otel_rbac_default() {
+    log_test "OpenTelemetry RBAC - Enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/rbac.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.collector.enabled=true)
+
+    assert_renders "$output" "RBAC renders"
+    assert_contains "$output" "kind: ServiceAccount" "Has ServiceAccount"
+    assert_contains "$output" "kind: Role" "Has Role"
+    assert_contains "$output" "kind: RoleBinding" "Has RoleBinding"
+    assert_contains "$output" "name: otel-collector" "Has correct name"
+}
+
+test_otel_rbac_disabled() {
+    log_test "OpenTelemetry RBAC - Disabled (default)"
+
+    assert_not_renders "templates/opentelemetry/rbac.yaml" \
+        "RBAC does not render when OTEL disabled (default)"
+}
+
+test_jupyter_otel_annotations() {
+    log_test "Jupyter Deployment - OTEL annotations when enabled"
+
+    local output
+    output=$(render_template "templates/jupyter-notebook/deployment.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.collector.enabled=true \
+        --set opentelemetry.instrumentation.enabled=true)
+
+    assert_contains "$output" "sidecar.opentelemetry.io/inject:" "Has sidecar injection annotation"
+    assert_contains "$output" "instrumentation.opentelemetry.io/inject-python:" "Has Python instrumentation annotation"
+    assert_contains "$output" 'prometheus.io/scrape: "true"' "Has Prometheus scrape annotation"
+    assert_contains "$output" 'prometheus.io/scrape-mode:' "Has Prometheus scrape-mode annotation"
+    assert_contains "$output" 'prometheus.io/port: "8889"' "Has Prometheus port annotation"
+}
+
+test_jupyter_no_otel_annotations_when_disabled() {
+    log_test "Jupyter Deployment - No OTEL annotations when disabled (default)"
+
+    local output
+    output=$(render_template "templates/jupyter-notebook/deployment.yaml" \
+        --set global.registry.url=test.io)
+
+    assert_not_contains "$output" "sidecar.opentelemetry.io/inject:" "No sidecar injection when disabled (default)"
+    assert_not_contains "$output" "instrumentation.opentelemetry.io/inject-python:" "No instrumentation when disabled (default)"
+}
+
+# ============================================================================
+# Admin/Non-Admin Installation Tests
+# ============================================================================
+
+test_admin_values_otel() {
+    log_test "Admin installation - OTEL operator enabled, CRs disabled"
+
+    # Collector should not render
+    assert_not_renders "templates/opentelemetry/collector.yaml" \
+        "Collector CR not rendered with admin values" \
+        -f "${CHART_DIR}/admin_installation_values.yaml"
+
+    # Instrumentation should not render
+    assert_not_renders "templates/opentelemetry/instrumentation.yaml" \
+        "Instrumentation CR not rendered with admin values" \
+        -f "${CHART_DIR}/admin_installation_values.yaml"
+}
+
+test_non_admin_values_otel() {
+    log_test "Non-admin installation - OTEL CRs enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/collector.yaml" \
+        --set global.registry.url=test.io \
+        -f "${CHART_DIR}/non_admin_installation_values.yaml")
+
+    assert_renders "$output" "Collector CR renders with non-admin values"
+
+    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+        --set global.registry.url=test.io \
+        -f "${CHART_DIR}/non_admin_installation_values.yaml")
+
+    assert_renders "$output" "Instrumentation CR renders with non-admin values"
+}
+
+test_namespace_label_enabled() {
+    log_test "Namespace Label - Enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/namespace-label.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.namespaceLabel.enabled=true \
+        --set opentelemetry.collector.enabled=true)
+
+    assert_renders "$output" "Namespace label renders"
+    assert_contains "$output" "kind: Namespace" "Has correct kind"
+    assert_contains "$output" "opentelemetry.io/inject" "Has OTEL inject label key"
+    assert_contains "$output" '"enabled"' "Has OTEL inject label value"
+}
+
+test_namespace_label_disabled() {
+    log_test "Namespace Label - Disabled (default)"
+
+    assert_not_renders "templates/opentelemetry/namespace-label.yaml" \
+        "Namespace label not rendered when disabled (default)"
+}
+
+test_admin_namespace_label_disabled() {
+    log_test "Admin installation - Namespace label disabled"
+
+    assert_not_renders "templates/opentelemetry/namespace-label.yaml" \
+        "Namespace label not rendered with admin values" \
+        -f "${CHART_DIR}/admin_installation_values.yaml"
+}
+
+test_non_admin_namespace_label_enabled() {
+    log_test "Non-admin installation - Namespace label enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/namespace-label.yaml" \
+        --set global.registry.url=test.io \
+        -f "${CHART_DIR}/non_admin_installation_values.yaml")
+
+    assert_renders "$output" "Namespace label renders with non-admin values"
+    assert_contains "$output" "opentelemetry.io/inject" "Has OTEL inject label"
+}
+
+test_otel_operator_namespace_selector() {
+    log_test "OTEL Operator - Namespace selector configured"
+
+    local output
+    output=$(render_all \
+        --set global.registry.url=test.io \
+        --set opentelemetry-operator.enabled=true)
+
+    # Check if the operator webhook has namespace selector configured
+    # The selector should be in the MutatingWebhookConfiguration
+    if echo "$output" | grep -A5 "namespaceSelector:" | grep -q "opentelemetry.io/inject"; then
+        log_pass "Has namespace selector in webhook configuration"
+    else
+        log_fail "Namespace selector not found in webhook configuration"
+    fi
+}
+
+# ============================================================================
+# Prometheus Integration Tests
+# ============================================================================
+
+test_prometheus_otel_scrape_config() {
+    log_test "Prometheus - OTEL scrape configuration"
+
+    local output
+    output=$(render_all --set global.registry.url=test.io)
+
+    # The scrape config is in a Secret as base64, extract and decode it
+    local secret_data
+    secret_data=$(echo "$output" | grep "additional-scrape-configs.yaml:" | head -1 | sed 's/.*: "//' | sed 's/"$//' || true)
+
+    if [[ -n "$secret_data" ]]; then
+        local decoded
+        decoded=$(echo "$secret_data" | base64 -d 2>/dev/null || true)
+
+        if echo "$decoded" | grep -q "otel-collector-sidecars"; then
+            log_pass "Has OTEL collector scrape job"
+        else
+            log_fail "Has OTEL collector scrape job - not found in decoded config"
+        fi
+
+        if echo "$decoded" | grep -q "prometheus_io_port"; then
+            log_pass "Has pod annotation relabeling"
+        else
+            log_fail "Has pod annotation relabeling - not found in decoded config"
+        fi
+    else
+        log_fail "Prometheus scrape config secret not found"
+    fi
+}
+
+# ============================================================================
+# Full Chart Render Test
+# ============================================================================
+
+test_full_chart_renders() {
+    log_test "Full chart renders without errors"
+
+    local output
+    output=$(render_all --set global.registry.url=test.io 2>&1)
+
+    if [[ $? -eq 0 ]] && [[ -n "$output" ]]; then
+        log_pass "Full chart renders successfully"
+    else
+        log_fail "Full chart failed to render"
+    fi
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+main() {
+    log_info "Running Helm template tests for MLRun CE"
+    log_info "Chart directory: ${CHART_DIR}"
+    echo ""
+
+    # Ensure dependencies are up to date
+    log_info "Updating Helm dependencies..."
+    helm dependency update "${CHART_DIR}" > /dev/null 2>&1
+
+    echo ""
+    echo "========================================"
+    echo "OpenTelemetry Collector Tests"
+    echo "========================================"
+    test_otel_collector_default
+    test_otel_collector_disabled
+    test_otel_collector_resources
+
+    echo ""
+    echo "========================================"
+    echo "OpenTelemetry Instrumentation Tests"
+    echo "========================================"
+    test_otel_instrumentation_default
+    test_otel_instrumentation_disabled
+    test_otel_instrumentation_java_enabled
+
+    echo ""
+    echo "========================================"
+    echo "OpenTelemetry RBAC Tests"
+    echo "========================================"
+    test_otel_rbac_default
+    test_otel_rbac_disabled
+
+    echo ""
+    echo "========================================"
+    echo "Jupyter OTEL Integration Tests"
+    echo "========================================"
+    test_jupyter_otel_annotations
+    test_jupyter_no_otel_annotations_when_disabled
+
+    echo ""
+    echo "========================================"
+    echo "Admin/Non-Admin Installation Tests"
+    echo "========================================"
+    test_admin_values_otel
+    test_non_admin_values_otel
+
+    echo ""
+    echo "========================================"
+    echo "Namespace Label Tests"
+    echo "========================================"
+    test_namespace_label_enabled
+    test_namespace_label_disabled
+    test_admin_namespace_label_disabled
+    test_non_admin_namespace_label_enabled
+    test_otel_operator_namespace_selector
+
+    echo ""
+    echo "========================================"
+    echo "Prometheus Integration Tests"
+    echo "========================================"
+    test_prometheus_otel_scrape_config
+
+    echo ""
+    echo "========================================"
+    echo "Full Chart Tests"
+    echo "========================================"
+    test_full_chart_renders
+
+    echo ""
+    echo "========================================"
+    echo "Test Summary"
+    echo "========================================"
+    echo -e "Passed: ${GREEN}${TESTS_PASSED}${NC}"
+    echo -e "Failed: ${RED}${TESTS_FAILED}${NC}"
+
+    if [[ ${TESTS_FAILED} -gt 0 ]]; then
+        log_error "Some tests failed!"
+        exit 1
+    else
+        log_info "All tests passed!"
+        exit 0
+    fi
+}
+
+main "$@"
+
+
+
+
diff --git a/tests/kind-test.sh b/tests/kind-test.sh
index c99a182e..94b41c7f 100755
--- a/tests/kind-test.sh
+++ b/tests/kind-test.sh
@@ -93,6 +93,7 @@ setup_helm_repos() {
     helm repo add spark-operator https://kubeflow.github.io/spark-operator 2>/dev/null || true
     helm repo add kube-prometheus-stack https://prometheus-community.github.io/helm-charts 2>/dev/null || true
     helm repo add kafka https://charts.bitnami.com/bitnami 2>/dev/null || true
+    helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts 2>/dev/null || true
     helm repo update
 }
 
@@ -220,6 +221,53 @@ verify_installation() {
     else
         log_warn "TimescaleDB pod not found"
     fi
+
+    # Verify OpenTelemetry CRDs and resources
+    echo ""
+    log_info "Verifying OpenTelemetry..."
+
+    # Check if OpenTelemetry Operator is installed (CRDs exist)
+    if kubectl get crd opentelemetrycollectors.opentelemetry.io &>/dev/null; then
+        log_info "OpenTelemetryCollector CRD exists"
+
+        # Check for collector CR
+        local collector
+        collector=$(kubectl get opentelemetrycollectors -n "${NAMESPACE}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+        if [[ -n "${collector}" ]]; then
+            log_info "OpenTelemetryCollector CR found: ${collector}"
+            kubectl get opentelemetrycollectors -n "${NAMESPACE}" "${collector}" -o yaml 2>/dev/null | grep -E "mode:|status:" | head -5 || true
+        else
+            log_warn "No OpenTelemetryCollector CR found in namespace ${NAMESPACE}"
+        fi
+    else
+        log_warn "OpenTelemetryCollector CRD not found - operator may not be installed"
+    fi
+
+    if kubectl get crd instrumentations.opentelemetry.io &>/dev/null; then
+        log_info "Instrumentation CRD exists"
+
+        # Check for instrumentation CR
+        local instrumentation
+        instrumentation=$(kubectl get instrumentations -n "${NAMESPACE}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+        if [[ -n "${instrumentation}" ]]; then
+            log_info "Instrumentation CR found: ${instrumentation}"
+        else
+            log_warn "No Instrumentation CR found in namespace ${NAMESPACE}"
+        fi
+    else
+        log_warn "Instrumentation CRD not found - operator may not be installed"
+    fi
+
+    # Check if Jupyter pod has OTEL sidecar annotations
+    echo ""
+    log_info "Checking Jupyter deployment for OTEL annotations..."
+    local jupyter_annotations
+    jupyter_annotations=$(kubectl get deployment -n "${NAMESPACE}" -l app.kubernetes.io/component=jupyter-notebook -o jsonpath='{.items[0].spec.template.metadata.annotations}' 2>/dev/null || echo "")
+    if echo "${jupyter_annotations}" | grep -q "sidecar.opentelemetry.io/inject"; then
+        log_info "Jupyter has OTEL sidecar injection annotation"
+    else
+        log_warn "Jupyter does not have OTEL sidecar injection annotation"
+    fi
 }
 
 delete_cluster() {

From 40baa802bec47898e2e96415ac27b5d61e61a43c Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 25 Mar 2026 11:54:41 +0200
Subject: [PATCH 05/23] fix requirements.lock

---
 charts/mlrun-ce/requirements.lock | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock
index 35c43691..f631004c 100644
--- a/charts/mlrun-ce/requirements.lock
+++ b/charts/mlrun-ce/requirements.lock
@@ -20,10 +20,8 @@ dependencies:
 - name: strimzi-kafka-operator
   repository: https://strimzi.io/charts/
   version: 0.48.0
-digest: sha256:f7f2ab0eaec5fb3097c09946f6de510a602293fd7f9c59c40539991b5449a6d1
-generated: "2026-03-08T12:42:39.145588+02:00"
 - name: opentelemetry-operator
   repository: https://open-telemetry.github.io/opentelemetry-helm-charts
   version: 0.78.1
-digest: sha256:4a47a90d97b21b41cd3bb7f7e9b70b56b42b95fe067bb012e4d490fa1912e18f
-generated: "2026-03-24T16:04:27.962041+02:00"
+digest: sha256:9f6ea4d6c60baabe3a9fb2a9c286f5c70a97bbf76ecba15ddaef7f39c56269ae
+generated: "2026-03-25T11:50:15.589709+02:00"

From ff914ac4888b27cf26c46c6e968f2e775c90e7d0 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Thu, 26 Mar 2026 16:10:34 +0200
Subject: [PATCH 06/23] fixes

---
 charts/mlrun-ce/templates/_helpers.tpl        | 158 ++++++++++++++++++
 .../templates/opentelemetry/collector.yaml    | 114 ++-----------
 .../opentelemetry/crd-readiness-job.yaml      |  82 +++++++++
 .../opentelemetry/instrumentation.yaml        |  98 ++---------
 .../opentelemetry/namespace-label.yaml        |  36 +++-
 .../templates/opentelemetry/rbac.yaml         | 110 ++++++++++++
 charts/mlrun-ce/values.yaml                   |  12 +-
 tests/helm-template-test.sh                   |  71 ++++----
 8 files changed, 447 insertions(+), 234 deletions(-)
 create mode 100644 charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml

diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 292fe3e2..709b5f37 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -416,3 +416,161 @@ OpenTelemetry selector labels
 app.kubernetes.io/component: opentelemetry
 {{- end }}
 
+{{/*
+OpenTelemetryCollector CR manifest for use in the CRD readiness job
+*/}}
+{{- define "mlrun-ce.otel.collector.manifest" -}}
+apiVersion: opentelemetry.io/v1beta1
+kind: OpenTelemetryCollector
+metadata:
+  name: {{ include "mlrun-ce.otel.collector.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+spec:
+  mode: {{ .Values.opentelemetry.collector.mode }}
+  upgradeStrategy: automatic
+  managementState: managed
+  resources:
+    {{- toYaml .Values.opentelemetry.collector.resources | nindent 4 }}
+  podAnnotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
+    prometheus.io/path: "/metrics"
+  config:
+    receivers:
+      otlp:
+        protocols:
+          grpc:
+            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
+          http:
+            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.httpPort }}
+    processors:
+      batch:
+        send_batch_size: 10000
+        timeout: 10s
+      memory_limiter:
+        check_interval: 1s
+        limit_percentage: 80
+        spike_limit_percentage: 25
+      resourcedetection:
+        detectors:
+          - env
+          - system
+        timeout: 5s
+        override: false
+    exporters:
+      prometheus:
+        endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.prometheus.port }}
+        namespace: {{ .Values.opentelemetry.collector.prometheus.namespace }}
+        const_labels:
+          collector_mode: sidecar
+          metrics_source: otel_collector
+        resource_to_telemetry_conversion:
+          enabled: true
+      debug:
+        verbosity: basic
+        sampling_initial: 5
+        sampling_thereafter: 200
+    extensions:
+      health_check:
+        endpoint: 0.0.0.0:13133
+    service:
+      extensions:
+        - health_check
+      pipelines:
+        metrics:
+          receivers:
+            - otlp
+          processors:
+            - memory_limiter
+            - resourcedetection
+            - batch
+          exporters:
+            - prometheus
+            - debug
+        traces:
+          receivers:
+            - otlp
+          processors:
+            - memory_limiter
+            - resourcedetection
+            - batch
+          exporters:
+            - debug
+      telemetry:
+        logs:
+          level: info
+        metrics:
+          address: 0.0.0.0:8888
+{{- end }}
+
+{{/*
+Instrumentation CR manifest for use in the CRD readiness job
+*/}}
+{{- define "mlrun-ce.otel.instrumentation.manifest" -}}
+apiVersion: opentelemetry.io/v1alpha1
+kind: Instrumentation
+metadata:
+  name: {{ include "mlrun-ce.otel.instrumentation.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+spec:
+  propagators:
+    {{- toYaml .Values.opentelemetry.instrumentation.propagators | nindent 4 }}
+  sampler:
+    type: {{ .Values.opentelemetry.instrumentation.sampler.type }}
+    argument: {{ .Values.opentelemetry.instrumentation.sampler.argument | quote }}
+  env:
+    - name: OTEL_SERVICE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.labels['app.kubernetes.io/name']
+    - name: OTEL_RESOURCE_ATTRIBUTES
+      value: >-
+        k8s.namespace.name=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE),
+        k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),
+        k8s.container.name=$(OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME),
+        service.namespace=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE)
+    - name: OTEL_RESOURCE_ATTRIBUTES_NAMESPACE
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.namespace
+    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.name
+    - name: OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: metadata.name
+    - name: OTEL_METRICS_EXPORTER
+      value: otlp
+    - name: OTEL_TRACES_EXPORTER
+      value: otlp
+    - name: OTEL_LOGS_EXPORTER
+      value: none
+  {{- if .Values.opentelemetry.instrumentation.python.enabled }}
+  python:
+    image: {{ .Values.opentelemetry.instrumentation.python.image.repository }}:{{ .Values.opentelemetry.instrumentation.python.image.tag }}
+    resourceRequirements:
+      {{- toYaml .Values.opentelemetry.instrumentation.python.resources | nindent 6 }}
+    env:
+      - name: OTEL_PYTHON_LOG_CORRELATION
+        value: "true"
+      - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
+        value: "false"
+      - name: OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
+        value: ""
+  {{- end }}
+  {{- if .Values.opentelemetry.instrumentation.java.enabled }}
+  java:
+    image: {{ .Values.opentelemetry.instrumentation.java.image.repository }}:{{ .Values.opentelemetry.instrumentation.java.image.tag }}
+    resourceRequirements:
+      {{- toYaml .Values.opentelemetry.instrumentation.java.resources | nindent 6 }}
+    env:
+      - name: OTEL_INSTRUMENTATION_COMMON_DEFAULT_ENABLED
+        value: "true"
+  {{- end }}
+{{- end }}
diff --git a/charts/mlrun-ce/templates/opentelemetry/collector.yaml b/charts/mlrun-ce/templates/opentelemetry/collector.yaml
index e32f3d4a..e1dd53ee 100644
--- a/charts/mlrun-ce/templates/opentelemetry/collector.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/collector.yaml
@@ -1,102 +1,14 @@
-{{- if .Values.opentelemetry.collector.enabled }}
-apiVersion: opentelemetry.io/v1beta1
-kind: OpenTelemetryCollector
-metadata:
-  name: {{ include "mlrun-ce.otel.collector.fullname" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
-  annotations:
-    # Delay collector CR creation until after CRDs are installed by the operator
-    helm.sh/hook: post-install,post-upgrade
-    helm.sh/hook-weight: "10"
-spec:
-  mode: {{ .Values.opentelemetry.collector.mode }}
-  resources:
-    {{- toYaml .Values.opentelemetry.collector.resources | nindent 4 }}
-  # Pod annotations for Prometheus scraping
-  podAnnotations:
-    prometheus.io/scrape: "true"
-    prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
-    prometheus.io/path: "/metrics"
-  config:
-    receivers:
-      otlp:
-        protocols:
-          grpc:
-            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
-          http:
-            endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.otlp.httpPort }}
-
-    processors:
-      # Batch processor for efficient metric export
-      batch:
-        send_batch_size: 10000
-        timeout: 10s
-      # Memory limiter to prevent OOM
-      memory_limiter:
-        check_interval: 1s
-        limit_percentage: 80
-        spike_limit_percentage: 25
-      # Resource detection for Kubernetes metadata
-      resourcedetection:
-        detectors:
-          - env
-          - system
-        timeout: 5s
-        override: false
-
-    exporters:
-      # Prometheus exporter for metrics - scraped by Prometheus
-      prometheus:
-        endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.prometheus.port }}
-        # Metric namespace prefix helps distinguish OTEL-collected metrics
-        # from directly-scraped metrics in Prometheus queries
-        namespace: {{ .Values.opentelemetry.collector.prometheus.namespace }}
-        const_labels:
-          collector_mode: sidecar
-          metrics_source: otel_collector
-        resource_to_telemetry_conversion:
-          enabled: true
-      # Debug exporter for troubleshooting (logs to stdout)
-      debug:
-        verbosity: basic
-        sampling_initial: 5
-        sampling_thereafter: 200
-
-    extensions:
-      health_check:
-        endpoint: 0.0.0.0:13133
-
-    service:
-      extensions:
-        - health_check
-      pipelines:
-        # Metrics pipeline: OTLP -> processing -> Prometheus export
-        metrics:
-          receivers:
-            - otlp
-          processors:
-            - memory_limiter
-            - resourcedetection
-            - batch
-          exporters:
-            - prometheus
-            - debug
-        # Traces pipeline: OTLP -> processing -> debug (no trace backend configured yet)
-        traces:
-          receivers:
-            - otlp
-          processors:
-            - memory_limiter
-            - resourcedetection
-            - batch
-          exporters:
-            - debug
-      telemetry:
-        logs:
-          level: info
-        metrics:
-          address: 0.0.0.0:8888
-{{- end }}
+{{/*
+OpenTelemetryCollector CR is created by the crd-readiness-job.yaml via a Helm post-install/post-upgrade hook.
+This solves the race condition between the OpenTelemetry Operator starting up and registering its CRDs,
+and Helm trying to create this CR.
+
+The actual CR manifest is defined in _helpers.tpl as "mlrun-ce.otel.collector.manifest" and is applied
+by the job after it confirms the CRD is available.
+
+To see the CR configuration, check:
+- templates/_helpers.tpl: defines the manifest
+- templates/opentelemetry/crd-readiness-job.yaml: creates the CR
+- values.yaml: opentelemetry.collector.* settings
+*/}}
 
diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
new file mode 100644
index 00000000..eec873f2
--- /dev/null
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -0,0 +1,82 @@
+{{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+---
+# Job to wait for OpenTelemetry CRDs to be available and then create the CRs
+# This solves the race condition between the operator starting and CR creation
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ .Release.Name }}-otel-cr-creator
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    # Run as a post-install and post-upgrade hook
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "10"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  ttlSecondsAfterFinished: 300
+  backoffLimit: 6
+  template:
+    metadata:
+      labels:
+        {{- include "mlrun-ce.otel.selectorLabels" . | nindent 8 }}
+    spec:
+      restartPolicy: OnFailure
+      serviceAccountName: {{ .Release.Name }}-otel-cr-creator
+      containers:
+        - name: cr-creator
+          image: bitnami/kubectl:latest
+          command:
+            - /bin/bash
+            - -c
+            - |
+              set -e
+              
+              echo "Waiting for OpenTelemetry CRDs to be available..."
+              
+              # Wait for the OpenTelemetryCollector CRD
+              {{- if .Values.opentelemetry.collector.enabled }}
+              echo "Waiting for OpenTelemetryCollector CRD..."
+              until kubectl get crd opentelemetrycollectors.opentelemetry.io &>/dev/null; do
+                echo "Waiting for opentelemetrycollectors.opentelemetry.io CRD..."
+                sleep 5
+              done
+              echo "OpenTelemetryCollector CRD is available!"
+              {{- end }}
+              
+              # Wait for the Instrumentation CRD
+              {{- if .Values.opentelemetry.instrumentation.enabled }}
+              echo "Waiting for Instrumentation CRD..."
+              until kubectl get crd instrumentations.opentelemetry.io &>/dev/null; do
+                echo "Waiting for instrumentations.opentelemetry.io CRD..."
+                sleep 5
+              done
+              echo "Instrumentation CRD is available!"
+              {{- end }}
+              
+              # Wait a bit more for the operator to be fully ready
+              echo "Waiting for operator webhook to be ready..."
+              sleep 10
+              
+              {{- if .Values.opentelemetry.collector.enabled }}
+              # Create or update the OpenTelemetryCollector CR
+              echo "Creating/updating OpenTelemetryCollector CR..."
+              cat <<'EOF' | kubectl apply -f -
+              {{- include "mlrun-ce.otel.collector.manifest" . | nindent 14 }}
+              EOF
+              echo "OpenTelemetryCollector CR created/updated!"
+              {{- end }}
+              
+              {{- if .Values.opentelemetry.instrumentation.enabled }}
+              # Create or update the Instrumentation CR
+              echo "Creating/updating Instrumentation CR..."
+              cat <<'EOF' | kubectl apply -f -
+              {{- include "mlrun-ce.otel.instrumentation.manifest" . | nindent 14 }}
+              EOF
+              echo "Instrumentation CR created/updated!"
+              {{- end }}
+              
+              echo "All OpenTelemetry CRs have been created successfully!"
+{{- end }}
+
diff --git a/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml b/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
index 7b9f0767..b79b9198 100644
--- a/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/instrumentation.yaml
@@ -1,86 +1,14 @@
-{{- if .Values.opentelemetry.instrumentation.enabled }}
-apiVersion: opentelemetry.io/v1alpha1
-kind: Instrumentation
-metadata:
-  name: {{ include "mlrun-ce.otel.instrumentation.fullname" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
-  annotations:
-    # Delay instrumentation CR creation until after CRDs are installed by the operator
-    helm.sh/hook: post-install,post-upgrade
-    helm.sh/hook-weight: "10"
-spec:
-  # Propagators for distributed tracing context
-  propagators:
-    {{- toYaml .Values.opentelemetry.instrumentation.propagators | nindent 4 }}
-
-  # Sampler configuration
-  sampler:
-    type: {{ .Values.opentelemetry.instrumentation.sampler.type }}
-    argument: {{ .Values.opentelemetry.instrumentation.sampler.argument | quote }}
-
-  # Environment variables injected into instrumented pods
-  env:
-    # Service name will be auto-detected from pod metadata
-    - name: OTEL_SERVICE_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.labels['app.kubernetes.io/name']
-    # Resource attributes for better observability
-    - name: OTEL_RESOURCE_ATTRIBUTES
-      value: >-
-        k8s.namespace.name=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE),
-        k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),
-        k8s.container.name=$(OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME),
-        service.namespace=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE)
-    - name: OTEL_RESOURCE_ATTRIBUTES_NAMESPACE
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.namespace
-    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.name
-    - name: OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.name
-    # Export metrics via OTLP to the sidecar collector
-    - name: OTEL_METRICS_EXPORTER
-      value: otlp
-    - name: OTEL_TRACES_EXPORTER
-      value: otlp
-    - name: OTEL_LOGS_EXPORTER
-      value: none
-
-  # Python auto-instrumentation configuration
-  {{- if .Values.opentelemetry.instrumentation.python.enabled }}
-  python:
-    image: {{ .Values.opentelemetry.instrumentation.python.image.repository }}:{{ .Values.opentelemetry.instrumentation.python.image.tag }}
-    resourceRequirements:
-      {{- toYaml .Values.opentelemetry.instrumentation.python.resources | nindent 6 }}
-    env:
-      # Python-specific OTEL configuration
-      - name: OTEL_PYTHON_LOG_CORRELATION
-        value: "true"
-      - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
-        value: "false"
-      # Disable specific instrumentations that might cause issues
-      - name: OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
-        value: ""
-  {{- end }}
-
-  # Java auto-instrumentation configuration
-  {{- if .Values.opentelemetry.instrumentation.java.enabled }}
-  java:
-    image: {{ .Values.opentelemetry.instrumentation.java.image.repository }}:{{ .Values.opentelemetry.instrumentation.java.image.tag }}
-    resourceRequirements:
-      {{- toYaml .Values.opentelemetry.instrumentation.java.resources | nindent 6 }}
-    env:
-      # Java-specific OTEL configuration
-      - name: OTEL_INSTRUMENTATION_COMMON_DEFAULT_ENABLED
-        value: "true"
-  {{- end }}
-{{- end }}
+{{/*
+Instrumentation CR is created by the crd-readiness-job.yaml via a Helm post-install/post-upgrade hook.
+This solves the race condition between the OpenTelemetry Operator starting up and registering its CRDs,
+and Helm trying to create this CR.
+
+The actual CR manifest is defined in _helpers.tpl as "mlrun-ce.otel.instrumentation.manifest" and is applied
+by the job after it confirms the CRD is available.
+
+To see the CR configuration, check:
+- templates/_helpers.tpl: defines the manifest
+- templates/opentelemetry/crd-readiness-job.yaml: creates the CR
+- values.yaml: opentelemetry.instrumentation.* settings
+*/}}
 
diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
index 985bd4fb..d86d3402 100644
--- a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -1,15 +1,33 @@
 {{- if and (or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled) .Values.opentelemetry.namespaceLabel.enabled -}}
-# Label the namespace for OpenTelemetry operator webhook injection
-# The operator will only inject sidecars into namespaces with this label
-apiVersion: v1
-kind: Namespace
+# This template uses a post-install/post-upgrade hook to label the namespace
+# for OpenTelemetry operator webhook injection, avoiding Helm ownership conflicts
+apiVersion: batch/v1
+kind: Job
 metadata:
-  name: {{ .Release.Namespace }}
+  name: {{ .Release.Name }}-namespace-label
+  namespace: {{ .Release.Namespace }}
   labels:
 {{ include "mlrun-ce.otel.labels" . | indent 4 }}
-    {{ .Values.opentelemetry.namespaceLabel.key }}: {{ .Values.opentelemetry.namespaceLabel.value | quote }}
   annotations:
-    # This resource only patches the existing namespace with the required label
-    # It does not create the namespace (namespace should already exist)
-    helm.sh/resource-policy: keep
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "-10"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  ttlSecondsAfterFinished: 60
+  template:
+    metadata:
+      name: {{ .Release.Name }}-namespace-label
+    spec:
+      serviceAccountName: {{ .Release.Name }}-otel-cr-creator
+      restartPolicy: Never
+      containers:
+      - name: label-namespace
+        image: bitnami/kubectl:latest
+        command:
+        - /bin/sh
+        - -c
+        - |
+          echo "Labeling namespace {{ .Release.Namespace }} for OpenTelemetry..."
+          kubectl label namespace {{ .Release.Namespace }} {{ .Values.opentelemetry.namespaceLabel.key }}={{ .Values.opentelemetry.namespaceLabel.value }} --overwrite
+          echo "Namespace labeled successfully!"
 {{- end -}}
diff --git a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
index 0ffe62b4..5dcf746b 100644
--- a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
@@ -9,6 +9,19 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
 ---
+# ServiceAccount for the CR creator job
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ .Release.Name }}-otel-cr-creator
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+---
 # Role for OpenTelemetry collector to access Kubernetes resources
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
@@ -54,5 +67,102 @@ subjects:
   - kind: ServiceAccount
     name: otel-collector
     namespace: {{ .Release.Namespace }}
+---
+# ClusterRole for the CR creator job to read CRDs and label namespaces (cluster-scoped)
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ .Release.Name }}-otel-crd-reader
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+rules:
+  # Allow reading CRDs to check availability (CRDs are cluster-scoped)
+  - apiGroups:
+      - apiextensions.k8s.io
+    resources:
+      - customresourcedefinitions
+    verbs:
+      - get
+      - list
+  # Allow labeling namespaces for OTEL injection
+  - apiGroups:
+      - ""
+    resources:
+      - namespaces
+    verbs:
+      - get
+      - patch
+      - update
+---
+# ClusterRoleBinding for the CR creator job
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ .Release.Name }}-otel-crd-reader
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ .Release.Name }}-otel-crd-reader
+subjects:
+  - kind: ServiceAccount
+    name: {{ .Release.Name }}-otel-cr-creator
+    namespace: {{ .Release.Namespace }}
+---
+# Role for the CR creator job to create OpenTelemetry CRs (namespace-scoped)
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ .Release.Name }}-otel-cr-creator
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+rules:
+  # Allow creating/updating OpenTelemetry CRs
+  - apiGroups:
+      - opentelemetry.io
+    resources:
+      - opentelemetrycollectors
+      - instrumentations
+    verbs:
+      - create
+      - get
+      - patch
+      - update
+      - list
+---
+# RoleBinding for the CR creator job
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ .Release.Name }}-otel-cr-creator
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ .Release.Name }}-otel-cr-creator
+subjects:
+  - kind: ServiceAccount
+    name: {{ .Release.Name }}-otel-cr-creator
+    namespace: {{ .Release.Namespace }}
 {{- end }}
 
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 5c864740..7850c51c 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -800,8 +800,8 @@ opentelemetry-operator:
   manager:
     # Collector image used by the operator when creating collectors
     collectorImage:
-      repository: otel/opentelemetry-collector-contrib
-      tag: 0.115.0
+      repository: otel/opentelemetry-collector
+      tag: 0.116.0
     # Auto-instrumentation images
     autoInstrumentationImage:
       python:
@@ -810,6 +810,8 @@ opentelemetry-operator:
       java:
         repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
         tag: 2.10.0
+    # Feature gates as string (comma-separated)
+    featureGates: ""
     resources:
       requests:
         cpu: 50m
@@ -830,12 +832,14 @@ opentelemetry:
     key: "opentelemetry.io/inject"
     value: "enabled"
 
-  # OpenTelemetry Collector configuration (sidecar mode)
+  # OpenTelemetry Collector configuration (SIDECAR mode)
+  # In sidecar mode, the collector is injected into pods via webhook
+  # Not as a standalone deployment
   collector:
     enabled: false
     nameOverride: ""
     fullnameOverride: ""
-    # Sidecar mode - collector runs as a sidecar in instrumented pods
+    # SIDECAR mode - collector is injected into pods by the operator
     mode: sidecar
     # Collector sidecar container resources
     resources:
diff --git a/tests/helm-template-test.sh b/tests/helm-template-test.sh
index 8b9d79c2..6acd789f 100755
--- a/tests/helm-template-test.sh
+++ b/tests/helm-template-test.sh
@@ -122,34 +122,40 @@ assert_not_renders() {
 # ============================================================================
 
 test_otel_collector_default() {
-    log_test "OpenTelemetry Collector - Enabled"
+    log_test "OpenTelemetry Collector - Enabled (via CRD Readiness Job)"
 
     local output
-    output=$(render_template "templates/opentelemetry/collector.yaml" \
+    # The collector CR is now created by the crd-readiness-job, not directly
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
         --set global.registry.url=test.io \
         --set opentelemetry.collector.enabled=true)
 
-    assert_renders "$output" "Collector CR renders"
-    assert_contains "$output" "kind: OpenTelemetryCollector" "Has correct kind"
+    assert_renders "$output" "CRD Readiness Job renders"
+    assert_contains "$output" "kind: Job" "Has correct kind"
+    assert_contains "$output" "kind: OpenTelemetryCollector" "Job contains OpenTelemetryCollector CR"
     assert_contains "$output" "mode: sidecar" "Uses sidecar mode"
     assert_contains "$output" "prometheus:" "Has Prometheus exporter"
     assert_contains "$output" "endpoint: 0.0.0.0:8889" "Prometheus on port 8889"
     assert_contains "$output" "otlp:" "Has OTLP receiver"
-    assert_contains "$output" "helm.sh/hook: post-install,post-upgrade" "Has Helm hooks"
+    assert_contains "$output" "helm.sh/hook" "Has Helm hooks"
+    assert_contains "$output" "post-install,post-upgrade" "Has correct hook triggers"
+    assert_contains "$output" "upgradeStrategy: automatic" "Has upgradeStrategy"
+    assert_contains "$output" "managementState: managed" "Has managementState"
 }
 
 test_otel_collector_disabled() {
     log_test "OpenTelemetry Collector - Disabled (default)"
 
-    assert_not_renders "templates/opentelemetry/collector.yaml" \
-        "Collector CR does not render when disabled (default)"
+    # When disabled, the crd-readiness-job should not render
+    assert_not_renders "templates/opentelemetry/crd-readiness-job.yaml" \
+        "CRD Readiness Job does not render when collector disabled (default)"
 }
 
 test_otel_collector_resources() {
     log_test "OpenTelemetry Collector - Custom resources"
 
     local output
-    output=$(render_template "templates/opentelemetry/collector.yaml" \
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
         --set global.registry.url=test.io \
         --set opentelemetry.collector.enabled=true \
         --set opentelemetry.collector.resources.requests.cpu=100m \
@@ -164,15 +170,15 @@ test_otel_collector_resources() {
 }
 
 test_otel_instrumentation_default() {
-    log_test "OpenTelemetry Instrumentation - Enabled"
+    log_test "OpenTelemetry Instrumentation - Enabled (via CRD Readiness Job)"
 
     local output
-    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
         --set global.registry.url=test.io \
         --set opentelemetry.instrumentation.enabled=true)
 
-    assert_renders "$output" "Instrumentation CR renders"
-    assert_contains "$output" "kind: Instrumentation" "Has correct kind"
+    assert_renders "$output" "CRD Readiness Job renders for Instrumentation"
+    assert_contains "$output" "kind: Instrumentation" "Job contains Instrumentation CR"
     assert_contains "$output" "tracecontext" "Has tracecontext propagator"
     assert_contains "$output" "baggage" "Has baggage propagator"
     assert_contains "$output" "parentbased_traceidratio" "Has sampler type"
@@ -183,15 +189,16 @@ test_otel_instrumentation_default() {
 test_otel_instrumentation_disabled() {
     log_test "OpenTelemetry Instrumentation - Disabled (default)"
 
-    assert_not_renders "templates/opentelemetry/instrumentation.yaml" \
-        "Instrumentation CR does not render when disabled (default)"
+    # When both collector and instrumentation are disabled, the job should not render
+    assert_not_renders "templates/opentelemetry/crd-readiness-job.yaml" \
+        "CRD Readiness Job does not render when instrumentation disabled (default)"
 }
 
 test_otel_instrumentation_java_enabled() {
     log_test "OpenTelemetry Instrumentation - Java enabled"
 
     local output
-    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
         --set global.registry.url=test.io \
         --set opentelemetry.instrumentation.enabled=true \
         --set opentelemetry.instrumentation.java.enabled=true)
@@ -213,6 +220,8 @@ test_otel_rbac_default() {
     assert_contains "$output" "kind: Role" "Has Role"
     assert_contains "$output" "kind: RoleBinding" "Has RoleBinding"
     assert_contains "$output" "name: otel-collector" "Has correct name"
+    assert_contains "$output" "kind: ClusterRole" "Has ClusterRole for CRD access"
+    assert_contains "$output" "otel-cr-creator" "Has CR creator ServiceAccount"
 }
 
 test_otel_rbac_disabled() {
@@ -256,14 +265,9 @@ test_jupyter_no_otel_annotations_when_disabled() {
 test_admin_values_otel() {
     log_test "Admin installation - OTEL operator enabled, CRs disabled"
 
-    # Collector should not render
-    assert_not_renders "templates/opentelemetry/collector.yaml" \
-        "Collector CR not rendered with admin values" \
-        -f "${CHART_DIR}/admin_installation_values.yaml"
-
-    # Instrumentation should not render
-    assert_not_renders "templates/opentelemetry/instrumentation.yaml" \
-        "Instrumentation CR not rendered with admin values" \
+    # CRD readiness job should not render when CRs are disabled
+    assert_not_renders "templates/opentelemetry/crd-readiness-job.yaml" \
+        "CRD Readiness Job not rendered with admin values" \
         -f "${CHART_DIR}/admin_installation_values.yaml"
 }
 
@@ -271,17 +275,13 @@ test_non_admin_values_otel() {
     log_test "Non-admin installation - OTEL CRs enabled"
 
     local output
-    output=$(render_template "templates/opentelemetry/collector.yaml" \
-        --set global.registry.url=test.io \
-        -f "${CHART_DIR}/non_admin_installation_values.yaml")
-
-    assert_renders "$output" "Collector CR renders with non-admin values"
-
-    output=$(render_template "templates/opentelemetry/instrumentation.yaml" \
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
         --set global.registry.url=test.io \
         -f "${CHART_DIR}/non_admin_installation_values.yaml")
 
-    assert_renders "$output" "Instrumentation CR renders with non-admin values"
+    assert_renders "$output" "CRD Readiness Job renders with non-admin values"
+    assert_contains "$output" "kind: OpenTelemetryCollector" "Has Collector CR"
+    assert_contains "$output" "kind: Instrumentation" "Has Instrumentation CR"
 }
 
 test_namespace_label_enabled() {
@@ -293,10 +293,11 @@ test_namespace_label_enabled() {
         --set opentelemetry.namespaceLabel.enabled=true \
         --set opentelemetry.collector.enabled=true)
 
-    assert_renders "$output" "Namespace label renders"
-    assert_contains "$output" "kind: Namespace" "Has correct kind"
+    assert_renders "$output" "Namespace label job renders"
+    assert_contains "$output" "kind: Job" "Has correct kind (Job)"
+    assert_contains "$output" "helm.sh/hook" "Has post-install hook annotation"
+    assert_contains "$output" "kubectl label namespace" "Has kubectl label command"
     assert_contains "$output" "opentelemetry.io/inject" "Has OTEL inject label key"
-    assert_contains "$output" '"enabled"' "Has OTEL inject label value"
 }
 
 test_namespace_label_disabled() {
@@ -322,7 +323,7 @@ test_non_admin_namespace_label_enabled() {
         --set global.registry.url=test.io \
         -f "${CHART_DIR}/non_admin_installation_values.yaml")
 
-    assert_renders "$output" "Namespace label renders with non-admin values"
+    assert_renders "$output" "Namespace label job renders with non-admin values"
     assert_contains "$output" "opentelemetry.io/inject" "Has OTEL inject label"
 }
 

From 78b175a63298b1c8c2e42fb6e39ce9ff58bb5668 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Mon, 30 Mar 2026 14:47:26 +0300
Subject: [PATCH 07/23] works

---
 .gitignore                                    |  4 ++
 charts/mlrun-ce/templates/_helpers.tpl        |  5 +-
 .../opentelemetry/crd-readiness-job.yaml      |  1 +
 .../opentelemetry/namespace-label.yaml        |  1 +
 .../templates/opentelemetry/rbac.yaml         | 10 ++--
 charts/mlrun-ce/values.yaml                   | 52 +++++++++++--------
 tests/package.sh                              | 36 +++++++++++++
 7 files changed, 77 insertions(+), 32 deletions(-)

diff --git a/.gitignore b/.gitignore
index cc8f86f3..4cda4743 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,7 @@ charts/mlrun-ce/charts/*
 **/.DS_Store
 *.DS_Store
 **/__pycache__
+# Packaged chart tarballs (generated by make package)
+charts/mlrun-ce/mlrun-ce-*.tgz
+# MLRun project directories created by test scripts
+otlp-pro/
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index a445e846..3cfa5d7b 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -496,12 +496,9 @@ spec:
   mode: {{ .Values.opentelemetry.collector.mode }}
   upgradeStrategy: automatic
   managementState: managed
+  image: {{ (index .Values "opentelemetry-operator").manager.collectorImage.repository }}:{{ (index .Values "opentelemetry-operator").manager.collectorImage.tag }}
   resources:
     {{- toYaml .Values.opentelemetry.collector.resources | nindent 4 }}
-  podAnnotations:
-    prometheus.io/scrape: "true"
-    prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
-    prometheus.io/path: "/metrics"
   config:
     receivers:
       otlp:
diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index eec873f2..2a731da3 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -14,6 +14,7 @@ metadata:
     "helm.sh/hook": post-install,post-upgrade
     "helm.sh/hook-weight": "10"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-timeout": "300s"
 spec:
   ttlSecondsAfterFinished: 300
   backoffLimit: 6
diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
index d86d3402..11040521 100644
--- a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -12,6 +12,7 @@ metadata:
     "helm.sh/hook": post-install,post-upgrade
     "helm.sh/hook-weight": "-10"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-timeout": "120s"
 spec:
   ttlSecondsAfterFinished: 60
   template:
diff --git a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
index 5dcf746b..eb360bfd 100644
--- a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
@@ -19,7 +19,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": post-install,post-upgrade
-    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-weight": "-20"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
 ---
 # Role for OpenTelemetry collector to access Kubernetes resources
@@ -77,7 +77,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": post-install,post-upgrade
-    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-weight": "-20"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
 rules:
   # Allow reading CRDs to check availability (CRDs are cluster-scoped)
@@ -107,7 +107,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": post-install,post-upgrade
-    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-weight": "-20"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
 roleRef:
   apiGroup: rbac.authorization.k8s.io
@@ -128,7 +128,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": post-install,post-upgrade
-    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-weight": "-20"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
 rules:
   # Allow creating/updating OpenTelemetry CRs
@@ -154,7 +154,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
     "helm.sh/hook": post-install,post-upgrade
-    "helm.sh/hook-weight": "0"
+    "helm.sh/hook-weight": "-20"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
 roleRef:
   apiGroup: rbac.authorization.k8s.io
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 40497364..d4d2c638 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -574,33 +574,25 @@ kube-prometheus-stack:
       # This creates clear separation between direct scraping and OTEL-collected metrics
       additionalScrapeConfigs:
         # Job for scraping OTEL collector sidecars (metrics on port 8889)
-        # Only scrapes pods with prometheus.io/scrape-mode: "otel" or "both"
+        # Discovers any pod with sidecar.opentelemetry.io/inject annotation — no per-function
+        # Prometheus annotations required. Port 8889 is the standard OTel prometheus exporter port.
         - job_name: 'otel-collector-sidecars'
           kubernetes_sd_configs:
             - role: pod
           relabel_configs:
-            # Only scrape pods with OTEL sidecar (port 8889)
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
+            # Only scrape pods that have the OTel sidecar injected
+            - source_labels: [__meta_kubernetes_pod_annotation_sidecar_opentelemetry_io_inject]
               action: keep
-              regex: "8889"
-            # Only scrape if scrape-mode is "otel" or "both" (or legacy scrape=true with port 8889)
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_mode]
-              action: keep
-              regex: (otel|both|)
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
-              action: keep
-              regex: true
-            # Set metrics path
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+              regex: .+
+            # Use port 8889 (OTel prometheus exporter) regardless of what the pod exposes
+            - source_labels: [__address__]
               action: replace
-              target_label: __metrics_path__
-              regex: (.+)
-            # Set target address with port
-            - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
-              action: replace
-              regex: ([^:]+)(?::\d+)?;(\d+)
-              replacement: $1:$2
+              regex: ([^:]+)(?::\d+)?
+              replacement: $1:8889
               target_label: __address__
+            # Metrics path is always /metrics for the OTel prometheus exporter
+            - target_label: __metrics_path__
+              replacement: /metrics
             # Add kubernetes labels
             - action: labelmap
               regex: __meta_kubernetes_pod_label_(.+)
@@ -815,9 +807,11 @@ opentelemetry-operator:
   manager:
     # Collector image used by the operator when creating collectors
     collectorImage:
-      repository: otel/opentelemetry-collector
-      tag: 0.116.0
-    # Auto-instrumentation images
+      # Using contrib distribution pinned to 0.108.0 — versions 0.109+ use a dynamically linked
+      # binary in a distroless image that lacks /lib64/ld-linux-x86-64.so.2 and fails to exec.
+      repository: otel/opentelemetry-collector-contrib
+      tag: 0.108.0
+    # Auto-instrumentation images (all fields required by the sub-chart schema)
     autoInstrumentationImage:
       python:
         repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python
@@ -825,6 +819,18 @@ opentelemetry-operator:
       java:
         repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
         tag: 2.10.0
+      nodejs:
+        repository: ""
+        tag: ""
+      dotnet:
+        repository: ""
+        tag: ""
+      go:
+        repository: ""
+        tag: ""
+      apacheHttpd:
+        repository: ""
+        tag: ""
     # Feature gates as string (comma-separated)
     featureGates: ""
     resources:
diff --git a/tests/package.sh b/tests/package.sh
index d8f847bc..fa8150ed 100755
--- a/tests/package.sh
+++ b/tests/package.sh
@@ -25,6 +25,42 @@ echo "Installing chart dependencies"
 cd "$dirname"/../charts/mlrun-ce
 helm dependency update
 
+# Patch opentelemetry-operator sub-chart schema: the upstream chart has
+# "examples": "" (string) for featureGates, but JSON Schema requires an array.
+# Helm v4 enforces metaschema validation strictly and rejects the install otherwise.
+echo "Patching opentelemetry-operator schema (featureGates.examples string -> array)..."
+python3 - <<'PYEOF'
+import json, tarfile, os, shutil, tempfile
+
+tgz = "charts/opentelemetry-operator-0.78.1.tgz"
+if not os.path.exists(tgz):
+    print(f"  {tgz} not found, skipping patch")
+    exit(0)
+
+with tempfile.TemporaryDirectory() as tmp:
+    with tarfile.open(tgz, "r:gz") as t:
+        t.extractall(tmp)
+    schema_path = os.path.join(tmp, "opentelemetry-operator", "values.schema.json")
+    with open(schema_path) as f:
+        schema = json.load(f)
+    fg = schema["properties"]["manager"]["properties"]["featureGates"]
+    if isinstance(fg.get("examples"), str):
+        fg["examples"] = [fg["examples"]]
+        with open(schema_path, "w") as f:
+            json.dump(schema, f, indent=2)
+        print("  Patched featureGates.examples")
+    else:
+        print("  Already correct, no patch needed")
+    # Repack without macOS metadata
+    env = os.environ.copy()
+    env["COPYFILE_DISABLE"] = "1"
+    import subprocess
+    subprocess.run(
+        ["tar", "czf", os.path.abspath(tgz), "opentelemetry-operator"],
+        cwd=tmp, env=env, check=True
+    )
+PYEOF
+
 # Create MLRun CE tarball
 helm package .
 exit 0

From 73ba28781fa92a837b793cfc5536522502ba716a Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Sun, 5 Apr 2026 17:54:40 +0300
Subject: [PATCH 08/23] Otel with collector works well

---
 charts/mlrun-ce/README.md                     | 104 ++++-----------
 .../non_admin_installation_values.yaml        |   4 +-
 charts/mlrun-ce/templates/NOTES.txt           |  41 +++---
 charts/mlrun-ce/templates/_helpers.tpl        |  37 +++---
 .../jupyter-notebook/deployment.yaml          |  19 +--
 .../opentelemetry/namespace-label.yaml        |  10 +-
 .../templates/opentelemetry/rbac.yaml         |  20 +--
 .../metadata-envoy-deployment.yaml            |   3 +
 .../deployments/metadata-grpc-deployment.yaml |   3 +
 .../deployments/metadata-writer.yaml          |   3 +
 .../ml-pipeline-persistenceagent.yaml         |   3 +
 .../ml-pipeline-scheduledworkflow.yaml        |   3 +
 .../pipelines/deployments/ml-pipeline-ui.yaml |   3 +
 .../deployments/ml-pipeline-viewer-crd.yaml   |   3 +
 .../ml-pipeline-visualizationserver.yaml      |   3 +
 .../pipelines/deployments/ml-pipeline.yaml    |   3 +
 .../pipelines/deployments/mysql.yaml          |   3 +
 .../deployments/workflow-controller.yaml      |   3 +
 .../templates/timescaledb/statefulset.yaml    |   3 +
 charts/mlrun-ce/values.yaml                   | 122 +++++++-----------
 tests/helm-template-test.sh                   |  48 ++++---
 tests/kind-test.sh                            |  14 +-
 22 files changed, 195 insertions(+), 260 deletions(-)

diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index 3a8e6157..dce3c7cd 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -107,20 +107,16 @@ helm --namespace mlrun \
     --set opentelemetry-operator.enabled=true \
     --set opentelemetry.namespaceLabel.enabled=true \
     --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.collector.scrapeMode=otel \
     --set opentelemetry.instrumentation.enabled=true \
     mlrun/mlrun-ce
 ```
 
-> **Important:** When enabling OpenTelemetry, set `opentelemetry.collector.scrapeMode=otel` to collect metrics 
-> via the OTEL sidecar and prevent duplicate metrics. The default is `direct` (for when OTEL is disabled).
-
 The installation will:
 - Deploy the OpenTelemetry Operator
-- Create an OpenTelemetryCollector CR (sidecar mode)
+- Create an OpenTelemetryCollector CR (deployment mode — one collector per namespace)
 - Create an Instrumentation CR for Python auto-instrumentation
-- Label the namespace with `opentelemetry.io/inject=enabled`
-- Configure Prometheus to scrape OTEL sidecar metrics (port 8889)
+- Label and annotate the namespace so all Python pods are auto-instrumented automatically
+- Configure Prometheus to scrape OTEL collector metrics (port 8889)
 
 #### Step 5: Verify OpenTelemetry Installation
 
@@ -140,21 +136,14 @@ kubectl -n mlrun get instrumentations
 kubectl -n mlrun get pods | grep opentelemetry
 ```
 
-#### Step 6: Verify Jupyter has OTEL Sidecar Annotations
+#### Step 6: Verify OTel Pod Labels and Namespace Annotation
 
 ```bash
-kubectl -n mlrun get deployment -l app.kubernetes.io/component=jupyter-notebook \
-    -o jsonpath='{.items[0].spec.template.metadata.annotations}' | jq .
-```
+# Check that the namespace has the instrumentation annotation (enables auto-instrumentation for all Python pods)
+kubectl get namespace mlrun -o jsonpath='{.metadata.annotations}' | jq .
 
-You should see annotations like:
-```json
-{
-  "instrumentation.opentelemetry.io/inject-python": "my-mlrun-otel-instrumentation",
-  "prometheus.io/port": "8889",
-  "prometheus.io/scrape": "true",
-  "sidecar.opentelemetry.io/inject": "my-mlrun-otel-collector"
-}
+# Check pod labels — all chart-managed pods should have mlrun.io/otel=true
+kubectl -n mlrun get pods --show-labels | grep mlrun.io/otel
 ```
 
 ### Installing MLRun-ce on minikube
@@ -185,7 +174,7 @@ Override those [in the normal methods](https://helm.sh/docs/chart_template_guide
 ### Configuring OpenTelemetry (Observability)
 
 MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces from your ML workloads. 
-The operator runs in **sidecar mode**, automatically injecting collector containers into annotated pods.
+The operator runs one collector **Deployment** per namespace. Instrumented pods send OTLP metrics to the collector, which exports them to Prometheus.
 
 > **Note:** OpenTelemetry is **disabled by default**. See below for how to enable it.
 
@@ -212,10 +201,10 @@ kubectl label namespace <your-namespace> opentelemetry.io/inject=enabled
 #### Default Configuration
 
 By default, OpenTelemetry is **disabled**. When enabled, it provides:
-- Namespace labeling for OTEL operator webhook targeting
-- Sidecar collector injection for instrumented pods
-- Python auto-instrumentation for Jupyter notebooks
-- Prometheus metrics export on port 8889
+- A single OTel Collector Deployment per namespace (OTLP receiver → Prometheus exporter on port 8889)
+- Namespace-level Python auto-instrumentation (all Python pods in the namespace are instrumented automatically)
+- `mlrun.io/otel: "true"` label on Jupyter, SeaweedFS, and Nuclio function pods
+- Prometheus scrapes the collector pod (not individual pods)
 
 #### Enabling OpenTelemetry
 
@@ -228,7 +217,6 @@ helm --namespace mlrun install my-mlrun \
     --set opentelemetry-operator.enabled=true \
     --set opentelemetry.namespaceLabel.enabled=true \
     --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.collector.scrapeMode=otel \
     --set opentelemetry.instrumentation.enabled=true \
     mlrun/mlrun-ce
 ```
@@ -240,7 +228,6 @@ helm --namespace mlrun upgrade my-mlrun \
     --set opentelemetry-operator.enabled=true \
     --set opentelemetry.namespaceLabel.enabled=true \
     --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.collector.scrapeMode=otel \
     --set opentelemetry.instrumentation.enabled=true \
     mlrun/mlrun-ce
 ```
@@ -253,13 +240,12 @@ helm --namespace mlrun upgrade my-mlrun \
     --set opentelemetry.collector.enabled=false \
     --set opentelemetry.instrumentation.enabled=false \
     --set opentelemetry.namespaceLabel.enabled=false \
-    --set opentelemetry.collector.scrapeMode=direct \
     mlrun/mlrun-ce
 ```
 
 #### Custom Resource Limits
 
-Configure collector sidecar resources:
+Configure collector resources:
 
 ```bash
 helm --namespace mlrun install my-mlrun \
@@ -282,63 +268,23 @@ helm --namespace mlrun install my-mlrun \
 
 #### Adding OpenTelemetry to Custom Workloads
 
-To instrument your own deployments with the OTEL sidecar and Python auto-instrumentation:
-
-1. Ensure your namespace has the OpenTelemetry label:
-   ```bash
-   kubectl label namespace <your-namespace> opentelemetry.io/inject=enabled
-   ```
-
-2. Add these annotations to your pod spec:
-   ```yaml
-   metadata:
-     annotations:
-       sidecar.opentelemetry.io/inject: "<release-name>-otel-collector"
-       instrumentation.opentelemetry.io/inject-python: "<release-name>-otel-instrumentation"
-       prometheus.io/scrape: "true"
-       prometheus.io/scrape-mode: "otel"
-       prometheus.io/port: "8889"
-   ```
-
-#### Preventing Prometheus/OTEL Metric Overlap
-
-To prevent duplicate metrics when using both Prometheus direct scraping and OpenTelemetry, 
-MLRun CE uses a **scrape-mode** annotation system:
-
-| Scrape Mode | Description | Use Case |
-|-------------|-------------|----------|
-| `direct` | Direct Prometheus scraping only | **Default** - When OTEL is disabled |
-| `otel` | Metrics collected via OTEL sidecar only | **Recommended when OTEL enabled** |
-| `both` | Both OTEL and direct scraping | Debugging/transition only |
-
-> **Note:** The default scrape mode is `direct`. When enabling OpenTelemetry, you must set 
-> `--set opentelemetry.collector.scrapeMode=otel` to collect metrics via the OTEL sidecar.
-
-**How it works:**
-- OTEL-collected metrics have the `mlrun_otel_` prefix and `metrics_source=otel_collector` label
-- Direct-scraped metrics have `metrics_source=direct_scrape` label
-- Prometheus scrape configs filter based on `prometheus.io/scrape-mode` annotation
+Python instrumentation is applied **namespace-wide** — any Python pod in the MLRun namespace is automatically instrumented when OTel is enabled. No per-pod annotations are required.
 
-**Configure scrape mode when enabling OTEL:**
+For pods in other namespaces, annotate the namespace directly:
 ```bash
-helm --namespace mlrun install my-mlrun \
-    --set opentelemetry-operator.enabled=true \
-    --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.collector.scrapeMode=otel \
-    --set opentelemetry.instrumentation.enabled=true \
-    mlrun/mlrun-ce
+kubectl annotate namespace <your-namespace> \
+    instrumentation.opentelemetry.io/inject-python=<release-name>-otel-instrumentation
 ```
 
-**Query metrics by source in Prometheus:**
-```promql
-# OTEL-collected metrics only
-{metrics_source="otel_collector"}
-
-# Direct-scraped metrics only  
-{metrics_source="direct_scrape"}
+The `mlrun.io/otel: "true"` label is applied to: **Jupyter**, **SeaweedFS** (master, volume, filer, s3, admin), and **Nuclio function pods** (via `functionDefaults.metadata.labels`). This label is used for Prometheus metric filtering and enrichment.
 
-# OTEL metrics use prefix
+**Query OTEL-collected metrics in Prometheus:**
+```promql
+# OTEL metrics use the mlrun_otel_ prefix
 mlrun_otel_http_server_duration_seconds_bucket{...}
+
+# Filter by source
+{metrics_source="otel_collector"}
 ```
 
 #### Split Installation (Admin/Non-Admin)
diff --git a/charts/mlrun-ce/non_admin_installation_values.yaml b/charts/mlrun-ce/non_admin_installation_values.yaml
index f7939235..2d32c68c 100644
--- a/charts/mlrun-ce/non_admin_installation_values.yaml
+++ b/charts/mlrun-ce/non_admin_installation_values.yaml
@@ -88,8 +88,8 @@ opentelemetry-operator:
   enabled: false
 
 # OpenTelemetry CRs - enabled for user namespace
-# The namespace will be labeled with opentelemetry.io/inject=enabled
-# so the operator can inject sidecars into pods
+# The namespace will be labeled and annotated for OTel deployment-mode collection
+# and namespace-wide Python auto-instrumentation.
 opentelemetry:
   namespaceLabel:
     enabled: true
diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index 005dfa1b..31d6d90d 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -134,25 +134,19 @@ OpenTelemetry Operator is enabled!
 -  Namespace selector: opentelemetry.io/inject=enabled
 {{- if .Values.opentelemetry.collector.enabled }}
 {{- "\n" }}
-OpenTelemetry Collector (sidecar mode):
--  Collector CR: {{ .Release.Name }}-otel-collector
+OpenTelemetry Collector (deployment mode):
+-  Collector CR: {{ include "mlrun-ce.otel.collector.fullname" . }}
 -  Mode: {{ .Values.opentelemetry.collector.mode }}
--  OTLP gRPC endpoint: localhost:{{ .Values.opentelemetry.collector.otlp.grpcPort }} (inside pod)
--  OTLP HTTP endpoint: localhost:{{ .Values.opentelemetry.collector.otlp.httpPort }} (inside pod)
--  Prometheus metrics port: {{ .Values.opentelemetry.collector.prometheus.port }}
--  Prometheus scrape mode: {{ .Values.opentelemetry.collector.scrapeMode }}
-{{- if eq .Values.opentelemetry.collector.scrapeMode "direct" }}
-
-⚠️  WARNING: Scrape mode is "direct" - OTEL sidecar metrics will NOT be collected!
-   To collect metrics via OTEL, reinstall with: --set opentelemetry.collector.scrapeMode=otel
-{{- end }}
+-  OTLP gRPC endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
+-  OTLP HTTP endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.httpPort }}
+-  Prometheus metrics port: {{ .Values.opentelemetry.collector.prometheus.port }} (scraped by Prometheus from the collector pod)
 {{- end }}
 {{- if .Values.opentelemetry.instrumentation.enabled }}
 {{- "\n" }}
 OpenTelemetry Auto-Instrumentation:
--  Instrumentation CR: {{ .Release.Name }}-otel-instrumentation
+-  Instrumentation CR: {{ include "mlrun-ce.otel.instrumentation.fullname" . }}
 {{- if .Values.opentelemetry.instrumentation.python.enabled }}
--  Python auto-instrumentation: enabled
+-  Python auto-instrumentation: enabled (namespace-wide via namespace annotation)
 {{- end }}
 {{- if .Values.opentelemetry.instrumentation.java.enabled }}
 -  Java auto-instrumentation: enabled
@@ -160,21 +154,16 @@ OpenTelemetry Auto-Instrumentation:
 {{- end }}
 {{- if .Values.opentelemetry.namespaceLabel.enabled }}
 {{- "\n" }}
-Namespace Label:
--  Namespace {{ .Release.Namespace }} is labeled with: {{ .Values.opentelemetry.namespaceLabel.key }}={{ .Values.opentelemetry.namespaceLabel.value }}
+Namespace OTel configuration:
+-  Label: {{ .Values.opentelemetry.namespaceLabel.key }}={{ .Values.opentelemetry.namespaceLabel.value }}
+{{- if .Values.opentelemetry.instrumentation.enabled }}
+-  Python instrumentation annotation applied to all pods in namespace {{ .Release.Namespace }}
 {{- end }}
+{{- end }}
+{{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
 {{- "\n" }}
-Prometheus Scrape Modes:
--  "otel"   : Metrics collected via OTEL sidecar only (recommended)
--  "direct" : Direct Prometheus scraping only (current: {{ .Values.opentelemetry.collector.scrapeMode }})
--  "both"   : Both methods active (for debugging)
-{{- "\n" }}
-To add OTEL instrumentation to your pods, add these annotations:
-  sidecar.opentelemetry.io/inject: "{{ .Release.Name }}-otel-collector"
-  instrumentation.opentelemetry.io/inject-python: "{{ .Release.Name }}-otel-instrumentation"
-  prometheus.io/scrape: "true"
-  prometheus.io/scrape-mode: "otel"
-  prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
+Pods labeled with mlrun.io/otel=true: Jupyter, SeaweedFS (master/volume/filer/s3/admin), and Nuclio function pods.
+{{- end }}
 {{- end }}
 
 Happy MLOPSing!!! :]
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 3cfa5d7b..08bc2499 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -423,7 +423,7 @@ OpenTelemetry helpers
 OpenTelemetry Collector name
 */}}
 {{- define "mlrun-ce.otel.collector.name" -}}
-{{- default "otel-collector" .Values.opentelemetry.collector.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- default "otel" .Values.opentelemetry.collector.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
@@ -433,7 +433,7 @@ OpenTelemetry Collector fullname
 {{- if .Values.opentelemetry.collector.fullnameOverride }}
 {{- .Values.opentelemetry.collector.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
-{{- $name := default "otel-collector" .Values.opentelemetry.collector.nameOverride }}
+{{- $name := default "otel" .Values.opentelemetry.collector.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -526,7 +526,7 @@ spec:
         endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.prometheus.port }}
         namespace: {{ .Values.opentelemetry.collector.prometheus.namespace }}
         const_labels:
-          collector_mode: sidecar
+          collector_mode: deployment
           metrics_source: otel_collector
         resource_to_telemetry_conversion:
           enabled: true
@@ -579,6 +579,8 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
 spec:
+  exporter:
+    endpoint: http://{{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.httpPort }}
   propagators:
     {{- toYaml .Values.opentelemetry.instrumentation.propagators | nindent 4 }}
   sampler:
@@ -589,24 +591,6 @@ spec:
       valueFrom:
         fieldRef:
           fieldPath: metadata.labels['app.kubernetes.io/name']
-    - name: OTEL_RESOURCE_ATTRIBUTES
-      value: >-
-        k8s.namespace.name=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE),
-        k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME),
-        k8s.container.name=$(OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME),
-        service.namespace=$(OTEL_RESOURCE_ATTRIBUTES_NAMESPACE)
-    - name: OTEL_RESOURCE_ATTRIBUTES_NAMESPACE
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.namespace
-    - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.name
-    - name: OTEL_RESOURCE_ATTRIBUTES_CONTAINER_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: metadata.name
     - name: OTEL_METRICS_EXPORTER
       value: otlp
     - name: OTEL_TRACES_EXPORTER
@@ -624,7 +608,7 @@ spec:
       - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
         value: "false"
       - name: OTEL_PYTHON_DISABLED_INSTRUMENTATIONS
-        value: ""
+        value: "aws_lambda"
   {{- end }}
   {{- if .Values.opentelemetry.instrumentation.java.enabled }}
   java:
@@ -636,3 +620,12 @@ spec:
         value: "true"
   {{- end }}
 {{- end }}
+..
+{{/*
+OTel pod label — marks a pod as OTel-monitored for metric enrichment and discovery.
+Namespace-level instrumentation annotation (set by namespace-label job) handles Python auto-instrumentation.
+Wrap usage with: {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+*/}}
+{{- define "mlrun-ce.otel.podLabels" -}}
+mlrun.io/otel: "true"
+{{- end }}
diff --git a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
index 83135a1e..08108481 100644
--- a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
+++ b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
@@ -14,22 +14,9 @@ spec:
     metadata:
       labels:
         {{- include "mlrun-ce.jupyter.selectorLabels" . | nindent 8 }}
-      {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-      annotations:
-        # OpenTelemetry sidecar injection
-        sidecar.opentelemetry.io/inject: "{{ include "mlrun-ce.otel.collector.fullname" . }}"
-        # Python auto-instrumentation injection
-        instrumentation.opentelemetry.io/inject-python: "{{ include "mlrun-ce.otel.instrumentation.fullname" . }}"
-        # Prometheus scraping configuration
-        # scrape-mode controls how metrics are collected to prevent duplicates:
-        #   "otel"   - Only OTEL sidecar metrics (recommended)
-        #   "both"   - Both OTEL and direct scraping (debugging)
-        #   "direct" - Only direct scraping (OTEL metrics ignored)
-        prometheus.io/scrape: "true"
-        prometheus.io/scrape-mode: {{ .Values.opentelemetry.collector.scrapeMode | quote }}
-        prometheus.io/port: "{{ .Values.opentelemetry.collector.prometheus.port }}"
-        prometheus.io/path: "/metrics"
-      {{- end }}
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       {{- with .Values.jupyterNotebook.image.pullSecrets }}
       imagePullSecrets:
diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
index 11040521..60cfb3b8 100644
--- a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -9,7 +9,7 @@ metadata:
   labels:
 {{ include "mlrun-ce.otel.labels" . | indent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade
     "helm.sh/hook-weight": "-10"
     "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
     "helm.sh/hook-timeout": "120s"
@@ -30,5 +30,11 @@ spec:
         - |
           echo "Labeling namespace {{ .Release.Namespace }} for OpenTelemetry..."
           kubectl label namespace {{ .Release.Namespace }} {{ .Values.opentelemetry.namespaceLabel.key }}={{ .Values.opentelemetry.namespaceLabel.value }} --overwrite
-          echo "Namespace labeled successfully!"
+          {{- if .Values.opentelemetry.instrumentation.enabled }}
+          echo "Annotating namespace for namespace-wide Python auto-instrumentation..."
+          kubectl annotate namespace {{ .Release.Namespace }} \
+            instrumentation.opentelemetry.io/inject-python={{ include "mlrun-ce.otel.instrumentation.fullname" . }} \
+            --overwrite
+          {{- end }}
+          echo "Namespace configured for OpenTelemetry successfully!"
 {{- end -}}
diff --git a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
index eb360bfd..9eec1971 100644
--- a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
@@ -18,9 +18,9 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
     "helm.sh/hook-weight": "-20"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-delete-policy": before-hook-creation
 ---
 # Role for OpenTelemetry collector to access Kubernetes resources
 apiVersion: rbac.authorization.k8s.io/v1
@@ -76,9 +76,9 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
     "helm.sh/hook-weight": "-20"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-delete-policy": before-hook-creation
 rules:
   # Allow reading CRDs to check availability (CRDs are cluster-scoped)
   - apiGroups:
@@ -106,9 +106,9 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
     "helm.sh/hook-weight": "-20"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-delete-policy": before-hook-creation
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
@@ -127,9 +127,9 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
     "helm.sh/hook-weight": "-20"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-delete-policy": before-hook-creation
 rules:
   # Allow creating/updating OpenTelemetry CRs
   - apiGroups:
@@ -153,9 +153,9 @@ metadata:
   labels:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
   annotations:
-    "helm.sh/hook": post-install,post-upgrade
+    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
     "helm.sh/hook-weight": "-20"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+    "helm.sh/hook-delete-policy": before-hook-creation
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: Role
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
index 0801bac9..8a702d9c 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         application-crd-id: kubeflow-pipelines
         component: metadata-envoy
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - image: {{ .Values.pipelines.images.metadataEnvoy.repository }}:{{ .Values.pipelines.images.metadataEnvoy.tag }}
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
index 00e7fb9a..f3fae663 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
@@ -25,6 +25,9 @@ spec:
       labels:
         application-crd-id: kubeflow-pipelines
         component: metadata-grpc-server
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - args:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
index d2800d1e..04f68b05 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
@@ -25,6 +25,9 @@ spec:
       labels:
         app: metadata-writer
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
index 5dbd6604..04af9784 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline-persistenceagent
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
index c27442ad..a3634401 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline-scheduledworkflow
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
index e8cac85f..459223e8 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline-ui
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
index 89f25c23..e34dfb52 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline-viewer-crd
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
index 6db618a7..b6f79527 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline-visualizationserver
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - image: {{ .Values.pipelines.images.visualizationServer.repository }}:{{ .Values.pipelines.images.visualizationServer.tag }}
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
index 42ece191..4a4a1a00 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
@@ -27,6 +27,9 @@ spec:
       labels:
         app: ml-pipeline
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
         - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml b/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
index 40791425..db7d4893 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
@@ -22,6 +22,9 @@ spec:
       labels:
         app: mysql
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       {{- if .Values.pipelines.db.securityContext }}
       securityContext:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml b/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
index 83be1799..9ec903e1 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
@@ -24,6 +24,9 @@ spec:
       labels:
         app: workflow-controller
         application-crd-id: kubeflow-pipelines
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       containers:
       - args:
diff --git a/charts/mlrun-ce/templates/timescaledb/statefulset.yaml b/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
index 79f5a7f8..7001a93e 100644
--- a/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
+++ b/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
@@ -15,6 +15,9 @@ spec:
     metadata:
       labels:
         {{- include "mlrun-ce.timescaledb.selectorLabels" . | nindent 8 }}
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
+        {{- end }}
     spec:
       {{- with .Values.timescaledb.nodeSelector }}
       nodeSelector:
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index d4d2c638..583bca11 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -94,6 +94,13 @@ nuclio:
       kind: mlrun
       synchronizationInterval: 10m
       apiAddress: http://mlrun-api-chief:8080/api
+    # Default labels applied to all Nuclio function pods.
+    # mlrun.io/otel marks function pods for OTel metric enrichment; the namespace-level
+    # instrumentation annotation handles Python auto-instrumentation automatically.
+    functionDefaults:
+      metadata:
+        labels:
+          mlrun.io/otel: "true"
 
 mlrun:
   # set the type of filesystem to use: filesystem, s3
@@ -173,6 +180,13 @@ mlrun:
           name: mlrun-override-env
           optional: true
     extraPersistentVolumeMounts: ~
+    # Explicitly expose the Docker image's PYTHONPATH as a K8s env var so that the
+    # OpenTelemetry operator's PYTHONPATH injection (PYTHONPATH=/otel-auto-instrumentation-python:$(PYTHONPATH))
+    # can expand $(PYTHONPATH) correctly. Without this, K8s env var substitution resolves
+    # $(PYTHONPATH) to an empty string (it cannot see Docker image ENV vars), and the
+    # mlrun `services` package path is lost, crashing the API on startup.
+    extraEnvKeyValue:
+      PYTHONPATH: "/mlrun/server/py:/mlrun/server/py/schemas/proto"
 
     # Set mlrun api workers count by setting the minReplicas value.
     # This is recommended for production environments running at high scale.
@@ -323,6 +337,8 @@ seaweedfs:
 
   # Master server - metadata management
   master:
+    podLabels:
+      mlrun.io/otel: "true"
     port: 9333
     # Storage: use PVC instead of default hostPath
     data:
@@ -336,6 +352,8 @@ seaweedfs:
 
   # Volume server - actual data storage
   volume:
+    podLabels:
+      mlrun.io/otel: "true"
     port: 8080
     # Storage: use PVC instead of default hostPath
     dataDirs:
@@ -351,6 +369,8 @@ seaweedfs:
 
   # Filer server - file system interface
   filer:
+    podLabels:
+      mlrun.io/otel: "true"
     port: 8888
     # Storage: use PVC instead of default hostPath
     data:
@@ -368,6 +388,8 @@ seaweedfs:
 
   # S3 API gateway - MLRun connects to this endpoint
   s3:
+    podLabels:
+      mlrun.io/otel: "true"
     enabled: true  # Default is false
     port: 8333
     enableAuth: true  # Default is false
@@ -381,6 +403,8 @@ seaweedfs:
 
   # Admin server - user and policy management UI
   admin:
+    podLabels:
+      mlrun.io/otel: "true"
     enabled: true  # Default is false
     port: 23646
     secret:
@@ -570,30 +594,30 @@ kube-prometheus-stack:
       type: NodePort
       nodePort: 30020
     prometheusSpec:
-      # Additional scrape configs for OpenTelemetry collector sidecars
-      # This creates clear separation between direct scraping and OTEL-collected metrics
+      # Additional scrape configs for OpenTelemetry collector Deployment.
+      # In deployment mode, one collector pod runs per namespace and receives OTLP from all
+      # instrumented pods. Prometheus scrapes only the collector (port 8889), not individual pods.
       additionalScrapeConfigs:
-        # Job for scraping OTEL collector sidecars (metrics on port 8889)
-        # Discovers any pod with sidecar.opentelemetry.io/inject annotation — no per-function
-        # Prometheus annotations required. Port 8889 is the standard OTel prometheus exporter port.
-        - job_name: 'otel-collector-sidecars'
+        # Scrape the OTel Collector Deployment pod.
+        # Discovers pods with app.kubernetes.io/component=opentelemetry-collector label
+        # (applied automatically by the OTel operator to collector pods).
+        - job_name: 'otel-collector'
           kubernetes_sd_configs:
             - role: pod
           relabel_configs:
-            # Only scrape pods that have the OTel sidecar injected
-            - source_labels: [__meta_kubernetes_pod_annotation_sidecar_opentelemetry_io_inject]
+            # Only scrape the OTel collector pod
+            - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
               action: keep
-              regex: .+
-            # Use port 8889 (OTel prometheus exporter) regardless of what the pod exposes
+              regex: opentelemetry-collector
+            # Use port 8889 (OTel prometheus exporter)
             - source_labels: [__address__]
               action: replace
               regex: ([^:]+)(?::\d+)?
               replacement: $1:8889
               target_label: __address__
-            # Metrics path is always /metrics for the OTel prometheus exporter
             - target_label: __metrics_path__
               replacement: /metrics
-            # Add kubernetes labels
+            # Propagate pod labels as metric labels
             - action: labelmap
               regex: __meta_kubernetes_pod_label_(.+)
             - source_labels: [__meta_kubernetes_namespace]
@@ -602,62 +626,10 @@ kube-prometheus-stack:
             - source_labels: [__meta_kubernetes_pod_name]
               action: replace
               target_label: kubernetes_pod_name
-          # Add metric relabeling to identify OTEL-sourced metrics
           metric_relabel_configs:
             - action: replace
               target_label: metrics_source
               replacement: otel_collector
-
-        # Job for direct application scraping (non-OTEL)
-        # Only scrapes pods with prometheus.io/scrape-mode: "direct" or "both"
-        # Excludes OTEL sidecar port (8889)
-        - job_name: 'kubernetes-pods-direct'
-          kubernetes_sd_configs:
-            - role: pod
-          relabel_configs:
-            # Only scrape pods with prometheus.io/scrape=true
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
-              action: keep
-              regex: true
-            # Exclude OTEL sidecar port (8889) - those are handled by otel-collector-sidecars job
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
-              action: drop
-              regex: "8889"
-            # Only scrape if scrape-mode is "direct" or "both" or not set (default to direct)
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_mode]
-              action: keep
-              regex: (direct|both|)
-            # Set metrics path
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
-              action: replace
-              target_label: __metrics_path__
-              regex: (.+)
-              replacement: $1
-            - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
-              action: replace
-              target_label: __metrics_path__
-              regex: ()
-              replacement: /metrics
-            # Set target address with port
-            - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
-              action: replace
-              regex: ([^:]+)(?::\d+)?;(\d+)
-              replacement: $1:$2
-              target_label: __address__
-            # Add kubernetes labels
-            - action: labelmap
-              regex: __meta_kubernetes_pod_label_(.+)
-            - source_labels: [__meta_kubernetes_namespace]
-              action: replace
-              target_label: kubernetes_namespace
-            - source_labels: [__meta_kubernetes_pod_name]
-              action: replace
-              target_label: kubernetes_pod_name
-          # Add metric relabeling to identify direct-scraped metrics
-          metric_relabel_configs:
-            - action: replace
-              target_label: metrics_source
-              replacement: direct_scrape
   kube-state-metrics:
     fullnameOverride: state-metrics
 prometheus-node-exporter:
@@ -846,22 +818,23 @@ opentelemetry-operator:
 # These are managed separately from the operator for admin/non-admin split
 # =============================================================================
 opentelemetry:
-  # Namespace label for enabling OpenTelemetry monitoring
-  # The namespace must have this label for the operator to inject sidecars
+  # Namespace label for enabling OpenTelemetry monitoring.
+  # When enabled, the post-install job labels the namespace and also annotates it with
+  # instrumentation.opentelemetry.io/inject-python so all Python pods are auto-instrumented.
   namespaceLabel:
     enabled: false
     key: "opentelemetry.io/inject"
     value: "enabled"
 
-  # OpenTelemetry Collector configuration (SIDECAR mode)
-  # In sidecar mode, the collector is injected into pods via webhook
-  # Not as a standalone deployment
+  # OpenTelemetry Collector configuration (DEPLOYMENT mode)
+  # A single collector Deployment runs per namespace, receiving OTLP from instrumented pods
+  # and exporting metrics to Prometheus.
   collector:
     enabled: false
     nameOverride: ""
     fullnameOverride: ""
-    # SIDECAR mode - collector is injected into pods by the operator
-    mode: sidecar
+    # DEPLOYMENT mode - one collector pod per namespace, not injected as a sidecar
+    mode: deployment
     # Collector sidecar container resources
     resources:
       requests:
@@ -880,13 +853,6 @@ opentelemetry:
     otlp:
       grpcPort: 4317
       httpPort: 4318
-    # Prometheus scrape mode for OTEL-instrumented pods
-    # Options:
-    #   - "direct": Direct Prometheus scraping only (default when OTEL disabled)
-    #   - "otel": Metrics collected via OTEL sidecar only (recommended when OTEL enabled)
-    #   - "both": Both OTEL sidecar and direct scraping (for debugging/transition)
-    # When enabling OTEL, set this to "otel" to prevent duplicate metrics
-    scrapeMode: "direct"
 
   # Instrumentation configuration for auto-instrumentation
   instrumentation:
diff --git a/tests/helm-template-test.sh b/tests/helm-template-test.sh
index 6acd789f..ccdef572 100755
--- a/tests/helm-template-test.sh
+++ b/tests/helm-template-test.sh
@@ -133,7 +133,7 @@ test_otel_collector_default() {
     assert_renders "$output" "CRD Readiness Job renders"
     assert_contains "$output" "kind: Job" "Has correct kind"
     assert_contains "$output" "kind: OpenTelemetryCollector" "Job contains OpenTelemetryCollector CR"
-    assert_contains "$output" "mode: sidecar" "Uses sidecar mode"
+    assert_contains "$output" "mode: deployment" "Uses deployment mode"
     assert_contains "$output" "prometheus:" "Has Prometheus exporter"
     assert_contains "$output" "endpoint: 0.0.0.0:8889" "Prometheus on port 8889"
     assert_contains "$output" "otlp:" "Has OTLP receiver"
@@ -231,8 +231,8 @@ test_otel_rbac_disabled() {
         "RBAC does not render when OTEL disabled (default)"
 }
 
-test_jupyter_otel_annotations() {
-    log_test "Jupyter Deployment - OTEL annotations when enabled"
+test_jupyter_otel_labels() {
+    log_test "Jupyter Deployment - OTel label applied when enabled"
 
     local output
     output=$(render_template "templates/jupyter-notebook/deployment.yaml" \
@@ -240,22 +240,19 @@ test_jupyter_otel_annotations() {
         --set opentelemetry.collector.enabled=true \
         --set opentelemetry.instrumentation.enabled=true)
 
-    assert_contains "$output" "sidecar.opentelemetry.io/inject:" "Has sidecar injection annotation"
-    assert_contains "$output" "instrumentation.opentelemetry.io/inject-python:" "Has Python instrumentation annotation"
-    assert_contains "$output" 'prometheus.io/scrape: "true"' "Has Prometheus scrape annotation"
-    assert_contains "$output" 'prometheus.io/scrape-mode:' "Has Prometheus scrape-mode annotation"
-    assert_contains "$output" 'prometheus.io/port: "8889"' "Has Prometheus port annotation"
+    assert_contains "$output" 'mlrun.io/otel: "true"' "Has OTel pod label"
+    assert_not_contains "$output" "sidecar.opentelemetry.io/inject:" "No sidecar annotation (deployment mode)"
+    assert_not_contains "$output" "prometheus.io/scrape:" "No per-pod Prometheus annotation (collector scrapes)"
 }
 
-test_jupyter_no_otel_annotations_when_disabled() {
-    log_test "Jupyter Deployment - No OTEL annotations when disabled (default)"
+test_jupyter_no_otel_label_when_disabled() {
+    log_test "Jupyter Deployment - No OTel label when disabled (default)"
 
     local output
     output=$(render_template "templates/jupyter-notebook/deployment.yaml" \
         --set global.registry.url=test.io)
 
-    assert_not_contains "$output" "sidecar.opentelemetry.io/inject:" "No sidecar injection when disabled (default)"
-    assert_not_contains "$output" "instrumentation.opentelemetry.io/inject-python:" "No instrumentation when disabled (default)"
+    assert_not_contains "$output" 'mlrun.io/otel' "No OTel label when disabled (default)"
 }
 
 # ============================================================================
@@ -300,6 +297,20 @@ test_namespace_label_enabled() {
     assert_contains "$output" "opentelemetry.io/inject" "Has OTEL inject label key"
 }
 
+test_namespace_label_with_instrumentation_annotation() {
+    log_test "Namespace Label - Instrumentation annotation added when instrumentation enabled"
+
+    local output
+    output=$(render_template "templates/opentelemetry/namespace-label.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.namespaceLabel.enabled=true \
+        --set opentelemetry.collector.enabled=true \
+        --set opentelemetry.instrumentation.enabled=true)
+
+    assert_contains "$output" "kubectl annotate namespace" "Has kubectl annotate command"
+    assert_contains "$output" "instrumentation.opentelemetry.io/inject-python" "Has Python instrumentation namespace annotation"
+}
+
 test_namespace_label_disabled() {
     log_test "Namespace Label - Disabled (default)"
 
@@ -362,16 +373,16 @@ test_prometheus_otel_scrape_config() {
         local decoded
         decoded=$(echo "$secret_data" | base64 -d 2>/dev/null || true)
 
-        if echo "$decoded" | grep -q "otel-collector-sidecars"; then
+        if echo "$decoded" | grep -q "otel-collector"; then
             log_pass "Has OTEL collector scrape job"
         else
             log_fail "Has OTEL collector scrape job - not found in decoded config"
         fi
 
-        if echo "$decoded" | grep -q "prometheus_io_port"; then
-            log_pass "Has pod annotation relabeling"
+        if echo "$decoded" | grep -q "opentelemetry-collector"; then
+            log_pass "Has collector pod label selector"
         else
-            log_fail "Has pod annotation relabeling - not found in decoded config"
+            log_fail "Has collector pod label selector - not found in decoded config"
         fi
     else
         log_fail "Prometheus scrape config secret not found"
@@ -435,8 +446,8 @@ main() {
     echo "========================================"
     echo "Jupyter OTEL Integration Tests"
     echo "========================================"
-    test_jupyter_otel_annotations
-    test_jupyter_no_otel_annotations_when_disabled
+    test_jupyter_otel_labels
+    test_jupyter_no_otel_label_when_disabled
 
     echo ""
     echo "========================================"
@@ -453,6 +464,7 @@ main() {
     test_namespace_label_disabled
     test_admin_namespace_label_disabled
     test_non_admin_namespace_label_enabled
+    test_namespace_label_with_instrumentation_annotation
     test_otel_operator_namespace_selector
 
     echo ""
diff --git a/tests/kind-test.sh b/tests/kind-test.sh
index 5ef2b37a..f5ee4333 100755
--- a/tests/kind-test.sh
+++ b/tests/kind-test.sh
@@ -832,15 +832,15 @@ verify_multi_ns() {
         log_warn "Instrumentation CRD not found - operator may not be installed"
     fi
 
-    # Check if Jupyter pod has OTEL sidecar annotations
+    # Check if Jupyter pod has mlrun.io/otel label (deployment mode - no sidecar injection)
     echo ""
-    log_info "Checking Jupyter deployment for OTEL annotations..."
-    local jupyter_annotations
-    jupyter_annotations=$(kubectl get deployment -n "${NAMESPACE}" -l app.kubernetes.io/component=jupyter-notebook -o jsonpath='{.items[0].spec.template.metadata.annotations}' 2>/dev/null || echo "")
-    if echo "${jupyter_annotations}" | grep -q "sidecar.opentelemetry.io/inject"; then
-        log_info "Jupyter has OTEL sidecar injection annotation"
+    log_info "Checking Jupyter deployment for OTEL pod label..."
+    local jupyter_labels
+    jupyter_labels=$(kubectl get deployment -n "${NAMESPACE}" -l app.kubernetes.io/component=jupyter-notebook -o jsonpath='{.items[0].spec.template.metadata.labels}' 2>/dev/null || echo "")
+    if echo "${jupyter_labels}" | grep -q "mlrun.io/otel"; then
+        log_info "Jupyter has mlrun.io/otel=true pod label (deployment mode)"
     else
-        log_warn "Jupyter does not have OTEL sidecar injection annotation"
+        log_warn "Jupyter does not have mlrun.io/otel label (OTel may be disabled)"
     fi
 }
 

From d71d89310d10ed3c9a1801ed978cb83b96171349 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Sun, 5 Apr 2026 18:06:53 +0300
Subject: [PATCH 09/23] bump chart version

---
 charts/mlrun-ce/Chart.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml
index d433a0ae..7095d780 100644
--- a/charts/mlrun-ce/Chart.yaml
+++ b/charts/mlrun-ce/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v1
 name: mlrun-ce
-version: 0.11.0-rc.28
+version: 0.11.0-rc.29
 description: MLRun Open Source Stack
 home: https://iguazio.com
 icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png

From 8b0be8481062faf0af3ec9c28e7517ef9cbcd465 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Sun, 5 Apr 2026 18:09:24 +0300
Subject: [PATCH 10/23] bump chart version

---
 charts/mlrun-ce/Chart.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml
index 7095d780..1a52ad3f 100644
--- a/charts/mlrun-ce/Chart.yaml
+++ b/charts/mlrun-ce/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v1
 name: mlrun-ce
-version: 0.11.0-rc.29
+version: 0.11.0-rc.30
 description: MLRun Open Source Stack
 home: https://iguazio.com
 icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png

From aedbbcfb241ed64d122ba1db95c2b9b27682480e Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Sun, 5 Apr 2026 18:18:00 +0300
Subject: [PATCH 11/23] fix lint

---
 charts/mlrun-ce/values.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 8d9041e7..8bb76629 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -893,4 +893,3 @@ opentelemetry:
         limits:
           cpu: 500m
           memory: 512Mi
-

From 3cc4d4694c64dab0874d900ac7762faa650c36f1 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Thu, 9 Apr 2026 10:46:30 +0300
Subject: [PATCH 12/23] documentation fixes

---
 charts/mlrun-ce/README.md           | 82 -----------------------------
 charts/mlrun-ce/templates/NOTES.txt |  1 -
 2 files changed, 83 deletions(-)

diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index dce3c7cd..7bfbf44b 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -37,7 +37,6 @@ kubectl create namespace mlrun
 Add the mlrun ce helm chart repo
 ```bash
 helm repo add mlrun https://mlrun.github.io/ce
-helm repo update
 ```
 
 To work with the open source MLRun stack, you must an accessible docker-registry. The registry's URL and credentials
@@ -206,87 +205,6 @@ By default, OpenTelemetry is **disabled**. When enabled, it provides:
 - `mlrun.io/otel: "true"` label on Jupyter, SeaweedFS, and Nuclio function pods
 - Prometheus scrapes the collector pod (not individual pods)
 
-#### Enabling OpenTelemetry
-
-To install **with** OpenTelemetry enabled:
-
-```bash
-helm --namespace mlrun install my-mlrun \
-    --set global.registry.url=<registry-url> \
-    --set global.registry.secretName=registry-credentials \
-    --set opentelemetry-operator.enabled=true \
-    --set opentelemetry.namespaceLabel.enabled=true \
-    --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.instrumentation.enabled=true \
-    mlrun/mlrun-ce
-```
-
-To **enable** OpenTelemetry on an existing installation:
-
-```bash
-helm --namespace mlrun upgrade my-mlrun \
-    --set opentelemetry-operator.enabled=true \
-    --set opentelemetry.namespaceLabel.enabled=true \
-    --set opentelemetry.collector.enabled=true \
-    --set opentelemetry.instrumentation.enabled=true \
-    mlrun/mlrun-ce
-```
-
-To **disable** OpenTelemetry (default):
-
-```bash
-helm --namespace mlrun upgrade my-mlrun \
-    --set opentelemetry-operator.enabled=false \
-    --set opentelemetry.collector.enabled=false \
-    --set opentelemetry.instrumentation.enabled=false \
-    --set opentelemetry.namespaceLabel.enabled=false \
-    mlrun/mlrun-ce
-```
-
-#### Custom Resource Limits
-
-Configure collector resources:
-
-```bash
-helm --namespace mlrun install my-mlrun \
-    --set opentelemetry.collector.resources.requests.cpu=100m \
-    --set opentelemetry.collector.resources.requests.memory=128Mi \
-    --set opentelemetry.collector.resources.limits.cpu=500m \
-    --set opentelemetry.collector.resources.limits.memory=512Mi \
-    mlrun/mlrun-ce
-```
-
-#### Enabling Java Auto-Instrumentation
-
-To enable Java auto-instrumentation (disabled by default):
-
-```bash
-helm --namespace mlrun install my-mlrun \
-    --set opentelemetry.instrumentation.java.enabled=true \
-    mlrun/mlrun-ce
-```
-
-#### Adding OpenTelemetry to Custom Workloads
-
-Python instrumentation is applied **namespace-wide** — any Python pod in the MLRun namespace is automatically instrumented when OTel is enabled. No per-pod annotations are required.
-
-For pods in other namespaces, annotate the namespace directly:
-```bash
-kubectl annotate namespace <your-namespace> \
-    instrumentation.opentelemetry.io/inject-python=<release-name>-otel-instrumentation
-```
-
-The `mlrun.io/otel: "true"` label is applied to: **Jupyter**, **SeaweedFS** (master, volume, filer, s3, admin), and **Nuclio function pods** (via `functionDefaults.metadata.labels`). This label is used for Prometheus metric filtering and enrichment.
-
-**Query OTEL-collected metrics in Prometheus:**
-```promql
-# OTEL metrics use the mlrun_otel_ prefix
-mlrun_otel_http_server_duration_seconds_bucket{...}
-
-# Filter by source
-{metrics_source="otel_collector"}
-```
-
 #### Split Installation (Admin/Non-Admin)
 
 For multi-tenant clusters, install the operator CRDs at the cluster level and collectors in user namespaces:
diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index 31d6d90d..bd7997ee 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -136,7 +136,6 @@ OpenTelemetry Operator is enabled!
 {{- "\n" }}
 OpenTelemetry Collector (deployment mode):
 -  Collector CR: {{ include "mlrun-ce.otel.collector.fullname" . }}
--  Mode: {{ .Values.opentelemetry.collector.mode }}
 -  OTLP gRPC endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
 -  OTLP HTTP endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.httpPort }}
 -  Prometheus metrics port: {{ .Values.opentelemetry.collector.prometheus.port }} (scraped by Prometheus from the collector pod)

From f7cca6a93775d3c14916c6ad74756a8110adbe78 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Thu, 9 Apr 2026 17:39:48 +0300
Subject: [PATCH 13/23] fixes

---
 charts/mlrun-ce/README.md                     | 101 +++---------------
 charts/mlrun-ce/templates/NOTES.txt           |   2 +-
 .../templates/opentelemetry/rbac.yaml         |  10 ++
 .../metadata-envoy-deployment.yaml            |   3 -
 .../deployments/metadata-grpc-deployment.yaml |   3 -
 .../deployments/metadata-writer.yaml          |   3 -
 .../ml-pipeline-persistenceagent.yaml         |   3 -
 .../ml-pipeline-scheduledworkflow.yaml        |   3 -
 .../pipelines/deployments/ml-pipeline-ui.yaml |   3 -
 .../deployments/ml-pipeline-viewer-crd.yaml   |   3 -
 .../ml-pipeline-visualizationserver.yaml      |   3 -
 .../pipelines/deployments/ml-pipeline.yaml    |   3 -
 .../pipelines/deployments/mysql.yaml          |   3 -
 .../deployments/workflow-controller.yaml      |   3 -
 14 files changed, 25 insertions(+), 121 deletions(-)

diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index 7bfbf44b..69444ac7 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -65,42 +65,16 @@ helm --namespace mlrun \
     mlrun/mlrun-ce
 ```
 
-### Complete Installation with OpenTelemetry (From Scratch)
+### Installing with OpenTelemetry Enabled
 
-This section provides a complete step-by-step guide to install MLRun CE with full OpenTelemetry observability enabled.
+> **Note:** OpenTelemetry is **disabled by default**. Follow the standard [Installing the Chart](#installing-the-chart) steps, adding the OTel flags below.
 
-> **Note:** OpenTelemetry is **disabled by default**. Follow these steps to enable it.
-
-#### Step 1: Create the namespace
-
-```bash
-kubectl create namespace mlrun
-```
-
-#### Step 2: Add the Helm repository
-
-```bash
-helm repo add mlrun https://mlrun.github.io/ce
-helm repo update
-```
-
-#### Step 3: Create the docker registry secret
-
-```bash
-kubectl --namespace mlrun create secret docker-registry registry-credentials \
-    --docker-username <registry-username> \
-    --docker-password <login-password> \
-    --docker-server <server URL, e.g. https://index.docker.io/v1/> \
-    --docker-email <user-email>
-```
-
-#### Step 4: Install MLRun CE with OpenTelemetry Enabled
+To install with OpenTelemetry enabled, append the following flags to the helm install command:
 
 ```bash
 helm --namespace mlrun \
     install my-mlrun \
     --wait \
-    --timeout 15m \
     --set global.registry.url=<registry URL e.g. index.docker.io/iguazio> \
     --set global.registry.secretName=registry-credentials \
     --set opentelemetry-operator.enabled=true \
@@ -110,41 +84,14 @@ helm --namespace mlrun \
     mlrun/mlrun-ce
 ```
 
-The installation will:
-- Deploy the OpenTelemetry Operator
-- Create an OpenTelemetryCollector CR (deployment mode — one collector per namespace)
-- Create an Instrumentation CR for Python auto-instrumentation
-- Label and annotate the namespace so all Python pods are auto-instrumented automatically
-- Configure Prometheus to scrape OTEL collector metrics (port 8889)
-
-#### Step 5: Verify OpenTelemetry Installation
-
-Check that the OpenTelemetry resources are created:
+To verify the OpenTelemetry resources were created:
 
 ```bash
-# Check the namespace label
-kubectl get namespace mlrun --show-labels | grep opentelemetry
-
-# Check the OpenTelemetry Collector CR
 kubectl -n mlrun get opentelemetrycollectors
-
-# Check the Instrumentation CR
 kubectl -n mlrun get instrumentations
-
-# Check that the OTEL operator is running
 kubectl -n mlrun get pods | grep opentelemetry
 ```
 
-#### Step 6: Verify OTel Pod Labels and Namespace Annotation
-
-```bash
-# Check that the namespace has the instrumentation annotation (enables auto-instrumentation for all Python pods)
-kubectl get namespace mlrun -o jsonpath='{.metadata.annotations}' | jq .
-
-# Check pod labels — all chart-managed pods should have mlrun.io/otel=true
-kubectl -n mlrun get pods --show-labels | grep mlrun.io/otel
-```
-
 ### Installing MLRun-ce on minikube
 
 The Open source MLRun ce uses node ports for simplicity. If your kubernetes cluster is running inside a VM, 
@@ -172,46 +119,27 @@ Override those [in the normal methods](https://helm.sh/docs/chart_template_guide
 
 ### Configuring OpenTelemetry (Observability)
 
-MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces from your ML workloads. 
-The operator runs one collector **Deployment** per namespace. Instrumented pods send OTLP metrics to the collector, which exports them to Prometheus.
-
-> **Note:** OpenTelemetry is **disabled by default**. See below for how to enable it.
+MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces. When enabled, it deploys a single collector per namespace (deployment mode) — instrumented pods send OTLP data to the collector, which exports metrics to Prometheus on port 8889. All Python pods in the namespace are auto-instrumented, and the `mlrun.io/otel: "true"` label is applied to Jupyter, SeaweedFS, TimescaleDB, and Nuclio function pods for metric enrichment.
 
-#### Namespace Labeling
+For a fresh install with OTel, see [Installing with OpenTelemetry Enabled](#installing-with-opentelemetry-enabled).
 
-The OpenTelemetry Operator **only monitors namespaces** with the label `opentelemetry.io/inject=enabled`.
-This is automatically applied to the MLRun namespace when OpenTelemetry is enabled.
+To enable OTel on an existing installation:
 
-When enabling OpenTelemetry, the namespace is labeled automatically:
-```yaml
-# Automatically added to your namespace when opentelemetry.namespaceLabel.enabled=true
-labels:
-  opentelemetry.io/inject: "enabled"
-```
-
-For custom namespaces that need OpenTelemetry instrumentation, add the label manually:
 ```bash
-kubectl label namespace <your-namespace> opentelemetry.io/inject=enabled
+helm --namespace mlrun upgrade my-mlrun \
+    --set opentelemetry-operator.enabled=true \
+    --set opentelemetry.namespaceLabel.enabled=true \
+    --set opentelemetry.collector.enabled=true \
+    --set opentelemetry.instrumentation.enabled=true \
+    mlrun/mlrun-ce
 ```
 
-> **Note:** The controller namespace (where the operator runs) does **NOT** need this label,
-> as only the operator itself runs there - no workloads require instrumentation.
-
-#### Default Configuration
-
-By default, OpenTelemetry is **disabled**. When enabled, it provides:
-- A single OTel Collector Deployment per namespace (OTLP receiver → Prometheus exporter on port 8889)
-- Namespace-level Python auto-instrumentation (all Python pods in the namespace are instrumented automatically)
-- `mlrun.io/otel: "true"` label on Jupyter, SeaweedFS, and Nuclio function pods
-- Prometheus scrapes the collector pod (not individual pods)
-
 #### Split Installation (Admin/Non-Admin)
 
-For multi-tenant clusters, install the operator CRDs at the cluster level and collectors in user namespaces:
+For multi-tenant clusters, install the operator at the cluster level and the collector CRs in each user namespace:
 
 **Controller namespace (admin):**
 ```bash
-# Operator only - no namespace label needed (no instrumented workloads here)
 helm --namespace controller install mlrun-controller \
     -f admin_installation_values.yaml \
     mlrun/mlrun-ce
@@ -219,7 +147,6 @@ helm --namespace controller install mlrun-controller \
 
 **User namespace (non-admin):**
 ```bash
-# Collector CRs + namespace label applied automatically
 helm --namespace mlrun install my-mlrun \
     -f non_admin_installation_values.yaml \
     mlrun/mlrun-ce
diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index bd7997ee..2e7c9598 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -161,7 +161,7 @@ Namespace OTel configuration:
 {{- end }}
 {{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
 {{- "\n" }}
-Pods labeled with mlrun.io/otel=true: Jupyter, SeaweedFS (master/volume/filer/s3/admin), and Nuclio function pods.
+Pods labeled with mlrun.io/otel=true: Jupyter, SeaweedFS (master/volume/filer/s3/admin), TimescaleDB, and Nuclio function pods (via functionDefaults).
 {{- end }}
 {{- end }}
 
diff --git a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
index 9eec1971..fd978318 100644
--- a/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/rbac.yaml
@@ -143,6 +143,16 @@ rules:
       - patch
       - update
       - list
+  # Allow rollout restart of instrumented deployments/statefulsets after webhook is ready
+  - apiGroups:
+      - apps
+    resources:
+      - deployments
+      - statefulsets
+    verbs:
+      - get
+      - list
+      - patch
 ---
 # RoleBinding for the CR creator job
 apiVersion: rbac.authorization.k8s.io/v1
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
index 8a702d9c..0801bac9 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-envoy-deployment.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         application-crd-id: kubeflow-pipelines
         component: metadata-envoy
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - image: {{ .Values.pipelines.images.metadataEnvoy.repository }}:{{ .Values.pipelines.images.metadataEnvoy.tag }}
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
index f3fae663..00e7fb9a 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-grpc-deployment.yaml
@@ -25,9 +25,6 @@ spec:
       labels:
         application-crd-id: kubeflow-pipelines
         component: metadata-grpc-server
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - args:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml b/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
index 04f68b05..d2800d1e 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/metadata-writer.yaml
@@ -25,9 +25,6 @@ spec:
       labels:
         app: metadata-writer
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
index 04af9784..5dbd6604 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-persistenceagent.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline-persistenceagent
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
index a3634401..c27442ad 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-scheduledworkflow.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline-scheduledworkflow
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
index 459223e8..e8cac85f 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-ui.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline-ui
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
index e34dfb52..89f25c23 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-viewer-crd.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline-viewer-crd
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
index b6f79527..6db618a7 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline-visualizationserver.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline-visualizationserver
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - image: {{ .Values.pipelines.images.visualizationServer.repository }}:{{ .Values.pipelines.images.visualizationServer.tag }}
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
index 4a4a1a00..42ece191 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/ml-pipeline.yaml
@@ -27,9 +27,6 @@ spec:
       labels:
         app: ml-pipeline
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
         - env:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml b/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
index db7d4893..40791425 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/mysql.yaml
@@ -22,9 +22,6 @@ spec:
       labels:
         app: mysql
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       {{- if .Values.pipelines.db.securityContext }}
       securityContext:
diff --git a/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml b/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
index 9ec903e1..83be1799 100644
--- a/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
+++ b/charts/mlrun-ce/templates/pipelines/deployments/workflow-controller.yaml
@@ -24,9 +24,6 @@ spec:
       labels:
         app: workflow-controller
         application-crd-id: kubeflow-pipelines
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       containers:
       - args:

From 6f6193c54c97c517a9afc714440f561e94533e5b Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Thu, 9 Apr 2026 17:39:54 +0300
Subject: [PATCH 14/23] fixes

---
 .../opentelemetry/crd-readiness-job.yaml        | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index 2a731da3..94ce109c 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -79,5 +79,22 @@ spec:
               {{- end }}
               
               echo "All OpenTelemetry CRs have been created successfully!"
+
+              # Wait for the operator webhook to be fully ready before restarting pods
+              echo "Waiting for OpenTelemetry operator webhook to be ready..."
+              until kubectl -n {{ .Release.Namespace }} rollout status deployment \
+                -l app.kubernetes.io/name=opentelemetry-operator --timeout=60s &>/dev/null; do
+                sleep 5
+              done
+
+              # Restart pods labeled mlrun.io/otel=true so they go through the webhook
+              # and receive OTel auto-instrumentation injection.
+              # This solves the race condition where pods start before the webhook is ready.
+              echo "Restarting instrumented deployments and statefulsets..."
+              kubectl -n {{ .Release.Namespace }} rollout restart deployment \
+                -l mlrun.io/otel=true 2>/dev/null || true
+              kubectl -n {{ .Release.Namespace }} rollout restart statefulset \
+                -l mlrun.io/otel=true 2>/dev/null || true
+              echo "Rollout restarts triggered — pods will be re-created with OTel injection."
 {{- end }}
 

From 4f56e6d95f396990c3a1ac43a7e18550ea497e7d Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Sun, 12 Apr 2026 12:19:02 +0300
Subject: [PATCH 15/23] change method to push to prometheus

---
 charts/mlrun-ce/templates/NOTES.txt    |  2 +-
 charts/mlrun-ce/templates/_helpers.tpl | 14 +++----
 charts/mlrun-ce/values.yaml            | 57 +++++++-------------------
 tests/package.sh                       | 46 +++++++++++++++++++++
 4 files changed, 67 insertions(+), 52 deletions(-)

diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index 2e7c9598..efb991bc 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -138,7 +138,7 @@ OpenTelemetry Collector (deployment mode):
 -  Collector CR: {{ include "mlrun-ce.otel.collector.fullname" . }}
 -  OTLP gRPC endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.grpcPort }}
 -  OTLP HTTP endpoint: {{ include "mlrun-ce.otel.collector.fullname" . }}-collector:{{ .Values.opentelemetry.collector.otlp.httpPort }}
--  Prometheus metrics port: {{ .Values.opentelemetry.collector.prometheus.port }} (scraped by Prometheus from the collector pod)
+-  Metrics export: collector pushes via OTLP to Prometheus at /api/v1/otlp/v1/metrics
 {{- end }}
 {{- if .Values.opentelemetry.instrumentation.enabled }}
 {{- "\n" }}
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 08bc2499..a949c3e8 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -522,14 +522,10 @@ spec:
         timeout: 5s
         override: false
     exporters:
-      prometheus:
-        endpoint: 0.0.0.0:{{ .Values.opentelemetry.collector.prometheus.port }}
-        namespace: {{ .Values.opentelemetry.collector.prometheus.namespace }}
-        const_labels:
-          collector_mode: deployment
-          metrics_source: otel_collector
-        resource_to_telemetry_conversion:
-          enabled: true
+      otlphttp/prometheus:
+        endpoint: http://prometheus-operated.{{ .Release.Namespace }}.svc:9090/api/v1/otlp
+        tls:
+          insecure: true
       debug:
         verbosity: basic
         sampling_initial: 5
@@ -549,7 +545,7 @@ spec:
             - resourcedetection
             - batch
           exporters:
-            - prometheus
+            - otlphttp/prometheus
             - debug
         traces:
           receivers:
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 8bb76629..246e0cab 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -594,42 +594,14 @@ kube-prometheus-stack:
       type: NodePort
       nodePort: 30020
     prometheusSpec:
-      # Additional scrape configs for OpenTelemetry collector Deployment.
-      # In deployment mode, one collector pod runs per namespace and receives OTLP from all
-      # instrumented pods. Prometheus scrapes only the collector (port 8889), not individual pods.
-      additionalScrapeConfigs:
-        # Scrape the OTel Collector Deployment pod.
-        # Discovers pods with app.kubernetes.io/component=opentelemetry-collector label
-        # (applied automatically by the OTel operator to collector pods).
-        - job_name: 'otel-collector'
-          kubernetes_sd_configs:
-            - role: pod
-          relabel_configs:
-            # Only scrape the OTel collector pod
-            - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
-              action: keep
-              regex: opentelemetry-collector
-            # Use port 8889 (OTel prometheus exporter)
-            - source_labels: [__address__]
-              action: replace
-              regex: ([^:]+)(?::\d+)?
-              replacement: $1:8889
-              target_label: __address__
-            - target_label: __metrics_path__
-              replacement: /metrics
-            # Propagate pod labels as metric labels
-            - action: labelmap
-              regex: __meta_kubernetes_pod_label_(.+)
-            - source_labels: [__meta_kubernetes_namespace]
-              action: replace
-              target_label: kubernetes_namespace
-            - source_labels: [__meta_kubernetes_pod_name]
-              action: replace
-              target_label: kubernetes_pod_name
-          metric_relabel_configs:
-            - action: replace
-              target_label: metrics_source
-              replacement: otel_collector
+      # Enable OTLP write receiver so the OTel collector can push metrics directly
+      # to Prometheus at /api/v1/otlp/v1/metrics instead of Prometheus scraping the collector.
+      # Prometheus v3 requires both the feature flag AND --web.enable-otlp-receiver.
+      enableFeatures:
+        - otlp-write-receiver
+      additionalArgs:
+        - name: web.enable-otlp-receiver
+          value: ""
   kube-state-metrics:
     fullnameOverride: state-metrics
   prometheus-node-exporter:
@@ -765,6 +737,13 @@ spark:
 # =============================================================================
 opentelemetry-operator:
   enabled: false
+  # CRDs are bootstrapped by the parent chart's crds/ directory (tiny stubs applied
+  # before any templates so the type is established immediately on fresh install).
+  # Disable sub-chart CRD rendering to avoid ownership conflicts with the crds/ stubs.
+  # The stubs use x-kubernetes-preserve-unknown-fields so the operator can still
+  # manage CRs; the operator's admission webhook handles CR validation.
+  crds:
+    create: false
   # Admission webhooks configuration
   admissionWebhooks:
     certManager:
@@ -843,12 +822,6 @@ opentelemetry:
       limits:
         cpu: 200m
         memory: 256Mi
-    # Prometheus exporter port for metrics
-    prometheus:
-      port: 8889
-      # Metric prefix added to all OTEL-collected metrics
-      # Helps distinguish OTEL metrics from directly-scraped metrics
-      namespace: mlrun_otel
     # OTLP receiver configuration
     otlp:
       grpcPort: 4317
diff --git a/tests/package.sh b/tests/package.sh
index fa8150ed..575c3546 100755
--- a/tests/package.sh
+++ b/tests/package.sh
@@ -61,6 +61,52 @@ with tempfile.TemporaryDirectory() as tmp:
     )
 PYEOF
 
+# Slim down the opentelemetry-operator sub-chart by replacing large conf/crds/ files
+# with empty stubs. The CRDs are managed by the parent chart's crds/ directory instead
+# (crds.create: false in values.yaml). Keeping the full 542 KB CRD files would push
+# the Helm release Secret over the Kubernetes 3 MB API request limit.
+echo "Slimming opentelemetry-operator conf/crds/ (replacing with empty stubs)..."
+python3 - <<'PYEOF'
+import tarfile, os, shutil, tempfile, io
+
+tgz = "charts/opentelemetry-operator-0.78.1.tgz"
+if not os.path.exists(tgz):
+    print(f"  {tgz} not found, skipping")
+    exit(0)
+
+# Stub content: preserves the {{- if .Values.crds.create }} guard so the template
+# renders correctly (empty output) whether crds.create is true or false.
+STUB = b"{{- if .Values.crds.create }}\n{{- end }}\n"
+
+crd_files = {
+    "opentelemetry-operator/conf/crds/crd-opentelemetrycollector.yaml",
+    "opentelemetry-operator/conf/crds/crd-opentelemetryinstrumentation.yaml",
+    "opentelemetry-operator/conf/crds/crd-opentelemetry.io_opampbridges.yaml",
+}
+
+with tempfile.TemporaryDirectory() as tmp:
+    with tarfile.open(tgz, "r:gz") as t:
+        t.extractall(tmp)
+
+    for rel in crd_files:
+        path = os.path.join(tmp, rel)
+        if os.path.exists(path):
+            orig = os.path.getsize(path)
+            with open(path, "wb") as f:
+                f.write(STUB)
+            print(f"  {os.path.basename(rel)}: {orig} -> {len(STUB)} bytes")
+        else:
+            print(f"  {rel} not found, skipping")
+
+    import subprocess, os as _os
+    env = _os.environ.copy()
+    env["COPYFILE_DISABLE"] = "1"
+    subprocess.run(
+        ["tar", "czf", os.path.abspath(tgz), "opentelemetry-operator"],
+        cwd=tmp, env=env, check=True
+    )
+PYEOF
+
 # Create MLRun CE tarball
 helm package .
 exit 0

From 3aa7420439f49aee098a07b0cc182b73506d8cf5 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Tue, 14 Apr 2026 10:15:37 +0300
Subject: [PATCH 16/23] change method to push to prometheus

---
 charts/mlrun-ce/README.md              | 18 +-----------------
 charts/mlrun-ce/templates/_helpers.tpl |  2 +-
 charts/mlrun-ce/values.yaml            |  2 ++
 3 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index 69444ac7..f6b3b308 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -134,23 +134,7 @@ helm --namespace mlrun upgrade my-mlrun \
     mlrun/mlrun-ce
 ```
 
-#### Split Installation (Admin/Non-Admin)
-
-For multi-tenant clusters, install the operator at the cluster level and the collector CRs in each user namespace:
-
-**Controller namespace (admin):**
-```bash
-helm --namespace controller install mlrun-controller \
-    -f admin_installation_values.yaml \
-    mlrun/mlrun-ce
-```
-
-**User namespace (non-admin):**
-```bash
-helm --namespace mlrun install my-mlrun \
-    -f non_admin_installation_values.yaml \
-    mlrun/mlrun-ce
-```
+> **Note:** The above assumes a single-namespace installation. For multi-namespace (admin/non-admin) deployments, refer to the MLRun documentation.
 
 ### Working with ECR
 
diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index a949c3e8..5918e769 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -494,7 +494,7 @@ metadata:
     {{- include "mlrun-ce.otel.labels" . | nindent 4 }}
 spec:
   mode: {{ .Values.opentelemetry.collector.mode }}
-  upgradeStrategy: automatic
+  upgradeStrategy: {{ .Values.opentelemetry.collector.upgradeStrategy }}
   managementState: managed
   image: {{ (index .Values "opentelemetry-operator").manager.collectorImage.repository }}:{{ (index .Values "opentelemetry-operator").manager.collectorImage.tag }}
   resources:
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 246e0cab..1213c5a7 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -814,6 +814,8 @@ opentelemetry:
     fullnameOverride: ""
     # DEPLOYMENT mode - one collector pod per namespace, not injected as a sidecar
     mode: deployment
+    # Set to "none" to prevent automatic collector upgrades when the operator is upgraded
+    upgradeStrategy: automatic
     # Collector sidecar container resources
     resources:
       requests:

From 36c4b3f56f2cbd31b4c708cb9a0012796ce57bd0 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 09:55:41 +0300
Subject: [PATCH 17/23] remove labeling s3 and TimescaleDB fix jupyter bug.
 update documentation accordingly. add request and limit for crdReadinessJob
 and namespaceLabelJob

---
 charts/mlrun-ce/README.md                     |  2 +-
 charts/mlrun-ce/templates/NOTES.txt           |  3 +-
 .../jupyter-notebook/deployment.yaml          | 29 ++++++++++
 .../opentelemetry/crd-readiness-job.yaml      |  7 +++
 .../opentelemetry/namespace-label.yaml        |  7 +++
 .../templates/timescaledb/statefulset.yaml    |  3 -
 charts/mlrun-ce/values.yaml                   | 29 ++++++----
 tests/helm-template-test.sh                   | 55 +++++--------------
 8 files changed, 80 insertions(+), 55 deletions(-)

diff --git a/charts/mlrun-ce/README.md b/charts/mlrun-ce/README.md
index f6b3b308..61aedbe3 100644
--- a/charts/mlrun-ce/README.md
+++ b/charts/mlrun-ce/README.md
@@ -119,7 +119,7 @@ Override those [in the normal methods](https://helm.sh/docs/chart_template_guide
 
 ### Configuring OpenTelemetry (Observability)
 
-MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces. When enabled, it deploys a single collector per namespace (deployment mode) — instrumented pods send OTLP data to the collector, which exports metrics to Prometheus on port 8889. All Python pods in the namespace are auto-instrumented, and the `mlrun.io/otel: "true"` label is applied to Jupyter, SeaweedFS, TimescaleDB, and Nuclio function pods for metric enrichment.
+MLRun CE includes the OpenTelemetry Operator for collecting metrics and traces. When enabled, it deploys a single collector per namespace (deployment mode) — instrumented pods push OTLP data to the collector, which forwards metrics to Prometheus via the OTLP endpoint. Python auto-instrumentation is applied namespace-wide via a webhook, and the `mlrun.io/otel: "true"` label is applied to Jupyter and Nuclio function pods to mark them for metric enrichment and trigger OTel injection on restart.
 
 For a fresh install with OTel, see [Installing with OpenTelemetry Enabled](#installing-with-opentelemetry-enabled).
 
diff --git a/charts/mlrun-ce/templates/NOTES.txt b/charts/mlrun-ce/templates/NOTES.txt
index efb991bc..6ce2f2bd 100644
--- a/charts/mlrun-ce/templates/NOTES.txt
+++ b/charts/mlrun-ce/templates/NOTES.txt
@@ -161,7 +161,8 @@ Namespace OTel configuration:
 {{- end }}
 {{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
 {{- "\n" }}
-Pods labeled with mlrun.io/otel=true: Jupyter, SeaweedFS (master/volume/filer/s3/admin), TimescaleDB, and Nuclio function pods (via functionDefaults).
+Pods labeled with mlrun.io/otel=true: Jupyter and Nuclio function pods (via functionDefaults).
+These Python-based pods receive OTel auto-instrumentation (runtime metrics, traces, HTTP metrics for Nuclio functions).
 {{- end }}
 {{- end }}
 
diff --git a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
index 08108481..176cda96 100644
--- a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
+++ b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
@@ -74,7 +74,36 @@ spec:
         ports:
         - containerPort: 8888
           name: http
+        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+        command:
+          - /bin/bash
+          - -c
+          - |
+            # Extract tar if needed (mirrors mlce-start.sh setup)
+            file_path="${HOME}/.intdata"
+            if [ ! -f "$file_path" ]; then
+              echo "Base data does not exist, extracting home backup..."
+              cd / && tar -xvf /tmp/basehome.tar
+              echo "1" > "$file_path"
+            fi
+            cd "${HOME}"
+            # Add OTel sitecustomize.py to PYTHONPATH so Jupyter's own Python
+            # process bootstraps OTel directly (bypassing start-notebook.py which
+            # strips the path to prevent re-instrumentation of subprocesses).
+            OTEL_PATH=/otel-auto-instrumentation-python/opentelemetry/instrumentation/auto_instrumentation
+            if [ -d "$OTEL_PATH" ]; then
+              export PYTHONPATH="${OTEL_PATH}:${PYTHONPATH:-/otel-auto-instrumentation-python}"
+            fi
+            exec /opt/conda/bin/python3 /opt/conda/bin/jupyter-lab \
+              --ip=0.0.0.0 \
+              --port=8888 \
+              --NotebookApp.token="" \
+              --NotebookApp.password="" \
+              --NotebookApp.allow_origin="*" \
+              --NotebookApp.default_url=/lab
+        {{- else }}
         command: ["/bin/bash", "/usr/local/bin/mlce-start.sh"]
+        {{- end }}
       {{- with .Values.jupyterNotebook.nodeSelector }}
       nodeSelector: {{ toYaml . | nindent 8 }}
       {{- end }}
diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index 94ce109c..bb4081c3 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -28,6 +28,13 @@ spec:
       containers:
         - name: cr-creator
           image: bitnami/kubectl:latest
+          resources:
+            requests:
+              cpu: {{ .Values.opentelemetry.crdReadinessJob.resources.requests.cpu }}
+              memory: {{ .Values.opentelemetry.crdReadinessJob.resources.requests.memory }}
+            limits:
+              cpu: {{ .Values.opentelemetry.crdReadinessJob.resources.limits.cpu }}
+              memory: {{ .Values.opentelemetry.crdReadinessJob.resources.limits.memory }}
           command:
             - /bin/bash
             - -c
diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
index 60cfb3b8..e82b7162 100644
--- a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -24,6 +24,13 @@ spec:
       containers:
       - name: label-namespace
         image: bitnami/kubectl:latest
+        resources:
+          requests:
+            cpu: {{ .Values.opentelemetry.namespaceLabelJob.resources.requests.cpu }}
+            memory: {{ .Values.opentelemetry.namespaceLabelJob.resources.requests.memory }}
+          limits:
+            cpu: {{ .Values.opentelemetry.namespaceLabelJob.resources.limits.cpu }}
+            memory: {{ .Values.opentelemetry.namespaceLabelJob.resources.limits.memory }}
         command:
         - /bin/sh
         - -c
diff --git a/charts/mlrun-ce/templates/timescaledb/statefulset.yaml b/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
index 7001a93e..79f5a7f8 100644
--- a/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
+++ b/charts/mlrun-ce/templates/timescaledb/statefulset.yaml
@@ -15,9 +15,6 @@ spec:
     metadata:
       labels:
         {{- include "mlrun-ce.timescaledb.selectorLabels" . | nindent 8 }}
-        {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
-        {{- include "mlrun-ce.otel.podLabels" . | nindent 8 }}
-        {{- end }}
     spec:
       {{- with .Values.timescaledb.nodeSelector }}
       nodeSelector:
diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml
index 1213c5a7..bb680601 100644
--- a/charts/mlrun-ce/values.yaml
+++ b/charts/mlrun-ce/values.yaml
@@ -337,8 +337,6 @@ seaweedfs:
 
   # Master server - metadata management
   master:
-    podLabels:
-      mlrun.io/otel: "true"
     port: 9333
     # Storage: use PVC instead of default hostPath
     data:
@@ -352,8 +350,6 @@ seaweedfs:
 
   # Volume server - actual data storage
   volume:
-    podLabels:
-      mlrun.io/otel: "true"
     port: 8080
     # Storage: use PVC instead of default hostPath
     dataDirs:
@@ -369,8 +365,6 @@ seaweedfs:
 
   # Filer server - file system interface
   filer:
-    podLabels:
-      mlrun.io/otel: "true"
     port: 8888
     # Storage: use PVC instead of default hostPath
     data:
@@ -388,8 +382,6 @@ seaweedfs:
 
   # S3 API gateway - MLRun connects to this endpoint
   s3:
-    podLabels:
-      mlrun.io/otel: "true"
     enabled: true  # Default is false
     port: 8333
     enableAuth: true  # Default is false
@@ -403,8 +395,6 @@ seaweedfs:
 
   # Admin server - user and policy management UI
   admin:
-    podLabels:
-      mlrun.io/otel: "true"
     enabled: true  # Default is false
     port: 23646
     secret:
@@ -868,3 +858,22 @@ opentelemetry:
         limits:
           cpu: 500m
           memory: 512Mi
+
+  # CRD readiness job resources (kubectl-only container)
+  crdReadinessJob:
+    resources:
+      requests:
+        cpu: 50m
+        memory: 64Mi
+      limits:
+        cpu: 100m
+        memory: 128Mi
+  # Namespace label job resources (kubectl-only container)
+  namespaceLabelJob:
+    resources:
+      requests:
+        cpu: 50m
+        memory: 64Mi
+      limits:
+        cpu: 100m
+        memory: 128Mi
diff --git a/tests/helm-template-test.sh b/tests/helm-template-test.sh
index ccdef572..0617e192 100755
--- a/tests/helm-template-test.sh
+++ b/tests/helm-template-test.sh
@@ -134,8 +134,8 @@ test_otel_collector_default() {
     assert_contains "$output" "kind: Job" "Has correct kind"
     assert_contains "$output" "kind: OpenTelemetryCollector" "Job contains OpenTelemetryCollector CR"
     assert_contains "$output" "mode: deployment" "Uses deployment mode"
-    assert_contains "$output" "prometheus:" "Has Prometheus exporter"
-    assert_contains "$output" "endpoint: 0.0.0.0:8889" "Prometheus on port 8889"
+    assert_contains "$output" "otlphttp/prometheus:" "Has OTLP HTTP Prometheus exporter"
+    assert_contains "$output" "/api/v1/otlp" "Pushes to Prometheus OTLP endpoint"
     assert_contains "$output" "otlp:" "Has OTLP receiver"
     assert_contains "$output" "helm.sh/hook" "Has Helm hooks"
     assert_contains "$output" "post-install,post-upgrade" "Has correct hook triggers"
@@ -151,6 +151,18 @@ test_otel_collector_disabled() {
         "CRD Readiness Job does not render when collector disabled (default)"
 }
 
+test_otel_collector_upgrade_strategy() {
+    log_test "OpenTelemetry Collector - upgradeStrategy override"
+
+    local output
+    output=$(render_template "templates/opentelemetry/crd-readiness-job.yaml" \
+        --set global.registry.url=test.io \
+        --set opentelemetry.collector.enabled=true \
+        --set opentelemetry.collector.upgradeStrategy=none)
+
+    assert_contains "$output" "upgradeStrategy: none" "upgradeStrategy can be overridden to none"
+}
+
 test_otel_collector_resources() {
     log_test "OpenTelemetry Collector - Custom resources"
 
@@ -355,39 +367,6 @@ test_otel_operator_namespace_selector() {
     fi
 }
 
-# ============================================================================
-# Prometheus Integration Tests
-# ============================================================================
-
-test_prometheus_otel_scrape_config() {
-    log_test "Prometheus - OTEL scrape configuration"
-
-    local output
-    output=$(render_all --set global.registry.url=test.io)
-
-    # The scrape config is in a Secret as base64, extract and decode it
-    local secret_data
-    secret_data=$(echo "$output" | grep "additional-scrape-configs.yaml:" | head -1 | sed 's/.*: "//' | sed 's/"$//' || true)
-
-    if [[ -n "$secret_data" ]]; then
-        local decoded
-        decoded=$(echo "$secret_data" | base64 -d 2>/dev/null || true)
-
-        if echo "$decoded" | grep -q "otel-collector"; then
-            log_pass "Has OTEL collector scrape job"
-        else
-            log_fail "Has OTEL collector scrape job - not found in decoded config"
-        fi
-
-        if echo "$decoded" | grep -q "opentelemetry-collector"; then
-            log_pass "Has collector pod label selector"
-        else
-            log_fail "Has collector pod label selector - not found in decoded config"
-        fi
-    else
-        log_fail "Prometheus scrape config secret not found"
-    fi
-}
 
 # ============================================================================
 # Full Chart Render Test
@@ -425,6 +404,7 @@ main() {
     echo "========================================"
     test_otel_collector_default
     test_otel_collector_disabled
+    test_otel_collector_upgrade_strategy
     test_otel_collector_resources
 
     echo ""
@@ -467,11 +447,6 @@ main() {
     test_namespace_label_with_instrumentation_annotation
     test_otel_operator_namespace_selector
 
-    echo ""
-    echo "========================================"
-    echo "Prometheus Integration Tests"
-    echo "========================================"
-    test_prometheus_otel_scrape_config
 
     echo ""
     echo "========================================"

From a5e71ef49d7f801536249461aea4a4b890441715 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 10:48:32 +0300
Subject: [PATCH 18/23] another jupyter timing fix

---
 .../templates/jupyter-notebook/deployment.yaml    |  3 +++
 .../opentelemetry/crd-readiness-job.yaml          | 15 ++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
index 176cda96..aa1e43dd 100644
--- a/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
+++ b/charts/mlrun-ce/templates/jupyter-notebook/deployment.yaml
@@ -5,6 +5,9 @@ metadata:
   name: {{ include "mlrun-ce.jupyter.fullname" . }}
   labels:
     {{- include "mlrun-ce.jupyter.labels" . | nindent 4 }}
+    {{- if and .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
+    {{- include "mlrun-ce.otel.podLabels" . | nindent 4 }}
+    {{- end }}
 spec:
   replicas: 1
   selector:
diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index bb4081c3..78df75eb 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -63,16 +63,17 @@ spec:
               echo "Instrumentation CRD is available!"
               {{- end }}
               
-              # Wait a bit more for the operator to be fully ready
-              echo "Waiting for operator webhook to be ready..."
-              sleep 10
-              
               {{- if .Values.opentelemetry.collector.enabled }}
-              # Create or update the OpenTelemetryCollector CR
-              echo "Creating/updating OpenTelemetryCollector CR..."
-              cat <<'EOF' | kubectl apply -f -
+              # Write the OpenTelemetryCollector CR to a temp file and retry applying it
+              # until the operator webhook is ready to accept it (fresh install timing fix).
+              cat > /tmp/collector-cr.yaml <<'EOF'
               {{- include "mlrun-ce.otel.collector.manifest" . | nindent 14 }}
               EOF
+              echo "Creating/updating OpenTelemetryCollector CR (with webhook readiness retry)..."
+              until kubectl apply -f /tmp/collector-cr.yaml 2>&1; do
+                echo "Webhook not ready yet, retrying in 5s..."
+                sleep 5
+              done
               echo "OpenTelemetryCollector CR created/updated!"
               {{- end }}
               

From b15ee61cb99cafdefe317b8af3b5ce21838a3733 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 11:12:11 +0300
Subject: [PATCH 19/23] remove redundant loop for crds check

---
 .../opentelemetry/crd-readiness-job.yaml      | 25 +++----------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index 78df75eb..915b9248 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -41,28 +41,9 @@ spec:
             - |
               set -e
               
-              echo "Waiting for OpenTelemetry CRDs to be available..."
-              
-              # Wait for the OpenTelemetryCollector CRD
-              {{- if .Values.opentelemetry.collector.enabled }}
-              echo "Waiting for OpenTelemetryCollector CRD..."
-              until kubectl get crd opentelemetrycollectors.opentelemetry.io &>/dev/null; do
-                echo "Waiting for opentelemetrycollectors.opentelemetry.io CRD..."
-                sleep 5
-              done
-              echo "OpenTelemetryCollector CRD is available!"
-              {{- end }}
-              
-              # Wait for the Instrumentation CRD
-              {{- if .Values.opentelemetry.instrumentation.enabled }}
-              echo "Waiting for Instrumentation CRD..."
-              until kubectl get crd instrumentations.opentelemetry.io &>/dev/null; do
-                echo "Waiting for instrumentations.opentelemetry.io CRD..."
-                sleep 5
-              done
-              echo "Instrumentation CRD is available!"
-              {{- end }}
-              
+              # CRDs are guaranteed to exist — Helm applies charts/mlrun-ce/crds/ before
+              # any hooks run, so no polling needed here.
+
               {{- if .Values.opentelemetry.collector.enabled }}
               # Write the OpenTelemetryCollector CR to a temp file and retry applying it
               # until the operator webhook is ready to accept it (fresh install timing fix).

From 4f2bda3ae77b9abe570c41fe40685e64bce2654b Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 11:24:32 +0300
Subject: [PATCH 20/23] fix requirements.lock

---
 charts/mlrun-ce/requirements.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charts/mlrun-ce/requirements.lock b/charts/mlrun-ce/requirements.lock
index 7561e0a0..adda8fc5 100644
--- a/charts/mlrun-ce/requirements.lock
+++ b/charts/mlrun-ce/requirements.lock
@@ -23,5 +23,5 @@ dependencies:
 - name: opentelemetry-operator
   repository: https://open-telemetry.github.io/opentelemetry-helm-charts
   version: 0.78.1
-digest: sha256:9f6ea4d6c60baabe3a9fb2a9c286f5c70a97bbf76ecba15ddaef7f39c56269ae
-generated: "2026-03-25T11:50:15.589709+02:00"
+digest: sha256:50ed77fd11e450e243c05eadac99857b4b0aae92ae73ca9a6c00fc1cdc726f70
+generated: "2026-04-15T11:23:19.249332+03:00"

From 657cab342e305d62273c699fc0277cecabccd6e9 Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 11:26:57 +0300
Subject: [PATCH 21/23] fix rc version

---
 charts/mlrun-ce/Chart.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml
index b91431ec..0edfd0d5 100644
--- a/charts/mlrun-ce/Chart.yaml
+++ b/charts/mlrun-ce/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v1
 name: mlrun-ce
-version: 0.11.0-rc.31
+version: 0.11.0-rc.32
 description: MLRun Open Source Stack
 home: https://iguazio.com
 icon: https://www.iguazio.com/wp-content/uploads/2019/10/Iguazio-Logo.png

From 73c6aa17b2d8c0f2ccc844669ffa6b24452e3f5a Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 11:45:14 +0300
Subject: [PATCH 22/23] fix pin kubectl version in jobs, fix documentation for
 crds readiness, change naming for otel metrics using metadata.name fieldRef

---
 charts/mlrun-ce/templates/_helpers.tpl                      | 3 +--
 .../mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/charts/mlrun-ce/templates/_helpers.tpl b/charts/mlrun-ce/templates/_helpers.tpl
index 5918e769..d207c879 100644
--- a/charts/mlrun-ce/templates/_helpers.tpl
+++ b/charts/mlrun-ce/templates/_helpers.tpl
@@ -586,7 +586,7 @@ spec:
     - name: OTEL_SERVICE_NAME
       valueFrom:
         fieldRef:
-          fieldPath: metadata.labels['app.kubernetes.io/name']
+          fieldPath: metadata.name
     - name: OTEL_METRICS_EXPORTER
       value: otlp
     - name: OTEL_TRACES_EXPORTER
@@ -616,7 +616,6 @@ spec:
         value: "true"
   {{- end }}
 {{- end }}
-..
 {{/*
 OTel pod label — marks a pod as OTel-monitored for metric enrichment and discovery.
 Namespace-level instrumentation annotation (set by namespace-label job) handles Python auto-instrumentation.
diff --git a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
index 915b9248..35c6b566 100644
--- a/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/crd-readiness-job.yaml
@@ -1,7 +1,7 @@
 {{- if or .Values.opentelemetry.collector.enabled .Values.opentelemetry.instrumentation.enabled }}
 ---
-# Job to wait for OpenTelemetry CRDs to be available and then create the CRs
-# This solves the race condition between the operator starting and CR creation
+# Job to create OpenTelemetry CRs after the operator webhook is ready to accept them.
+# Retries kubectl apply until the webhook accepts the CR (fresh install timing fix).
 apiVersion: batch/v1
 kind: Job
 metadata:
@@ -27,7 +27,7 @@ spec:
       serviceAccountName: {{ .Release.Name }}-otel-cr-creator
       containers:
         - name: cr-creator
-          image: bitnami/kubectl:latest
+          image: bitnami/kubectl:1.32
           resources:
             requests:
               cpu: {{ .Values.opentelemetry.crdReadinessJob.resources.requests.cpu }}

From 28cf7bad305ba20b6f531b2dc67002e8cd0ef1cf Mon Sep 17 00:00:00 2001
From: royischoss <royishoss@gmail.com>
Date: Wed, 15 Apr 2026 11:45:56 +0300
Subject: [PATCH 23/23] fix pin kubectl version in jobs, fix documentation for
 crds readiness, change naming for otel metrics using metadata.name fieldRef

---
 charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
index e82b7162..ee8bb5c7 100644
--- a/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
+++ b/charts/mlrun-ce/templates/opentelemetry/namespace-label.yaml
@@ -23,7 +23,7 @@ spec:
       restartPolicy: Never
       containers:
       - name: label-namespace
-        image: bitnami/kubectl:latest
+        image: bitnami/kubectl:1.32
         resources:
           requests:
             cpu: {{ .Values.opentelemetry.namespaceLabelJob.resources.requests.cpu }}