diff --git a/Makefile b/Makefile
index 4cfccff752..fb58bbb637 100644
--- a/Makefile
+++ b/Makefile
@@ -640,6 +640,9 @@ run-perf-test:
 run-e2e-test:
 	go test -v ./test/e2e/ -timeout 1h -tags=e2e -count=1  -args -image-tag=${TAG} -image-registry=${IMAGE_REGISTRY} -image-namespace=${IMAGE_NAMESPACE}
 
+run-e2ev3-test:
+	go test -v ./test/e2ev3/ -timeout 1h -tags=e2e -count=1  -args -image-tag=${TAG} -image-registry=${IMAGE_REGISTRY} -image-namespace=${IMAGE_NAMESPACE}
+
 .PHONY: update-hubble
 update-hubble:
 	@echo "Checking for Hubble updates..."
diff --git a/controller/Dockerfile b/controller/Dockerfile
index d6b958140c..ff70dafe44 100644
--- a/controller/Dockerfile
+++ b/controller/Dockerfile
@@ -18,7 +18,8 @@ ARG GOARCH=amd64 # default to amd64
 ARG GOOS=linux # default to linux
 ENV GOARCH=${GOARCH}
 ENV GOOS=${GOOS}
-RUN if [ "$GOOS" = "linux" ] ; then \
+RUN --mount=type=cache,target=/var/cache/tdnf \
+    if [ "$GOOS" = "linux" ] ; then \
     tdnf install -y clang lld bpftool libbpf-devel; \
     fi
 COPY ./pkg/plugin /go/src/github.com/microsoft/retina/pkg/plugin
@@ -68,7 +69,7 @@ RUN --mount=type=cache,target="/root/.cache/go-build" go build -v -o /go/bin/ret
 
 # tools image
 FROM azurelinux-core AS tools
-RUN tdnf install -y \
+RUN --mount=type=cache,target=/var/cache/tdnf tdnf install -y \
     clang \
     iproute \
     iptables \
diff --git a/deploy/standard/manifests/controller/helm/retina/crds/monitoring.coreos.com_servicemonitors.yaml b/deploy/standard/manifests/controller/helm/retina/crds/monitoring.coreos.com_servicemonitors.yaml
new file mode 100644
index 0000000000..39532119bd
--- /dev/null
+++ b/deploy/standard/manifests/controller/helm/retina/crds/monitoring.coreos.com_servicemonitors.yaml
@@ -0,0 +1,1412 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.19.0
+    operator.prometheus.io/version: 0.90.0
+  name: servicemonitors.monitoring.coreos.com
+spec:
+  group: monitoring.coreos.com
+  names:
+    categories:
+    - prometheus-operator
+    kind: ServiceMonitor
+    listKind: ServiceMonitorList
+    plural: servicemonitors
+    shortNames:
+    - smon
+    singular: servicemonitor
+  scope: Namespaced
+  versions:
+  - name: v1
+    schema:
+      openAPIV3Schema:
+        description: |-
+          The `ServiceMonitor` custom resource definition (CRD) defines how `Prometheus` and `PrometheusAgent` can scrape metrics from a group of services.
+          Among other things, it allows to specify:
+          * The services to scrape via label selectors.
+          * The container ports to scrape.
+          * Authentication credentials to use.
+          * Target and metric relabeling.
+
+          `Prometheus` and `PrometheusAgent` objects select `ServiceMonitor` objects using label and namespace selectors.
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: |-
+              spec defines the specification of desired Service selection for target discovery by
+              Prometheus.
+            properties:
+              attachMetadata:
+                description: |-
+                  attachMetadata defines additional metadata which is added to the
+                  discovered targets.
+
+                  It requires Prometheus >= v2.37.0.
+                properties:
+                  node:
+                    description: |-
+                      node when set to true, Prometheus attaches node metadata to the discovered
+                      targets.
+
+                      The Prometheus service account must have the `list` and `watch`
+                      permissions on the `Nodes` objects.
+                    type: boolean
+                type: object
+              bodySizeLimit:
+                description: |-
+                  bodySizeLimit when defined, bodySizeLimit specifies a job level limit on the size
+                  of uncompressed response body that will be accepted by Prometheus.
+
+                  It requires Prometheus >= v2.28.0.
+                pattern: (^0|([0-9]*[.])?[0-9]+((K|M|G|T|E|P)i?)?B)$
+                type: string
+              convertClassicHistogramsToNHCB:
+                description: |-
+                  convertClassicHistogramsToNHCB defines whether to convert all scraped classic histograms into a native histogram with custom buckets.
+                  It requires Prometheus >= v3.0.0.
+                type: boolean
+              endpoints:
+                description: |-
+                  endpoints defines the list of endpoints part of this ServiceMonitor.
+                  Defines how to scrape metrics from Kubernetes [Endpoints](https://kubernetes.io/docs/concepts/services-networking/service/#endpoints) objects.
+                  In most cases, an Endpoints object is backed by a Kubernetes [Service](https://kubernetes.io/docs/concepts/services-networking/service/) object with the same name and labels.
+                items:
+                  description: |-
+                    Endpoint defines an endpoint serving Prometheus metrics to be scraped by
+                    Prometheus.
+                  properties:
+                    authorization:
+                      description: |-
+                        authorization configures the Authorization header credentials used by
+                        the client.
+
+                        Cannot be set at the same time as `basicAuth`, `bearerTokenSecret` or `oauth2`.
+                      properties:
+                        credentials:
+                          description: credentials defines a key of a Secret in the
+                            namespace that contains the credentials for authentication.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        type:
+                          description: |-
+                            type defines the authentication type. The value is case-insensitive.
+
+                            "Basic" is not a supported value.
+
+                            Default: "Bearer"
+                          type: string
+                      type: object
+                    basicAuth:
+                      description: |-
+                        basicAuth defines the Basic Authentication credentials used by the
+                        client.
+
+                        Cannot be set at the same time as `authorization`, `bearerTokenSecret` or `oauth2`.
+                      properties:
+                        password:
+                          description: |-
+                            password defines a key of a Secret containing the password for
+                            authentication.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        username:
+                          description: |-
+                            username defines a key of a Secret containing the username for
+                            authentication.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                      type: object
+                    bearerTokenFile:
+                      description: |-
+                        bearerTokenFile defines the file to read bearer token for scraping the target.
+
+                        Deprecated: use `authorization` instead.
+                      type: string
+                    bearerTokenSecret:
+                      description: |-
+                        bearerTokenSecret defines a key of a Secret containing the bearer token
+                        used by the client for authentication. The secret needs to be in the
+                        same namespace as the custom resource and readable by the Prometheus
+                        Operator.
+
+                        Cannot be set at the same time as `authorization`, `basicAuth` or `oauth2`.
+
+                        Deprecated: use `authorization` instead.
+                      properties:
+                        key:
+                          description: The key of the secret to select from.  Must
+                            be a valid secret key.
+                          type: string
+                        name:
+                          default: ""
+                          description: |-
+                            Name of the referent.
+                            This field is effectively required, but due to backwards compatibility is
+                            allowed to be empty. Instances of this type with an empty value here are
+                            almost certainly wrong.
+                            More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                          type: string
+                        optional:
+                          description: Specify whether the Secret or its key must
+                            be defined
+                          type: boolean
+                      required:
+                      - key
+                      type: object
+                      x-kubernetes-map-type: atomic
+                    enableHttp2:
+                      description: enableHttp2 can be used to disable HTTP2.
+                      type: boolean
+                    filterRunning:
+                      description: |-
+                        filterRunning when true, the pods which are not running (e.g. either in Failed or
+                        Succeeded state) are dropped during the target discovery.
+
+                        If unset, the filtering is enabled.
+
+                        More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
+                      type: boolean
+                    followRedirects:
+                      description: |-
+                        followRedirects defines whether the client should follow HTTP 3xx
+                        redirects.
+                      type: boolean
+                    honorLabels:
+                      description: |-
+                        honorLabels defines when true the metric's labels when they collide
+                        with the target's labels.
+                      type: boolean
+                    honorTimestamps:
+                      description: |-
+                        honorTimestamps defines whether Prometheus preserves the timestamps
+                        when exposed by the target.
+                      type: boolean
+                    interval:
+                      description: |-
+                        interval at which Prometheus scrapes the metrics from the target.
+
+                        If empty, Prometheus uses the global scrape interval.
+                      pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+                      type: string
+                    metricRelabelings:
+                      description: |-
+                        metricRelabelings defines the relabeling rules to apply to the
+                        samples before ingestion.
+                      items:
+                        description: |-
+                          RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+                          scraped samples and remote write samples.
+
+                          More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+                        properties:
+                          action:
+                            default: replace
+                            description: |-
+                              action to perform based on the regex matching.
+
+                              `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+                              `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+                              Default: "Replace"
+                            enum:
+                            - replace
+                            - Replace
+                            - keep
+                            - Keep
+                            - drop
+                            - Drop
+                            - hashmod
+                            - HashMod
+                            - labelmap
+                            - LabelMap
+                            - labeldrop
+                            - LabelDrop
+                            - labelkeep
+                            - LabelKeep
+                            - lowercase
+                            - Lowercase
+                            - uppercase
+                            - Uppercase
+                            - keepequal
+                            - KeepEqual
+                            - dropequal
+                            - DropEqual
+                            type: string
+                          modulus:
+                            description: |-
+                              modulus to take of the hash of the source label values.
+
+                              Only applicable when the action is `HashMod`.
+                            format: int64
+                            type: integer
+                          regex:
+                            description: regex defines the regular expression against
+                              which the extracted value is matched.
+                            type: string
+                          replacement:
+                            description: |-
+                              replacement value against which a Replace action is performed if the
+                              regular expression matches.
+
+                              Regex capture groups are available.
+                            type: string
+                          separator:
+                            description: separator defines the string between concatenated
+                              SourceLabels.
+                            type: string
+                          sourceLabels:
+                            description: |-
+                              sourceLabels defines the source labels select values from existing labels. Their content is
+                              concatenated using the configured Separator and matched against the
+                              configured regular expression.
+                            items:
+                              description: |-
+                                LabelName is a valid Prometheus label name.
+                                For Prometheus 3.x, a label name is valid if it contains UTF-8 characters.
+                                For Prometheus 2.x, a label name is only valid if it contains ASCII characters, letters, numbers, as well as underscores.
+                              type: string
+                            type: array
+                          targetLabel:
+                            description: |-
+                              targetLabel defines the label to which the resulting string is written in a replacement.
+
+                              It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+                              `KeepEqual` and `DropEqual` actions.
+
+                              Regex capture groups are available.
+                            type: string
+                        type: object
+                      type: array
+                    noProxy:
+                      description: |-
+                        noProxy defines a comma-separated string that can contain IPs, CIDR notation, domain names
+                        that should be excluded from proxying. IP and domain names can
+                        contain port numbers.
+
+                        It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                      type: string
+                    oauth2:
+                      description: |-
+                        oauth2 defines the OAuth2 settings used by the client.
+
+                        It requires Prometheus >= 2.27.0.
+
+                        Cannot be set at the same time as `authorization`, `basicAuth` or `bearerTokenSecret`.
+                      properties:
+                        clientId:
+                          description: |-
+                            clientId defines a key of a Secret or ConfigMap containing the
+                            OAuth2 client's ID.
+                          properties:
+                            configMap:
+                              description: configMap defines the ConfigMap containing
+                                data to use for the targets.
+                              properties:
+                                key:
+                                  description: The key to select.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the ConfigMap or its
+                                    key must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            secret:
+                              description: secret defines the Secret containing data
+                                to use for the targets.
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                          type: object
+                        clientSecret:
+                          description: |-
+                            clientSecret defines a key of a Secret containing the OAuth2
+                            client's secret.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        endpointParams:
+                          additionalProperties:
+                            type: string
+                          description: |-
+                            endpointParams configures the HTTP parameters to append to the token
+                            URL.
+                          type: object
+                        noProxy:
+                          description: |-
+                            noProxy defines a comma-separated string that can contain IPs, CIDR notation, domain names
+                            that should be excluded from proxying. IP and domain names can
+                            contain port numbers.
+
+                            It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                          type: string
+                        proxyConnectHeader:
+                          additionalProperties:
+                            items:
+                              description: SecretKeySelector selects a key of a Secret.
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            type: array
+                          description: |-
+                            proxyConnectHeader optionally specifies headers to send to
+                            proxies during CONNECT requests.
+
+                            It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        proxyFromEnvironment:
+                          description: |-
+                            proxyFromEnvironment defines whether to use the proxy configuration defined by environment variables (HTTP_PROXY, HTTPS_PROXY, and NO_PROXY).
+
+                            It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                          type: boolean
+                        proxyUrl:
+                          description: proxyUrl defines the HTTP proxy server to use.
+                          pattern: ^(http|https|socks5)://.+$
+                          type: string
+                        scopes:
+                          description: scopes defines the OAuth2 scopes used for the
+                            token request.
+                          items:
+                            type: string
+                          type: array
+                        tlsConfig:
+                          description: |-
+                            tlsConfig defines the TLS configuration to use when connecting to the OAuth2 server.
+                            It requires Prometheus >= v2.43.0.
+                          properties:
+                            ca:
+                              description: ca defines the Certificate authority used
+                                when verifying server certificates.
+                              properties:
+                                configMap:
+                                  description: configMap defines the ConfigMap containing
+                                    data to use for the targets.
+                                  properties:
+                                    key:
+                                      description: The key to select.
+                                      type: string
+                                    name:
+                                      default: ""
+                                      description: |-
+                                        Name of the referent.
+                                        This field is effectively required, but due to backwards compatibility is
+                                        allowed to be empty. Instances of this type with an empty value here are
+                                        almost certainly wrong.
+                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                      type: string
+                                    optional:
+                                      description: Specify whether the ConfigMap or
+                                        its key must be defined
+                                      type: boolean
+                                  required:
+                                  - key
+                                  type: object
+                                  x-kubernetes-map-type: atomic
+                                secret:
+                                  description: secret defines the Secret containing
+                                    data to use for the targets.
+                                  properties:
+                                    key:
+                                      description: The key of the secret to select
+                                        from.  Must be a valid secret key.
+                                      type: string
+                                    name:
+                                      default: ""
+                                      description: |-
+                                        Name of the referent.
+                                        This field is effectively required, but due to backwards compatibility is
+                                        allowed to be empty. Instances of this type with an empty value here are
+                                        almost certainly wrong.
+                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                      type: string
+                                    optional:
+                                      description: Specify whether the Secret or its
+                                        key must be defined
+                                      type: boolean
+                                  required:
+                                  - key
+                                  type: object
+                                  x-kubernetes-map-type: atomic
+                              type: object
+                            cert:
+                              description: cert defines the Client certificate to
+                                present when doing client-authentication.
+                              properties:
+                                configMap:
+                                  description: configMap defines the ConfigMap containing
+                                    data to use for the targets.
+                                  properties:
+                                    key:
+                                      description: The key to select.
+                                      type: string
+                                    name:
+                                      default: ""
+                                      description: |-
+                                        Name of the referent.
+                                        This field is effectively required, but due to backwards compatibility is
+                                        allowed to be empty. Instances of this type with an empty value here are
+                                        almost certainly wrong.
+                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                      type: string
+                                    optional:
+                                      description: Specify whether the ConfigMap or
+                                        its key must be defined
+                                      type: boolean
+                                  required:
+                                  - key
+                                  type: object
+                                  x-kubernetes-map-type: atomic
+                                secret:
+                                  description: secret defines the Secret containing
+                                    data to use for the targets.
+                                  properties:
+                                    key:
+                                      description: The key of the secret to select
+                                        from.  Must be a valid secret key.
+                                      type: string
+                                    name:
+                                      default: ""
+                                      description: |-
+                                        Name of the referent.
+                                        This field is effectively required, but due to backwards compatibility is
+                                        allowed to be empty. Instances of this type with an empty value here are
+                                        almost certainly wrong.
+                                        More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                      type: string
+                                    optional:
+                                      description: Specify whether the Secret or its
+                                        key must be defined
+                                      type: boolean
+                                  required:
+                                  - key
+                                  type: object
+                                  x-kubernetes-map-type: atomic
+                              type: object
+                            insecureSkipVerify:
+                              description: insecureSkipVerify defines how to disable
+                                target certificate validation.
+                              type: boolean
+                            keySecret:
+                              description: keySecret defines the Secret containing
+                                the client key file for the targets.
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            maxVersion:
+                              description: |-
+                                maxVersion defines the maximum acceptable TLS version.
+
+                                It requires Prometheus >= v2.41.0 or Thanos >= v0.31.0.
+                              enum:
+                              - TLS10
+                              - TLS11
+                              - TLS12
+                              - TLS13
+                              type: string
+                            minVersion:
+                              description: |-
+                                minVersion defines the minimum acceptable TLS version.
+
+                                It requires Prometheus >= v2.35.0 or Thanos >= v0.28.0.
+                              enum:
+                              - TLS10
+                              - TLS11
+                              - TLS12
+                              - TLS13
+                              type: string
+                            serverName:
+                              description: serverName is used to verify the hostname
+                                for the targets.
+                              type: string
+                          type: object
+                        tokenUrl:
+                          description: tokenUrl defines the URL to fetch the token
+                            from.
+                          minLength: 1
+                          type: string
+                      required:
+                      - clientId
+                      - clientSecret
+                      - tokenUrl
+                      type: object
+                    params:
+                      additionalProperties:
+                        items:
+                          type: string
+                        type: array
+                      description: params define optional HTTP URL parameters.
+                      type: object
+                    path:
+                      description: |-
+                        path defines the HTTP path from which to scrape for metrics.
+
+                        If empty, Prometheus uses the default value (e.g. `/metrics`).
+                      type: string
+                    port:
+                      description: |-
+                        port defines the name of the Service port which this endpoint refers to.
+
+                        It takes precedence over `targetPort`.
+                      type: string
+                    proxyConnectHeader:
+                      additionalProperties:
+                        items:
+                          description: SecretKeySelector selects a key of a Secret.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        type: array
+                      description: |-
+                        proxyConnectHeader optionally specifies headers to send to
+                        proxies during CONNECT requests.
+
+                        It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                      type: object
+                      x-kubernetes-map-type: atomic
+                    proxyFromEnvironment:
+                      description: |-
+                        proxyFromEnvironment defines whether to use the proxy configuration defined by environment variables (HTTP_PROXY, HTTPS_PROXY, and NO_PROXY).
+
+                        It requires Prometheus >= v2.43.0, Alertmanager >= v0.25.0 or Thanos >= v0.32.0.
+                      type: boolean
+                    proxyUrl:
+                      description: proxyUrl defines the HTTP proxy server to use.
+                      pattern: ^(http|https|socks5)://.+$
+                      type: string
+                    relabelings:
+                      description: |-
+                        relabelings defines the relabeling rules to apply the target's
+                        metadata labels.
+
+                        The Operator automatically adds relabelings for a few standard Kubernetes fields.
+
+                        The original scrape job's name is available via the `__tmp_prometheus_job_name` label.
+
+                        More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+                      items:
+                        description: |-
+                          RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+                          scraped samples and remote write samples.
+
+                          More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+                        properties:
+                          action:
+                            default: replace
+                            description: |-
+                              action to perform based on the regex matching.
+
+                              `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+                              `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+                              Default: "Replace"
+                            enum:
+                            - replace
+                            - Replace
+                            - keep
+                            - Keep
+                            - drop
+                            - Drop
+                            - hashmod
+                            - HashMod
+                            - labelmap
+                            - LabelMap
+                            - labeldrop
+                            - LabelDrop
+                            - labelkeep
+                            - LabelKeep
+                            - lowercase
+                            - Lowercase
+                            - uppercase
+                            - Uppercase
+                            - keepequal
+                            - KeepEqual
+                            - dropequal
+                            - DropEqual
+                            type: string
+                          modulus:
+                            description: |-
+                              modulus to take of the hash of the source label values.
+
+                              Only applicable when the action is `HashMod`.
+                            format: int64
+                            type: integer
+                          regex:
+                            description: regex defines the regular expression against
+                              which the extracted value is matched.
+                            type: string
+                          replacement:
+                            description: |-
+                              replacement value against which a Replace action is performed if the
+                              regular expression matches.
+
+                              Regex capture groups are available.
+                            type: string
+                          separator:
+                            description: separator defines the string between concatenated
+                              SourceLabels.
+                            type: string
+                          sourceLabels:
+                            description: |-
+                              sourceLabels defines the source labels select values from existing labels. Their content is
+                              concatenated using the configured Separator and matched against the
+                              configured regular expression.
+                            items:
+                              description: |-
+                                LabelName is a valid Prometheus label name.
+                                For Prometheus 3.x, a label name is valid if it contains UTF-8 characters.
+                                For Prometheus 2.x, a label name is only valid if it contains ASCII characters, letters, numbers, as well as underscores.
+                              type: string
+                            type: array
+                          targetLabel:
+                            description: |-
+                              targetLabel defines the label to which the resulting string is written in a replacement.
+
+                              It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+                              `KeepEqual` and `DropEqual` actions.
+
+                              Regex capture groups are available.
+                            type: string
+                        type: object
+                      type: array
+                    scheme:
+                      description: scheme defines the HTTP scheme to use when scraping
+                        the metrics.
+                      enum:
+                      - http
+                      - https
+                      - HTTP
+                      - HTTPS
+                      type: string
+                    scrapeTimeout:
+                      description: |-
+                        scrapeTimeout defines the timeout after which Prometheus considers the scrape to be failed.
+
+                        If empty, Prometheus uses the global scrape timeout unless it is less
+                        than the target's scrape interval value in which the latter is used.
+                        The value cannot be greater than the scrape interval otherwise the operator will reject the resource.
+                      pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+                      type: string
+                    targetPort:
+                      anyOf:
+                      - type: integer
+                      - type: string
+                      description: |-
+                        targetPort defines the name or number of the target port of the `Pod` object behind the
+                        Service. The port must be specified with the container's port property.
+                      x-kubernetes-int-or-string: true
+                    tlsConfig:
+                      description: tlsConfig defines TLS configuration used by the
+                        client.
+                      properties:
+                        ca:
+                          description: ca defines the Certificate authority used when
+                            verifying server certificates.
+                          properties:
+                            configMap:
+                              description: configMap defines the ConfigMap containing
+                                data to use for the targets.
+                              properties:
+                                key:
+                                  description: The key to select.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the ConfigMap or its
+                                    key must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            secret:
+                              description: secret defines the Secret containing data
+                                to use for the targets.
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                          type: object
+                        caFile:
+                          description: caFile defines the path to the CA cert in the
+                            Prometheus container to use for the targets.
+                          type: string
+                        cert:
+                          description: cert defines the Client certificate to present
+                            when doing client-authentication.
+                          properties:
+                            configMap:
+                              description: configMap defines the ConfigMap containing
+                                data to use for the targets.
+                              properties:
+                                key:
+                                  description: The key to select.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the ConfigMap or its
+                                    key must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                            secret:
+                              description: secret defines the Secret containing data
+                                to use for the targets.
+                              properties:
+                                key:
+                                  description: The key of the secret to select from.  Must
+                                    be a valid secret key.
+                                  type: string
+                                name:
+                                  default: ""
+                                  description: |-
+                                    Name of the referent.
+                                    This field is effectively required, but due to backwards compatibility is
+                                    allowed to be empty. Instances of this type with an empty value here are
+                                    almost certainly wrong.
+                                    More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                                  type: string
+                                optional:
+                                  description: Specify whether the Secret or its key
+                                    must be defined
+                                  type: boolean
+                              required:
+                              - key
+                              type: object
+                              x-kubernetes-map-type: atomic
+                          type: object
+                        certFile:
+                          description: certFile defines the path to the client cert
+                            file in the Prometheus container for the targets.
+                          type: string
+                        insecureSkipVerify:
+                          description: insecureSkipVerify defines how to disable target
+                            certificate validation.
+                          type: boolean
+                        keyFile:
+                          description: keyFile defines the path to the client key
+                            file in the Prometheus container for the targets.
+                          type: string
+                        keySecret:
+                          description: keySecret defines the Secret containing the
+                            client key file for the targets.
+                          properties:
+                            key:
+                              description: The key of the secret to select from.  Must
+                                be a valid secret key.
+                              type: string
+                            name:
+                              default: ""
+                              description: |-
+                                Name of the referent.
+                                This field is effectively required, but due to backwards compatibility is
+                                allowed to be empty. Instances of this type with an empty value here are
+                                almost certainly wrong.
+                                More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+                              type: string
+                            optional:
+                              description: Specify whether the Secret or its key must
+                                be defined
+                              type: boolean
+                          required:
+                          - key
+                          type: object
+                          x-kubernetes-map-type: atomic
+                        maxVersion:
+                          description: |-
+                            maxVersion defines the maximum acceptable TLS version.
+
+                            It requires Prometheus >= v2.41.0 or Thanos >= v0.31.0.
+                          enum:
+                          - TLS10
+                          - TLS11
+                          - TLS12
+                          - TLS13
+                          type: string
+                        minVersion:
+                          description: |-
+                            minVersion defines the minimum acceptable TLS version.
+
+                            It requires Prometheus >= v2.35.0 or Thanos >= v0.28.0.
+                          enum:
+                          - TLS10
+                          - TLS11
+                          - TLS12
+                          - TLS13
+                          type: string
+                        serverName:
+                          description: serverName is used to verify the hostname for
+                            the targets.
+                          type: string
+                      type: object
+                    trackTimestampsStaleness:
+                      description: |-
+                        trackTimestampsStaleness defines whether Prometheus tracks staleness of
+                        the metrics that have an explicit timestamp present in scraped data.
+                        Has no effect if `honorTimestamps` is false.
+
+                        It requires Prometheus >= v2.48.0.
+                      type: boolean
+                  type: object
+                type: array
+              fallbackScrapeProtocol:
+                description: |-
+                  fallbackScrapeProtocol defines the protocol to use if a scrape returns blank, unparseable, or otherwise invalid Content-Type.
+
+                  It requires Prometheus >= v3.0.0.
+                enum:
+                - PrometheusProto
+                - OpenMetricsText0.0.1
+                - OpenMetricsText1.0.0
+                - PrometheusText0.0.4
+                - PrometheusText1.0.0
+                type: string
+              jobLabel:
+                description: |-
+                  jobLabel selects the label from the associated Kubernetes `Service`
+                  object which will be used as the `job` label for all metrics.
+
+                  For example if `jobLabel` is set to `foo` and the Kubernetes `Service`
+                  object is labeled with `foo: bar`, then Prometheus adds the `job="bar"`
+                  label to all ingested metrics.
+
+                  If the value of this field is empty or if the label doesn't exist for
+                  the given Service, the `job` label of the metrics defaults to the name
+                  of the associated Kubernetes `Service`.
+                type: string
+              keepDroppedTargets:
+                description: |-
+                  keepDroppedTargets defines the per-scrape limit on the number of targets dropped by relabeling
+                  that will be kept in memory. 0 means no limit.
+
+                  It requires Prometheus >= v2.47.0.
+                format: int64
+                type: integer
+              labelLimit:
+                description: |-
+                  labelLimit defines the per-scrape limit on number of labels that will be accepted for a sample.
+
+                  It requires Prometheus >= v2.27.0.
+                format: int64
+                type: integer
+              labelNameLengthLimit:
+                description: |-
+                  labelNameLengthLimit defines the per-scrape limit on length of labels name that will be accepted for a sample.
+
+                  It requires Prometheus >= v2.27.0.
+                format: int64
+                type: integer
+              labelValueLengthLimit:
+                description: |-
+                  labelValueLengthLimit defines the per-scrape limit on length of labels value that will be accepted for a sample.
+
+                  It requires Prometheus >= v2.27.0.
+                format: int64
+                type: integer
+              namespaceSelector:
+                description: |-
+                  namespaceSelector defines in which namespace(s) Prometheus should discover the services.
+                  By default, the services are discovered in the same namespace as the `ServiceMonitor` object but it is possible to select pods across different/all namespaces.
+                properties:
+                  any:
+                    description: |-
+                      any defines the boolean describing whether all namespaces are selected in contrast to a
+                      list restricting them.
+                    type: boolean
+                  matchNames:
+                    description: matchNames defines the list of namespace names to
+                      select from.
+                    items:
+                      type: string
+                    type: array
+                type: object
+              nativeHistogramBucketLimit:
+                description: |-
+                  nativeHistogramBucketLimit defines ff there are more than this many buckets in a native histogram,
+                  buckets will be merged to stay within the limit.
+                  It requires Prometheus >= v2.45.0.
+                format: int64
+                type: integer
+              nativeHistogramMinBucketFactor:
+                anyOf:
+                - type: integer
+                - type: string
+                description: |-
+                  nativeHistogramMinBucketFactor defines if the growth factor of one bucket to the next is smaller than this,
+                  buckets will be merged to increase the factor sufficiently.
+                  It requires Prometheus >= v2.50.0.
+                pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                x-kubernetes-int-or-string: true
+              podTargetLabels:
+                description: |-
+                  podTargetLabels defines the labels which are transferred from the
+                  associated Kubernetes `Pod` object onto the ingested metrics.
+                items:
+                  type: string
+                type: array
+              sampleLimit:
+                description: |-
+                  sampleLimit defines a per-scrape limit on the number of scraped samples
+                  that will be accepted.
+                format: int64
+                type: integer
+              scrapeClass:
+                description: scrapeClass defines the scrape class to apply.
+                minLength: 1
+                type: string
+              scrapeClassicHistograms:
+                description: |-
+                  scrapeClassicHistograms defines whether to scrape a classic histogram that is also exposed as a native histogram.
+                  It requires Prometheus >= v2.45.0.
+
+                  Notice: `scrapeClassicHistograms` corresponds to the `always_scrape_classic_histograms` field in the Prometheus configuration.
+                type: boolean
+              scrapeNativeHistograms:
+                description: |-
+                  scrapeNativeHistograms defines whether to enable scraping of native histograms.
+                  It requires Prometheus >= v3.8.0.
+                type: boolean
+              scrapeProtocols:
+                description: |-
+                  scrapeProtocols defines the protocols to negotiate during a scrape. It tells clients the
+                  protocols supported by Prometheus in order of preference (from most to least preferred).
+
+                  If unset, Prometheus uses its default value.
+
+                  It requires Prometheus >= v2.49.0.
+                items:
+                  description: |-
+                    ScrapeProtocol represents a protocol used by Prometheus for scraping metrics.
+                    Supported values are:
+                    * `OpenMetricsText0.0.1`
+                    * `OpenMetricsText1.0.0`
+                    * `PrometheusProto`
+                    * `PrometheusText0.0.4`
+                    * `PrometheusText1.0.0`
+                  enum:
+                  - PrometheusProto
+                  - OpenMetricsText0.0.1
+                  - OpenMetricsText1.0.0
+                  - PrometheusText0.0.4
+                  - PrometheusText1.0.0
+                  type: string
+                type: array
+                x-kubernetes-list-type: set
+              selector:
+                description: selector defines the label selector to select the Kubernetes
+                  `Endpoints` objects to scrape metrics from.
+                properties:
+                  matchExpressions:
+                    description: matchExpressions is a list of label selector requirements.
+                      The requirements are ANDed.
+                    items:
+                      description: |-
+                        A label selector requirement is a selector that contains values, a key, and an operator that
+                        relates the key and values.
+                      properties:
+                        key:
+                          description: key is the label key that the selector applies
+                            to.
+                          type: string
+                        operator:
+                          description: |-
+                            operator represents a key's relationship to a set of values.
+                            Valid operators are In, NotIn, Exists and DoesNotExist.
+                          type: string
+                        values:
+                          description: |-
+                            values is an array of string values. If the operator is In or NotIn,
+                            the values array must be non-empty. If the operator is Exists or DoesNotExist,
+                            the values array must be empty. This array is replaced during a strategic
+                            merge patch.
+                          items:
+                            type: string
+                          type: array
+                          x-kubernetes-list-type: atomic
+                      required:
+                      - key
+                      - operator
+                      type: object
+                    type: array
+                    x-kubernetes-list-type: atomic
+                  matchLabels:
+                    additionalProperties:
+                      type: string
+                    description: |-
+                      matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+                      map is equivalent to an element of matchExpressions, whose key field is "key", the
+                      operator is "In", and the values array contains only "value". The requirements are ANDed.
+                    type: object
+                type: object
+                x-kubernetes-map-type: atomic
+              selectorMechanism:
+                description: |-
+                  selectorMechanism defines the mechanism used to select the endpoints to scrape.
+                  By default, the selection process relies on relabel configurations to filter the discovered targets.
+                  Alternatively, you can opt in for role selectors, which may offer better efficiency in large clusters.
+                  Which strategy is best for your use case needs to be carefully evaluated.
+
+                  It requires Prometheus >= v2.17.0.
+                enum:
+                - RelabelConfig
+                - RoleSelector
+                type: string
+              serviceDiscoveryRole:
+                description: |-
+                  serviceDiscoveryRole defines the service discovery role used to discover targets.
+
+                  If set, the value should be either "Endpoints" or "EndpointSlice".
+                  Otherwise it defaults to the value defined in the
+                  Prometheus/PrometheusAgent resource.
+                enum:
+                - Endpoints
+                - EndpointSlice
+                type: string
+              targetLabels:
+                description: |-
+                  targetLabels defines the labels which are transferred from the
+                  associated Kubernetes `Service` object onto the ingested metrics.
+                items:
+                  type: string
+                type: array
+              targetLimit:
+                description: |-
+                  targetLimit defines a limit on the number of scraped targets that will
+                  be accepted.
+                format: int64
+                type: integer
+            required:
+            - endpoints
+            - selector
+            type: object
+          status:
+            description: |-
+              status defines the status subresource. It is under active development and is updated only when the
+              "StatusForConfigurationResources" feature gate is enabled.
+
+              Most recent observed status of the ServiceMonitor. Read-only.
+              More info:
+              https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status
+            properties:
+              bindings:
+                description: bindings defines the list of workload resources (Prometheus,
+                  PrometheusAgent, ThanosRuler or Alertmanager) which select the configuration
+                  resource.
+                items:
+                  description: WorkloadBinding is a link between a configuration resource
+                    and a workload resource.
+                  properties:
+                    conditions:
+                      description: conditions defines the current state of the configuration
+                        resource when bound to the referenced Workload object.
+                      items:
+                        description: ConfigResourceCondition describes the status
+                          of configuration resources linked to Prometheus, PrometheusAgent,
+                          Alertmanager or ThanosRuler.
+                        properties:
+                          lastTransitionTime:
+                            description: lastTransitionTime defines the time of the
+                              last update to the current status property.
+                            format: date-time
+                            type: string
+                          message:
+                            description: message defines the human-readable message
+                              indicating details for the condition's last transition.
+                            type: string
+                          observedGeneration:
+                            description: |-
+                              observedGeneration defines the .metadata.generation that the
+                              condition was set based upon. For instance, if `.metadata.generation` is
+                              currently 12, but the `.status.conditions[].observedGeneration` is 9, the
+                              condition is out of date with respect to the current state of the object.
+                            format: int64
+                            type: integer
+                          reason:
+                            description: reason for the condition's last transition.
+                            type: string
+                          status:
+                            description: status of the condition.
+                            minLength: 1
+                            type: string
+                          type:
+                            description: |-
+                              type of the condition being reported.
+                              Currently, only "Accepted" is supported.
+                            enum:
+                            - Accepted
+                            minLength: 1
+                            type: string
+                        required:
+                        - lastTransitionTime
+                        - status
+                        - type
+                        type: object
+                      type: array
+                      x-kubernetes-list-map-keys:
+                      - type
+                      x-kubernetes-list-type: map
+                    group:
+                      description: group defines the group of the referenced resource.
+                      enum:
+                      - monitoring.coreos.com
+                      type: string
+                    name:
+                      description: name defines the name of the referenced object.
+                      minLength: 1
+                      type: string
+                    namespace:
+                      description: namespace defines the namespace of the referenced
+                        object.
+                      minLength: 1
+                      type: string
+                    resource:
+                      description: resource defines the type of resource being referenced
+                        (e.g. Prometheus, PrometheusAgent, ThanosRuler or Alertmanager).
+                      enum:
+                      - prometheuses
+                      - prometheusagents
+                      - thanosrulers
+                      - alertmanagers
+                      type: string
+                  required:
+                  - group
+                  - name
+                  - namespace
+                  - resource
+                  type: object
+                type: array
+                x-kubernetes-list-map-keys:
+                - group
+                - resource
+                - name
+                - namespace
+                x-kubernetes-list-type: map
+            type: object
+        required:
+        - spec
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
diff --git a/go.mod b/go.mod
index 2552fc6360..55d2492385 100644
--- a/go.mod
+++ b/go.mod
@@ -361,6 +361,7 @@ require (
 	github.com/Azure/go-autorest/autorest/azure/auth v0.5.12 // indirect
 	github.com/Azure/go-autorest/autorest/azure/cli v0.4.6 // indirect
 	github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
+	github.com/Azure/go-workflow v0.1.13 // indirect
 	github.com/Azure/msi-dataplane v0.4.3 // indirect
 	github.com/Crocmagnon/fatcontext v0.7.1 // indirect
 	github.com/Djarvur/go-err113 v0.0.0-20210108212216-aea10b59be24 // indirect
@@ -391,6 +392,7 @@ require (
 	github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20240514230400-03fa26f5508f // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/benbjohnson/clock v1.3.5 // indirect
 	github.com/bkielbasa/cyclop v1.2.3 // indirect
 	github.com/blacktop/go-dwarf v1.0.9 // indirect
 	github.com/blacktop/go-macho v1.1.162 // indirect
diff --git a/go.sum b/go.sum
index 50dcee13ba..28502742e9 100644
--- a/go.sum
+++ b/go.sum
@@ -142,6 +142,8 @@ github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+Z
 github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
 github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
 github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
+github.com/Azure/go-workflow v0.1.13 h1:tqYmmKsw068Uu/OKRIctQEe1H6BZyXs9mJtzocc7jtE=
+github.com/Azure/go-workflow v0.1.13/go.mod h1:gOt4hadDnP+SzV+ywWJRcM1BSopJ+1rfGlhrTIR040I=
 github.com/Azure/msi-dataplane v0.4.3 h1:dWPWzY4b54tLIR9T1Q014Xxd/1DxOsMIp6EjRFAJlQY=
 github.com/Azure/msi-dataplane v0.4.3/go.mod h1:yAfxdJyvcnvSDfSyOFV9qm4fReEQDl+nZLGeH2ZWSmw=
 github.com/Azure/perf-tests/network/benchmarks/netperf v0.0.0-20241008140716-395a79947d2c h1:TMXh4Z1Z98o4Ob7JYhiNHocITQXHqJSsrB5ts4uwKl8=
@@ -295,6 +297,8 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ
 github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
 github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
+github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o=
+github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
diff --git a/operator/Dockerfile b/operator/Dockerfile
index a6e34f5037..5d8ccdbd05 100644
--- a/operator/Dockerfile
+++ b/operator/Dockerfile
@@ -5,9 +5,14 @@ ARG VERSION
 ARG APP_INSIGHTS_ID
 
 WORKDIR /workspace
+
+# Cache module download separately from source changes.
+COPY go.mod go.sum ./
+RUN go mod download
+
 COPY . .
 
-RUN tdnf install -y jq
+RUN --mount=type=cache,target=/var/cache/tdnf tdnf install -y jq
 
 # Default linux/architecture.
 ARG GOOS=linux
diff --git a/test/e2e/framework/prometheus/prometheus.go b/test/e2e/framework/prometheus/prometheus.go
index ebd0cb185c..b65764fe0a 100644
--- a/test/e2e/framework/prometheus/prometheus.go
+++ b/test/e2e/framework/prometheus/prometheus.go
@@ -81,23 +81,55 @@ func CheckMetricFromBuffer(prometheusMetricData []byte, metricName string, valid
 	return nil
 }
 
+func formatMetricDetail(name string, mf *promclient.MetricFamily, m *promclient.Metric) string {
+	var sb strings.Builder
+	sb.WriteString(name)
+	sb.WriteString("{")
+	for i, label := range m.GetLabel() {
+		if i > 0 {
+			sb.WriteString(", ")
+		}
+		fmt.Fprintf(&sb, "%s=%q", label.GetName(), label.GetValue())
+	}
+	sb.WriteString("}")
+
+	switch mf.GetType() {
+	case promclient.MetricType_COUNTER:
+		fmt.Fprintf(&sb, " counter:%v", m.GetCounter().GetValue())
+	case promclient.MetricType_GAUGE:
+		fmt.Fprintf(&sb, " gauge:%v", m.GetGauge().GetValue())
+	case promclient.MetricType_HISTOGRAM:
+		h := m.GetHistogram()
+		fmt.Fprintf(&sb, " histogram:count=%v sum=%v", h.GetSampleCount(), h.GetSampleSum())
+	case promclient.MetricType_SUMMARY:
+		s := m.GetSummary()
+		fmt.Fprintf(&sb, " summary:count=%v sum=%v", s.GetSampleCount(), s.GetSampleSum())
+	case promclient.MetricType_UNTYPED:
+		fmt.Fprintf(&sb, " untyped:%v", m.GetUntyped().GetValue())
+	}
+
+	return sb.String()
+}
+
 func verifyValidMetricPresent(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
-	for _, metric := range data {
-		if metric.GetName() == metricName {
-			for _, metric := range metric.GetMetric() {
+	for _, mf := range data {
+		if mf.GetName() == metricName {
+			for _, m := range mf.GetMetric() {
 
 				// get all labels and values on the metric
 				metricLabels := map[string]string{}
-				for _, label := range metric.GetLabel() {
+				for _, label := range m.GetLabel() {
 					metricLabels[label.GetName()] = label.GetValue()
 				}
 
 				// if valid metric is empty, then we just need to make sure the metric and value is present
 				if len(validMetric) == 0 && len(metricLabels) > 0 {
+					log.Printf("found matching metric: %s", formatMetricDetail(metricName, mf, m))
 					return nil
 				}
 
 				if reflect.DeepEqual(metricLabels, validMetric) {
+					log.Printf("found matching metric: %s", formatMetricDetail(metricName, mf, m))
 					return nil
 				}
 			}
@@ -130,18 +162,19 @@ func getAllPrometheusMetricsFromURL(url string) (map[string]*promclient.MetricFa
 // verifyValidMetricPresentPartial checks if a metric exists with labels that contain
 // all the key-value pairs in validMetric (partial matching - the metric can have additional labels)
 func verifyValidMetricPresentPartial(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
-	for _, metric := range data {
-		if metric.GetName() == metricName {
-			for _, metric := range metric.GetMetric() {
+	for _, mf := range data {
+		if mf.GetName() == metricName {
+			for _, m := range mf.GetMetric() {
 
 				// get all labels and values on the metric
 				metricLabels := map[string]string{}
-				for _, label := range metric.GetLabel() {
+				for _, label := range m.GetLabel() {
 					metricLabels[label.GetName()] = label.GetValue()
 				}
 
 				// if valid metric is empty, then we just need to make sure the metric and value is present
 				if len(validMetric) == 0 && len(metricLabels) > 0 {
+					log.Printf("found matching metric: %s", formatMetricDetail(metricName, mf, m))
 					return nil
 				}
 
@@ -155,6 +188,7 @@ func verifyValidMetricPresentPartial(metricName string, data map[string]*promcli
 				}
 
 				if allMatch {
+					log.Printf("found matching metric: %s", formatMetricDetail(metricName, mf, m))
 					return nil
 				}
 			}
diff --git a/test/e2e/scenarios/dns/validate-advanced-dns-metric.go b/test/e2e/scenarios/dns/validate-advanced-dns-metric.go
index 00badbe8bb..de92a3295b 100644
--- a/test/e2e/scenarios/dns/validate-advanced-dns-metric.go
+++ b/test/e2e/scenarios/dns/validate-advanced-dns-metric.go
@@ -4,7 +4,6 @@ package dns
 
 import (
 	"fmt"
-	"log"
 
 	"github.com/microsoft/retina/test/e2e/framework/constants"
 	"github.com/microsoft/retina/test/e2e/framework/kubernetes"
@@ -50,8 +49,6 @@ func (v *ValidateAdvancedDNSRequestMetrics) Run() error {
 	if err != nil {
 		return errors.Wrapf(err, "failed to verify advance dns request metrics %s", dnsAdvRequestCountMetricName)
 	}
-	log.Printf("found metrics matching %+v\n", dnsAdvRequestCountMetricName)
-
 	return nil
 }
 
@@ -106,8 +103,6 @@ func (v *ValidateAdvanceDNSResponseMetrics) Run() error {
 	if err != nil {
 		return errors.Wrapf(err, "failed to verify advance dns response metrics %s", dnsAdvRequestCountMetricName)
 	}
-	log.Printf("found metrics matching %+v\n", dnsAdvResponseCountMetricName)
-
 	return nil
 }
 
diff --git a/test/e2e/scenarios/dns/validate-basic-dns-metric.go b/test/e2e/scenarios/dns/validate-basic-dns-metric.go
index 91662e1bb5..2c047dde3c 100644
--- a/test/e2e/scenarios/dns/validate-basic-dns-metric.go
+++ b/test/e2e/scenarios/dns/validate-basic-dns-metric.go
@@ -4,7 +4,6 @@ package dns
 
 import (
 	"fmt"
-	"log"
 
 	"github.com/microsoft/retina/test/e2e/framework/constants"
 	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
@@ -30,8 +29,6 @@ func (v *validateBasicDNSRequestMetrics) Run() error {
 	if err != nil {
 		return errors.Wrapf(err, "failed to verify basic dns request metrics %s", dnsBasicRequestCountMetricName)
 	}
-	log.Printf("found metrics matching %+v\n", dnsBasicRequestCountMetricName)
-
 	return nil
 }
 
@@ -64,8 +61,6 @@ func (v *validateBasicDNSResponseMetrics) Run() error {
 	if err != nil {
 		return errors.Wrapf(err, "failed to verify basic dns response metrics %s", dnsBasicResponseCountMetricName)
 	}
-	log.Printf("found metrics matching %+v\n", dnsBasicResponseCountMetricName)
-
 	return nil
 }
 
diff --git a/test/e2e/scenarios/drop/validate-drop-metric.go b/test/e2e/scenarios/drop/validate-drop-metric.go
index 7b647e65f8..e2a759612a 100644
--- a/test/e2e/scenarios/drop/validate-drop-metric.go
+++ b/test/e2e/scenarios/drop/validate-drop-metric.go
@@ -2,7 +2,6 @@ package drop
 
 import (
 	"fmt"
-	"log"
 
 	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
 )
@@ -45,7 +44,6 @@ func (v *ValidateRetinaDropMetric) Run() error {
 		return fmt.Errorf("failed to verify prometheus metrics %s: %w", dropBytesMetricName, err)
 	}
 
-	log.Printf("found metrics matching %+v\n", metric)
 	return nil
 }
 
diff --git a/test/e2e/scenarios/latency/validate-latency-metric.go b/test/e2e/scenarios/latency/validate-latency-metric.go
index 270ae95969..0da7d3341b 100644
--- a/test/e2e/scenarios/latency/validate-latency-metric.go
+++ b/test/e2e/scenarios/latency/validate-latency-metric.go
@@ -2,7 +2,6 @@ package latency
 
 import (
 	"fmt"
-	"log"
 
 	"github.com/microsoft/retina/test/e2e/framework/constants"
 	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
@@ -25,8 +24,6 @@ func (v *ValidateAPIServerLatencyMetric) Run() error {
 	if err != nil {
 		return errors.Wrapf(err, "failed to verify latency metrics %s", latencyBucketMetricName)
 	}
-
-	log.Printf("found metrics matching %s\n", latencyBucketMetricName)
 	return nil
 }
 
diff --git a/test/e2e/scenarios/tcp/validate-flow-metric.go b/test/e2e/scenarios/tcp/validate-flow-metric.go
index 9ade9947b9..8463f7b04f 100644
--- a/test/e2e/scenarios/tcp/validate-flow-metric.go
+++ b/test/e2e/scenarios/tcp/validate-flow-metric.go
@@ -2,7 +2,6 @@ package flow
 
 import (
 	"fmt"
-	"log"
 
 	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
 )
@@ -37,7 +36,6 @@ func (v *ValidateRetinaTCPStateMetric) Run() error {
 		}
 	}
 
-	log.Printf("found metrics matching %+v\n", validMetrics)
 	return nil
 }
 
diff --git a/test/e2e/scenarios/tcp/validate-tcp-connection-remote.go b/test/e2e/scenarios/tcp/validate-tcp-connection-remote.go
index 70c1c7fb97..9fe94e156e 100644
--- a/test/e2e/scenarios/tcp/validate-tcp-connection-remote.go
+++ b/test/e2e/scenarios/tcp/validate-tcp-connection-remote.go
@@ -2,7 +2,6 @@ package flow
 
 import (
 	"fmt"
-	"log"
 
 	prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
 )
@@ -30,7 +29,6 @@ func (v *ValidateRetinaTCPConnectionRemoteMetric) Run() error {
 		}
 	}
 
-	log.Printf("found metrics matching %+v\n", tcpConnectionRemoteMetricName)
 	return nil
 }
 
diff --git a/test/e2ev3/Makefile b/test/e2ev3/Makefile
new file mode 100644
index 0000000000..21887fb218
--- /dev/null
+++ b/test/e2ev3/Makefile
@@ -0,0 +1,50 @@
+TIMEOUT_KIND   ?= 60m
+TIMEOUT_AZURE  ?= 120m
+PROVIDER       ?= kind
+KUBECONFIG     ?=
+CREATE_INFRA   ?= true
+DELETE_INFRA   ?= true
+
+REPO_ROOT := $(shell git rev-parse --show-toplevel)
+
+GO_TEST = cd $(REPO_ROOT) && go test -v -tags e2e ./test/e2ev3/ -timeout
+
+# base flags, computed from variables above
+FLAGS = -provider=$(PROVIDER) -create-infra=$(CREATE_INFRA) -delete-infra=$(DELETE_INFRA)
+ifdef KUBECONFIG
+FLAGS += -kubeconfig=$(KUBECONFIG)
+endif
+
+# timeout picked by provider
+ifeq ($(PROVIDER),kind)
+TIMEOUT = $(TIMEOUT_KIND)
+else
+TIMEOUT = $(TIMEOUT_AZURE)
+endif
+
+.PHONY: test-e2e test-basic-metrics test-advanced-metrics test-hubble-metrics test-capture \
+        test-basic-metrics-exp test-advanced-metrics-exp help
+
+test-e2e: ## run all e2e scenarios, defaults to kind
+	$(GO_TEST) $(TIMEOUT) $(FLAGS)
+
+test-basic-metrics: ## run basic metrics scenarios (drop, tcp, dns)
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/BasicMetrics$$ $(FLAGS)
+
+test-advanced-metrics: ## run advanced metrics scenarios (dns, latency)
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/AdvancedMetrics$$ $(FLAGS)
+
+test-hubble-metrics: ## run hubble metrics scenarios (drop, tcp, dns, flows)
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/HubbleMetrics$$ $(FLAGS)
+
+test-capture: ## run packet capture scenarios
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/Capture$$ $(FLAGS)
+
+test-basic-metrics-exp: ## run experimental basic metrics scenarios
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/BasicMetricsExperimental$$ $(FLAGS)
+
+test-advanced-metrics-exp: ## run experimental advanced metrics scenarios
+	$(GO_TEST) $(TIMEOUT) -run TestE2ERetina/AdvancedMetricsExperimental$$ $(FLAGS)
+
+help: ## show this help
+	@grep -E '^[a-z-]+:.*## ' $(MAKEFILE_LIST) | awk -F ':.*## ' '{printf "  %-26s %s\n", $$1, $$2}'
diff --git a/test/e2ev3/README.md b/test/e2ev3/README.md
new file mode 100644
index 0000000000..8feb69edef
--- /dev/null
+++ b/test/e2ev3/README.md
@@ -0,0 +1,171 @@
+# Retina E2E Tests (v3)
+
+End-to-end tests built on [go-workflow](https://github.com/Azure/go-workflow), a DAG-based test orchestration framework.
+
+## Prerequisites
+
+- Go 1.24+
+- Docker (required for the Kind provider)
+
+## Environment Variables
+
+| Variable | Required | Default | Description |
+|---|---|---|---|
+| `TAG` | No | `git describe` | Image tag. If unset, images are built from source. |
+| `IMAGE_NAMESPACE` | No | `microsoft/retina` | Image namespace |
+| `IMAGE_REGISTRY` | No | `ghcr.io` | Container registry |
+| `AZURE_SUBSCRIPTION_ID` | Azure only | — | Azure subscription ID |
+| `AZURE_LOCATION` | Azure only | — | Azure region (fallback: `LOCATION`) |
+| `AZURE_RESOURCE_GROUP` | Azure only | — | Resource group name |
+| `CLUSTER_NAME` | Azure only | — | AKS cluster name |
+| `HELM_DRIVER` | No | `secrets` | Helm storage driver |
+
+## Test Flags
+
+| Flag | Default | Description |
+|---|---|---|
+| `-provider` | `azure` | Infrastructure provider: `azure` or `kind` |
+| `-kubeconfig` | `""` | Path to an existing kubeconfig (skips infra creation) |
+| `-create-infra` | `true` | Create infrastructure before tests |
+| `-delete-infra` | `true` | Delete infrastructure after tests |
+
+## Running Tests
+
+All commands are run from `test/e2ev3/`.
+
+### Make Targets
+
+```bash
+make test-e2e                     # Run all scenarios
+make test-basic-metrics           # Drop, TCP, DNS
+make test-advanced-metrics        # DNS, latency
+make test-hubble-metrics          # Hubble drop, TCP, DNS, flows
+make test-capture                 # Packet capture
+make test-basic-metrics-exp       # Experimental basic metrics
+make test-advanced-metrics-exp    # Experimental advanced metrics
+```
+
+The default provider is `kind`. When no `TAG` is set, images are built from source automatically using `git describe` as the tag (agent, init, and operator for linux/amd64). For Kind, images are built locally; for Azure, they are built and pushed to the registry.
+
+Override with Make variables:
+
+```bash
+# Use an existing Kind cluster
+make test-basic-metrics KUBECONFIG=$HOME/.kube/config CREATE_INFRA=false DELETE_INFRA=false
+
+# Run against Azure
+make test-e2e PROVIDER=azure
+```
+
+### Kind (Local)
+
+With no environment variables, images are built from source and loaded onto a new Kind cluster:
+
+```bash
+make test-e2e
+```
+
+Or with an explicit tag pointing at pre-built images:
+
+```bash
+TAG=v0.0.1 \
+IMAGE_NAMESPACE=retina \
+IMAGE_REGISTRY=ghcr.io/microsoft \
+  go test -v -tags e2e ./test/e2ev3/ \
+    -provider=kind \
+    -timeout 60m
+```
+
+Use an existing Kind cluster:
+
+```bash
+TAG=v0.0.1 \
+IMAGE_NAMESPACE=retina \
+IMAGE_REGISTRY=ghcr.io/microsoft \
+  go test -v -tags e2e ./test/e2ev3/ \
+    -provider=kind \
+    -kubeconfig=$HOME/.kube/config \
+    -create-infra=false \
+    -delete-infra=false \
+    -timeout 60m
+```
+
+### Azure (AKS)
+
+Create an AKS cluster, run all scenarios, and tear down:
+
+```bash
+TAG=v0.0.1 \
+IMAGE_NAMESPACE=retina \
+IMAGE_REGISTRY=ghcr.io/microsoft \
+AZURE_SUBSCRIPTION_ID=<sub-id> \
+AZURE_LOCATION=eastus2 \
+AZURE_RESOURCE_GROUP=retina-e2e-rg \
+CLUSTER_NAME=retina-e2e \
+  go test -v -tags e2e ./test/e2ev3/ \
+    -provider=azure \
+    -timeout 120m
+```
+
+Use an existing AKS cluster:
+
+```bash
+TAG=v0.0.1 \
+IMAGE_NAMESPACE=retina \
+IMAGE_REGISTRY=ghcr.io/microsoft \
+  go test -v -tags e2e ./test/e2ev3/ \
+    -kubeconfig=$HOME/.kube/config \
+    -create-infra=false \
+    -delete-infra=false \
+    -timeout 120m
+```
+
+### Running a Specific Sub-Test
+
+> **Note:** The test pipeline runs as a single `flow.Pipe` — there are no Go
+> sub-tests. The individual Makefile targets (`test-basic-metrics`, etc.)
+> currently run the full pipeline. To run a subset, use `-kubeconfig` to point
+> at an existing cluster and comment out unwanted steps in
+> `retina_e2e_test.go`.
+
+## Workflow Structure
+
+Each scenario follows the same DAG pattern:
+
+```
+create → exec → validate (retry with backoff) → cleanup (always)
+```
+
+- **Create** — Provision resources (pods, network policies).
+- **Exec** — Generate traffic (curl, nslookup).
+- **Validate** — Port-forward to Retina or Hubble and assert Prometheus metrics. Retried with exponential backoff.
+- **Cleanup** — Delete resources. Runs even if validation fails via `When(flow.Always)`.
+
+## Directory Layout
+
+```
+test/e2ev3/
+├── retina_e2e_test.go              # Test entry point (declarative pipeline)
+├── Makefile                        # Make targets
+├── config/                         # E2E config, flags, paths, shared params
+│   ├── e2e.go                      # Config types, env loading, E2EParams
+│   └── load_step.go                # config.Step — resolves config + image tag
+├── pkg/
+│   ├── images/                     # Image loading interface + images.Step
+│   │   ├── build/                  # Build images from source + build.Step
+│   │   └── load/                   # Load images onto clusters (Kind sideload vs registry pull)
+│   ├── infra/                      # Infrastructure orchestration + infra.Workflow
+│   │   └── providers/
+│   │       ├── azure/              # AKS cluster provisioning (ARM templates)
+│   │       └── kind/               # Kind cluster lifecycle (native SDK)
+│   ├── kubernetes/                 # Reusable K8s steps (Helm, pods, port-forward, exec)
+│   ├── prometheus/                 # Prometheus metric scraping and validation
+│   └── utils/                      # Shared utilities
+└── workflows/
+    ├── basicmetrics/               # Drop, TCP, DNS scenarios
+    │   └── experimental/           # Experimental basic metrics (conntrack, forward, etc.)
+    ├── advancedmetrics/            # DNS, latency scenarios (upgraded Helm profile)
+    │   └── experimental/           # Experimental advanced metrics (drop, forward, etc.)
+    ├── hubblemetrics/              # Hubble drop, TCP, DNS, flow scenarios
+    └── capture/                    # Packet capture validation
+```
diff --git a/test/e2ev3/config/e2e.go b/test/e2ev3/config/e2e.go
new file mode 100644
index 0000000000..5d34570092
--- /dev/null
+++ b/test/e2ev3/config/e2e.go
@@ -0,0 +1,215 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package config
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"log/slog"
+	"os/exec"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/kind"
+	"github.com/spf13/viper"
+	"k8s.io/client-go/rest"
+)
+
+// E2EConfig holds all configuration and runtime state for e2e tests.
+// Fields are populated incrementally by pipeline steps.
+type E2EConfig struct {
+	Cluster ClusterProvider
+	Image   ImageConfig
+	Scale   ScaleConfig
+	Helm    HelmConfig
+	Paths   Paths
+}
+
+// ClusterProvider abstracts cluster-specific behaviors.
+// Implementations live in pkg/infra/providers/{kind,azure}.
+type ClusterProvider interface {
+	ClusterName() string
+	KubeConfigPath() string
+	RestConfig() *rest.Config
+	LoadImages(ctx context.Context, images []string) error
+	ImagePullPolicy() string
+	ImagePullSecrets() []map[string]interface{}
+}
+
+// ImageConfig holds container image coordinates.
+type ImageConfig struct {
+	Tag       string
+	Namespace string
+	Registry  string
+}
+
+// ScaleConfig holds scale-test parameters.
+type ScaleConfig struct {
+	Nodes              string
+	NumDeployments     string
+	NumReplicas        string
+	NumNetworkPolicies string
+	CleanUp            string
+}
+
+// HelmConfig holds Helm-specific settings.
+type HelmConfig struct {
+	Driver string
+}
+
+// Flags parsed from test command line.
+var (
+	CreateInfra = flag.Bool("create-infra", true, "create infrastructure for testing")
+	DeleteInfra = flag.Bool("delete-infra", true, "delete infrastructure after testing")
+	KubeConfig  = flag.String("kubeconfig", "", "path to kubeconfig file")
+	Provider    = flag.String("provider", "azure", "infrastructure provider: azure or kind")
+	ForceBuild  = flag.Bool("force-build", false, "rebuild images even if they already exist locally")
+)
+
+const (
+	KubeSystemNamespace = "kube-system"
+	TestPodNamespace    = "kube-system-test"
+	safetyTimeout       = 24 * time.Hour
+)
+
+// Architectures lists the CPU architectures to test across.
+// Kind clusters are single-arch (amd64), so arm64 is only tested on Azure.
+var Architectures []string
+
+// Paths holds resolved filesystem paths relative to the repository root.
+type Paths struct {
+	RootDir         string
+	RetinaChart     string
+	HubbleChart     string
+	AdvancedProfile string
+}
+
+// ResolvePaths computes all standard paths from the repository root directory.
+func ResolvePaths(rootDir string) *Paths {
+	return &Paths{
+		RootDir:         rootDir,
+		RetinaChart:     filepath.Join(rootDir, "deploy", "standard", "manifests", "controller", "helm", "retina"),
+		HubbleChart:     filepath.Join(rootDir, "deploy", "hubble", "manifests", "controller", "helm", "retina"),
+		AdvancedProfile: filepath.Join(rootDir, "test", "profiles", "advanced", "values.yaml"),
+	}
+}
+
+// TestContext returns a context with a deadline set to the test deadline minus 1 min to ensure cleanup.
+// If the test deadline is not set, a deadline is set to Now + 24h to prevent the test from running indefinitely.
+func TestContext(t *testing.T) (context.Context, context.CancelFunc) {
+	t.Helper()
+
+	deadline, ok := t.Deadline()
+	if !ok {
+		t.Log("Test deadline disabled, deadline set to Now + 24h to prevent test from running indefinitely")
+		deadline = time.Now().Add(safetyTimeout)
+	}
+	deadline = deadline.Add(-time.Minute)
+
+	ctx, cancel := context.WithDeadline(context.Background(), deadline) //nolint:all // cancel is reassigned in next line
+	ctx, cancel = signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
+
+	return ctx, cancel
+}
+
+// DevTag returns a tag derived from git describe, suitable for local dev builds.
+func DevTag(rootDir string) (string, error) {
+	cmd := exec.Command("git", "describe", "--tags", "--always")
+	cmd.Dir = rootDir
+
+	out, err := cmd.Output()
+	if err != nil {
+		return "", fmt.Errorf("git describe: %w", err)
+	}
+	return strings.TrimSpace(string(out)), nil
+}
+
+// LoadE2EConfig reads environment variables via viper and returns a populated E2EConfig.
+func LoadE2EConfig() (*E2EConfig, error) {
+	v := viper.New()
+
+	// Bind each env var explicitly — env var names don't match struct field paths.
+	bindings := map[string]string{
+		"azure.subscriptionid": "AZURE_SUBSCRIPTION_ID",
+		"azure.location":       "AZURE_LOCATION",
+		"azure.resourcegroup":  "AZURE_RESOURCE_GROUP",
+		"azure.clustername":    "CLUSTER_NAME",
+		"image.tag":            "TAG",
+		"image.namespace":      "IMAGE_NAMESPACE",
+		"image.registry":       "IMAGE_REGISTRY",
+		"scale.nodes":          "NODES",
+		"scale.numdeployments": "NUM_DEPLOYMENTS",
+		"scale.numreplicas":    "NUM_REPLICAS",
+		"scale.numnetworkpolicies": "NUM_NET_POL",
+		"scale.cleanup":        "CLEANUP",
+		"helm.driver":          "HELM_DRIVER",
+	}
+
+	for key, env := range bindings {
+		if err := v.BindEnv(key, env); err != nil {
+			return nil, fmt.Errorf("binding env %s to %s: %w", env, key, err)
+		}
+	}
+
+	// Also accept LOCATION as a fallback for AZURE_LOCATION.
+	if v.GetString("azure.location") == "" {
+		if err := v.BindEnv("azure.location", "LOCATION"); err != nil {
+			return nil, fmt.Errorf("binding env LOCATION: %w", err)
+		}
+	}
+
+	// Build the provider-specific cluster config.
+	var cluster ClusterProvider
+	switch *Provider {
+	case "kind":
+		Architectures = []string{"amd64"}
+		cluster = &kind.Cluster{
+			Name: v.GetString("azure.clustername"),
+		}
+	default:
+		Architectures = []string{"amd64", "arm64"}
+		cluster = &azure.Cluster{
+			SubscriptionID: v.GetString("azure.subscriptionid"),
+			Location:       v.GetString("azure.location"),
+			ResourceGroup:  v.GetString("azure.resourcegroup"),
+			Name:           v.GetString("azure.clustername"),
+		}
+	}
+
+	cfg := &E2EConfig{
+		Cluster: cluster,
+		Image: ImageConfig{
+			Tag:       v.GetString("image.tag"),
+			Namespace: v.GetString("image.namespace"),
+			Registry:  v.GetString("image.registry"),
+		},
+		Scale: ScaleConfig{
+			Nodes:              v.GetString("scale.nodes"),
+			NumDeployments:     v.GetString("scale.numdeployments"),
+			NumReplicas:        v.GetString("scale.numreplicas"),
+			NumNetworkPolicies: v.GetString("scale.numnetworkpolicies"),
+			CleanUp:            v.GetString("scale.cleanup"),
+		},
+		Helm: HelmConfig{
+			Driver: v.GetString("helm.driver"),
+		},
+	}
+
+	if cfg.Image.Registry == "" {
+		cfg.Image.Registry = "ghcr.io"
+	}
+	if cfg.Image.Namespace == "" {
+		cfg.Image.Namespace = "microsoft/retina"
+	}
+
+	slog.Info("using image", "registry", cfg.Image.Registry, "namespace", cfg.Image.Namespace, "tag", cfg.Image.Tag)
+
+	return cfg, nil
+}
diff --git a/test/e2ev3/config/load_step.go b/test/e2ev3/config/load_step.go
new file mode 100644
index 0000000000..4571ded1e9
--- /dev/null
+++ b/test/e2ev3/config/load_step.go
@@ -0,0 +1,54 @@
+//go:build e2e
+
+package config
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/kind"
+)
+
+// Step resolves e2e config, paths, and image tag.
+type Step struct {
+	Cfg *E2EConfig
+}
+
+func (l *Step) String() string { return "load-config" }
+
+func (l *Step) Do(ctx context.Context) error {
+	log := slog.With("step", l.String())
+	cfg, err := LoadE2EConfig()
+	if err != nil {
+		return fmt.Errorf("load e2e config: %w", err)
+	}
+
+	cwd, err := os.Getwd()
+	if err != nil {
+		return fmt.Errorf("get cwd: %w", err)
+	}
+	*l.Cfg = *cfg
+	l.Cfg.Paths = *ResolvePaths(filepath.Dir(filepath.Dir(cwd)))
+
+	kubeCfgPath := filepath.Join(l.Cfg.Paths.RootDir, "test", "e2e", "test.pem")
+	switch c := l.Cfg.Cluster.(type) {
+	case *kind.Cluster:
+		c.KubeCfgPath = kubeCfgPath
+	case *azure.Cluster:
+		c.KubeCfgPath = kubeCfgPath
+	}
+
+	if l.Cfg.Image.Tag == "" {
+		tag, err := DevTag(l.Cfg.Paths.RootDir)
+		if err != nil {
+			return fmt.Errorf("generate dev tag: %w", err)
+		}
+		l.Cfg.Image.Tag = tag
+		log.Info("no TAG provided, will build images", "tag", tag)
+	}
+	return nil
+}
diff --git a/test/e2ev3/config/metrics.go b/test/e2ev3/config/metrics.go
new file mode 100644
index 0000000000..b4526eb5c2
--- /dev/null
+++ b/test/e2ev3/config/metrics.go
@@ -0,0 +1,48 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package config
+
+const (
+	// Retina Metrics Port
+	RetinaMetricsPort = "10093"
+
+	// Retina MetricsName
+	RetinaDropMetricName    = "networkobservability_drop_count"
+	RetinaForwardMetricName = "networkobservability_forward_count"
+
+	// Retina Labels
+	RetinaSourceLabel      = "source"
+	RetinaDestinationLabel = "destination"
+	RetinaProtocolLabel    = "protocol"
+	RetinaReasonLabel      = "reason"
+	RetinaDirectionLabel   = "direction"
+
+	// Hubble Metrics Port
+	HubbleMetricsPort = "9965"
+
+	// Hubble MetricsName
+	HubbleDNSQueryMetricName    = "hubble_dns_queries_total"
+	HubbleDNSResponseMetricName = "hubble_dns_responses_total"
+	HubbleFlowMetricName        = "hubble_flows_processed_total"
+	HubbleDropMetricName        = "hubble_drop_total"
+	HubbleTCPFlagsMetricName    = "hubble_tcp_flags_total"
+
+	// Hubble Labels
+	HubbleDestinationLabel = "destination"
+	HubbleSourceLabel      = "source"
+	HubbleIPsRetunedLabel  = "ips_returned"
+	HubbleQTypesLabel      = "qtypes"
+	HubbleRCodeLabel       = "rcode"
+	HubbleQueryLabel       = "query"
+
+	HubbleProtocolLabel = "protocol"
+	HubbleReasonLabel   = "reason"
+
+	HubbleSubtypeLabel = "subtype"
+	HubbleTypeLabel    = "type"
+	HubbleVerdictLabel = "verdict"
+
+	HubbleFamilyLabel = "family"
+	HubbleFlagLabel   = "flag"
+)
diff --git a/test/e2ev3/config/network.go b/test/e2ev3/config/network.go
new file mode 100644
index 0000000000..7f3904baea
--- /dev/null
+++ b/test/e2ev3/config/network.go
@@ -0,0 +1,18 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package config
+
+const (
+	MetricsEndpoint = "metrics"
+
+	TCP             = "TCP"
+	UDP             = "UDP"
+	IPV4            = "IPv4"
+	IPTableRuleDrop = "IPTABLE_RULE_DROP"
+	SYN             = "SYN"
+	SYNACK          = "SYN-ACK"
+	ACK             = "ACK"
+	FIN             = "FIN"
+	RST             = "RST"
+)
diff --git a/test/e2ev3/pkg/images/build/build.go b/test/e2ev3/pkg/images/build/build.go
new file mode 100644
index 0000000000..546fc8eaa5
--- /dev/null
+++ b/test/e2ev3/pkg/images/build/build.go
@@ -0,0 +1,103 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package build
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os/exec"
+	"strings"
+
+	"github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/images"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Step builds Retina container images by invoking the top-level Makefile.
+// It builds the agent, init, and operator images for linux/amd64.
+// If all images already exist locally and ForceBuild is false, the build is skipped.
+type Step struct {
+	Cfg *config.E2EConfig
+}
+
+func (b *Step) String() string { return "build-images" }
+
+func (b *Step) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, b)
+	img := &b.Cfg.Image
+	if !*config.ForceBuild && allImagesExist(img.Registry, img.Namespace, img.Tag) {
+		log.Info("all images already present locally, skipping build")
+		return nil
+	}
+
+	push := *config.Provider != "kind"
+	return b.build(ctx, b.Cfg.Paths.RootDir, img.Registry, img.Namespace, img.Tag, push)
+}
+
+func (b *Step) build(ctx context.Context, rootDir, registry, namespace, tag string, push bool) error {
+	targets := []string{"retina-image", "retina-operator-image"}
+
+	errs := make(chan error, len(targets))
+	for _, target := range targets {
+		go func(t string) {
+			errs <- runMake(ctx, rootDir, registry, namespace, tag, push, t)
+		}(target)
+	}
+
+	var firstErr error
+	for range targets {
+		if err := <-errs; err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+func runMake(ctx context.Context, rootDir, registry, namespace, tag string, push bool, target string) error {
+	args := []string{
+		target,
+		"PLATFORM=linux/amd64",
+		"TAG=" + tag,
+		"RETINA_PLATFORM_TAG=" + tag,
+		"IMAGE_REGISTRY=" + registry,
+		"IMAGE_NAMESPACE=" + namespace,
+	}
+	if push {
+		args = append(args, "BUILDX_ACTION=--push", "OUTPUT_LOCAL=")
+	} else {
+		// Load into local docker daemon for Kind sideloading.
+		// Disable provenance/sbom attestations — Kind's ctr import can't handle them.
+		args = append(args, "BUILDX_ACTION=--load --provenance=false --sbom=false", "OUTPUT_LOCAL=")
+	}
+
+	slog.Info("building image", "command", "make "+strings.Join(args, " "))
+
+	cmd := exec.CommandContext(ctx, "make", args...)
+	cmd.Dir = rootDir
+	cmdOut := &utils.SlogWriter{Level: slog.LevelInfo, Source: "make-" + target}
+	cmd.Stdout = cmdOut
+	cmd.Stderr = cmdOut
+
+	if err := cmd.Run(); err != nil {
+		cmdOut.Flush()
+		return fmt.Errorf("make %s failed: %w", target, err)
+	}
+	cmdOut.Flush()
+	return nil
+}
+
+// allImagesExist returns true if every Retina image is already in the local Docker daemon.
+func allImagesExist(registry, namespace, tag string) bool {
+	for _, ref := range images.RetinaImages(registry, namespace, tag) {
+		cmd := exec.Command("docker", "image", "inspect", ref)
+		if err := cmd.Run(); err != nil {
+			return false
+		}
+	}
+	return true
+}
+
diff --git a/test/e2ev3/pkg/images/step.go b/test/e2ev3/pkg/images/step.go
new file mode 100644
index 0000000000..3f7ddbccc5
--- /dev/null
+++ b/test/e2ev3/pkg/images/step.go
@@ -0,0 +1,33 @@
+package images
+
+import (
+	"context"
+	"log/slog"
+
+	"github.com/microsoft/retina/test/e2ev3/config"
+)
+
+// Step loads container images into the cluster.
+type Step struct {
+	Cfg *config.E2EConfig
+}
+
+func (l *Step) String() string { return "load-images" }
+
+func (l *Step) Do(ctx context.Context) error {
+	log := slog.With("step", l.String())
+	p := l.Cfg
+	imgs := RetinaImages(p.Image.Registry, p.Image.Namespace, p.Image.Tag)
+	log.Info("loading images into cluster", "count", len(imgs), "cluster", p.Cluster.ClusterName())
+	return p.Cluster.LoadImages(ctx, imgs)
+}
+
+// RetinaImages returns the standard Retina image references for the given coordinates.
+func RetinaImages(registry, namespace, tag string) []string {
+	base := registry + "/" + namespace
+	return []string{
+		base + "/retina-agent:" + tag,
+		base + "/retina-init:" + tag,
+		base + "/retina-operator:" + tag,
+	}
+}
diff --git a/test/e2ev3/pkg/infra/azure.go b/test/e2ev3/pkg/infra/azure.go
new file mode 100644
index 0000000000..599b048518
--- /dev/null
+++ b/test/e2ev3/pkg/infra/azure.go
@@ -0,0 +1,64 @@
+package infra
+
+import (
+	"context"
+	"testing"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure/arm"
+)
+
+// ResolveInfraConfig builds the Azure infrastructure config from viper-loaded values,
+// falling back to a random location and generated cluster name when not set.
+func ResolveInfraConfig(t *testing.T, ac *azure.Cluster) *azure.InfraConfig {
+	t.Helper()
+
+	subID := ac.SubscriptionID
+	if subID == "" {
+		t.Fatal("AZURE_SUBSCRIPTION_ID must be set")
+	}
+
+	location := ac.Location
+	if location == "" {
+		location = azure.RandomLocation(t)
+	}
+
+	clusterName := azure.ClusterNameForE2ETest(t, ac.Name)
+
+	rg := ac.ResourceGroup
+	if rg == "" {
+		rg = clusterName
+	}
+
+	return azure.DefaultE2EInfraConfig(subID, rg, location, clusterName)
+}
+
+// AzureSteps returns the workflow steps to deploy Azure infrastructure and
+// retrieve the cluster kubeconfig, plus registers teardown via t.Cleanup.
+func AzureSteps(t *testing.T, cfg *azure.InfraConfig, kubeConfigFilePath string, createInfra, deleteInfra bool) []flow.Steper {
+	var steps []flow.Steper
+
+	if createInfra {
+		steps = append(steps, &arm.DeployInfra{Config: cfg})
+	}
+
+	steps = append(steps, &azure.GetAKSKubeConfig{
+		SubscriptionID:     cfg.SubscriptionID,
+		ResourceGroupName:  cfg.ResourceGroupName,
+		ClusterName:        cfg.ClusterName,
+		Location:           cfg.Location,
+		KubeConfigFilePath: kubeConfigFilePath,
+	})
+
+	if deleteInfra {
+		t.Cleanup(func() {
+			del := &arm.DeleteInfra{Config: cfg}
+			if err := del.Do(context.Background()); err != nil {
+				t.Logf("Failed to delete test infrastructure: %v", err)
+			}
+		})
+	}
+
+	return steps
+}
diff --git a/test/e2ev3/pkg/infra/kind.go b/test/e2ev3/pkg/infra/kind.go
new file mode 100644
index 0000000000..6020c737ba
--- /dev/null
+++ b/test/e2ev3/pkg/infra/kind.go
@@ -0,0 +1,44 @@
+package infra
+
+import (
+	"context"
+	"testing"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/kind"
+)
+
+// KindSteps returns the workflow steps to provision a Kind cluster and
+// export its kubeconfig, plus registers teardown via t.Cleanup.
+func KindSteps(t *testing.T, cfg *kind.Config, kubeConfigFilePath string, createInfra, deleteInfra bool) []flow.Steper {
+	var steps []flow.Steper
+
+	if createInfra {
+		steps = append(steps, &kind.CreateCluster{Config: cfg})
+	}
+
+	steps = append(steps, &kind.ExportKubeConfig{
+		ClusterName:        cfg.ClusterName,
+		KubeConfigFilePath: kubeConfigFilePath,
+	})
+
+	if createInfra {
+		steps = append(steps, &kind.InstallNPM{
+			KubeConfigFilePath: kubeConfigFilePath,
+		})
+	}
+
+	if deleteInfra {
+		t.Cleanup(func() {
+			del := &kind.DeleteCluster{
+				ClusterName:        cfg.ClusterName,
+				KubeConfigFilePath: kubeConfigFilePath,
+			}
+			if err := del.Do(context.Background()); err != nil {
+				t.Logf("Failed to delete Kind cluster: %v", err)
+			}
+		})
+	}
+
+	return steps
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/arm/deploy.go b/test/e2ev3/pkg/infra/providers/azure/arm/deploy.go
new file mode 100644
index 0000000000..11d5e67638
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/arm/deploy.go
@@ -0,0 +1,92 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package arm
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+)
+
+const (
+	deploymentPollFrequency = 30 * time.Second
+	deploymentStatusTicker  = 60 * time.Second
+)
+
+// DeployInfra is a go-workflow step that generates an ARM template from InfraConfig
+// and deploys all e2e infrastructure (resource group, VNet, public IPs, AKS cluster)
+// in a single subscription-level ARM deployment.
+type DeployInfra struct {
+	Config *azure.InfraConfig
+}
+
+func (d *DeployInfra) String() string { return "deploy-azure-infra" }
+
+func (d *DeployInfra) Do(ctx context.Context) error {
+	log := slog.With("step", d.String())
+	template := GenerateTemplate(d.Config)
+
+	templateJSON, err := json.MarshalIndent(template, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal ARM template: %w", err)
+	}
+	log.Info("generated ARM template", "bytes", len(templateJSON), "cluster", d.Config.ClusterName, "location", d.Config.Location)
+
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain Azure CLI credential: %w", err)
+	}
+
+	client, err := armresources.NewDeploymentsClient(d.Config.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create deployments client: %w", err)
+	}
+
+	deploymentName := fmt.Sprintf("e2e-%s", d.Config.ClusterName)
+	log.Info("starting ARM deployment at subscription scope", "deployment", deploymentName)
+
+	poller, err := client.BeginCreateOrUpdateAtSubscriptionScope(ctx, deploymentName, armresources.Deployment{
+		Location: to.Ptr(d.Config.Location),
+		Properties: &armresources.DeploymentProperties{
+			Mode:     to.Ptr(armresources.DeploymentModeIncremental),
+			Template: template,
+		},
+	}, nil)
+	if err != nil {
+		return fmt.Errorf("failed to begin ARM deployment: %w", err)
+	}
+
+	notifychan := make(chan struct{})
+	go func() {
+		_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
+			Frequency: deploymentPollFrequency,
+		})
+		close(notifychan)
+	}()
+
+	ticker := time.NewTicker(deploymentStatusTicker)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("ARM deployment timed out: %w", ctx.Err())
+		case <-ticker.C:
+			log.Info("waiting for ARM deployment to complete", "deployment", deploymentName)
+		case <-notifychan:
+			if err != nil {
+				return fmt.Errorf("ARM deployment %q failed: %w", deploymentName, err)
+			}
+			log.Info("ARM deployment completed successfully", "deployment", deploymentName)
+			return nil
+		}
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/arm/lifecycle.go b/test/e2ev3/pkg/infra/providers/azure/arm/lifecycle.go
new file mode 100644
index 0000000000..fc66192357
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/arm/lifecycle.go
@@ -0,0 +1,93 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package arm
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+)
+
+// DeleteInfra is a go-workflow step that deletes the resource group created
+// by DeployInfra, cascading deletion of all resources within it.
+type DeleteInfra struct {
+	Config *azure.InfraConfig
+}
+
+func (d *DeleteInfra) String() string { return "delete-azure-infra" }
+
+func (d *DeleteInfra) Do(ctx context.Context) error {
+	log := slog.With("step", d.String())
+	log.Info("deleting resource group and all resources within", "resourceGroup", d.Config.ResourceGroupName)
+
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain Azure CLI credential: %w", err)
+	}
+
+	clientFactory, err := armresources.NewClientFactory(d.Config.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create resource group client: %w", err)
+	}
+
+	forceDeleteType := "Microsoft.Compute/virtualMachines,Microsoft.Compute/virtualMachineScaleSets"
+	poller, err := clientFactory.NewResourceGroupsClient().BeginDelete(ctx, d.Config.ResourceGroupName,
+		&armresources.ResourceGroupsClientBeginDeleteOptions{
+			ForceDeletionTypes: &forceDeleteType,
+		})
+	if err != nil {
+		return fmt.Errorf("failed to begin resource group deletion: %w", err)
+	}
+
+	notifychan := make(chan struct{})
+	go func() {
+		_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
+			Frequency: deploymentPollFrequency,
+		})
+		close(notifychan)
+	}()
+
+	ticker := time.NewTicker(deploymentStatusTicker)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("resource group deletion timed out: %w", ctx.Err())
+		case <-ticker.C:
+			log.Info("waiting for resource group deletion", "resourceGroup", d.Config.ResourceGroupName)
+		case <-notifychan:
+			if err != nil {
+				return fmt.Errorf("resource group %q deletion failed: %w", d.Config.ResourceGroupName, err)
+			}
+			log.Info("resource group deleted successfully", "resourceGroup", d.Config.ResourceGroupName)
+			return nil
+		}
+	}
+}
+
+// GetKubeConfig is a go-workflow step that retrieves kubeconfig for a cluster
+// deployed via ARM template.
+type GetKubeConfig struct {
+	Config             *azure.InfraConfig
+	KubeConfigFilePath string
+}
+
+func (g *GetKubeConfig) String() string { return "get-arm-kubeconfig" }
+
+func (g *GetKubeConfig) Do(ctx context.Context) error {
+	step := &azure.GetAKSKubeConfig{
+		ClusterName:        g.Config.ClusterName,
+		SubscriptionID:     g.Config.SubscriptionID,
+		ResourceGroupName:  g.Config.ResourceGroupName,
+		Location:           g.Config.Location,
+		KubeConfigFilePath: g.KubeConfigFilePath,
+	}
+	return step.Do(ctx)
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/arm/template.go b/test/e2ev3/pkg/infra/providers/azure/arm/template.go
new file mode 100644
index 0000000000..bf0020aaca
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/arm/template.go
@@ -0,0 +1,236 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package arm
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+)
+
+// GenerateTemplate builds a subscription-level ARM template that creates
+// all e2e infrastructure in a single deployment: resource group, VNet with
+// subnet, public IPs, and AKS cluster.
+func GenerateTemplate(cfg *azure.InfraConfig) map[string]any {
+	nestedResources := []any{buildVNet(cfg)}
+
+	for _, ip := range cfg.PublicIPs {
+		nestedResources = append(nestedResources, buildPublicIP(cfg, ip))
+	}
+
+	nestedResources = append(nestedResources, buildAKSCluster(cfg))
+
+	return map[string]any{
+		"$schema":        "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#",
+		"contentVersion": "1.0.0.0",
+		"resources": []any{
+			buildResourceGroup(cfg),
+			buildNestedDeployment(cfg, nestedResources),
+		},
+	}
+}
+
+// GenerateTemplateJSON returns the ARM template as pretty-printed JSON bytes.
+func GenerateTemplateJSON(cfg *azure.InfraConfig) ([]byte, error) {
+	template := GenerateTemplate(cfg)
+	return json.MarshalIndent(template, "", "  ")
+}
+
+func buildResourceGroup(cfg *azure.InfraConfig) map[string]any {
+	return map[string]any{
+		"type":       "Microsoft.Resources/resourceGroups",
+		"apiVersion": "2022-09-01",
+		"name":       cfg.ResourceGroupName,
+		"location":   cfg.Location,
+	}
+}
+
+func buildNestedDeployment(cfg *azure.InfraConfig, resources []any) map[string]any {
+	return map[string]any{
+		"type":          "Microsoft.Resources/deployments",
+		"apiVersion":    "2022-09-01",
+		"name":          "e2e-infra-deployment",
+		"resourceGroup": cfg.ResourceGroupName,
+		"dependsOn": []string{
+			fmt.Sprintf("[resourceId('Microsoft.Resources/resourceGroups', '%s')]", cfg.ResourceGroupName),
+		},
+		"properties": map[string]any{
+			"mode": "Incremental",
+			"template": map[string]any{
+				"$schema":        "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
+				"contentVersion": "1.0.0.0",
+				"resources":      resources,
+			},
+		},
+	}
+}
+
+func buildVNet(cfg *azure.InfraConfig) map[string]any {
+	return map[string]any{
+		"type":       "Microsoft.Network/virtualNetworks",
+		"apiVersion": "2023-04-01",
+		"name":       cfg.VnetName,
+		"location":   cfg.Location,
+		"properties": map[string]any{
+			"addressSpace": map[string]any{
+				"addressPrefixes": []string{cfg.VnetAddressSpace},
+			},
+			"flowTimeoutInMinutes": 10,
+			"subnets": []map[string]any{
+				{
+					"name": cfg.SubnetName,
+					"properties": map[string]any{
+						"addressPrefix": cfg.SubnetAddressSpace,
+					},
+				},
+			},
+		},
+	}
+}
+
+func buildPublicIP(cfg *azure.InfraConfig, ip azure.PublicIPConfig) map[string]any {
+	return map[string]any{
+		"type":       "Microsoft.Network/publicIPAddresses",
+		"apiVersion": "2023-04-01",
+		"name":       ip.FullName(cfg.ClusterName),
+		"location":   cfg.Location,
+		"sku": map[string]any{
+			"name": "Standard",
+			"tier": "Regional",
+		},
+		"properties": map[string]any{
+			"publicIPAllocationMethod": "Static",
+			"publicIPAddressVersion":   ip.IPVersion,
+			"ipTags": []map[string]any{
+				{
+					"ipTagType": "FirstPartyUsage",
+					"tag":       "/NonProd",
+				},
+			},
+		},
+	}
+}
+
+func buildAKSCluster(cfg *azure.InfraConfig) map[string]any {
+	subnetRef := fmt.Sprintf("[resourceId('Microsoft.Network/virtualNetworks/subnets', '%s', '%s')]",
+		cfg.VnetName, cfg.SubnetName)
+
+	// Agent pool profiles
+	pools := make([]map[string]any, 0, len(cfg.AgentPools))
+	for _, pool := range cfg.AgentPools {
+		p := map[string]any{
+			"name":               pool.Name,
+			"count":              pool.Count,
+			"vmSize":             pool.VMSize,
+			"osType":             pool.OSType,
+			"mode":               pool.Mode,
+			"maxPods":            pool.MaxPods,
+			"type":               "VirtualMachineScaleSets",
+			"enableNodePublicIP": false,
+			"scaleDownMode":      "Delete",
+			"vnetSubnetID":       subnetRef,
+		}
+		if pool.OSSku != "" {
+			p["osSku"] = pool.OSSku
+		}
+		if pool.EnableFIPS {
+			p["enableFIPS"] = true
+		}
+		pools = append(pools, p)
+	}
+
+	// Outbound public IP references for load balancer
+	outboundIPs := make([]map[string]any, 0, len(cfg.PublicIPs))
+	for _, ip := range cfg.PublicIPs {
+		outboundIPs = append(outboundIPs, map[string]any{
+			"id": fmt.Sprintf("[resourceId('Microsoft.Network/publicIPAddresses', '%s')]",
+				ip.FullName(cfg.ClusterName)),
+		})
+	}
+
+	// Dependencies
+	deps := []string{
+		fmt.Sprintf("[resourceId('Microsoft.Network/virtualNetworks', '%s')]", cfg.VnetName),
+	}
+	for _, ip := range cfg.PublicIPs {
+		deps = append(deps, fmt.Sprintf("[resourceId('Microsoft.Network/publicIPAddresses', '%s')]",
+			ip.FullName(cfg.ClusterName)))
+	}
+
+	// Network profile
+	networkProfile := map[string]any{
+		"networkPlugin":  cfg.NetworkPlugin,
+		"loadBalancerSku": "standard",
+		"outboundType":   "loadBalancer",
+	}
+	if cfg.NetworkPolicy != "" {
+		networkProfile["networkPolicy"] = cfg.NetworkPolicy
+	}
+	if cfg.PodCidr != "" {
+		networkProfile["podCidr"] = cfg.PodCidr
+	}
+	if cfg.ServiceCidr != "" {
+		networkProfile["serviceCidr"] = cfg.ServiceCidr
+	}
+	if cfg.DNSServiceIP != "" {
+		networkProfile["dnsServiceIP"] = cfg.DNSServiceIP
+	}
+	if cfg.NetworkPluginMode != "" {
+		networkProfile["networkPluginMode"] = cfg.NetworkPluginMode
+	}
+	if len(outboundIPs) > 0 {
+		networkProfile["loadBalancerProfile"] = map[string]any{
+			"outboundIPs": map[string]any{
+				"publicIPs": outboundIPs,
+			},
+		}
+	}
+
+	// Cluster properties
+	properties := map[string]any{
+		"dnsPrefix":              cfg.ClusterName,
+		"enableRBAC":             cfg.EnableRBAC,
+		"enablePodSecurityPolicy": false,
+		"agentPoolProfiles":      pools,
+		"networkProfile":         networkProfile,
+	}
+
+	if cfg.AutoUpgradeChannel != "" {
+		properties["autoUpgradeProfile"] = map[string]any{
+			"nodeOSUpgradeChannel": cfg.AutoUpgradeChannel,
+		}
+	}
+
+	// Add Windows profile if any pool is Windows
+	for _, pool := range cfg.AgentPools {
+		if pool.OSType == "Windows" {
+			properties["windowsProfile"] = map[string]any{
+				"adminUsername": cfg.WindowsAdminUsername,
+				"adminPassword": cfg.WindowsAdminPassword,
+			}
+			break
+		}
+	}
+
+	return map[string]any{
+		"type":       "Microsoft.ContainerService/managedClusters",
+		"apiVersion": "2024-01-01",
+		"name":       cfg.ClusterName,
+		"location":   cfg.Location,
+		"tags": map[string]string{
+			"archv2": "",
+			"tier":   "production",
+		},
+		"identity": map[string]any{
+			"type": "SystemAssigned",
+		},
+		"sku": map[string]any{
+			"name": "Base",
+			"tier": "Standard",
+		},
+		"properties": properties,
+		"dependsOn":  deps,
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/cluster.go b/test/e2ev3/pkg/infra/providers/azure/cluster.go
new file mode 100644
index 0000000000..b0fb68213b
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/cluster.go
@@ -0,0 +1,34 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package azure
+
+import (
+	"context"
+
+	"k8s.io/client-go/rest"
+)
+
+// Cluster is a ClusterProvider for Azure Kubernetes Service clusters.
+// Images are pulled from a container registry, so LoadImages is a no-op.
+type Cluster struct {
+	SubscriptionID string
+	Location       string
+	ResourceGroup  string
+	Name           string
+	KubeCfgPath    string
+	RC             *rest.Config
+}
+
+func (a *Cluster) ClusterName() string            { return a.Name }
+func (a *Cluster) KubeConfigPath() string          { return a.KubeCfgPath }
+func (a *Cluster) RestConfig() *rest.Config        { return a.RC }
+
+func (a *Cluster) LoadImages(_ context.Context, _ []string) error { return nil }
+func (a *Cluster) ImagePullPolicy() string                        { return "Always" }
+
+func (a *Cluster) ImagePullSecrets() []map[string]interface{} {
+	return []map[string]interface{}{
+		{"name": "acr-credentials"},
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/config.go b/test/e2ev3/pkg/infra/providers/azure/config.go
new file mode 100644
index 0000000000..1b8fe19722
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/config.go
@@ -0,0 +1,159 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package azure
+
+import (
+	"crypto/rand"
+	"encoding/base64"
+	"fmt"
+	"math/big"
+	"os/user"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+const netObsRGtag = "-e2e-netobs-"
+
+// AzureLocations is the set of locations randomly chosen when AZURE_LOCATION is unset.
+var AzureLocations = []string{"eastus2", "northeurope", "uksouth", "centralindia", "westus2"}
+
+// RandomLocation picks a random Azure location from AzureLocations.
+func RandomLocation(t *testing.T) string {
+	t.Helper()
+	nBig, err := rand.Int(rand.Reader, big.NewInt(int64(len(AzureLocations))))
+	require.NoError(t, err)
+	return AzureLocations[nBig.Int64()]
+}
+
+// ClusterNameForE2ETest returns clusterName if set, or generates one from
+// the current username and timestamp.
+func ClusterNameForE2ETest(t *testing.T, clusterName string) string {
+	if clusterName == "" {
+		curuser, err := user.Current()
+		require.NoError(t, err)
+		username := curuser.Username
+
+		if len(username) > 8 {
+			username = username[:8]
+			t.Logf("Username is too long, truncating to 8 characters: %s", username)
+		}
+		clusterName = username + netObsRGtag + strconv.FormatInt(time.Now().Unix(), 10)
+		t.Logf("CLUSTER_NAME is not set, generating a random cluster name: %s", clusterName)
+	}
+	return clusterName
+}
+
+// InfraConfig defines the complete infrastructure configuration for deploying
+// all e2e test resources in a single ARM template deployment.
+type InfraConfig struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	ClusterName       string
+
+	// VNet configuration
+	VnetName           string
+	VnetAddressSpace   string
+	SubnetName         string
+	SubnetAddressSpace string
+
+	// Cluster network configuration
+	PodCidr      string
+	ServiceCidr  string
+	DNSServiceIP string
+
+	// Public IP configuration
+	PublicIPs []PublicIPConfig
+
+	// Agent pool configuration
+	AgentPools []AgentPoolConfig
+
+	// Cluster configuration
+	NetworkPlugin      string
+	NetworkPolicy      string
+	NetworkPluginMode  string
+	EnableRBAC         bool
+	AutoUpgradeChannel string
+
+	// Windows node configuration
+	WindowsAdminUsername string
+	WindowsAdminPassword string
+}
+
+// PublicIPConfig defines a public IP address to create.
+type PublicIPConfig struct {
+	NamePrefix string
+	IPVersion  string // "IPv4" or "IPv6"
+}
+
+// AgentPoolConfig defines an AKS agent pool.
+type AgentPoolConfig struct {
+	Name       string
+	Count      int32
+	VMSize     string
+	OSType     string // "Linux" or "Windows"
+	OSSku      string // "Windows2022", "AzureLinux", etc. Empty for default.
+	Mode       string // "System" or "User"
+	MaxPods    int32
+	EnableFIPS bool
+}
+
+// FullName returns the public IP resource name, e.g. "serviceTaggedIp-mycluster-v4".
+func (ip PublicIPConfig) FullName(clusterName string) string {
+	suffix := "v4"
+	if strings.Contains(ip.IPVersion, "6") {
+		suffix = "v6"
+	}
+	return fmt.Sprintf("%s-%s-%s", ip.NamePrefix, clusterName, suffix)
+}
+
+// DefaultE2EInfraConfig returns the standard infrastructure configuration
+// matching the existing e2e test setup (NPM cluster with 4 agent pools).
+func DefaultE2EInfraConfig(subscriptionID, resourceGroupName, location, clusterName string) *InfraConfig {
+	return &InfraConfig{
+		SubscriptionID:    subscriptionID,
+		ResourceGroupName: resourceGroupName,
+		Location:          location,
+		ClusterName:       clusterName,
+
+		VnetName:           "testvnet",
+		VnetAddressSpace:   "10.0.0.0/9",
+		SubnetName:         "testsubnet",
+		SubnetAddressSpace: "10.0.0.0/12",
+
+		PodCidr:      "10.128.0.0/9",
+		ServiceCidr:  "192.168.0.0/28",
+		DNSServiceIP: "192.168.0.10",
+
+		PublicIPs: []PublicIPConfig{
+			{NamePrefix: "serviceTaggedIp", IPVersion: "IPv4"},
+			{NamePrefix: "serviceTaggedIp", IPVersion: "IPv6"},
+		},
+
+		AgentPools: []AgentPoolConfig{
+			{Name: "nodepool1", Count: 3, VMSize: "Standard_DS4_v2", OSType: "Linux", Mode: "System", MaxPods: 250},
+			{Name: "ws22", Count: 1, VMSize: "Standard_DS4_v2", OSType: "Windows", OSSku: "Windows2022", Mode: "User", MaxPods: 250},
+			{Name: "azlinux", Count: 1, VMSize: "Standard_D4pls_v5", OSType: "Linux", OSSku: "AzureLinux", Mode: "User", MaxPods: 250, EnableFIPS: true},
+			{Name: "arm64", Count: 2, VMSize: "Standard_D4pls_v5", OSType: "Linux", Mode: "User", MaxPods: 250},
+		},
+
+		NetworkPlugin:        "azure",
+		NetworkPolicy:        "azure",
+		EnableRBAC:           true,
+		AutoUpgradeChannel:   "node-image",
+		WindowsAdminUsername: "azureuser",
+		WindowsAdminPassword: generatePassword(),
+	}
+}
+
+func generatePassword() string {
+	b := make([]byte, 16)
+	_, _ = rand.Read(b)
+	// Guarantee complexity: uppercase (P), lowercase (w), digit (1), special (!)
+	return "Pw" + base64.RawStdEncoding.EncodeToString(b)[:12] + "!1"
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/delete.go b/test/e2ev3/pkg/infra/providers/azure/delete.go
new file mode 100644
index 0000000000..805c592e13
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/delete.go
@@ -0,0 +1,89 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package azure
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
+)
+
+// DeleteResourceGroup is a go-workflow step that deletes a resource group
+// and all resources within it.
+type DeleteResourceGroup struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+}
+
+func (d *DeleteResourceGroup) String() string { return "delete-resource-group" }
+
+func (d *DeleteResourceGroup) Do(ctx context.Context) error {
+	log := slog.With("step", d.String())
+	log.Info("deleting resource group", "resourceGroup", d.ResourceGroupName)
+
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	clientFactory, err := armresources.NewClientFactory(d.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create resource group client: %w", err)
+	}
+
+	forceDeleteType := "Microsoft.Compute/virtualMachines,Microsoft.Compute/virtualMachineScaleSets"
+	_, err = clientFactory.NewResourceGroupsClient().BeginDelete(ctx, d.ResourceGroupName,
+		&armresources.ResourceGroupsClientBeginDeleteOptions{
+			ForceDeletionTypes: to.Ptr(forceDeleteType),
+		})
+	if err != nil {
+		return fmt.Errorf("failed to delete resource group %q: %w", d.ResourceGroupName, err)
+	}
+
+	log.Info("resource group deleted successfully", "resourceGroup", d.ResourceGroupName)
+	return nil
+}
+
+// DeleteCluster is a go-workflow step that deletes an AKS cluster.
+type DeleteCluster struct {
+	ClusterName       string
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+}
+
+func (d *DeleteCluster) String() string { return "delete-aks-cluster" }
+
+func (d *DeleteCluster) Do(ctx context.Context) error {
+	log := slog.With("step", d.String())
+	log.Info("deleting cluster", "cluster", d.ClusterName, "resourceGroup", d.ResourceGroupName)
+
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	clientFactory, err := armcontainerservice.NewClientFactory(d.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	poller, err := clientFactory.NewManagedClustersClient().BeginDelete(ctx, d.ResourceGroupName, d.ClusterName, nil)
+	if err != nil {
+		return fmt.Errorf("failed to begin cluster deletion: %w", err)
+	}
+
+	if _, err = poller.PollUntilDone(ctx, nil); err != nil {
+		return fmt.Errorf("failed to delete cluster %q: %w", d.ClusterName, err)
+	}
+
+	log.Info("cluster deleted successfully", "cluster", d.ClusterName)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/get.go b/test/e2ev3/pkg/infra/providers/azure/get.go
new file mode 100644
index 0000000000..1b7476a4a6
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/get.go
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package azure
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+const kubeConfigPerms = 0o600
+
+// GetAKSKubeConfig is a go-workflow step that retrieves cluster credentials
+// and writes the kubeconfig to a file.
+type GetAKSKubeConfig struct {
+	ClusterName        string
+	SubscriptionID     string
+	ResourceGroupName  string
+	Location           string
+	KubeConfigFilePath string
+}
+
+func (c *GetAKSKubeConfig) String() string { return "get-aks-kubeconfig" }
+
+func (c *GetAKSKubeConfig) Do(ctx context.Context) error {
+	log := slog.With("step", c.String())
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	clientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	res, err := clientFactory.NewManagedClustersClient().ListClusterUserCredentials(ctx, c.ResourceGroupName, c.ClusterName, nil)
+	if err != nil {
+		return fmt.Errorf("failed to get cluster credentials: %w", err)
+	}
+
+	if err := os.WriteFile(c.KubeConfigFilePath, res.Kubeconfigs[0].Value, kubeConfigPerms); err != nil {
+		return fmt.Errorf("failed to write kubeconfig to %q: %w", c.KubeConfigFilePath, err)
+	}
+
+	log.Info("kubeconfig for cluster written", "cluster", c.ClusterName, "path", c.KubeConfigFilePath)
+	return nil
+}
+
+// GetFQDN returns the FQDN of the given AKS cluster.
+func GetFQDN(ctx context.Context, subscriptionID, resourceGroupName, clusterName string) (string, error) {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	clientFactory, err := armcontainerservice.NewClientFactory(subscriptionID, cred, nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to create client: %w", err)
+	}
+
+	res, err := clientFactory.NewManagedClustersClient().Get(ctx, resourceGroupName, clusterName, nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to get cluster: %w", err)
+	}
+
+	return *res.Properties.Fqdn, nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster-with-npm.go b/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster-with-npm.go
new file mode 100644
index 0000000000..edff3c914d
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster-with-npm.go
@@ -0,0 +1,170 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+var (
+	ErrResourceNameTooLong = fmt.Errorf("resource name too long")
+	ErrEmptyFile           = fmt.Errorf("empty file")
+)
+
+const (
+	clusterTimeout       = 15 * time.Minute
+	clusterCreateTicker  = 30 * time.Second
+	pollFrequency        = 5 * time.Second
+	AgentARMSKU          = "Standard_D4pls_v5"
+	AuxilaryNodeCount    = 1
+	AuxilaryARMNodeCount = 2
+)
+
+type CreateNPMCluster struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	ClusterName       string
+	VnetName          string
+	SubnetName        string
+	PodCidr           string
+	DNSServiceIP      string
+	ServiceCidr       string
+	PublicIPs         []string
+}
+
+func (c *CreateNPMCluster) Do(_ context.Context) error {
+	// Start with default cluster template
+	npmCluster := GetStarterClusterTemplate(c.Location)
+
+	npmCluster.Properties.NetworkProfile.NetworkPolicy = to.Ptr(armcontainerservice.NetworkPolicyAzure)
+
+	//nolint:appendCombine // separate for verbosity
+	npmCluster.Properties.AgentPoolProfiles = append(npmCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{ //nolint:all
+		Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
+		// AvailabilityZones:  []*string{to.Ptr("1")},
+		Count:              to.Ptr[int32](AuxilaryNodeCount),
+		EnableNodePublicIP: to.Ptr(false),
+		Mode:               to.Ptr(armcontainerservice.AgentPoolModeUser),
+		OSType:             to.Ptr(armcontainerservice.OSTypeWindows),
+		OSSKU:              to.Ptr(armcontainerservice.OSSKUWindows2022),
+		ScaleDownMode:      to.Ptr(armcontainerservice.ScaleDownModeDelete),
+		VMSize:             to.Ptr(AgentSKU),
+		Name:               to.Ptr("ws22"),
+		MaxPods:            to.Ptr(int32(MaxPodsPerNode)),
+	})
+
+	//nolint:appendCombine // separate for verbosity
+	npmCluster.Properties.AgentPoolProfiles = append(npmCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{
+		Type:               to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
+		AvailabilityZones:  []*string{to.Ptr("1")},
+		Count:              to.Ptr[int32](AuxilaryNodeCount),
+		EnableNodePublicIP: to.Ptr(false),
+		EnableFIPS:         to.Ptr(true),
+		Mode:               to.Ptr(armcontainerservice.AgentPoolModeUser),
+		OSType:             to.Ptr(armcontainerservice.OSTypeLinux),
+		OSSKU:              to.Ptr(armcontainerservice.OSSKUAzureLinux),
+		ScaleDownMode:      to.Ptr(armcontainerservice.ScaleDownModeDelete),
+		VMSize:             to.Ptr(AgentSKU),
+		Name:               to.Ptr("azlinux"),
+		MaxPods:            to.Ptr(int32(MaxPodsPerNode)),
+	})
+
+	//nolint:appendCombine // separate for verbosity
+	npmCluster.Properties.AgentPoolProfiles = append(npmCluster.Properties.AgentPoolProfiles, &armcontainerservice.ManagedClusterAgentPoolProfile{ //nolint:all
+		Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
+		// AvailabilityZones:  []*string{to.Ptr("1")},
+		Count:              to.Ptr[int32](AuxilaryARMNodeCount),
+		EnableNodePublicIP: to.Ptr(false),
+		Mode:               to.Ptr(armcontainerservice.AgentPoolModeUser),
+		OSType:             to.Ptr(armcontainerservice.OSTypeLinux),
+		ScaleDownMode:      to.Ptr(armcontainerservice.ScaleDownModeDelete),
+		VMSize:             to.Ptr(AgentARMSKU),
+		Name:               to.Ptr("arm64"),
+		MaxPods:            to.Ptr(int32(MaxPodsPerNode)),
+	})
+
+	npmCluster.Properties.AutoUpgradeProfile = &armcontainerservice.ManagedClusterAutoUpgradeProfile{
+		NodeOSUpgradeChannel: to.Ptr(armcontainerservice.NodeOSUpgradeChannelNodeImage),
+	}
+
+	if len(c.PublicIPs) > 0 {
+		publicIPIDs := make([]*armcontainerservice.ResourceReference, 0, len(c.PublicIPs))
+
+		for _, ipID := range c.PublicIPs {
+			slog.Info("adding public IP", "id", ipID)
+			publicIPIDs = append(publicIPIDs, &armcontainerservice.ResourceReference{
+				ID: to.Ptr(ipID),
+			})
+		}
+
+		for _, ip := range c.PublicIPs {
+			slog.Info("public IP", "id", ip)
+		}
+
+		if npmCluster.Properties.NetworkProfile.LoadBalancerProfile == nil {
+			npmCluster.Properties.NetworkProfile.LoadBalancerProfile = &armcontainerservice.ManagedClusterLoadBalancerProfile{
+				OutboundIPs: &armcontainerservice.ManagedClusterLoadBalancerProfileOutboundIPs{
+					PublicIPs: publicIPIDs,
+				},
+			}
+		}
+	}
+
+	// Deploy cluster
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), clusterTimeout)
+	defer cancel()
+
+	clientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create az client: %w", err)
+	}
+
+	slog.Info("when the cluster is ready, use the below command to access and debug")
+	slog.Info("az aks get-credentials", "resourceGroup", c.ResourceGroupName, "cluster", c.ClusterName, "subscription", c.SubscriptionID)
+	slog.Info("creating cluster", "cluster", c.ClusterName, "resourceGroup", c.ResourceGroupName)
+
+	poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, npmCluster, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the create cluster request: %w", err)
+	}
+
+	notifychan := make(chan struct{})
+	go func() {
+		_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
+			Frequency: pollFrequency,
+		})
+		if err != nil {
+			slog.Error("failed to create cluster", "error", err)
+		} else {
+			slog.Info("cluster is ready", "cluster", c.ClusterName)
+		}
+		close(notifychan)
+	}()
+
+	ticker := time.NewTicker(clusterCreateTicker)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("failed to create cluster: %w", ctx.Err())
+		case <-ticker.C:
+			slog.Info("waiting for cluster to be ready", "cluster", c.ClusterName)
+		case <-notifychan:
+			if err != nil {
+				return fmt.Errorf("received notification, failed to create cluster: %w", err)
+			}
+			return nil
+		}
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster.go b/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster.go
new file mode 100644
index 0000000000..8c0a825690
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/create-cluster.go
@@ -0,0 +1,149 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+const (
+	MaxNumberOfNodes = 3
+	MaxPodsPerNode   = 250
+	AgentSKU         = "Standard_DS4_v2"
+)
+
+var defaultClusterCreateTimeout = 30 * time.Minute
+
+type CreateCluster struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	ClusterName       string
+	podCidr           string
+	vmSize            string
+	networkPluginMode string
+	Nodes             int32
+}
+
+func (c *CreateCluster) SetPodCidr(podCidr string) *CreateCluster {
+	c.podCidr = podCidr
+	return c
+}
+
+func (c *CreateCluster) SetVMSize(vmSize string) *CreateCluster {
+	c.vmSize = vmSize
+	return c
+}
+
+func (c *CreateCluster) SetNetworkPluginMode(networkPluginMode string) *CreateCluster {
+	c.networkPluginMode = networkPluginMode
+	return c
+}
+
+func (c *CreateCluster) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.TODO()
+	clientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+	if c.Nodes == 0 {
+		c.Nodes = MaxNumberOfNodes
+	}
+
+	template := GetStarterClusterTemplate(c.Location)
+
+	if c.Nodes > 0 {
+		template.Properties.AgentPoolProfiles[0].Count = to.Ptr(c.Nodes)
+	}
+
+	if c.podCidr != "" {
+		template.Properties.NetworkProfile.PodCidr = to.Ptr(c.podCidr)
+	}
+
+	if c.vmSize != "" {
+		template.Properties.AgentPoolProfiles[0].VMSize = to.Ptr(c.vmSize)
+	}
+
+	if c.networkPluginMode != "" {
+		template.Properties.NetworkProfile.NetworkPluginMode = to.Ptr(armcontainerservice.NetworkPluginMode(c.networkPluginMode))
+	}
+
+	slog.Info("creating cluster", "cluster", c.ClusterName, "location", c.Location)
+	poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, template, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the create cluster request: %w", err)
+	}
+	_, err = poller.PollUntilDone(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to pull the create cluster result: %w", err)
+	}
+	slog.Info("cluster created", "cluster", c.ClusterName, "location", c.Location)
+
+	return nil
+}
+
+func GetStarterClusterTemplate(location string) armcontainerservice.ManagedCluster {
+	id := armcontainerservice.ResourceIdentityTypeSystemAssigned
+	return armcontainerservice.ManagedCluster{
+		Location: to.Ptr(location),
+		Tags: map[string]*string{
+			"archv2": to.Ptr(""),
+			"tier":   to.Ptr("production"),
+		},
+		Properties: &armcontainerservice.ManagedClusterProperties{
+			AddonProfiles: map[string]*armcontainerservice.ManagedClusterAddonProfile{},
+			/* Moving this to a separate stage to enable AMA since it takes some time to provision
+			AzureMonitorProfile: &armcontainerservice.ManagedClusterAzureMonitorProfile{
+				Metrics: &armcontainerservice.ManagedClusterAzureMonitorProfileMetrics{
+					Enabled: to.Ptr(true),
+				},
+			},
+			*/
+			AgentPoolProfiles: []*armcontainerservice.ManagedClusterAgentPoolProfile{
+				{
+					Type: to.Ptr(armcontainerservice.AgentPoolTypeVirtualMachineScaleSets),
+					// AvailabilityZones:  []*string{to.Ptr("1")},
+					Count:              to.Ptr[int32](MaxNumberOfNodes),
+					EnableNodePublicIP: to.Ptr(false),
+					Mode:               to.Ptr(armcontainerservice.AgentPoolModeSystem),
+					OSType:             to.Ptr(armcontainerservice.OSTypeLinux),
+					ScaleDownMode:      to.Ptr(armcontainerservice.ScaleDownModeDelete),
+					VMSize:             to.Ptr(AgentSKU),
+					Name:               to.Ptr("nodepool1"),
+					MaxPods:            to.Ptr(int32(MaxPodsPerNode)),
+				},
+			},
+			KubernetesVersion:       to.Ptr(""),
+			DNSPrefix:               to.Ptr("dnsprefix1"),
+			EnablePodSecurityPolicy: to.Ptr(false),
+			EnableRBAC:              to.Ptr(true),
+			LinuxProfile:            nil,
+			NetworkProfile: &armcontainerservice.NetworkProfile{
+				LoadBalancerSKU: to.Ptr(armcontainerservice.LoadBalancerSKUStandard),
+				OutboundType:    to.Ptr(armcontainerservice.OutboundTypeLoadBalancer),
+				NetworkPlugin:   to.Ptr(armcontainerservice.NetworkPluginAzure),
+			},
+			WindowsProfile: &armcontainerservice.ManagedClusterWindowsProfile{
+				AdminPassword: to.Ptr("replacePassword1234$"),
+				AdminUsername: to.Ptr("azureuser"),
+			},
+		},
+		Identity: &armcontainerservice.ManagedClusterIdentity{
+			Type: &id,
+		},
+
+		SKU: &armcontainerservice.ManagedClusterSKU{
+			Name: to.Ptr(armcontainerservice.ManagedClusterSKUName("Base")),
+			Tier: to.Ptr(armcontainerservice.ManagedClusterSKUTierStandard),
+		},
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/create-public-ip.go b/test/e2ev3/pkg/infra/providers/azure/legacy/create-public-ip.go
new file mode 100644
index 0000000000..7cb639d70a
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/create-public-ip.go
@@ -0,0 +1,101 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
+)
+
+type CreatePublicIP struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	ClusterName       string
+	IPVersion         string
+	IPPrefix          string
+}
+
+func (c *CreatePublicIP) Do(_ context.Context) error {
+	cred, err := azidentity.NewDefaultAzureCredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), clusterTimeout)
+	defer cancel()
+
+	publicIPClient, err := armnetwork.NewPublicIPAddressesClient(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("%w: failed to create public IP client", err)
+	}
+
+	publicIPParams := armnetwork.PublicIPAddress{
+		Location: to.Ptr(c.Location),
+		SKU: &armnetwork.PublicIPAddressSKU{
+			Name: to.Ptr(armnetwork.PublicIPAddressSKUNameStandard),
+			Tier: to.Ptr(armnetwork.PublicIPAddressSKUTierRegional),
+		},
+		Properties: &armnetwork.PublicIPAddressPropertiesFormat{
+			PublicIPAllocationMethod: to.Ptr(armnetwork.IPAllocationMethodStatic),
+			PublicIPAddressVersion:   to.Ptr(armnetwork.IPVersion(c.IPVersion)),
+			IPTags: []*armnetwork.IPTag{
+				{
+					IPTagType: to.Ptr("FirstPartyUsage"),
+					Tag:       to.Ptr("/NonProd"),
+				},
+			},
+		},
+	}
+
+	var version string
+	switch c.IPVersion {
+	case string(armnetwork.IPVersionIPv4):
+		version = "v4"
+	case string(armnetwork.IPVersionIPv6):
+		version = "v6"
+	default:
+		return fmt.Errorf("%w: invalid IP version: %s", err, c.IPVersion)
+	}
+
+	ipName := fmt.Sprintf("%s-%s-%s", c.IPPrefix, c.ClusterName, version)
+
+	poller, err := publicIPClient.BeginCreateOrUpdate(ctx, c.ResourceGroupName, ipName, publicIPParams, nil)
+	if err != nil {
+		return fmt.Errorf("%w: failed to create public IP address", err)
+	}
+
+	notifychan := make(chan struct{})
+	go func() {
+		_, err = poller.PollUntilDone(ctx, &runtime.PollUntilDoneOptions{
+			Frequency: 5 * time.Second,
+		})
+		if err != nil {
+			slog.Error("failed to create public IP", "name", ipName, "error", err)
+		} else {
+			slog.Info("public IP created", "name", ipName)
+		}
+		close(notifychan)
+	}()
+
+	ticker := time.NewTicker(30 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("failed to create Public IP: %w", ctx.Err())
+		case <-ticker.C:
+			slog.Info("waiting for public IP to be ready", "name", ipName)
+		case <-notifychan:
+			if err != nil {
+				return fmt.Errorf("received notification, failed to create public IP address: %w", err)
+			}
+			return nil
+		}
+	}
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/create-rg.go b/test/e2ev3/pkg/infra/providers/azure/legacy/create-rg.go
new file mode 100644
index 0000000000..9171a8e37d
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/create-rg.go
@@ -0,0 +1,40 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
+)
+
+type CreateResourceGroup struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+}
+
+func (c *CreateResourceGroup) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armresources.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create resource group client: %w", err)
+	}
+	slog.Info("creating resource group", "resourceGroup", c.ResourceGroupName, "location", c.Location)
+
+	_, err = clientFactory.NewResourceGroupsClient().CreateOrUpdate(ctx, c.ResourceGroupName, armresources.ResourceGroup{
+		Location: to.Ptr(c.Location),
+	}, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the request: %w", err)
+	}
+
+	slog.Info("resource group created", "resourceGroup", c.ResourceGroupName, "location", c.Location)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/create-vnet.go b/test/e2ev3/pkg/infra/providers/azure/legacy/create-vnet.go
new file mode 100644
index 0000000000..6c743e146a
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/create-vnet.go
@@ -0,0 +1,94 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armnetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v5"
+)
+
+const FlowTimeoutInMinutes = 10
+
+type CreateVNet struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	VnetName          string
+	VnetAddressSpace  string
+}
+
+func (c *CreateVNet) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armnetwork.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	slog.Info("creating vnet", "vnet", c.VnetName, "resourceGroup", c.ResourceGroupName)
+
+	poller, err := clientFactory.NewVirtualNetworksClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.VnetName, armnetwork.VirtualNetwork{
+		Location: to.Ptr(c.Location),
+		Properties: &armnetwork.VirtualNetworkPropertiesFormat{
+			AddressSpace: &armnetwork.AddressSpace{
+				AddressPrefixes: []*string{
+					to.Ptr(c.VnetAddressSpace),
+				},
+			},
+			FlowTimeoutInMinutes: to.Ptr[int32](FlowTimeoutInMinutes),
+		},
+	}, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the request for create vnet: %w", err)
+	}
+
+	_, err = poller.PollUntilDone(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to pull the result for create vnet: %w", err)
+	}
+	return nil
+}
+
+type CreateSubnet struct {
+	SubscriptionID     string
+	ResourceGroupName  string
+	Location           string
+	VnetName           string
+	SubnetName         string
+	SubnetAddressSpace string
+}
+
+func (c *CreateSubnet) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armnetwork.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	slog.Info("creating subnet", "subnet", c.SubnetName, "vnet", c.VnetName, "resourceGroup", c.ResourceGroupName)
+
+	poller, err := clientFactory.NewSubnetsClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.VnetName, c.SubnetName, armnetwork.Subnet{
+		Properties: &armnetwork.SubnetPropertiesFormat{
+			AddressPrefix: to.Ptr(c.SubnetAddressSpace),
+		},
+	}, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the request for create subnet: %w", err)
+	}
+
+	_, err = poller.PollUntilDone(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to pull the result for create subnet: %w", err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/delete-cluster.go b/test/e2ev3/pkg/infra/providers/azure/legacy/delete-cluster.go
new file mode 100644
index 0000000000..9be15315d6
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/delete-cluster.go
@@ -0,0 +1,40 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+type DeleteCluster struct {
+	ClusterName       string
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+}
+
+func (d *DeleteCluster) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armcontainerservice.NewClientFactory(d.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	slog.Info("deleting cluster", "cluster", d.ClusterName, "resourceGroup", d.ResourceGroupName)
+	poller, err := clientFactory.NewManagedClustersClient().BeginDelete(ctx, d.ResourceGroupName, d.ClusterName, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the request: %w", err)
+	}
+	_, err = poller.PollUntilDone(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to pull the result: %w", err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/delete-rg.go b/test/e2ev3/pkg/infra/providers/azure/legacy/delete-rg.go
new file mode 100644
index 0000000000..2921fbb5ff
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/delete-rg.go
@@ -0,0 +1,38 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources"
+)
+
+type DeleteResourceGroup struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+}
+
+func (d *DeleteResourceGroup) Do(_ context.Context) error {
+	slog.Info("deleting resource group", "resourceGroup", d.ResourceGroupName)
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armresources.NewClientFactory(d.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create resource group client: %w", err)
+	}
+	forceDeleteType := "Microsoft.Compute/virtualMachines,Microsoft.Compute/virtualMachineScaleSets"
+	_, err = clientFactory.NewResourceGroupsClient().BeginDelete(ctx, d.ResourceGroupName, &armresources.ResourceGroupsClientBeginDeleteOptions{ForceDeletionTypes: to.Ptr(forceDeleteType)})
+	if err != nil {
+		return fmt.Errorf("failed to finish the delete resource group request: %w", err)
+	}
+
+	slog.Info("resource group deleted successfully", "resourceGroup", d.ResourceGroupName)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/enable-ama.go b/test/e2ev3/pkg/infra/providers/azure/legacy/enable-ama.go
new file mode 100644
index 0000000000..44cf44ada1
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/enable-ama.go
@@ -0,0 +1,109 @@
+package legacy
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/dashboard/armdashboard"
+	"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/monitor/armmonitor"
+)
+
+const fileperms = 0o600
+
+type CreateAzureMonitor struct {
+	SubscriptionID    string
+	ResourceGroupName string
+	Location          string
+	ClusterName       string
+}
+
+func (c *CreateAzureMonitor) Do(_ context.Context) error {
+	slog.Info(`this will deploy azure monitor workspace and grafana, but as of 1/9/2024, the api docs don't show how to do 
+az aks update --enable-azure-monitor-metrics \
+-n $NAME \
+-g $CLUSTER_RESOURCE_GROUP \
+--azure-monitor-workspace-resource-id $AZMON_RESOURCE_ID \
+--grafana-resource-id  $GRAFANA_RESOURCE_ID
+`)
+
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+
+	ctx := context.Background()
+	amaClientFactory, err := armmonitor.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create azure monitor workspace client: %w", err)
+	}
+	slog.Info("creating resource group", "resourceGroup", c.ResourceGroupName, "location", c.Location)
+
+	// create azure monitor
+	_, err = amaClientFactory.NewAzureMonitorWorkspacesClient().Create(ctx, c.ResourceGroupName, "test", armmonitor.AzureMonitorWorkspaceResource{
+		Location: &c.Location,
+	}, &armmonitor.AzureMonitorWorkspacesClientCreateOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to azure monitor workspace: %w", err)
+	}
+
+	// Create grafana
+
+	granafaClientFactory, err := armdashboard.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create grafana client: %w", err)
+	}
+
+	_, err = granafaClientFactory.NewGrafanaClient().BeginCreate(ctx, c.ResourceGroupName, "test", armdashboard.ManagedGrafana{}, &armdashboard.GrafanaClientBeginCreateOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to create grafana: %w", err)
+	}
+
+	slog.Info("azure monitor workspace created", "resourceGroup", c.ResourceGroupName, "location", c.Location)
+
+	// update aks cluster
+
+	ctx, cancel := context.WithTimeout(context.Background(), defaultClusterCreateTimeout)
+	defer cancel()
+	aksClientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	cluster, err := aksClientFactory.NewManagedClustersClient().Get(ctx, c.ResourceGroupName, c.ClusterName, nil)
+	if err != nil {
+		return fmt.Errorf("failed to get cluster to enable AMA: %w", err)
+	}
+
+	// enable Azure Monitor Metrics
+	cluster.Properties.AzureMonitorProfile.Metrics.Enabled = to.Ptr(true)
+
+	// Marshal the struct into a JSON byte array with indentation
+	jsonData, err := json.MarshalIndent(cluster, "", "    ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal cluster to JSON for AMA: %w", err)
+	}
+
+	// Write the JSON data to a file
+	err = os.WriteFile("cluster.json", jsonData, fileperms)
+	if err != nil {
+		return fmt.Errorf("failed to write cluster JSON to file for AMA: %w", err)
+	}
+
+	poller, err := aksClientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location), nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the update cluster request for AMA: %w", err)
+	}
+
+	_, err = poller.PollUntilDone(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("failed to enable AMA on cluster %s: %w", *cluster.Name, err)
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/get-fqdn.go b/test/e2ev3/pkg/infra/providers/azure/legacy/get-fqdn.go
new file mode 100644
index 0000000000..4cfc8613a4
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/get-fqdn.go
@@ -0,0 +1,27 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+func GetFqdnFn(subscriptionId, resourceGroupName, clusterName string) (string, error) {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armcontainerservice.NewClientFactory(subscriptionId, cred, nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to create client: %w", err)
+	}
+	res, err := clientFactory.NewManagedClustersClient().Get(ctx, resourceGroupName, clusterName, nil)
+	if err != nil {
+		return "", fmt.Errorf("failed to finish the get managed cluster client request: %w", err)
+	}
+
+	return *res.Properties.Fqdn, nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/azure/legacy/get-kubeconfig.go b/test/e2ev3/pkg/infra/providers/azure/legacy/get-kubeconfig.go
new file mode 100644
index 0000000000..8dc80f0188
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/azure/legacy/get-kubeconfig.go
@@ -0,0 +1,45 @@
+package legacy
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
+	armcontainerservice "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v4"
+)
+
+const KubeConfigPerms = 0o600
+
+type GetAKSKubeConfig struct {
+	ClusterName        string
+	SubscriptionID     string
+	ResourceGroupName  string
+	Location           string
+	KubeConfigFilePath string
+}
+
+func (c *GetAKSKubeConfig) Do(_ context.Context) error {
+	cred, err := azidentity.NewAzureCLICredential(nil)
+	if err != nil {
+		return fmt.Errorf("failed to obtain a credential: %w", err)
+	}
+	ctx := context.Background()
+	clientFactory, err := armcontainerservice.NewClientFactory(c.SubscriptionID, cred, nil)
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+	res, err := clientFactory.NewManagedClustersClient().ListClusterUserCredentials(ctx, c.ResourceGroupName, c.ClusterName, nil)
+	if err != nil {
+		return fmt.Errorf("failed to finish the get managed cluster client request: %w", err)
+	}
+
+	err = os.WriteFile(c.KubeConfigFilePath, []byte(res.Kubeconfigs[0].Value), KubeConfigPerms)
+	if err != nil {
+		return fmt.Errorf("failed to write kubeconfig to file \"%s\": %w", c.KubeConfigFilePath, err)
+	}
+
+	slog.Info("kubeconfig written", "cluster", c.ClusterName, "resourceGroup", c.ResourceGroupName, "path", c.KubeConfigFilePath)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/cluster.go b/test/e2ev3/pkg/infra/providers/kind/cluster.go
new file mode 100644
index 0000000000..7304ff56dc
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/cluster.go
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os/exec"
+
+	"k8s.io/client-go/rest"
+)
+
+// Cluster is a ClusterProvider for Kind (Kubernetes in Docker) clusters.
+// Images are loaded directly onto cluster nodes via `kind load docker-image`.
+type Cluster struct {
+	Name       string
+	KubeCfgPath string
+	RC          *rest.Config
+}
+
+func (k *Cluster) ClusterName() string            { return k.Name }
+func (k *Cluster) KubeConfigPath() string          { return k.KubeCfgPath }
+func (k *Cluster) RestConfig() *rest.Config        { return k.RC }
+
+func (k *Cluster) LoadImages(ctx context.Context, images []string) error {
+	for _, image := range images {
+		slog.Info("loading image onto kind cluster", "image", image, "cluster", k.Name)
+		args := []string{"load", "docker-image", "--name", k.Name, image}
+		cmd := exec.CommandContext(ctx, "kind", args...)
+		cmdOut := &slogWriter{level: slog.LevelInfo, source: "kind-load"}
+		cmd.Stdout = cmdOut
+		cmd.Stderr = cmdOut
+		if err := cmd.Run(); err != nil {
+			return fmt.Errorf("kind load docker-image %s: %w", image, err)
+		}
+		cmdOut.Flush()
+	}
+	return nil
+}
+
+func (k *Cluster) ImagePullPolicy() string                    { return "IfNotPresent" }
+func (k *Cluster) ImagePullSecrets() []map[string]interface{} { return nil }
diff --git a/test/e2ev3/pkg/infra/providers/kind/config.go b/test/e2ev3/pkg/infra/providers/kind/config.go
new file mode 100644
index 0000000000..83e599de29
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/config.go
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"fmt"
+	"os/user"
+	"time"
+
+	"sigs.k8s.io/kind/pkg/apis/config/v1alpha4"
+)
+
+// Config defines the configuration for a Kind cluster used in e2e tests.
+type Config struct {
+	ClusterName string
+	NodeImage   string
+	WaitForReady time.Duration
+
+	// V1Alpha4Config is the native Kind cluster configuration.
+	// If nil, a default single-node cluster is used.
+	V1Alpha4Config *v1alpha4.Cluster
+}
+
+// DefaultE2EKindConfig returns the standard Kind cluster configuration for e2e testing.
+func DefaultE2EKindConfig(clusterName string) *Config {
+	if clusterName == "" {
+		clusterName = defaultClusterName()
+	}
+
+	return &Config{
+		ClusterName:  clusterName,
+		WaitForReady: defaultWaitForReady,
+		V1Alpha4Config: &v1alpha4.Cluster{
+			Nodes: []v1alpha4.Node{
+				{Role: v1alpha4.ControlPlaneRole},
+				{Role: v1alpha4.WorkerRole},
+			},
+		},
+	}
+}
+
+const defaultWaitForReady = 5 * time.Minute
+
+func defaultClusterName() string {
+	name := "retina-e2e"
+	u, err := user.Current()
+	if err == nil && u.Username != "" {
+		username := u.Username
+		if len(username) > 8 {
+			username = username[:8]
+		}
+		name = fmt.Sprintf("retina-e2e-%s", username)
+	}
+	return name
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/create.go b/test/e2ev3/pkg/infra/providers/kind/create.go
new file mode 100644
index 0000000000..d697870dec
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/create.go
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"sigs.k8s.io/kind/pkg/cluster"
+)
+
+// CreateCluster is a go-workflow step that creates a Kind cluster
+// using the native Kind Go SDK.
+type CreateCluster struct {
+	Config *Config
+}
+
+func (c *CreateCluster) String() string { return "create-kind-cluster" }
+
+func (c *CreateCluster) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, c)
+	provider := cluster.NewProvider()
+
+	clusters, err := provider.List()
+	if err != nil {
+		return fmt.Errorf("listing Kind clusters: %w", err)
+	}
+	for _, name := range clusters {
+		if name == c.Config.ClusterName {
+			log.Info("Kind cluster already exists, skipping creation", "cluster", c.Config.ClusterName)
+			return nil
+		}
+	}
+
+	log.Info("creating Kind cluster", "cluster", c.Config.ClusterName)
+
+	opts := []cluster.CreateOption{
+		cluster.CreateWithWaitForReady(c.Config.WaitForReady),
+		cluster.CreateWithDisplayUsage(false),
+		cluster.CreateWithDisplaySalutation(false),
+	}
+
+	if c.Config.NodeImage != "" {
+		opts = append(opts, cluster.CreateWithNodeImage(c.Config.NodeImage))
+	}
+
+	if c.Config.V1Alpha4Config != nil {
+		opts = append(opts, cluster.CreateWithV1Alpha4Config(c.Config.V1Alpha4Config))
+	}
+
+	if err := provider.Create(c.Config.ClusterName, opts...); err != nil {
+		return fmt.Errorf("failed to create Kind cluster %q: %w", c.Config.ClusterName, err)
+	}
+
+	log.Info("Kind cluster created successfully", "cluster", c.Config.ClusterName)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/delete.go b/test/e2ev3/pkg/infra/providers/kind/delete.go
new file mode 100644
index 0000000000..535fb0ed1d
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/delete.go
@@ -0,0 +1,35 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"sigs.k8s.io/kind/pkg/cluster"
+)
+
+// DeleteCluster is a go-workflow step that deletes a Kind cluster
+// using the native Kind Go SDK.
+type DeleteCluster struct {
+	ClusterName        string
+	KubeConfigFilePath string
+}
+
+func (d *DeleteCluster) String() string { return "delete-kind-cluster" }
+
+func (d *DeleteCluster) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, d)
+	log.Info("deleting Kind cluster", "cluster", d.ClusterName)
+
+	provider := cluster.NewProvider()
+
+	if err := provider.Delete(d.ClusterName, d.KubeConfigFilePath); err != nil {
+		return fmt.Errorf("failed to delete Kind cluster %q: %w", d.ClusterName, err)
+	}
+
+	log.Info("Kind cluster deleted successfully", "cluster", d.ClusterName)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/install_npm.go b/test/e2ev3/pkg/infra/providers/kind/install_npm.go
new file mode 100644
index 0000000000..d6fab8ebfb
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/install_npm.go
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+const npmManifestURL = "https://raw.githubusercontent.com/Azure/azure-container-networking/master/npm/azure-npm.yaml"
+
+// InstallNPM applies Azure Network Policy Manager to enable NetworkPolicy
+// enforcement on Kind clusters.
+type InstallNPM struct {
+	KubeConfigFilePath string
+}
+
+func (n *InstallNPM) String() string { return "install-azure-npm" }
+
+func (n *InstallNPM) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, n)
+	log.Info("installing Azure NPM for NetworkPolicy enforcement")
+	cmd := exec.CommandContext(ctx, "kubectl", "apply", "-f", npmManifestURL)
+	if n.KubeConfigFilePath != "" {
+		cmd.Env = append(os.Environ(), "KUBECONFIG="+n.KubeConfigFilePath)
+	}
+	cmdOut := &slogWriter{level: slog.LevelInfo, source: "kubectl-apply"}
+	cmd.Stdout = cmdOut
+	cmd.Stderr = cmdOut
+	if err := cmd.Run(); err != nil {
+		return fmt.Errorf("failed to install Azure NPM: %w", err)
+	}
+	cmdOut.Flush()
+
+	// Wait for the DaemonSet to be ready.
+	log.Info("waiting for Azure NPM DaemonSet to be ready")
+	waitCmd := exec.CommandContext(ctx, "kubectl", "rollout", "status", "daemonset/azure-npm",
+		"-n", "kube-system", "--timeout=120s")
+	if n.KubeConfigFilePath != "" {
+		waitCmd.Env = append(os.Environ(), "KUBECONFIG="+n.KubeConfigFilePath)
+	}
+	waitOut := &slogWriter{level: slog.LevelInfo, source: "kubectl-rollout"}
+	waitCmd.Stdout = waitOut
+	waitCmd.Stderr = waitOut
+	if err := waitCmd.Run(); err != nil {
+		return fmt.Errorf("Azure NPM DaemonSet not ready: %w", err)
+	}
+	waitOut.Flush()
+
+	log.Info("Azure NPM installed successfully")
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/kubeconfig.go b/test/e2ev3/pkg/infra/providers/kind/kubeconfig.go
new file mode 100644
index 0000000000..1c52b58455
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/kubeconfig.go
@@ -0,0 +1,43 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"sigs.k8s.io/kind/pkg/cluster"
+)
+
+const kubeConfigPerms = 0o600
+
+// ExportKubeConfig is a go-workflow step that exports the kubeconfig
+// for a Kind cluster to a file using the native Kind Go SDK.
+type ExportKubeConfig struct {
+	ClusterName        string
+	KubeConfigFilePath string
+}
+
+func (e *ExportKubeConfig) String() string { return "export-kind-kubeconfig" }
+
+func (e *ExportKubeConfig) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, e)
+	log.Info("exporting kubeconfig for Kind cluster", "cluster", e.ClusterName, "path", e.KubeConfigFilePath)
+
+	provider := cluster.NewProvider()
+
+	kubeConfig, err := provider.KubeConfig(e.ClusterName, false)
+	if err != nil {
+		return fmt.Errorf("failed to get kubeconfig for Kind cluster %q: %w", e.ClusterName, err)
+	}
+
+	if err := os.WriteFile(e.KubeConfigFilePath, []byte(kubeConfig), kubeConfigPerms); err != nil {
+		return fmt.Errorf("failed to write kubeconfig to %q: %w", e.KubeConfigFilePath, err)
+	}
+
+	log.Info("kubeconfig for Kind cluster written", "cluster", e.ClusterName, "path", e.KubeConfigFilePath)
+	return nil
+}
diff --git a/test/e2ev3/pkg/infra/providers/kind/slog_writer.go b/test/e2ev3/pkg/infra/providers/kind/slog_writer.go
new file mode 100644
index 0000000000..3964dd7b43
--- /dev/null
+++ b/test/e2ev3/pkg/infra/providers/kind/slog_writer.go
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package kind
+
+import (
+	"bytes"
+	"context"
+	"log/slog"
+)
+
+// slogWriter is an io.Writer that logs each complete line through slog at the given level.
+// Partial lines are buffered until a newline is received.
+type slogWriter struct {
+	level  slog.Level
+	source string
+	buf    []byte
+}
+
+func (w *slogWriter) Write(p []byte) (int, error) {
+	w.buf = append(w.buf, p...)
+	for {
+		idx := bytes.IndexByte(w.buf, '\n')
+		if idx < 0 {
+			break
+		}
+		line := string(bytes.TrimRight(w.buf[:idx], "\r"))
+		w.buf = w.buf[idx+1:]
+		if line != "" {
+			slog.Log(context.Background(), w.level, line, "source", w.source)
+		}
+	}
+	return len(p), nil
+}
+
+// Flush logs any remaining buffered content not terminated by a newline.
+func (w *slogWriter) Flush() {
+	if len(w.buf) > 0 {
+		line := string(bytes.TrimRight(w.buf, "\r\n"))
+		if line != "" {
+			slog.Log(context.Background(), w.level, line, "source", w.source)
+		}
+		w.buf = nil
+	}
+}
diff --git a/test/e2ev3/pkg/infra/step.go b/test/e2ev3/pkg/infra/step.go
new file mode 100644
index 0000000000..71c2bc878a
--- /dev/null
+++ b/test/e2ev3/pkg/infra/step.go
@@ -0,0 +1,71 @@
+package infra
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/azure"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra/providers/kind"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+// Workflow provisions a cluster via the configured provider.
+type Workflow struct {
+	Cfg *config.E2EConfig
+	T   *testing.T
+}
+
+func (s *Workflow) Do(ctx context.Context) error {
+	p := s.Cfg
+	kubeCfgPath := p.Cluster.KubeConfigPath()
+	ctx, _ = utils.StepLogger(ctx, s)
+
+	if *config.KubeConfig != "" {
+		rc, err := clientcmd.BuildConfigFromFlags("", kubeCfgPath)
+		if err != nil {
+			return fmt.Errorf("build rest config: %w", err)
+		}
+		setRestConfig(p.Cluster, rc)
+		return nil
+	}
+
+	var steps []flow.Steper
+	switch *config.Provider {
+	case "kind":
+		kc := p.Cluster.(*kind.Cluster)
+		kindCfg := kind.DefaultE2EKindConfig(kc.Name)
+		kc.Name = kindCfg.ClusterName
+		steps = KindSteps(s.T, kindCfg, kubeCfgPath, *config.CreateInfra, *config.DeleteInfra)
+	default:
+		ac := p.Cluster.(*azure.Cluster)
+		infraCfg := ResolveInfraConfig(s.T, ac)
+		steps = AzureSteps(s.T, infraCfg, kubeCfgPath, *config.CreateInfra, *config.DeleteInfra)
+	}
+
+	inner := new(flow.Workflow)
+	inner.Add(flow.Pipe(steps...))
+	if err := inner.Do(ctx); err != nil {
+		return err
+	}
+
+	rc, err := clientcmd.BuildConfigFromFlags("", kubeCfgPath)
+	if err != nil {
+		return fmt.Errorf("build rest config: %w", err)
+	}
+	setRestConfig(p.Cluster, rc)
+	return nil
+}
+
+func setRestConfig(c config.ClusterProvider, rc *rest.Config) {
+	switch t := c.(type) {
+	case *kind.Cluster:
+		t.RC = rc
+	case *azure.Cluster:
+		t.RC = rc
+	}
+}
diff --git a/test/e2ev3/pkg/kubernetes/check-pod-status.go b/test/e2ev3/pkg/kubernetes/check-pod-status.go
new file mode 100644
index 0000000000..8d64fc3c41
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/check-pod-status.go
@@ -0,0 +1,105 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+const (
+	RetryTimeoutPodsReady  = 5 * time.Minute
+	RetryIntervalPodsReady = 5 * time.Second
+
+	printInterval = 5 // print to stdout every 5 iterations
+)
+
+type WaitPodsReady struct {
+	RestConfig    *rest.Config
+	Namespace     string
+	LabelSelector string
+}
+
+func (w *WaitPodsReady) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(w.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	return WaitForPodReady(ctx, clientset, w.Namespace, w.LabelSelector)
+}
+
+func WaitForPodReady(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
+	log := slog.Default()
+
+	printIterator := 0
+	conditionFunc := wait.ConditionWithContextFunc(func(context.Context) (bool, error) {
+		defer func() {
+			printIterator++
+		}()
+		var podList *corev1.PodList
+		podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
+		if err != nil {
+			return false, fmt.Errorf("error listing Pods: %w", err)
+		}
+
+		if len(podList.Items) == 0 {
+			log.Info("no pods found", "namespace", namespace, "label", labelSelector)
+			return false, nil
+		}
+
+		// check each individual pod to see if it's in Running state
+		for i := range podList.Items {
+
+			// Check the Pod phase
+			if podList.Items[i].Status.Phase != corev1.PodRunning {
+				if printIterator%printInterval == 0 {
+					log.Info("pod not ready, waiting", "pod", podList.Items[i].Name)
+				}
+				return false, nil
+			}
+
+			// Check all container status.
+			for j := range podList.Items[i].Status.ContainerStatuses {
+				if !podList.Items[i].Status.ContainerStatuses[j].Ready {
+					log.Info("container not ready, waiting", "container", podList.Items[i].Status.ContainerStatuses[j].Name, "pod", podList.Items[i].Name)
+					return false, nil
+				}
+			}
+
+		}
+		log.Info("all pods running", "namespace", namespace, "label", labelSelector)
+		return true, nil
+	})
+
+	err := wait.PollUntilContextCancel(ctx, RetryIntervalPodsReady, true, conditionFunc)
+	if err != nil {
+		PrintPodLogs(ctx, clientset, namespace, labelSelector)
+		return fmt.Errorf("error waiting for pods in namespace \"%s\" with label \"%s\" to be in Running state: %w", namespace, labelSelector, err)
+	}
+	return nil
+}
+
+func CheckContainerRestart(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) error {
+	var podList *corev1.PodList
+	podList, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
+	if err != nil {
+		return fmt.Errorf("error listing Pods: %w", err)
+	}
+
+	for _, pod := range podList.Items {
+		for istatus := range pod.Status.ContainerStatuses {
+			status := &pod.Status.ContainerStatuses[istatus]
+			if status.RestartCount > 0 {
+				return fmt.Errorf("pod %s has %d container restarts: status: %+v: %w", pod.Name, status.RestartCount, status, ErrPodCrashed)
+			}
+		}
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/create-agnhost-statefulset.go b/test/e2ev3/pkg/kubernetes/create-agnhost-statefulset.go
new file mode 100644
index 0000000000..c32d035d87
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/create-agnhost-statefulset.go
@@ -0,0 +1,184 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"time"
+
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+var ErrLabelMissingFromPod = fmt.Errorf("label missing from pod")
+
+const (
+	AgnhostHTTPPort  = 80
+	AgnhostArchAmd64 = "amd64"
+	AgnhostArchArm64 = "arm64"
+)
+
+type CreateAgnhostStatefulSet struct {
+	AgnhostName        string
+	AgnhostNamespace   string
+	ScheduleOnSameNode bool
+	RestConfig         *rest.Config
+	AgnhostArch        string
+	AgnhostReplicas    *int
+}
+
+func (c *CreateAgnhostStatefulSet) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(c.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, defaultTimeoutSeconds*time.Second)
+	defer cancel()
+
+	// set default arch to amd64
+	if c.AgnhostArch == "" {
+		c.AgnhostArch = AgnhostArchAmd64
+	}
+
+	// set default replicas to 1
+	replicas := 1
+	if c.AgnhostReplicas != nil {
+		replicas = *c.AgnhostReplicas
+	}
+
+	agnhostStatefulSet := c.getAgnhostDeployment(c.AgnhostArch, replicas)
+
+	err = CreateResource(ctx, agnhostStatefulSet, clientset)
+	if err != nil {
+		return fmt.Errorf("error agnhost component: %w", err)
+	}
+
+	selector, exists := agnhostStatefulSet.Spec.Selector.MatchLabels["app"]
+	if !exists {
+		return fmt.Errorf("missing label \"app=%s\" from agnhost statefulset: %w", c.AgnhostName, ErrLabelMissingFromPod)
+	}
+
+	labelSelector := fmt.Sprintf("app=%s", selector)
+	err = WaitForPodReady(ctx, clientset, c.AgnhostNamespace, labelSelector)
+	if err != nil {
+		return fmt.Errorf("error waiting for agnhost pod to be ready: %w", err)
+	}
+
+	return nil
+}
+
+func (c *CreateAgnhostStatefulSet) getAgnhostDeployment(arch string, replicas int) *appsv1.StatefulSet {
+	if replicas < 1 {
+		replicas = 1
+	}
+	reps := int32(replicas) //nolint:gosec // replicas controlled by test code
+
+	var affinity *v1.Affinity
+	if c.ScheduleOnSameNode {
+		affinity = &v1.Affinity{
+			PodAffinity: &v1.PodAffinity{
+				RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
+					{
+						TopologyKey: "kubernetes.io/hostname",
+						LabelSelector: &metav1.LabelSelector{
+							MatchLabels: map[string]string{
+								"k8s-app": "agnhost",
+							},
+						},
+					},
+				},
+			},
+		}
+	} else {
+		affinity = &v1.Affinity{
+			PodAntiAffinity: &v1.PodAntiAffinity{
+				// prefer an even spread across the cluster to avoid scheduling on the same node
+				PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
+					{
+						Weight: MaxAffinityWeight,
+						PodAffinityTerm: v1.PodAffinityTerm{
+							TopologyKey: "kubernetes.io/hostname",
+							LabelSelector: &metav1.LabelSelector{
+								MatchLabels: map[string]string{
+									"k8s-app": "agnhost",
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+	}
+
+	return &appsv1.StatefulSet{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       "StatefulSet",
+			APIVersion: "apps/v1",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      c.AgnhostName,
+			Namespace: c.AgnhostNamespace,
+		},
+		Spec: appsv1.StatefulSetSpec{
+			ServiceName: c.AgnhostName,
+			Replicas:    &reps,
+			Selector: &metav1.LabelSelector{
+				MatchLabels: map[string]string{
+					"app":     c.AgnhostName,
+					"k8s-app": "agnhost",
+				},
+			},
+			Template: v1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						"app":     c.AgnhostName,
+						"k8s-app": "agnhost",
+					},
+				},
+
+				Spec: v1.PodSpec{
+					Affinity: affinity,
+					NodeSelector: map[string]string{
+						"kubernetes.io/os":   "linux",
+						"kubernetes.io/arch": arch,
+					},
+					Containers: []v1.Container{
+						{
+							Name:  c.AgnhostName,
+							Image: "registry.k8s.io/e2e-test-images/agnhost:2.40",
+							Resources: v1.ResourceRequirements{
+								Requests: v1.ResourceList{
+									"memory": resource.MustParse("20Mi"),
+								},
+								Limits: v1.ResourceList{
+									"memory": resource.MustParse("20Mi"),
+								},
+							},
+							Command: []string{
+								"/agnhost",
+							},
+							Args: []string{
+								"serve-hostname",
+								"--http",
+								"--port",
+								strconv.Itoa(AgnhostHTTPPort),
+							},
+
+							Ports: []v1.ContainerPort{
+								{
+									ContainerPort: AgnhostHTTPPort,
+								},
+							},
+							Env: []v1.EnvVar{},
+						},
+					},
+				},
+			},
+		},
+	}
+}
diff --git a/test/e2ev3/pkg/kubernetes/create-kapinger-deployment.go b/test/e2ev3/pkg/kubernetes/create-kapinger-deployment.go
new file mode 100644
index 0000000000..dba310d973
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/create-kapinger-deployment.go
@@ -0,0 +1,247 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+const (
+	KapingerHTTPPort  = 8080
+	KapingerTCPPort   = 8085
+	KapingerUDPPort   = 8086
+	MaxAffinityWeight = 100
+)
+
+type CreateKapingerDeployment struct {
+	KapingerNamespace string
+	KapingerReplicas  string
+	RestConfig        *rest.Config
+}
+
+func (c *CreateKapingerDeployment) Do(ctx context.Context) error {
+	_, err := strconv.Atoi(c.KapingerReplicas)
+	if err != nil {
+		return fmt.Errorf("error converting replicas to int for Kapinger replicas: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(c.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	resources := []runtime.Object{
+		c.GetKapingerService(),
+		c.GetKapingerServiceAccount(),
+		c.GetKapingerClusterRole(),
+		c.GetKapingerClusterRoleBinding(),
+		c.GetKapingerDeployment(),
+	}
+
+	for i := range resources {
+		err = CreateResource(ctx, resources[i], clientset)
+		if err != nil {
+			return fmt.Errorf("error kapinger component: %w", err)
+		}
+	}
+
+	return nil
+}
+
+func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
+	replicas, err := strconv.ParseInt(c.KapingerReplicas, 10, 32)
+	if err != nil {
+		fmt.Println("Error converting replicas to int for Kapinger replicas: ", err)
+		return nil
+	}
+	reps := int32(replicas)
+
+	return &appsv1.Deployment{
+		TypeMeta: metaV1.TypeMeta{
+			Kind:       "Deployment",
+			APIVersion: "apps/v1",
+		},
+		ObjectMeta: metaV1.ObjectMeta{
+			Name:      "kapinger",
+			Namespace: c.KapingerNamespace,
+		},
+		Spec: appsv1.DeploymentSpec{
+			Replicas: &reps,
+			Selector: &metaV1.LabelSelector{
+				MatchLabels: map[string]string{
+					"app": "kapinger",
+				},
+			},
+			Template: v1.PodTemplateSpec{
+				ObjectMeta: metaV1.ObjectMeta{
+					Labels: map[string]string{
+						"app":    "kapinger",
+						"server": "good",
+					},
+				},
+
+				Spec: v1.PodSpec{
+					NodeSelector: map[string]string{
+						"kubernetes.io/os": "linux",
+					},
+					Affinity: &v1.Affinity{
+						PodAntiAffinity: &v1.PodAntiAffinity{
+							// prefer an even spread across the cluster to avoid scheduling on the same node
+							PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{
+								{
+									Weight: MaxAffinityWeight,
+									PodAffinityTerm: v1.PodAffinityTerm{
+										TopologyKey: "kubernetes.io/hostname",
+										LabelSelector: &metaV1.LabelSelector{
+											MatchLabels: map[string]string{
+												"app": "kapinger",
+											},
+										},
+									},
+								},
+							},
+						},
+					},
+					ServiceAccountName: "kapinger-sa",
+					Containers: []v1.Container{
+						{
+							Name:  "kapinger",
+							Image: "acnpublic.azurecr.io/kapinger:v0.0.23-9-g23ef222",
+							Resources: v1.ResourceRequirements{
+								Requests: v1.ResourceList{
+									"memory": resource.MustParse("20Mi"),
+								},
+								Limits: v1.ResourceList{
+									"memory": resource.MustParse("40Mi"),
+								},
+							},
+							Ports: []v1.ContainerPort{
+								{
+									ContainerPort: KapingerHTTPPort,
+								},
+							},
+							Env: []v1.EnvVar{
+								{
+									Name:  "GODEBUG",
+									Value: "netdns=go",
+								},
+								{
+									Name:  "TARGET_TYPE",
+									Value: "service",
+								},
+								{
+									Name:  "HTTP_PORT",
+									Value: strconv.Itoa(KapingerHTTPPort),
+								},
+								{
+									Name:  "TCP_PORT",
+									Value: strconv.Itoa(KapingerTCPPort),
+								},
+								{
+									Name:  "UDP_PORT",
+									Value: strconv.Itoa(KapingerUDPPort),
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func (c *CreateKapingerDeployment) GetKapingerService() *v1.Service {
+	return &v1.Service{
+		TypeMeta: metaV1.TypeMeta{
+			Kind:       "Service",
+			APIVersion: "v1",
+		},
+		ObjectMeta: metaV1.ObjectMeta{
+			Name:      "kapinger-service",
+			Namespace: c.KapingerNamespace,
+			Labels: map[string]string{
+				"app": "kapinger",
+			},
+		},
+		Spec: v1.ServiceSpec{
+			Selector: map[string]string{
+				"app": "kapinger",
+			},
+			Ports: []v1.ServicePort{
+				{
+					Port:       KapingerHTTPPort,
+					Protocol:   v1.ProtocolTCP,
+					TargetPort: intstr.FromInt(KapingerHTTPPort),
+				},
+			},
+		},
+	}
+}
+
+func (c *CreateKapingerDeployment) GetKapingerServiceAccount() *v1.ServiceAccount {
+	return &v1.ServiceAccount{
+		TypeMeta: metaV1.TypeMeta{
+			Kind:       "ServiceAccount",
+			APIVersion: "v1",
+		},
+		ObjectMeta: metaV1.ObjectMeta{
+			Name:      "kapinger-sa",
+			Namespace: c.KapingerNamespace,
+		},
+	}
+}
+
+func (c *CreateKapingerDeployment) GetKapingerClusterRole() *rbacv1.ClusterRole {
+	return &rbacv1.ClusterRole{
+		TypeMeta: metaV1.TypeMeta{
+			Kind:       "ClusterRole",
+			APIVersion: "rbac.authorization.k8s.io/v1",
+		},
+		ObjectMeta: metaV1.ObjectMeta{
+			Name:      "kapinger-role",
+			Namespace: c.KapingerNamespace,
+		},
+		Rules: []rbacv1.PolicyRule{
+			{
+				APIGroups: []string{""},
+				Resources: []string{"services", "pods"},
+				Verbs:     []string{"get", "list"},
+			},
+		},
+	}
+}
+
+func (c *CreateKapingerDeployment) GetKapingerClusterRoleBinding() *rbacv1.ClusterRoleBinding {
+	return &rbacv1.ClusterRoleBinding{
+		TypeMeta: metaV1.TypeMeta{
+			Kind:       "ClusterRoleBinding",
+			APIVersion: "rbac.authorization.k8s.io/v1",
+		},
+		ObjectMeta: metaV1.ObjectMeta{
+			Name:      "kapinger-rolebinding",
+			Namespace: c.KapingerNamespace,
+		},
+		Subjects: []rbacv1.Subject{
+			{
+				Kind:      "ServiceAccount",
+				Name:      "kapinger-sa",
+				Namespace: c.KapingerNamespace,
+			},
+		},
+		RoleRef: rbacv1.RoleRef{
+			APIGroup: "rbac.authorization.k8s.io",
+			Kind:     "ClusterRole",
+			Name:     "kapinger-role",
+		},
+	}
+}
diff --git a/test/e2ev3/pkg/kubernetes/create-namespace.go b/test/e2ev3/pkg/kubernetes/create-namespace.go
new file mode 100644
index 0000000000..8d068602c6
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/create-namespace.go
@@ -0,0 +1,55 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+type CreateNamespace struct {
+	Namespace  string
+	RestConfig *rest.Config
+}
+
+func (c *CreateNamespace) Do(ctx context.Context) error {
+	return CreateNamespaceFn(ctx, c.RestConfig, c.Namespace)
+}
+
+func (c *CreateNamespace) getNamespace() *v1.Namespace {
+	return &v1.Namespace{
+		TypeMeta: metav1.TypeMeta{
+			Kind:       "Namespace",
+			APIVersion: "v1",
+		},
+		ObjectMeta: metav1.ObjectMeta{
+			Name: c.Namespace,
+		},
+	}
+}
+
+func CreateNamespaceFn(ctx context.Context, restConfig *rest.Config, namespace string) error {
+	log := slog.Default()
+	clientset, err := kubernetes.NewForConfig(restConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	_, err = clientset.CoreV1().Namespaces().Create(ctx, &v1.Namespace{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: namespace,
+		},
+	}, metav1.CreateOptions{})
+	if err != nil && !errors.IsAlreadyExists(err) {
+		return fmt.Errorf("failed to create namespace \"%s\": %w", namespace, err)
+	}
+
+	log.Info("namespace created", "namespace", namespace)
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/create-network-policy.go b/test/e2ev3/pkg/kubernetes/create-network-policy.go
new file mode 100644
index 0000000000..9ad2db06fb
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/create-network-policy.go
@@ -0,0 +1,82 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	networkingv1 "k8s.io/api/networking/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+const (
+	Egress  = "egress"
+	Ingress = "ingress"
+)
+
+type CreateDenyAllNetworkPolicy struct {
+	NetworkPolicyNamespace string
+	RestConfig             *rest.Config
+	DenyAllLabelSelector   string
+}
+
+func (c *CreateDenyAllNetworkPolicy) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(c.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	networkPolicy := getNetworkPolicy(c.NetworkPolicyNamespace, c.DenyAllLabelSelector)
+	err = CreateResource(ctx, networkPolicy, clientset)
+	if err != nil {
+		return fmt.Errorf("error creating simple deny-all network policy: %w", err)
+	}
+
+	return nil
+}
+
+func getNetworkPolicy(namespace, labelSelector string) *networkingv1.NetworkPolicy {
+	labelSelectorSlice := strings.Split(labelSelector, "=")
+	return &networkingv1.NetworkPolicy{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "deny-all",
+			Namespace: namespace,
+		},
+		Spec: networkingv1.NetworkPolicySpec{
+			PodSelector: metav1.LabelSelector{
+				MatchLabels: map[string]string{
+					labelSelectorSlice[0]: labelSelectorSlice[1],
+				},
+			},
+			PolicyTypes: []networkingv1.PolicyType{
+				networkingv1.PolicyTypeIngress,
+				networkingv1.PolicyTypeEgress,
+			},
+			Egress:  []networkingv1.NetworkPolicyEgressRule{},
+			Ingress: []networkingv1.NetworkPolicyIngressRule{},
+		},
+	}
+}
+
+type DeleteDenyAllNetworkPolicy struct {
+	NetworkPolicyNamespace string
+	RestConfig             *rest.Config
+	DenyAllLabelSelector   string
+}
+
+func (d *DeleteDenyAllNetworkPolicy) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(d.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	networkPolicy := getNetworkPolicy(d.NetworkPolicyNamespace, d.DenyAllLabelSelector)
+	err = DeleteResource(ctx, networkPolicy, clientset)
+	if err != nil {
+		return fmt.Errorf("error creating simple deny-all network policy: %w", err)
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/create-resource.go b/test/e2ev3/pkg/kubernetes/create-resource.go
new file mode 100644
index 0000000000..d1f9c03e2e
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/create-resource.go
@@ -0,0 +1,225 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	networkingv1 "k8s.io/api/networking/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes"
+)
+
+var (
+	ErrUnknownResourceType = fmt.Errorf("unknown resource type")
+	ErrCreateNilResource   = fmt.Errorf("cannot create nil resource")
+)
+
+func CreateResource(ctx context.Context, obj runtime.Object, clientset *kubernetes.Clientset) error { //nolint:gocyclo //this is just boilerplate code
+	if obj == nil {
+		return ErrCreateNilResource
+	}
+
+	switch o := obj.(type) {
+	case *appsv1.DaemonSet:
+		slog.Info("creating DaemonSet", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().DaemonSets(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create DaemonSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update DaemonSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *appsv1.Deployment:
+		slog.Info("creating Deployment", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().Deployments(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create Deployment \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update Deployment \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *appsv1.StatefulSet:
+		slog.Info("creating StatefulSet", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().StatefulSets(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create StatefulSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update StatefulSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.Service:
+		slog.Info("creating Service", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().Services(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create Service \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update Service \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.ServiceAccount:
+		slog.Info("creating ServiceAccount", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().ServiceAccounts(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create ServiceAccount \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update ServiceAccount \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.Role:
+		slog.Info("creating Role", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.RbacV1().Roles(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create Role \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update Role \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.RoleBinding:
+		slog.Info("creating RoleBinding", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.RbacV1().RoleBindings(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create RoleBinding \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update RoleBinding \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.ClusterRole:
+		slog.Info("creating ClusterRole", "name", o.Name)
+		client := clientset.RbacV1().ClusterRoles()
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create ClusterRole \"%s\": %w", o.Name, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update ClusterRole \"%s\": %w", o.Name, err)
+		}
+
+	case *rbacv1.ClusterRoleBinding:
+		slog.Info("creating ClusterRoleBinding", "name", o.Name)
+		client := clientset.RbacV1().ClusterRoleBindings()
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create ClusterRoleBinding \"%s\": %w", o.Name, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update ClusterRoleBinding \"%s\": %w", o.Name, err)
+		}
+
+	case *v1.ConfigMap:
+		slog.Info("creating ConfigMap", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().ConfigMaps(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create ConfigMap \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update ConfigMap \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *networkingv1.NetworkPolicy:
+		slog.Info("creating NetworkPolicy", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.NetworkingV1().NetworkPolicies(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create NetworkPolicy \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update NetworkPolicy \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.Secret:
+		slog.Info("creating Secret", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().Secrets(o.Namespace)
+		_, err := client.Get(ctx, o.Name, metaV1.GetOptions{})
+		if errors.IsNotFound(err) {
+			_, err = client.Create(ctx, o, metaV1.CreateOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to create Secret \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+			}
+			return nil
+		}
+		_, err = client.Update(ctx, o, metaV1.UpdateOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to create/update Secret \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	default:
+		return fmt.Errorf("unknown object type: %T, err: %w", obj, ErrUnknownResourceType)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/debug.go b/test/e2ev3/pkg/kubernetes/debug.go
new file mode 100644
index 0000000000..11dbbbd621
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/debug.go
@@ -0,0 +1,37 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package kubernetes
+
+import (
+	"context"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"k8s.io/client-go/rest"
+)
+
+// DebugOnFailure captures diagnostic info when upstream steps fail.
+// Add it to a workflow with When(flow.AnyFailed) so it only runs on failure.
+type DebugOnFailure struct {
+	RestConfig    *rest.Config
+	Namespace     string
+	LabelSelector string
+}
+
+func (d *DebugOnFailure) String() string { return "debug-on-failure" }
+
+func (d *DebugOnFailure) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, d)
+	log.Info("capturing logs for pods", "namespace", d.Namespace, "label", d.LabelSelector)
+	getLogs := &GetPodLogs{
+		RestConfig:    d.RestConfig,
+		Namespace:     d.Namespace,
+		LabelSelector: d.LabelSelector,
+	}
+	if err := getLogs.Do(context.Background()); err != nil {
+		log.Error("failed to capture logs", "error", err)
+	}
+	return nil // never fail the debug step itself
+}
diff --git a/test/e2ev3/pkg/kubernetes/delete-namespace.go b/test/e2ev3/pkg/kubernetes/delete-namespace.go
new file mode 100644
index 0000000000..7bc14fb68e
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/delete-namespace.go
@@ -0,0 +1,65 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"time"
+
+	"k8s.io/apimachinery/pkg/api/errors"
+	metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/util/retry"
+)
+
+type DeleteNamespace struct {
+	Namespace  string
+	RestConfig *rest.Config
+}
+
+func (d *DeleteNamespace) String() string { return "delete-namespace" }
+
+func (d *DeleteNamespace) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, d)
+	clientset, err := kubernetes.NewForConfig(d.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	err = clientset.CoreV1().Namespaces().Delete(ctx, d.Namespace, metaV1.DeleteOptions{})
+	if err != nil {
+		if !errors.IsNotFound(err) {
+			return fmt.Errorf("failed to delete namespace \"%s\": %w", d.Namespace, err)
+		}
+	}
+
+	backoff := wait.Backoff{
+		Steps:    9,
+		Duration: 10 * time.Second,
+		Factor:   2.0,
+		// Jitter:   0.1,
+	}
+
+	// Check if namespace was deleted
+	return retry.OnError(backoff,
+		func(err error) bool {
+			log.Info("namespace deletion pending", "error", err)
+
+			return true
+		},
+		func() error {
+			_, err = clientset.CoreV1().Namespaces().Get(ctx, d.Namespace, metaV1.GetOptions{})
+			if errors.IsNotFound(err) {
+				return nil
+			}
+
+			if err == nil {
+				return fmt.Errorf("namespace \"%s\" still exists", d.Namespace)
+			}
+
+			return err
+		},
+	)
+}
diff --git a/test/e2ev3/pkg/kubernetes/delete-resource.go b/test/e2ev3/pkg/kubernetes/delete-resource.go
new file mode 100644
index 0000000000..502bacb77e
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/delete-resource.go
@@ -0,0 +1,346 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	networkingv1 "k8s.io/api/networking/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+var ErrDeleteNilResource = fmt.Errorf("cannot create nil resource")
+
+type ResourceType string
+
+const (
+	DaemonSet          ResourceType = "DaemonSet"
+	Deployment         ResourceType = "Deployment"
+	StatefulSet        ResourceType = "StatefulSet"
+	Service            ResourceType = "Service"
+	ServiceAccount     ResourceType = "ServiceAccount"
+	Role               ResourceType = "Role"
+	RoleBinding        ResourceType = "RoleBinding"
+	ClusterRole        ResourceType = "ClusterRole"
+	ClusterRoleBinding ResourceType = "ClusterRoleBinding"
+	ConfigMap          ResourceType = "ConfigMap"
+	NetworkPolicy      ResourceType = "NetworkPolicy"
+	Secret             ResourceType = "Secret"
+	Unknown            ResourceType = "Unknown"
+)
+
+// Parameters can only be strings, heres to help add guardrails
+func TypeString(resourceType ResourceType) string {
+	ResourceTypes := map[ResourceType]string{
+		DaemonSet:          "DaemonSet",
+		Deployment:         "Deployment",
+		StatefulSet:        "StatefulSet",
+		Service:            "Service",
+		ServiceAccount:     "ServiceAccount",
+		Role:               "Role",
+		RoleBinding:        "RoleBinding",
+		ClusterRole:        "ClusterRole",
+		ClusterRoleBinding: "ClusterRoleBinding",
+		ConfigMap:          "ConfigMap",
+		NetworkPolicy:      "NetworkPolicy",
+		Secret:             "Secret",
+		Unknown:            "Unknown",
+	}
+	str, ok := ResourceTypes[resourceType]
+	if !ok {
+		return ResourceTypes[Unknown]
+	}
+	return str
+}
+
+type DeleteKubernetesResource struct {
+	ResourceType      string // can't use enum, breaks parameter parsing, all must be strings
+	ResourceName      string
+	ResourceNamespace string
+	RestConfig        *rest.Config
+}
+
+func (d *DeleteKubernetesResource) String() string { return "delete-kubernetes-resource" }
+
+func (d *DeleteKubernetesResource) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, d)
+	// Prevalidation: check resource type before proceeding
+	restype := ResourceType(d.ResourceType)
+	if restype == Unknown {
+		return ErrUnknownResourceType
+	}
+
+	clientset, err := kubernetes.NewForConfig(d.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, defaultTimeoutSeconds*time.Second)
+	defer cancel()
+
+	res := ResourceType(d.ResourceType)
+
+	var resource runtime.Object
+
+	switch res {
+	case DaemonSet:
+		resource = &appsv1.DaemonSet{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case Deployment:
+		resource = &appsv1.Deployment{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case StatefulSet:
+		resource = &appsv1.StatefulSet{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case Service:
+		resource = &v1.Service{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case ServiceAccount:
+		resource = &v1.ServiceAccount{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case Role:
+		resource = &rbacv1.Role{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case RoleBinding:
+		resource = &rbacv1.RoleBinding{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case ClusterRole:
+		resource = &rbacv1.ClusterRole{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name: d.ResourceName,
+			},
+		}
+	case ClusterRoleBinding:
+		resource = &rbacv1.ClusterRoleBinding{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name: d.ResourceName,
+			},
+		}
+	case ConfigMap:
+		resource = &v1.ConfigMap{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case NetworkPolicy:
+		resource = &networkingv1.NetworkPolicy{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case Secret:
+		resource = &v1.Secret{
+			ObjectMeta: metaV1.ObjectMeta{
+				Name:      d.ResourceName,
+				Namespace: d.ResourceNamespace,
+			},
+		}
+	case Unknown:
+		return fmt.Errorf("unknown resource type: %s: %w", d.ResourceType, ErrUnknownResourceType)
+	default:
+		return ErrUnknownResourceType
+	}
+
+	err = DeleteResource(ctx, resource, clientset)
+	if err != nil {
+		return fmt.Errorf("error deleting resource: %w", err)
+	}
+
+	return nil
+}
+
+func DeleteResource(ctx context.Context, obj runtime.Object, clientset *kubernetes.Clientset) error { //nolint:gocyclo //this is just boilerplate code
+	if obj == nil {
+		return ErrCreateNilResource
+	}
+
+	switch o := obj.(type) {
+	case *appsv1.DaemonSet:
+		slog.Info("deleting DaemonSet", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().DaemonSets(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "DaemonSet", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete DaemonSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *appsv1.Deployment:
+		slog.Info("deleting Deployment", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().Deployments(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "Deployment", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete Deployment \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *appsv1.StatefulSet:
+		slog.Info("deleting StatefulSet", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.AppsV1().StatefulSets(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "StatefulSet", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete StatefulSet \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.Service:
+		slog.Info("deleting Service", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().Services(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "Service", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete Service \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.ServiceAccount:
+		slog.Info("deleting ServiceAccount", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().ServiceAccounts(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "ServiceAccount", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete ServiceAccount \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.Role:
+		slog.Info("deleting Role", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.RbacV1().Roles(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "Role", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete Role \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.RoleBinding:
+		slog.Info("deleting RoleBinding", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.RbacV1().RoleBindings(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "RoleBinding", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete RoleBinding \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.ClusterRole:
+		slog.Info("deleting ClusterRole", "name", o.Name)
+		client := clientset.RbacV1().ClusterRoles()
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "ClusterRole", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete ClusterRole \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *rbacv1.ClusterRoleBinding:
+		slog.Info("deleting ClusterRoleBinding", "name", o.Name)
+		client := clientset.RbacV1().ClusterRoleBindings()
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "ClusterRoleBinding", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete ClusterRoleBinding \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.ConfigMap:
+		slog.Info("deleting ConfigMap", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().ConfigMaps(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "ConfigMap", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete ConfigMap \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *networkingv1.NetworkPolicy:
+		slog.Info("deleting NetworkPolicy", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.NetworkingV1().NetworkPolicies(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "NetworkPolicy", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete NetworkPolicy \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	case *v1.Secret:
+		slog.Info("deleting Secret", "name", o.Name, "namespace", o.Namespace)
+		client := clientset.CoreV1().Secrets(o.Namespace)
+		err := client.Delete(ctx, o.Name, metaV1.DeleteOptions{})
+		if err != nil {
+			if errors.IsNotFound(err) {
+				slog.Info("resource does not exist", "kind", "Secret", "name", o.Name, "namespace", o.Namespace)
+				return nil
+			}
+			return fmt.Errorf("failed to delete Secret \"%s\" in namespace \"%s\": %w", o.Name, o.Namespace, err)
+		}
+
+	default:
+		return fmt.Errorf("unknown object type: %T, err: %w", obj, ErrUnknownResourceType)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/exec-pod.go b/test/e2ev3/pkg/kubernetes/exec-pod.go
new file mode 100644
index 0000000000..4b41450929
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/exec-pod.go
@@ -0,0 +1,94 @@
+package kubernetes
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"strings"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/remotecommand"
+	"k8s.io/client-go/util/retry"
+	"k8s.io/kubectl/pkg/scheme"
+)
+
+const ExecSubResources = "exec"
+
+type ExecInPod struct {
+	PodNamespace string
+	RestConfig   *rest.Config
+	PodName      string
+	Command      string
+}
+
+func (e *ExecInPod) String() string { return "exec-in-pod" }
+
+func (e *ExecInPod) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, e)
+	ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
+	defer cancel()
+
+	clientset, err := kubernetes.NewForConfig(e.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	err = retry.OnError(retry.DefaultRetry, func(err error) bool {
+		// Retry on every error
+		return true
+	}, func() error {
+		_, execErr := ExecPod(ctx, clientset, e.RestConfig, e.PodNamespace, e.PodName, e.Command)
+		if execErr != nil {
+			log.Error("executing command, retrying", "command", e.Command, "error", execErr)
+		}
+		return execErr
+	})
+	if err != nil {
+		return fmt.Errorf("error executing command, all retries exhausted [%s]: %w", e.Command, err)
+	}
+
+	return nil
+}
+
+func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) ([]byte, error) {
+	slog.Info("executing command", "command", command, "pod", podName, "namespace", namespace)
+	req := clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName).
+		Namespace(namespace).SubResource(ExecSubResources)
+	option := &v1.PodExecOptions{
+		Command: strings.Fields(command),
+		Stdin:   true,
+		Stdout:  true,
+		Stderr:  true,
+		TTY:     false,
+	}
+
+	req.VersionedParams(
+		option,
+		scheme.ParameterCodec,
+	)
+
+	var buf bytes.Buffer
+	exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
+	if err != nil {
+		return buf.Bytes(), fmt.Errorf("error creating executor: %w", err)
+	}
+
+	err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
+		Stdin:  os.Stdin,
+		Stdout: &buf,
+		Stderr: &buf,
+	})
+	if err != nil {
+		return buf.Bytes(), fmt.Errorf("error executing command: %w", err)
+	}
+
+	res := buf.Bytes()
+	return res, nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/get-external-crd.go b/test/e2ev3/pkg/kubernetes/get-external-crd.go
new file mode 100644
index 0000000000..0b02e7bddd
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/get-external-crd.go
@@ -0,0 +1,63 @@
+package kubernetes
+
+import (
+	"fmt"
+	"io"
+	"log/slog"
+	"net/http"
+	"net/url"
+	"os"
+	"path"
+	"path/filepath"
+)
+
+func downloadExternalCRDs(chartPath string) error {
+	crdUrls := []string{
+		"https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/main/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml",
+	}
+
+	for _, crdUrl := range crdUrls {
+		crd, err := fetchYAML(crdUrl)
+		if err != nil {
+			return err
+		}
+
+		crdName, err := extractFileName(crdUrl)
+		if err != nil {
+			return err
+		}
+
+		slog.Info("CRD exists", "name", crdName)
+		slog.Info("writing CRD file", "path", filepath.Join(chartPath, "/crds/"+crdName))
+		err = saveToFile(filepath.Join(chartPath, "/crds/"+crdName), crd)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func fetchYAML(url string) ([]byte, error) {
+	resp, err := http.Get(url)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get crd source code from %s: %w", url, err)
+	}
+	defer resp.Body.Close()
+	return io.ReadAll(resp.Body)
+}
+
+func extractFileName(rawURL string) (string, error) {
+	parsedURL, err := url.Parse(rawURL)
+	if err != nil {
+		return "", fmt.Errorf("failed to parse url: %w", err)
+	}
+	return path.Base(parsedURL.Path), nil
+}
+
+func saveToFile(filename string, data []byte) error {
+	err := os.WriteFile(filename, data, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to write crd.yaml to /crds dir : %w", err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/get-logs.go b/test/e2ev3/pkg/kubernetes/get-logs.go
new file mode 100644
index 0000000000..dba0fb4eed
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/get-logs.go
@@ -0,0 +1,70 @@
+package kubernetes
+
+import (
+	"context"
+	"io"
+	"log/slog"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+type GetPodLogs struct {
+	RestConfig    *rest.Config
+	Namespace     string
+	LabelSelector string
+}
+
+func (p *GetPodLogs) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, p)
+
+	log.Info("printing pod logs", "namespace", p.Namespace, "labelselector", p.LabelSelector)
+
+	clientset, err := kubernetes.NewForConfig(p.RestConfig)
+	if err != nil {
+		log.Error("error creating clientset", "error", err)
+	}
+
+	PrintPodLogs(ctx, clientset, p.Namespace, p.LabelSelector)
+
+	return nil
+}
+
+func PrintPodLogs(ctx context.Context, clientset *kubernetes.Clientset, namespace, labelSelector string) {
+	log := slog.Default()
+	// List all the pods in the namespace
+	pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: labelSelector,
+	})
+	if err != nil {
+		log.Error("error listing pods", "error", err)
+	}
+
+	// Iterate over the pods and get the logs for each pod
+	for i := range pods.Items {
+		pod := pods.Items[i]
+		log.Info("pod logs", "pod", pod.Name)
+
+		// Get the logs for the pod
+		req := clientset.CoreV1().Pods(namespace).GetLogs(pod.Name, &corev1.PodLogOptions{})
+		podLogs, err := req.Stream(ctx)
+		if err != nil {
+			log.Error("error getting logs for pod", "pod", pod.Name, "error", err)
+		}
+
+		// Read the logs
+		buf, err := io.ReadAll(podLogs)
+		if err != nil {
+			log.Error("error reading logs for pod", "pod", pod.Name, "error", err)
+		}
+
+		podLogs.Close()
+
+		// Print the logs
+		log.Info("pod log output", "pod", pod.Name, "output", string(buf))
+	}
+}
diff --git a/test/e2ev3/pkg/kubernetes/get-pod-ip.go b/test/e2ev3/pkg/kubernetes/get-pod-ip.go
new file mode 100644
index 0000000000..905734b59a
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/get-pod-ip.go
@@ -0,0 +1,26 @@
+package kubernetes
+
+import (
+	"context"
+
+	"github.com/pkg/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+func GetPodIP(ctx context.Context, restConfig *rest.Config, namespace, podName string) (string, error) {
+	clientset, err := kubernetes.NewForConfig(restConfig)
+	if err != nil {
+		return "", errors.Wrapf(err, "error creating Kubernetes clientset")
+	}
+
+	pod, err := clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
+	if err != nil {
+		return "", errors.Wrapf(err, "error getting pod %s in namespace %s", podName, namespace)
+	}
+	if pod.Status.PodIP == "" {
+		return "", errors.Errorf("pod %s in namespace %s has no IP", podName, namespace)
+	}
+	return pod.Status.PodIP, nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/install-hubble-helm.go b/test/e2ev3/pkg/kubernetes/install-hubble-helm.go
new file mode 100644
index 0000000000..3d2fa0e16c
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/install-hubble-helm.go
@@ -0,0 +1,169 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	e2ecfg "github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/chart/loader"
+	"helm.sh/helm/v3/pkg/cli"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+const (
+	HubbleNamespace = "kube-system"
+	HubbleUIApp     = "hubble-ui"
+	HubbleRelayApp  = "hubble-relay"
+)
+
+type InstallHubbleHelmChart struct {
+	Namespace          string
+	ReleaseName        string
+	KubeConfigFilePath string
+	ChartPath          string
+	ImageTag           string
+	ImageRegistry      string
+	ImageNamespace     string
+	HelmDriver         string
+	ImageLoader        e2ecfg.ClusterProvider
+}
+
+func (v *InstallHubbleHelmChart) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, v)
+	ctx, cancel := context.WithTimeout(ctx, defaultTimeoutSeconds*time.Second)
+	defer cancel()
+
+	settings := cli.New()
+	settings.KubeConfig = v.KubeConfigFilePath
+	actionConfig := new(action.Configuration)
+
+	err := actionConfig.Init(settings.RESTClientGetter(), v.Namespace, v.HelmDriver, func(format string, a ...any) { log.Info(fmt.Sprintf(format, a...)) })
+	if err != nil {
+		return fmt.Errorf("failed to initialize helm action config: %w", err)
+	}
+
+	// Creating extra namespace to deploy test pods
+	rc, err := clientcmd.BuildConfigFromFlags("", v.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to build rest config: %w", err)
+	}
+	err = CreateNamespaceFn(ctx, rc, e2ecfg.TestPodNamespace)
+	if err != nil {
+		return fmt.Errorf("failed to create namespace %s: %w", v.Namespace, err)
+	}
+
+	tag := v.ImageTag
+	if tag == "" {
+		return fmt.Errorf("tag is not set: %w", errEmpty)
+	}
+	imageRegistry := v.ImageRegistry
+	if imageRegistry == "" {
+		return fmt.Errorf("image registry is not set: %w", errEmpty)
+	}
+
+	imageNamespace := v.ImageNamespace
+	if imageNamespace == "" {
+		return fmt.Errorf("image namespace is not set: %w", errEmpty)
+	}
+
+	// load chart from the path
+	chart, err := loader.Load(v.ChartPath)
+	if err != nil {
+		return fmt.Errorf("failed to load chart from path %s: %w", v.ChartPath, err)
+	}
+
+	if secrets := v.ImageLoader.ImagePullSecrets(); len(secrets) > 0 {
+		chart.Values["imagePullSecrets"] = secrets
+	}
+	pullPolicy := v.ImageLoader.ImagePullPolicy()
+
+	chart.Values["operator"].(map[string]interface{})["enabled"] = true
+	chart.Values["operator"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-operator"
+	chart.Values["operator"].(map[string]interface{})["tag"] = tag
+	chart.Values["operator"].(map[string]interface{})["pullPolicy"] = pullPolicy
+	chart.Values["agent"].(map[string]interface{})["enabled"] = true
+	chart.Values["agent"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-agent"
+	chart.Values["agent"].(map[string]interface{})["tag"] = tag
+	chart.Values["agent"].(map[string]interface{})["pullPolicy"] = pullPolicy
+	chart.Values["agent"].(map[string]interface{})["init"].(map[string]interface{})["enabled"] = true
+	chart.Values["agent"].(map[string]interface{})["init"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-init"
+	chart.Values["agent"].(map[string]interface{})["init"].(map[string]interface{})["tag"] = tag
+	chart.Values["hubble"].(map[string]interface{})["tls"].(map[string]interface{})["enabled"] = false
+	chart.Values["hubble"].(map[string]interface{})["relay"].(map[string]interface{})["tls"].(map[string]interface{})["server"].(map[string]interface{})["enabled"] = false
+	chart.Values["hubble"].(map[string]interface{})["tls"].(map[string]interface{})["auto"].(map[string]interface{})["enabled"] = false
+
+	getclient := action.NewGet(actionConfig)
+	release, err := getclient.Run(v.ReleaseName)
+	if err == nil && release != nil {
+		log.Info("found existing release, removing before installing", "release", release.Name)
+		delclient := action.NewUninstall(actionConfig)
+		delclient.Wait = true
+		delclient.Timeout = deleteTimeout
+		_, err = delclient.Run(v.ReleaseName)
+		if err != nil {
+			return fmt.Errorf("failed to delete existing release %s: %w", v.ReleaseName, err)
+		}
+	} else if err != nil && !strings.Contains(err.Error(), "not found") {
+		return fmt.Errorf("failed to get release %s: %w", v.ReleaseName, err)
+	}
+
+	client := action.NewInstall(actionConfig)
+	client.Namespace = v.Namespace
+	client.ReleaseName = v.ReleaseName
+	client.Timeout = createTimeout
+	client.Wait = true
+	client.WaitForJobs = true
+
+	// install the chart here
+	rel, err := client.RunWithContext(ctx, chart, chart.Values)
+	if err != nil {
+		return fmt.Errorf("failed to install chart: %w", err)
+	}
+
+	log.Info("installed chart", "release", rel.Name, "namespace", rel.Namespace)
+	// this will confirm the values set during installation
+	log.Info("chart values", "config", rel.Config)
+
+	// ensure all pods are running, since helm doesn't care about windows
+	config, err := clientcmd.BuildConfigFromFlags("", v.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	// Validate Hubble Relay and UI pods in parallel.
+	var relayErr, uiErr error
+	var wg sync.WaitGroup
+	wg.Add(2)
+	go func() {
+		defer wg.Done()
+		relayErr = WaitForPodReady(ctx, clientset, HubbleNamespace, "k8s-app="+HubbleRelayApp)
+	}()
+	go func() {
+		defer wg.Done()
+		uiErr = WaitForPodReady(ctx, clientset, HubbleNamespace, "k8s-app="+HubbleUIApp)
+	}()
+	wg.Wait()
+
+	if relayErr != nil {
+		return fmt.Errorf("error waiting for Hubble Relay pods to be ready: %w", relayErr)
+	}
+	log.Info("Hubble Relay Pod is ready")
+
+	if uiErr != nil {
+		return fmt.Errorf("error waiting for Hubble UI pods to be ready: %w", uiErr)
+	}
+	log.Info("Hubble UI Pod is ready")
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/install-retina-helm.go b/test/e2ev3/pkg/kubernetes/install-retina-helm.go
new file mode 100644
index 0000000000..7849d44904
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/install-retina-helm.go
@@ -0,0 +1,170 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	e2ecfg "github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/chart/loader"
+	"helm.sh/helm/v3/pkg/cli"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+)
+
+const (
+	createTimeout = 20 * time.Minute // windows is slow
+	deleteTimeout = 5 * time.Minute
+)
+
+var (
+	errEmpty             = fmt.Errorf("is empty")
+	errDirectoryNotFound = fmt.Errorf("directory not found")
+)
+
+type InstallHelmChart struct {
+	Namespace          string
+	ReleaseName        string
+	KubeConfigFilePath string
+	ChartPath          string
+	ImageTag           string
+	ImageRegistry      string
+	ImageNamespace     string
+	HelmDriver         string
+	ImageLoader        e2ecfg.ClusterProvider
+	EnableHeartbeat    bool
+}
+
+func (i *InstallHelmChart) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, i)
+	// Prevalidation: check chart path and tag env
+	_, err := os.Stat(i.ChartPath)
+	if os.IsNotExist(err) {
+		cwd, err := os.Getwd()
+		if err != nil {
+			return fmt.Errorf("failed to get current working directory %s: %w", cwd, err)
+		}
+		log.Info("current working directory", "cwd", cwd)
+		return fmt.Errorf("directory not found at %s:  working directory: %s: %w", i.ChartPath, cwd, errDirectoryNotFound)
+	}
+	log.Info("found chart", "path", i.ChartPath)
+
+	if i.ImageTag == "" {
+		return fmt.Errorf("image tag is not set: %w", errEmpty)
+	}
+	if i.ImageRegistry == "" {
+		return fmt.Errorf("image registry is not set: %w", errEmpty)
+	}
+	if i.ImageNamespace == "" {
+		return fmt.Errorf("image namespace is not set: %w", errEmpty)
+	}
+
+	tag := i.ImageTag
+	imageRegistry := i.ImageRegistry
+	imageNamespace := i.ImageNamespace
+
+	ctx, cancel := context.WithTimeout(ctx, createTimeout)
+	defer cancel()
+	settings := cli.New()
+	settings.KubeConfig = i.KubeConfigFilePath
+	actionConfig := new(action.Configuration)
+
+	err = actionConfig.Init(settings.RESTClientGetter(), i.Namespace, i.HelmDriver, func(format string, v ...any) { log.Info(fmt.Sprintf(format, v...)) })
+	if err != nil {
+		return fmt.Errorf("failed to initialize helm action config: %w", err)
+	}
+
+	// Creating extra namespace to deploy test pods
+	rc, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("failed to build rest config: %w", err)
+	}
+	err = CreateNamespaceFn(ctx, rc, e2ecfg.TestPodNamespace)
+	if err != nil {
+		return fmt.Errorf("failed to create namespace %s: %w", i.Namespace, err)
+	}
+
+	//Download necessary CRD's
+	err = downloadExternalCRDs(i.ChartPath)
+	if err != nil {
+		return fmt.Errorf("failed to load external crd's: %w", err)
+	}
+
+	// load chart from the path
+	chart, err := loader.Load(i.ChartPath)
+	if err != nil {
+		return fmt.Errorf("failed to load chart from path %s: %w", i.ChartPath, err)
+	}
+
+	if secrets := i.ImageLoader.ImagePullSecrets(); len(secrets) > 0 {
+		chart.Values["imagePullSecrets"] = secrets
+	}
+
+	if i.EnableHeartbeat {
+		chart.Values["enableTelemetry"] = i.EnableHeartbeat
+		chart.Values["logLevel"] = "error"
+	}
+
+	chart.Values["image"].(map[string]interface{})["tag"] = tag
+	chart.Values["image"].(map[string]interface{})["pullPolicy"] = i.ImageLoader.ImagePullPolicy()
+	chart.Values["operator"].(map[string]interface{})["tag"] = tag
+	chart.Values["image"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-agent"
+	chart.Values["image"].(map[string]interface{})["initRepository"] = imageRegistry + "/" + imageNamespace + "/retina-init"
+	chart.Values["operator"].(map[string]interface{})["repository"] = imageRegistry + "/" + imageNamespace + "/retina-operator"
+	chart.Values["operator"].(map[string]interface{})["enabled"] = true
+
+	getclient := action.NewGet(actionConfig)
+	release, err := getclient.Run(i.ReleaseName)
+	if err == nil && release != nil {
+		log.Info("found existing release, removing before installing", "release", release.Name)
+		delclient := action.NewUninstall(actionConfig)
+		delclient.Wait = true
+		delclient.Timeout = deleteTimeout
+		_, err = delclient.Run(i.ReleaseName)
+		if err != nil {
+			return fmt.Errorf("failed to delete existing release %s: %w", i.ReleaseName, err)
+		}
+	} else if err != nil && !strings.Contains(err.Error(), "not found") {
+		return fmt.Errorf("failed to get release %s: %w", i.ReleaseName, err)
+	}
+
+	client := action.NewInstall(actionConfig)
+	client.Namespace = i.Namespace
+	client.ReleaseName = i.ReleaseName
+	client.Timeout = createTimeout
+	client.Wait = true
+	client.WaitForJobs = true
+
+	// install the chart here
+	rel, err := client.RunWithContext(ctx, chart, chart.Values)
+	if err != nil {
+		return fmt.Errorf("failed to install chart: %w", err)
+	}
+
+	log.Info("installed chart", "release", rel.Name, "namespace", rel.Namespace)
+	// this will confirm the values set during installation
+	log.Info("chart values", "config", rel.Config)
+
+	// ensure all pods are running, since helm doesn't care about windows
+	config, err := clientcmd.BuildConfigFromFlags("", i.KubeConfigFilePath)
+	if err != nil {
+		return fmt.Errorf("error building kubeconfig: %w", err)
+	}
+
+	clientset, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	labelSelector := "k8s-app=retina"
+	err = WaitForPodReady(ctx, clientset, "kube-system", labelSelector)
+	if err != nil {
+		return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/label-nodes.go b/test/e2ev3/pkg/kubernetes/label-nodes.go
new file mode 100644
index 0000000000..c96b71371f
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/label-nodes.go
@@ -0,0 +1,79 @@
+package kubernetes
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	retry "github.com/microsoft/retina/test/retry"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+type patchStringValue struct {
+	Op    string `json:"op"`
+	Path  string `json:"path"`
+	Value string `json:"value"`
+}
+
+type LabelNodes struct {
+	RestConfig *rest.Config
+	Labels     map[string]string
+}
+
+func (l *LabelNodes) String() string { return "label-nodes" }
+
+func (l *LabelNodes) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, l)
+	clientset, err := kubernetes.NewForConfig(l.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	var nodes *corev1.NodeList
+
+	retrier := retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}
+	err = retrier.Do(ctx, func() error {
+		nodes, err = clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to get nodes: %w", err)
+		}
+		return nil
+	})
+	if err != nil {
+		return fmt.Errorf("retrier failed: %w", err)
+	}
+
+	patch := []patchStringValue{}
+	for k, v := range l.Labels {
+		patch = append(patch, patchStringValue{
+			Op:    "add",
+			Path:  "/metadata/labels/" + k,
+			Value: v,
+		})
+	}
+	b, err := json.Marshal(patch)
+	if err != nil {
+		return fmt.Errorf("failed to marshal patch: %w", err)
+	}
+
+	for i := range nodes.Items {
+		log.Info("labeling node", "node", nodes.Items[i].Name)
+		err = retrier.Do(ctx, func() error {
+			_, err = clientset.CoreV1().Nodes().Patch(ctx, nodes.Items[i].Name, types.JSONPatchType, b, metav1.PatchOptions{})
+			if err != nil {
+				return fmt.Errorf("failed to patch pod: %w", err)
+			}
+			return nil
+		})
+		if err != nil {
+			return fmt.Errorf("retrier failed: %w", err)
+		}
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/no-crashes.go b/test/e2ev3/pkg/kubernetes/no-crashes.go
new file mode 100644
index 0000000000..5cb9847639
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/no-crashes.go
@@ -0,0 +1,42 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+var ErrPodCrashed = fmt.Errorf("pod has crashes")
+
+type EnsureStableComponent struct {
+	LabelSelector string
+	PodNamespace  string
+	RestConfig    *rest.Config
+
+	// Container restarts can occur for various reason, they do not necessarily mean the entire cluster
+	// is unstable or needs to be recreated. In some cases, container restarts are expected and acceptable.
+	// This flag should be set to true only in those cases and provide additional why restart restarts are acceptable.
+	IgnoreContainerRestart bool
+}
+
+func (n *EnsureStableComponent) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(n.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	err = WaitForPodReady(ctx, clientset, n.PodNamespace, n.LabelSelector)
+	if err != nil {
+		return fmt.Errorf("error waiting for retina pods to be ready: %w", err)
+	}
+
+	if !n.IgnoreContainerRestart {
+		err = CheckContainerRestart(ctx, clientset, n.PodNamespace, n.LabelSelector)
+		if err != nil {
+			return fmt.Errorf("error checking pod restarts: %w", err)
+		}
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/port-forward.go b/test/e2ev3/pkg/kubernetes/port-forward.go
new file mode 100644
index 0000000000..ffd83d7377
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/port-forward.go
@@ -0,0 +1,174 @@
+// todo: matmerr, this is just going to remain broken until it can be validated with scenarios pr
+
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+
+	retry "github.com/microsoft/retina/test/retry"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+const (
+	defaultTimeoutSeconds    = 300
+	defaultRetryDelay        = 500 * time.Millisecond
+	defaultRetryAttempts     = 60
+	defaultHTTPClientTimeout = 2 * time.Second
+)
+
+var (
+	ErrNoPodWithLabelFound = fmt.Errorf("no pod with label found with matching pod affinity")
+
+	defaultRetrier = retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay, ExpBackoff: true}
+)
+
+type PortForward struct {
+	Namespace             string
+	LabelSelector         string
+	LocalPort             string
+	RemotePort            string
+	Endpoint              string
+	RestConfig            *rest.Config
+	OptionalLabelAffinity string
+
+	// local properties
+	pf *PortForwarder
+}
+
+func (p *PortForward) String() string { return "port-forward" }
+
+func (p *PortForward) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, p)
+	lport, _ := strconv.Atoi(p.LocalPort)
+	rport, _ := strconv.Atoi(p.RemotePort)
+
+	portForwardCtx, cancel := context.WithTimeout(ctx, defaultTimeoutSeconds*time.Second)
+	defer cancel()
+
+	clientset, err := kubernetes.NewForConfig(p.RestConfig)
+	if err != nil {
+		return fmt.Errorf("could not create clientset: %w", err)
+	}
+
+	// if we have an optional label affinity, find a pod with that label, on the same node as a pod with the label selector
+	targetPodName := ""
+	if p.OptionalLabelAffinity != "" {
+		// get all pods with label
+		log.Info("finding pod with affinity", "label", p.LabelSelector, "affinityLabel", p.OptionalLabelAffinity)
+		targetPodName, err = p.findPodsWithAffinity(ctx, clientset)
+		if err != nil {
+			return fmt.Errorf("could not find pod with affinity: %w", err)
+		}
+	}
+
+	portForwardFn := func() error {
+		// if we have a pod name (likely from affinity above), use it, otherwise use label selector
+		opts := PortForwardingOpts{
+			Namespace: p.Namespace,
+			PodName:   targetPodName,
+			LocalPort: lport,
+			DestPort:  rport,
+		}
+
+		if targetPodName != "" {
+			opts.PodName = targetPodName
+		}
+
+		log.Info("attempting port forward", "pod", targetPodName, "label", p.LabelSelector, "namespace", p.Namespace)
+
+		p.pf, err = NewPortForwarder(p.RestConfig, logger{}, opts)
+		if err != nil {
+			return fmt.Errorf("could not create port forwarder: %w", err)
+		}
+		err = p.pf.Forward(ctx)
+		if err != nil {
+			return fmt.Errorf("could not start port forward: %w", err)
+		}
+
+		// verify port forward succeeded
+		client := http.Client{
+			Timeout: defaultHTTPClientTimeout,
+		}
+		resp, err := client.Get(p.pf.Address() + "/" + p.Endpoint) //nolint
+		if err != nil {
+			log.Error("port forward validation failed", "address", p.pf.Address(), "error", err)
+			p.pf.Stop()
+			return fmt.Errorf("port forward validation HTTP request to %s failed: %w", p.pf.Address(), err)
+		}
+		defer resp.Body.Close()
+
+		log.Info("port forward validation succeeded", "address", p.pf.Address(), "status", resp.Status)
+
+		return nil
+	}
+
+	if err = defaultRetrier.Do(portForwardCtx, portForwardFn); err != nil {
+		return fmt.Errorf("could not start port forward within %ds: %w", defaultTimeoutSeconds, err)
+	}
+	log.Info("successfully port forwarded", "address", p.pf.Address())
+	return nil
+}
+
+func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kubernetes.Clientset) (string, error) {
+	targetPodsAll, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: p.LabelSelector,
+		FieldSelector: "status.phase=Running",
+	})
+	if errAffinity != nil {
+		return "", fmt.Errorf("could not list pods in %q with label %q: %w", p.Namespace, p.LabelSelector, errAffinity)
+	}
+
+	// omit windows pods because we can't port-forward to them
+	targetPodsLinux := make([]v1.Pod, 0)
+	for i := range targetPodsAll.Items {
+		if targetPodsAll.Items[i].Spec.NodeSelector["kubernetes.io/os"] != "windows" {
+			targetPodsLinux = append(targetPodsLinux, targetPodsAll.Items[i])
+		}
+	}
+
+	// get all pods with optional label affinity
+	affinityPods, errAffinity := clientset.CoreV1().Pods(metav1.NamespaceAll).List(ctx, metav1.ListOptions{
+		LabelSelector: p.OptionalLabelAffinity,
+		FieldSelector: "status.phase=Running",
+	})
+	if errAffinity != nil {
+		return "", fmt.Errorf("could not list affinity pods across all namespaces with label %q: %w", p.OptionalLabelAffinity, errAffinity)
+	}
+
+	// keep track of where the affinity pods are scheduled
+	affinityNodes := make(map[string]bool)
+	for i := range affinityPods.Items {
+		affinityNodes[affinityPods.Items[i].Spec.NodeName] = true
+	}
+
+	// if a pod is found on the same node as an affinity pod, use it
+	for i := range targetPodsLinux {
+		if affinityNodes[targetPodsLinux[i].Spec.NodeName] {
+			// found a pod with the specified label, on a node with the optional label affinity
+			return targetPodsLinux[i].Name, nil
+		}
+	}
+
+	return "", fmt.Errorf("could not find a pod with label \"%s\", on a node that also has a pod with label \"%s\": %w", p.LabelSelector, p.OptionalLabelAffinity, ErrNoPodWithLabelFound)
+}
+
+func (p *PortForward) Stop() error {
+	p.pf.Stop()
+	return nil
+}
+
+type logger struct{}
+
+func (l *logger) Logf(format string, args ...interface{}) {
+	slog.Info(fmt.Sprintf(format, args...))
+}
diff --git a/test/e2ev3/pkg/kubernetes/portforward.go b/test/e2ev3/pkg/kubernetes/portforward.go
new file mode 100644
index 0000000000..3a8b3f1a2a
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/portforward.go
@@ -0,0 +1,199 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/pkg/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/portforward"
+	"k8s.io/client-go/transport/spdy"
+)
+
+// PortForwarder can manage a port forwarding session.
+type PortForwarder struct {
+	clientset *kubernetes.Clientset
+	transport http.RoundTripper
+	upgrader  spdy.Upgrader
+	logger    logger
+
+	opts PortForwardingOpts
+
+	stopChan    chan struct{}
+	errChan     chan error
+	address     string
+	lazyAddress sync.Once
+}
+
+type PortForwardingOpts struct {
+	Namespace     string
+	LabelSelector string
+	PodName       string
+	LocalPort     int
+	DestPort      int
+}
+
+// NewPortForwarder creates a PortForwarder.
+func NewPortForwarder(restConfig *rest.Config, logger logger, opts PortForwardingOpts) (*PortForwarder, error) {
+	clientset, err := kubernetes.NewForConfig(restConfig)
+	if err != nil {
+		return nil, fmt.Errorf("could not create clientset: %w", err)
+	}
+
+	transport, upgrader, err := spdy.RoundTripperFor(restConfig)
+	if err != nil {
+		return nil, fmt.Errorf("could not create spdy roundtripper: %w", err)
+	}
+
+	return &PortForwarder{
+		clientset: clientset,
+		transport: transport,
+		upgrader:  upgrader,
+		logger:    logger,
+		opts:      opts,
+		stopChan:  make(chan struct{}, 1),
+	}, nil
+}
+
+// todo: can be made more flexible to allow a service to be specified
+
+// Forward attempts to initiate port forwarding a pod and port using the configured namespace and labels.
+// An error is returned if a port forwarding session could not be started. If no error is returned, the
+// Address method can be used to communicate with the pod, and the Stop and KeepAlive methods can be used
+// to manage the lifetime of the port forwarding session.
+
+func (p *PortForwarder) Forward(ctx context.Context) error {
+	var podName string
+	if p.opts.PodName == "" {
+		pods, err := p.clientset.CoreV1().Pods(p.opts.Namespace).List(ctx, metav1.ListOptions{LabelSelector: p.opts.LabelSelector, FieldSelector: "status.phase=Running"})
+		if err != nil {
+			return fmt.Errorf("could not list pods in %q with label %q: %w", p.opts.Namespace, p.opts.LabelSelector, err)
+		}
+
+		if len(pods.Items) < 1 {
+			return fmt.Errorf("no pods found in %q with label %q", p.opts.Namespace, p.opts.LabelSelector) //nolint:goerr113 //no specific handling expected
+		}
+
+		// Deterministic selection: sort by name and pick the first pod.
+		sort.Slice(pods.Items, func(i, j int) bool {
+			return pods.Items[i].Name < pods.Items[j].Name
+		})
+		podName = pods.Items[0].Name
+	} else {
+		podName = p.opts.PodName
+	}
+
+	pods, err := p.clientset.CoreV1().Pods(p.opts.Namespace).List(ctx, metav1.ListOptions{LabelSelector: p.opts.LabelSelector, FieldSelector: "status.phase=Running"})
+	if err != nil {
+		return fmt.Errorf("could not list pods in %q with label %q: %w", p.opts.Namespace, p.opts.LabelSelector, err)
+	}
+
+	if len(pods.Items) < 1 {
+		return fmt.Errorf("no pods found in %q with label %q", p.opts.Namespace, p.opts.LabelSelector) //nolint:goerr113 //no specific handling expected
+	}
+
+	portForwardURL := p.clientset.CoreV1().RESTClient().Post().
+		Resource("pods").
+		Namespace(p.opts.Namespace).
+		Name(podName).
+		SubResource("portforward").URL()
+
+	readyChan := make(chan struct{}, 1)
+	dialer := spdy.NewDialer(p.upgrader, &http.Client{Transport: p.transport}, http.MethodPost, portForwardURL)
+	ports := []string{fmt.Sprintf("%d:%d", p.opts.LocalPort, p.opts.DestPort)}
+	pf, err := portforward.New(dialer, ports, p.stopChan, readyChan, io.Discard, io.Discard)
+	if err != nil {
+		return fmt.Errorf("could not create portforwarder: %w", err)
+	}
+
+	errChan := make(chan error, 1)
+	go func() {
+		// ForwardPorts is a blocking function thus it has to be invoked in a goroutine to allow callers to do
+		// other things, but it can return 2 kinds of errors: initial dial errors that will be caught in the select
+		// block below (Ready should not fire in these cases) and later errors if the connection is dropped.
+		// this is why we propagate the error channel to PortForwardStreamHandle: to allow callers to handle
+		// cases of eventual errors.
+		errChan <- pf.ForwardPorts()
+	}()
+
+	var portForwardPort int
+	select {
+	case <-ctx.Done():
+		return fmt.Errorf("portforward cancelled: %w", ctx.Err())
+	case err := <-errChan:
+		return fmt.Errorf("portforward failed: %w", err)
+	case <-pf.Ready:
+		prts, err := pf.GetPorts()
+		if err != nil {
+			return fmt.Errorf("get portforward port: %w", err)
+		}
+
+		if len(prts) < 1 {
+			return errors.New("no ports forwarded")
+		}
+
+		portForwardPort = int(prts[0].Local)
+	}
+
+	// once successful, any subsequent port forwarding sessions from keep alive would yield the same address.
+	// since the address could be read at the same time as the session is renewed, it's appropriate to initialize
+	// lazily.
+	p.lazyAddress.Do(func() {
+		p.address = fmt.Sprintf("http://localhost:%d", portForwardPort)
+	})
+
+	p.errChan = errChan
+
+	return nil
+}
+
+// Address returns an address for communicating with a port-forwarded pod.
+func (p *PortForwarder) Address() string {
+	return p.address
+}
+
+// Stop terminates a port forwarding session.
+func (p *PortForwarder) Stop() {
+	select {
+	case p.stopChan <- struct{}{}:
+	default:
+	}
+}
+
+// KeepAlive can be used to restart the port forwarding session in the background.
+func (p *PortForwarder) KeepAlive(ctx context.Context) {
+	for {
+		select {
+		case <-ctx.Done():
+			p.logger.Logf("port forwarder: keep alive cancelled: %v", ctx.Err())
+			return
+		case pfErr := <-p.errChan:
+			// as of client-go v0.26.1, if the connection is successful at first but then fails,
+			// an error is logged but only a nil error is sent to this channel. this will be fixed
+			// in v0.27.x, which at the time of writing has not been released.
+			//
+			// see https://github.com/kubernetes/client-go/commit/d0842249d3b92ea67c446fe273f84fe74ebaed9f
+			// for the relevant change.
+			p.logger.Logf("port forwarder: received error signal: %v. restarting session", pfErr)
+			p.Stop()
+			if err := p.Forward(ctx); err != nil {
+				p.logger.Logf("port forwarder: could not restart session: %v. retrying", err)
+
+				select {
+				case <-ctx.Done():
+					p.logger.Logf("port forwarder: keep alive cancelled: %v", ctx.Err())
+					return
+				case <-time.After(time.Second): // todo: make configurable?
+					continue
+				}
+			}
+		}
+	}
+}
diff --git a/test/e2ev3/pkg/kubernetes/uninstall-helm.go b/test/e2ev3/pkg/kubernetes/uninstall-helm.go
new file mode 100644
index 0000000000..6386182956
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/uninstall-helm.go
@@ -0,0 +1,41 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/cli"
+)
+
+type UninstallHelmChart struct {
+	Namespace          string
+	ReleaseName        string
+	KubeConfigFilePath string
+	HelmDriver         string
+}
+
+func (i *UninstallHelmChart) String() string { return "uninstall-helm" }
+
+func (i *UninstallHelmChart) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, i)
+	settings := cli.New()
+	settings.KubeConfig = i.KubeConfigFilePath
+	actionConfig := new(action.Configuration)
+
+	err := actionConfig.Init(settings.RESTClientGetter(), i.Namespace, i.HelmDriver, func(format string, v ...any) { log.Info(fmt.Sprintf(format, v...)) })
+	if err != nil {
+		return fmt.Errorf("failed to initialize helm action config: %w", err)
+	}
+
+	delclient := action.NewUninstall(actionConfig)
+	delclient.Wait = true
+	delclient.Timeout = deleteTimeout
+	_, err = delclient.Run(i.ReleaseName)
+	if err != nil {
+		return fmt.Errorf("failed to delete existing release %s: %w", i.ReleaseName, err)
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/upgrade-retina-helm.go b/test/e2ev3/pkg/kubernetes/upgrade-retina-helm.go
new file mode 100644
index 0000000000..3a5a8f2dfa
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/upgrade-retina-helm.go
@@ -0,0 +1,79 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/cli"
+	helmValues "helm.sh/helm/v3/pkg/cli/values"
+	"helm.sh/helm/v3/pkg/getter"
+)
+
+const upgradeTimeout = 300 * time.Second // longer timeout to accommodate slow windows node terminating and restarting.
+
+type UpgradeRetinaHelmChart struct {
+	Namespace          string
+	ReleaseName        string
+	KubeConfigFilePath string
+	ChartPath          string
+	HelmDriver         string
+	ValuesFile         string
+}
+
+func (u *UpgradeRetinaHelmChart) String() string { return "upgrade-retina-helm" }
+
+func (u *UpgradeRetinaHelmChart) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, u)
+	settings := cli.New()
+	settings.KubeConfig = u.KubeConfigFilePath
+	actionConfig := new(action.Configuration)
+
+	err := actionConfig.Init(settings.RESTClientGetter(), u.Namespace, u.HelmDriver, func(format string, v ...any) { log.Info(fmt.Sprintf(format, v...)) })
+	if err != nil {
+		return fmt.Errorf("failed to initialize helm action config: %w", err)
+	}
+
+	client := action.NewUpgrade(actionConfig)
+	client.Wait = true
+	client.WaitForJobs = true
+	client.Timeout = upgradeTimeout
+
+	// Create a new Get action
+	get := action.NewGet(actionConfig)
+
+	// Get the current release
+	rel, err := get.Run(u.ReleaseName)
+	if err != nil {
+		return fmt.Errorf("failed to get release: %w", err)
+	}
+
+	// Get the chart from the current release
+	chart := rel.Chart
+
+	// enable advanced metrics profile
+	options := helmValues.Options{
+		ValueFiles: []string{u.ValuesFile},
+	}
+	provider := getter.All(settings)
+	values, err := options.MergeValues(provider)
+	if err != nil {
+		return fmt.Errorf("failed to merge values: %w", err)
+	}
+	// logs values to be set during upgrade
+	log.Info("values to be set during upgrade", "values", values)
+
+	rel, err = client.Run(u.ReleaseName, chart, values)
+	if err != nil {
+		return fmt.Errorf("failed to upgrade chart: %w", err)
+	}
+
+	log.Info("upgraded chart", "release", rel.Name, "namespace", rel.Namespace)
+	log.Info("chart values", "config", rel.Config)
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/validate-service.go b/test/e2ev3/pkg/kubernetes/validate-service.go
new file mode 100644
index 0000000000..ed4c014391
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/validate-service.go
@@ -0,0 +1,70 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+type ResourceTypes string
+
+const (
+	ResourceTypePod     = "pod"
+	ResourceTypeService = "service"
+)
+
+type ValidateResource struct {
+	ResourceName      string
+	ResourceNamespace string
+	ResourceType      string
+	Labels            string
+	RestConfig        *rest.Config
+}
+
+func (v *ValidateResource) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(v.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	ctx, cancel := context.WithTimeout(ctx, defaultTimeoutSeconds*time.Second)
+	defer cancel()
+
+	switch v.ResourceType {
+	case ResourceTypePod:
+		err = WaitForPodReady(ctx, clientset, v.ResourceNamespace, v.Labels)
+		if err != nil {
+			return fmt.Errorf("pod not found: %w", err)
+		}
+	case ResourceTypeService:
+		exists, err := serviceExists(ctx, clientset, v.ResourceNamespace, v.ResourceName, v.Labels)
+		if err != nil || !exists {
+			return fmt.Errorf("service not found: %w", err)
+		}
+
+	default:
+		return fmt.Errorf("resource type %s not supported", v.ResourceType)
+	}
+
+	if err != nil {
+		return fmt.Errorf("error waiting for pod to be ready: %w", err)
+	}
+	return nil
+}
+
+func serviceExists(ctx context.Context, clientset *kubernetes.Clientset, namespace, _, labels string) (bool, error) {
+	var serviceList *corev1.ServiceList
+	serviceList, err := clientset.CoreV1().Services(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels})
+	if err != nil {
+		return false, fmt.Errorf("error listing Services: %w", err)
+	}
+	if len(serviceList.Items) == 0 {
+		return false, nil
+	}
+	return true, nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/validateHttp.go b/test/e2ev3/pkg/kubernetes/validateHttp.go
new file mode 100644
index 0000000000..585be4456f
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/validateHttp.go
@@ -0,0 +1,46 @@
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+const (
+	RequestTimeout = 30 * time.Second
+)
+
+type ValidateHTTPResponse struct {
+	URL            string
+	ExpectedStatus int
+}
+
+func (v *ValidateHTTPResponse) String() string { return "validate-http-response" }
+
+func (v *ValidateHTTPResponse) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, v)
+	ctx, cancel := context.WithTimeout(ctx, RequestTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, v.URL, nil)
+	if err != nil {
+		return fmt.Errorf("error creating HTTP request: %w", err)
+	}
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return fmt.Errorf("error making HTTP request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != v.ExpectedStatus {
+		return fmt.Errorf("unexpected status code: got %d, want %d", resp.StatusCode, v.ExpectedStatus)
+	}
+	log.Info("HTTP validation succeeded", "url", v.URL, "statusCode", resp.StatusCode)
+
+	return nil
+}
diff --git a/test/e2ev3/pkg/kubernetes/with-port-forward.go b/test/e2ev3/pkg/kubernetes/with-port-forward.go
new file mode 100644
index 0000000000..4efcddfcfa
--- /dev/null
+++ b/test/e2ev3/pkg/kubernetes/with-port-forward.go
@@ -0,0 +1,84 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package kubernetes
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"time"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/cenkalti/backoff/v4"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+const (
+	// DefaultValidationTimeout bounds total time for validation within a port-forward.
+	DefaultValidationTimeout = 5 * time.Minute
+
+	// DefaultRetryAttempts for metric validation (metrics may need time to appear).
+	DefaultRetryAttempts = 10
+
+	// DefaultScenarioTimeout bounds the total setup phase of a scenario.
+	DefaultScenarioTimeout = 10 * time.Minute
+)
+
+// WithPortForward is a composite step that:
+//  1. Starts a Kubernetes port-forward
+//  2. Runs all inner steps sequentially (as a Pipe)
+//  3. Guarantees the port-forward is stopped via defer, even on error
+type WithPortForward struct {
+	PF    *PortForward
+	Steps []flow.Steper
+}
+
+func (w *WithPortForward) String() string { return "with-port-forward" }
+
+func (w *WithPortForward) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, w)
+	if err := w.PF.Do(ctx); err != nil {
+		return fmt.Errorf("port-forward failed: %w", err)
+	}
+	defer func() {
+		log.Info("stopping port-forward", "local", w.PF.LocalPort, "remote", w.PF.RemotePort)
+		w.PF.Stop() //nolint:errcheck // best-effort cleanup
+	}()
+
+	inner := new(flow.Workflow)
+	inner.Add(flow.Pipe(w.Steps...))
+	if err := inner.Do(ctx); err != nil {
+		return fmt.Errorf("validation within port-forward failed: %w", err)
+	}
+	return nil
+}
+
+// Unwrap exposes inner steps to go-workflow for visibility/debugging.
+func (w *WithPortForward) Unwrap() []flow.Steper {
+	return w.Steps
+}
+
+// CurlExpectFail creates a named step that runs a command expected to fail
+// (e.g., curl behind a deny-all network policy). The error is intentionally swallowed.
+func CurlExpectFail(name string, exec *ExecInPod) flow.Steper {
+	return flow.Func(name, func(ctx context.Context) error {
+		if err := exec.Do(ctx); err != nil {
+			slog.Info("curl failed as expected", "step", name, "error", err)
+		}
+		return nil
+	})
+}
+
+// RetryWithBackoff configures exponential backoff for metric validation.
+func RetryWithBackoff(ro *flow.RetryOption) {
+	bo := backoff.NewExponentialBackOff()
+	bo.InitialInterval = 5 * time.Second
+	bo.MaxInterval = 30 * time.Second
+	bo.MaxElapsedTime = 5 * time.Minute
+	ro.Backoff = bo
+	ro.Attempts = DefaultRetryAttempts
+	ro.TimeoutPerTry = 30 * time.Second
+}
diff --git a/test/e2ev3/pkg/prometheus/prometheus.go b/test/e2ev3/pkg/prometheus/prometheus.go
new file mode 100644
index 0000000000..6513eb56ef
--- /dev/null
+++ b/test/e2ev3/pkg/prometheus/prometheus.go
@@ -0,0 +1,222 @@
+package prom
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"reflect"
+	"strings"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"github.com/microsoft/retina/test/retry"
+	promclient "github.com/prometheus/client_model/go"
+	"github.com/prometheus/common/expfmt"
+	"github.com/prometheus/common/model"
+)
+
+var (
+	ErrNoMetricFound     = fmt.Errorf("no metric found")
+	defaultTimeout       = 300 * time.Second
+	defaultRetryDelay    = 5 * time.Second
+	defaultRetryAttempts = 60
+)
+
+func CheckMetric(ctx context.Context, promAddress, metricName string, validMetric map[string]string, partial ...bool) error {
+	defaultRetrier := retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}
+
+	// Default partial to false if not provided
+	usePartial := len(partial) > 0 && partial[0]
+
+	metrics := map[string]*promclient.MetricFamily{}
+	scrapeMetricsFn := func() error {
+		log.Printf("checking for metrics on %s", promAddress)
+		var err error
+
+		// obtain a full dump of all metrics on the endpoint
+		metrics, err = getAllPrometheusMetricsFromURL(promAddress)
+		if err != nil {
+			return fmt.Errorf("could not start port forward within %ds: %w	", defaultTimeout, err)
+		}
+
+		// loop through each metric to check for a match,
+		// if none is found then log and return an error which will trigger a retry
+		if usePartial {
+			err = verifyValidMetricPresentPartial(metricName, metrics, validMetric)
+		} else {
+			err = verifyValidMetricPresent(metricName, metrics, validMetric)
+		}
+		if err != nil {
+			log.Printf("failed to find metric matching %s: %+v\n", metricName, validMetric)
+			return ErrNoMetricFound
+		}
+
+		return nil
+	}
+
+	err := defaultRetrier.Do(ctx, scrapeMetricsFn)
+	if err != nil {
+		return fmt.Errorf("failed to get prometheus metrics: %w", err)
+	}
+	return nil
+}
+
+func CheckMetricFromBuffer(prometheusMetricData []byte, metricName string, validMetric map[string]string) error {
+	metrics, err := getAllPrometheusMetricsFromBuffer(prometheusMetricData)
+	if err != nil {
+		return fmt.Errorf("failed to parse prometheus metrics: %w", err)
+	}
+
+	err = verifyValidMetricPresent(metricName, metrics, validMetric)
+	if err != nil {
+		log.Printf("failed to find metric matching %s: %+v\n", metricName, validMetric)
+		return ErrNoMetricFound
+	}
+
+	return nil
+}
+
+func verifyValidMetricPresent(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
+	for _, metric := range data {
+		if metric.GetName() == metricName {
+			for _, metric := range metric.GetMetric() {
+
+				// get all labels and values on the metric
+				metricLabels := map[string]string{}
+				for _, label := range metric.GetLabel() {
+					metricLabels[label.GetName()] = label.GetValue()
+				}
+
+				// if valid metric is empty, then we just need to make sure the metric and value is present
+				if len(validMetric) == 0 && len(metricLabels) > 0 {
+					return nil
+				}
+
+				if reflect.DeepEqual(metricLabels, validMetric) {
+					return nil
+				}
+			}
+		}
+	}
+
+	return fmt.Errorf("failed to find metric matching: %+v: %w", validMetric, ErrNoMetricFound)
+}
+
+func getAllPrometheusMetricsFromURL(url string) (map[string]*promclient.MetricFamily, error) {
+	client := http.Client{}
+	resp, err := client.Get(url) //nolint
+	if err != nil {
+		return nil, fmt.Errorf("HTTP request failed: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic
+	}
+
+	metrics, err := ParseReaderPrometheusMetrics(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	return metrics, nil
+}
+
+// verifyValidMetricPresentPartial checks if a metric exists with labels that contain
+// all the key-value pairs in validMetric (partial matching - the metric can have additional labels)
+func verifyValidMetricPresentPartial(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
+	for _, metric := range data {
+		if metric.GetName() == metricName {
+			for _, metric := range metric.GetMetric() {
+
+				// get all labels and values on the metric
+				metricLabels := map[string]string{}
+				for _, label := range metric.GetLabel() {
+					metricLabels[label.GetName()] = label.GetValue()
+				}
+
+				// if valid metric is empty, then we just need to make sure the metric and value is present
+				if len(validMetric) == 0 && len(metricLabels) > 0 {
+					return nil
+				}
+
+				// Check if all key-value pairs in validMetric exist in metricLabels
+				allMatch := true
+				for key, value := range validMetric {
+					if metricLabels[key] != value {
+						allMatch = false
+						break
+					}
+				}
+
+				if allMatch {
+					return nil
+				}
+			}
+		}
+	}
+
+	return fmt.Errorf("failed to find metric matching: %+v: %w", validMetric, ErrNoMetricFound)
+}
+
+func getAllPrometheusMetricsFromBuffer(buf []byte) (map[string]*promclient.MetricFamily, error) {
+	parser := expfmt.NewTextParser(model.LegacyValidation)
+	reader := strings.NewReader(string(buf))
+	return parser.TextToMetricFamilies(reader) //nolint
+}
+
+func ParseReaderPrometheusMetrics(input io.Reader) (map[string]*promclient.MetricFamily, error) {
+	parser := expfmt.NewTextParser(model.LegacyValidation)
+	return parser.TextToMetricFamilies(input) //nolint
+}
+
+// When capturing promethus output via curl and exect, there's a lot
+// of garbage at the front
+func stripExecGarbage(s string) string {
+	index := strings.Index(s, "#")
+	if index == -1 {
+		// If there's no `#`, return the original string
+		return s
+	}
+	// Slice the string up to the character before the first `#`
+	return s[:index]
+}
+
+var ErrMetricFound = errors.New("unexpected metric found")
+
+// ValidateMetricStep validates Prometheus metrics at a given port.
+// Implements flow.Steper via Do(context.Context) error.
+type ValidateMetricStep struct {
+	ForwardedPort string
+	MetricName    string
+	ValidMetrics  []map[string]string
+	ExpectMetric  bool
+	PartialMatch  bool
+}
+
+func (v *ValidateMetricStep) Do(ctx context.Context) error {
+	_, slogger := utils.StepLogger(ctx, v)
+
+	promAddress := fmt.Sprintf("http://localhost:%s/metrics", v.ForwardedPort)
+
+	for _, validMetric := range v.ValidMetrics {
+		err := CheckMetric(ctx, promAddress, v.MetricName, validMetric, v.PartialMatch)
+		if err != nil {
+			if !v.ExpectMetric && errors.Is(err, ErrNoMetricFound) {
+				slogger.Info("metric not found, as expected", "metric", v.MetricName)
+				return nil
+			}
+			return fmt.Errorf("failed to verify prometheus metrics: %w", err)
+		}
+
+		if !v.ExpectMetric {
+			return fmt.Errorf("did not expect to find metric %s matching %+v: %w", v.MetricName, validMetric, ErrMetricFound)
+		}
+
+		slogger.Info("found metric", "metric", v.MetricName, "labels", validMetric)
+	}
+	return nil
+}
diff --git a/test/e2ev3/pkg/utils/context_logger.go b/test/e2ev3/pkg/utils/context_logger.go
new file mode 100644
index 0000000000..b5209687c6
--- /dev/null
+++ b/test/e2ev3/pkg/utils/context_logger.go
@@ -0,0 +1,63 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package utils
+
+import (
+	"context"
+	"log/slog"
+
+	flow "github.com/Azure/go-workflow"
+)
+
+type prefixKey struct{}
+
+// StepLogger appends the step name of s to the accumulated context prefix
+// and returns the enriched context + a logger tagged with the full prefix.
+//
+// Call this at the top of every Do(ctx) at any level:
+//
+//	func (w *Workflow) Do(ctx context.Context) error {
+//	    ctx, log := utils.StepLogger(ctx, w)  // prefix = "basic-metrics"
+//	    ...
+//	}
+//	func (p *PortForward) Do(ctx context.Context) error {
+//	    _, log := utils.StepLogger(ctx, p)    // prefix = "basic-metrics/drop/port-forward"
+//	    ...
+//	}
+func StepLogger(ctx context.Context, s any) (context.Context, *slog.Logger) {
+	name := StepName(s)
+	existing := Prefix(ctx)
+	var prefix string
+	if existing != "" {
+		prefix = existing + "/" + name
+	} else {
+		prefix = name
+	}
+	ctx = context.WithValue(ctx, prefixKey{}, prefix)
+	return ctx, slog.Default().With("prefix", prefix)
+}
+
+// Prefix returns the accumulated log prefix stored in ctx.
+func Prefix(ctx context.Context) string {
+	if v, ok := ctx.Value(prefixKey{}).(string); ok {
+		return v
+	}
+	return ""
+}
+
+// Scenario wraps a flow.Workflow with a name that gets added to the
+// context prefix when executed. Use this for test/scenario grouping:
+//
+//	&utils.Scenario{Name: "drop", Inner: buildDropWorkflow(...)}
+type Scenario struct {
+	Name  string
+	Inner *flow.Workflow
+}
+
+func (s *Scenario) String() string { return s.Name }
+
+func (s *Scenario) Do(ctx context.Context) error {
+	ctx, _ = StepLogger(ctx, s)
+	return s.Inner.Do(ctx)
+}
diff --git a/test/e2ev3/pkg/utils/slog_handler.go b/test/e2ev3/pkg/utils/slog_handler.go
new file mode 100644
index 0000000000..713f3cb8f2
--- /dev/null
+++ b/test/e2ev3/pkg/utils/slog_handler.go
@@ -0,0 +1,291 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package utils
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"log/slog"
+	"path"
+	"runtime"
+	"slices"
+	"strings"
+	"sync"
+	"unicode"
+
+	"golang.org/x/term"
+)
+
+// StepHandler is an slog.Handler that produces structured log lines with
+// workflow/test/step context rendered as a bracketed prefix.
+//
+// Output format:
+//
+//	15:04:05 INFO [workflow/test/step] message key=value ...
+//
+// The "workflow", "test", and "step" attributes are absorbed into the prefix
+// and not printed as key=value pairs. When no prefix parts are set the
+// brackets are omitted entirely.
+type StepHandler struct {
+	w        io.Writer
+	level    slog.Level
+	workflow string
+	test     string
+	step     string
+	prefix   string
+	color    bool
+	attrs    []slog.Attr
+	mu       *sync.Mutex
+}
+
+func NewStepHandler(w io.Writer, level slog.Level) *StepHandler {
+	c := false
+	if f, ok := w.(interface{ Fd() uintptr }); ok {
+		c = isTerminal(f.Fd())
+	}
+	return &StepHandler{w: w, level: level, color: c, mu: &sync.Mutex{}}
+}
+
+// NewStepHandlerWithColor creates a handler with explicit color control (for tests).
+func NewStepHandlerWithColor(w io.Writer, level slog.Level, color bool) *StepHandler {
+	return &StepHandler{w: w, level: level, color: color, mu: &sync.Mutex{}}
+}
+
+func (h *StepHandler) Enabled(_ context.Context, level slog.Level) bool {
+	return level >= h.level
+}
+
+func (h *StepHandler) Handle(ctx context.Context, r slog.Record) error {
+	var buf bytes.Buffer
+
+	// Start with any prefix from context (set by StepLogger),
+	// then check handler-level prefix (from WithAttrs).
+	prefix := Prefix(ctx)
+	if prefix == "" {
+		prefix = h.prefix
+	}
+
+	// Also check handler-level and record-level attrs for prefix/workflow/test/step.
+	// "prefix" overrides everything; legacy workflow/test/step build a prefix if no "prefix" attr.
+	workflow, test, step := h.workflow, h.test, h.step
+
+	var extra []slog.Attr
+	r.Attrs(func(a slog.Attr) bool {
+		switch a.Key {
+		case "prefix":
+			prefix = a.Value.String()
+		case "workflow":
+			workflow = a.Value.String()
+		case "test":
+			test = a.Value.String()
+		case "step":
+			step = a.Value.String()
+		default:
+			extra = append(extra, a)
+		}
+		return true
+	})
+
+	// If no explicit prefix, build from workflow/test/step parts.
+	if prefix == "" {
+		prefix = buildPrefix(workflow, test, step)
+	}
+
+	// If still empty, try caller detection from stack.
+	if prefix == "" {
+		cw, _, cs := callerPrefix()
+		prefix = buildPrefix(cw, "", cs)
+	}
+
+	// Timestamp and level always come first.
+	fmt.Fprintf(&buf, "%s %s ",
+		r.Time.Format("15:04:05"),
+		r.Level.String())
+
+	// Render the [prefix] bracket.
+	if prefix != "" {
+		if h.color {
+			code := colorForPrefix(prefix)
+			fmt.Fprintf(&buf, "\033[%dm[%s]\033[0m ", code, prefix)
+		} else {
+			fmt.Fprintf(&buf, "[%s] ", prefix)
+		}
+	}
+
+	buf.WriteString(r.Message)
+
+	// Pre-attached attrs (from WithAttrs), skipping prefix keys.
+	for _, a := range h.attrs {
+		fmt.Fprintf(&buf, " %s=%s", a.Key, a.Value)
+	}
+	// Record-level attrs (prefix keys already absorbed above).
+	for _, a := range extra {
+		fmt.Fprintf(&buf, " %s=%s", a.Key, a.Value)
+	}
+
+	buf.WriteByte('\n')
+
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	_, err := h.w.Write(buf.Bytes())
+	return err
+}
+
+func (h *StepHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
+	workflow, test, step, prefix := h.workflow, h.test, h.step, h.prefix
+	var remaining []slog.Attr
+	for _, a := range attrs {
+		switch a.Key {
+		case "prefix":
+			prefix = a.Value.String()
+		case "workflow":
+			workflow = a.Value.String()
+		case "test":
+			test = a.Value.String()
+		case "step":
+			step = a.Value.String()
+		default:
+			remaining = append(remaining, a)
+		}
+	}
+	return &StepHandler{
+		w:        h.w,
+		level:    h.level,
+		workflow: workflow,
+		test:     test,
+		step:     step,
+		prefix:   prefix,
+		color:    h.color,
+		attrs:    append(slices.Clone(h.attrs), remaining...),
+		mu:       h.mu,
+	}
+}
+
+func (h *StepHandler) WithGroup(name string) slog.Handler {
+	return h
+}
+
+// buildPrefix joins non-empty parts with "/".
+func buildPrefix(parts ...string) string {
+	var buf bytes.Buffer
+	for _, p := range parts {
+		if p == "" {
+			continue
+		}
+		if buf.Len() > 0 {
+			buf.WriteByte('/')
+		}
+		buf.WriteString(p)
+	}
+	return buf.String()
+}
+
+// colorForPrefix returns a deterministic ANSI color code for the given prefix string.
+func colorForPrefix(prefix string) int {
+	codes := []int{31, 32, 33, 34, 35, 36, 91, 92, 93, 94, 95, 96}
+	h := fnv32a(prefix)
+	return codes[h%uint32(len(codes))]
+}
+
+func fnv32a(s string) uint32 {
+	h := uint32(2166136261)
+	for i := 0; i < len(s); i++ {
+		h ^= uint32(s[i])
+		h *= 16777619
+	}
+	return h
+}
+
+// isTerminal checks if the given file descriptor is a terminal.
+func isTerminal(fd uintptr) bool {
+	return term.IsTerminal(int(fd))
+}
+
+const e2ev3Prefix = "retina/test/e2ev3/"
+
+// callerPrefix scans the call stack for e2ev3 types and returns
+// (workflow, test, step). It identifies the outermost Workflow receiver
+// as the workflow name and the innermost non-Workflow receiver as the step.
+func callerPrefix() (workflow, test, step string) {
+	var pcs [32]uintptr
+	n := runtime.Callers(3, pcs[:])
+	frames := runtime.CallersFrames(pcs[:n])
+	for {
+		frame, more := frames.Next()
+		if !strings.Contains(frame.Function, e2ev3Prefix) {
+			if !more {
+				break
+			}
+			continue
+		}
+		typeName, pkgName := extractCallerInfo(frame.Function)
+		if typeName == "" {
+			if !more {
+				break
+			}
+			continue
+		}
+		kebab := toKebabCase(typeName)
+		if kebab == "workflow" || kebab == "step" {
+			// Generic type — use package name as the workflow identifier.
+			workflow = toKebabCase(pkgName)
+		} else if kebab == "slog-writer" {
+			// io.Writer adapter — not a real step, skip it.
+			if !more {
+				break
+			}
+			continue
+		} else if step == "" {
+			step = kebab
+		}
+		if !more {
+			break
+		}
+	}
+	return workflow, test, step
+}
+
+// extractCallerInfo extracts the type name and package name from a fully
+// qualified function name like "github.com/.../pkg/kubernetes.(*PortForward).Do".
+func extractCallerInfo(funcName string) (typeName, pkgName string) {
+	// Get last path component: "kubernetes.(*PortForward).Do"
+	base := path.Base(funcName)
+	// Split on ".": ["kubernetes", "(*PortForward)", "Do"]
+	parts := strings.SplitN(base, ".", 3)
+	if len(parts) < 2 {
+		return "", ""
+	}
+	pkgName = parts[0]
+	receiver := parts[1]
+	// Strip pointer/paren: "(*PortForward)" → "PortForward"
+	receiver = strings.TrimPrefix(receiver, "(*")
+	receiver = strings.TrimSuffix(receiver, ")")
+	receiver = strings.TrimPrefix(receiver, "*")
+	return receiver, pkgName
+}
+
+// toKebabCase converts PascalCase to kebab-case, keeping consecutive
+// uppercase letters together (e.g. "InstallNPM" → "install-npm").
+func toKebabCase(s string) string {
+	var buf bytes.Buffer
+	runes := []rune(s)
+	for i, r := range runes {
+		if unicode.IsUpper(r) {
+			if i > 0 {
+				prev := runes[i-1]
+				if unicode.IsLower(prev) {
+					buf.WriteByte('-')
+				} else if unicode.IsUpper(prev) && i+1 < len(runes) && unicode.IsLower(runes[i+1]) {
+					buf.WriteByte('-')
+				}
+			}
+			buf.WriteRune(unicode.ToLower(r))
+		} else {
+			buf.WriteRune(r)
+		}
+	}
+	return buf.String()
+}
diff --git a/test/e2ev3/pkg/utils/slog_handler_test.go b/test/e2ev3/pkg/utils/slog_handler_test.go
new file mode 100644
index 0000000000..348357447e
--- /dev/null
+++ b/test/e2ev3/pkg/utils/slog_handler_test.go
@@ -0,0 +1,366 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package utils
+
+import (
+	"bytes"
+	"log/slog"
+	"regexp"
+	"strings"
+	"testing"
+
+)
+
+// These mock types simulate the real e2e call stack:
+//
+//	Workflow.Do()                    → sets "workflow" attr, passes logger down
+//	  └─ addScenario(log)           → sets "test" attr, passes logger down
+//	       └─ WithPortForward.Do()  → calls PortForward.Do()
+//	            └─ PortForward.Do() → sets "step" attr, logs messages
+
+// Workflow mirrors *basicmetrics.Workflow.
+// StepName should resolve to the package name ("utils" here) since the
+// type name "Workflow" is generic.
+type Workflow struct {
+	// bareStep, if set, is called instead of the normal scenario chain.
+	// Used by TestHandlerFormat_WorkflowPrefixFromStack to test stack-based
+	// workflow detection when steps don't receive an explicit logger.
+	bareStep func()
+}
+
+func (w *Workflow) Do() {
+	if w.bareStep != nil {
+		w.bareStep()
+		return
+	}
+	// Real workflows create a logger and pass it to scenarios — they
+	// don't log directly. This matches basicmetrics.Workflow.Do().
+	log := slog.Default().With("workflow", StepName(w))
+
+	// Simulate passing logger to scenario.
+	s := &MockScenario{log: log}
+	s.Do()
+}
+
+// MockScenario mirrors addDropScenario / addTCPScenario.
+type MockScenario struct {
+	log *slog.Logger
+}
+
+func (s *MockScenario) Do() {
+	// Real scenarios add "test" attr and pass the logger to steps.
+	log := s.log.With("test", "drop")
+
+	// Simulate passing logger to PortForward via WithPortForward.
+	pf := &MockPortForward{Log: log}
+	pf.Do()
+}
+
+// MockPortForward mirrors *k8s.PortForward.
+type MockPortForward struct {
+	Log *slog.Logger
+}
+
+func (pf *MockPortForward) Do() {
+	log := pf.Log
+	if log == nil {
+		log = slog.Default()
+	}
+	log = log.With("step", StepName(pf))
+	log.Info("finding pod with affinity", "label", "k8s-app=retina")
+	log.Info("attempting port forward", "pod", "retina-agent-abc", "namespace", "kube-system")
+	log.Info("port forward validation succeeded", "address", "http://localhost:10093")
+}
+
+// MockBareStep simulates a step that does NOT receive a logger from the
+// workflow (e.g., CreateAgnhostStatefulSet, CreateDenyAllNetworkPolicy).
+// It uses slog.Default() — the handler must detect the workflow from the stack.
+type MockBareStep struct{}
+
+func (s *MockBareStep) Do() {
+	slog.Info("creating resource", "name", "agnhost")
+}
+
+// WorkflowWithBareStep simulates a Workflow that calls a step without passing
+// a logger. Note: type name is NOT "Workflow", so the handler won't detect it
+// as a workflow — only as a step.
+type WorkflowWithBareStep struct{}
+
+func (w *WorkflowWithBareStep) Do() {
+	step := &MockBareStep{}
+	step.Do()
+}
+
+// MockCallerDetected is used when NO explicit "step" attribute is set.
+// The handler should auto-detect the type name via runtime stack inspection.
+type MockCallerDetected struct{}
+
+func (m *MockCallerDetected) Do() {
+	// No log.With("step", ...) — handler must detect "mock-caller-detected" from the stack.
+	slog.Info("this should auto-detect step name")
+}
+
+// stripANSI removes ANSI escape codes for easier assertion.
+func stripANSI(s string) string {
+	re := regexp.MustCompile(`\x1b\[[0-9;]*m`)
+	return re.ReplaceAllString(s, "")
+}
+
+// hasANSI checks that the bracketed prefix contains ANSI color codes.
+func hasANSI(s string) bool {
+	re := regexp.MustCompile(`\x1b\[\d+m\[`)
+	return re.MatchString(s)
+}
+
+func TestHandlerFormat_ExplicitAttributes(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandler(&buf, slog.LevelInfo)
+	slog.SetDefault(slog.New(handler))
+
+	// Replicate: Workflow.Do() → addScenario(log) → PortForward.Do()
+	w := &Workflow{}
+	w.Do()
+
+	output := buf.String()
+	lines := strings.Split(strings.TrimSpace(output), "\n")
+	if len(lines) < 3 {
+		t.Fatalf("expected at least 3 log lines, got %d:\n%s", len(lines), output)
+	}
+
+	// Verify each line format: "HH:MM:SS LEVEL [prefix] message key=value"
+	timeLevel := regexp.MustCompile(`^\d{2}:\d{2}:\d{2} (INFO|ERROR|WARN|DEBUG) `)
+	for i, line := range lines {
+		if !timeLevel.MatchString(line) {
+			t.Errorf("line %d: expected timestamp+level first, got: %s", i, line)
+		}
+	}
+
+	// All 3 lines come from MockPortForward.Do() which sets step explicitly.
+	// Prefix should be [utils/drop/mock-port-forward]:
+	//   workflow = "utils"             (StepName resolves generic Workflow → package name)
+	//   test     = "drop"              (set in MockScenario.Do)
+	//   step     = "mock-port-forward" (set explicitly via log.With)
+	expectedPrefix := "[utils/drop/mock-port-forward]"
+	for i, line := range lines {
+		if !strings.Contains(line, expectedPrefix) {
+			t.Errorf("line %d: expected %s prefix, got: %s", i, expectedPrefix, line)
+		}
+	}
+
+	// Buffer is not a TTY → no ANSI codes should be present.
+	for i, line := range lines {
+		if hasANSI(line) {
+			t.Errorf("line %d: unexpected ANSI codes in non-TTY output", i)
+		}
+	}
+
+	// Verify key=value pairs propagate.
+	if !strings.Contains(lines[0], "label=k8s-app=retina") {
+		t.Errorf("line 0: expected label=k8s-app=retina, got: %s", lines[0])
+	}
+}
+
+func TestHandlerFormat_ColorOnTTY(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandlerWithColor(&buf, slog.LevelInfo, true)
+	slog.SetDefault(slog.New(handler))
+
+	w := &Workflow{}
+	w.Do()
+
+	lines := strings.Split(strings.TrimSpace(buf.String()), "\n")
+	if len(lines) < 3 {
+		t.Fatalf("expected at least 3 log lines, got %d", len(lines))
+	}
+
+	// With color forced on, ANSI codes should wrap the prefix.
+	for i, line := range lines {
+		if !hasANSI(line) {
+			t.Errorf("line %d: expected ANSI color on bracketed prefix", i)
+		}
+	}
+
+	// Stripping ANSI should still show the correct prefix.
+	expectedPrefix := "[utils/drop/mock-port-forward]"
+	for i, line := range lines {
+		plain := stripANSI(line)
+		if !strings.Contains(plain, expectedPrefix) {
+			t.Errorf("line %d: expected %s prefix, got: %s", i, expectedPrefix, plain)
+		}
+	}
+}
+
+func TestHandlerFormat_ColorDeterminism(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandlerWithColor(&buf, slog.LevelInfo, true)
+	slog.SetDefault(slog.New(handler))
+
+	w := &Workflow{}
+	w.Do()
+
+	lines := strings.Split(strings.TrimSpace(buf.String()), "\n")
+	if len(lines) < 2 {
+		t.Fatalf("expected at least 2 lines, got %d", len(lines))
+	}
+
+	// All lines share the same prefix → same color.
+	ansiRe := regexp.MustCompile(`(\x1b\[\d+m)\[`)
+	first := ansiRe.FindStringSubmatch(lines[0])
+	if first == nil {
+		t.Fatal("no ANSI color code found in first line")
+	}
+	for i, line := range lines[1:] {
+		match := ansiRe.FindStringSubmatch(line)
+		if match == nil {
+			t.Errorf("line %d: no ANSI color code found", i+1)
+			continue
+		}
+		if match[1] != first[1] {
+			t.Errorf("line %d: color %q differs from first line %q", i+1, match[1], first[1])
+		}
+	}
+
+	// Log with a DIFFERENT prefix and verify it also gets a valid color.
+	buf.Reset()
+	slog.SetDefault(slog.New(NewStepHandlerWithColor(&buf, slog.LevelInfo, true)))
+	m := &MockCallerDetected{}
+	m.Do()
+	diffLine := buf.String()
+	diffMatch := ansiRe.FindStringSubmatch(diffLine)
+	if diffMatch == nil {
+		t.Fatal("no ANSI color code found in caller-detected line")
+	}
+	validAnsi := regexp.MustCompile(`^\x1b\[\d+m$`)
+	if !validAnsi.MatchString(diffMatch[1]) {
+		t.Errorf("invalid ANSI escape for different prefix: %q", diffMatch[1])
+	}
+}
+
+func TestHandlerFormat_CallerAutoDetection(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandler(&buf, slog.LevelInfo)
+	slog.SetDefault(slog.New(handler))
+
+	// No explicit "step" attribute — handler should detect from call stack.
+	m := &MockCallerDetected{}
+	m.Do()
+
+	output := stripANSI(buf.String())
+	// The handler should detect "mock-caller-detected" from the receiver type.
+	if !strings.Contains(output, "[mock-caller-detected]") {
+		t.Errorf("expected auto-detected [mock-caller-detected] prefix, got: %s", output)
+	}
+}
+
+func TestHandlerFormat_WorkflowAutoDetection(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandler(&buf, slog.LevelInfo)
+	slog.SetDefault(slog.New(handler))
+
+	// Simulate a step called from inside a Workflow.Do() that does NOT
+	// receive a logger. The handler should detect both the workflow
+	// ("utils" — package name of WorkflowWithBareStep) and the step
+	// ("mock-bare-step") from the call stack.
+	w := &WorkflowWithBareStep{}
+	w.Do()
+
+	output := buf.String()
+	t.Logf("output: %s", output)
+
+	// Should detect workflow from (*WorkflowWithBareStep).Do on the stack.
+	// WorkflowWithBareStep → type name ends in "...BareStep" — not "Workflow",
+	// so it won't be detected as a workflow. Let me use the real Workflow type.
+	// Actually, the type is WorkflowWithBareStep, not Workflow — the handler
+	// only recognizes types named exactly "Workflow". This test verifies the
+	// step is detected.
+	if !strings.Contains(output, "[mock-bare-step]") {
+		t.Errorf("expected [mock-bare-step] in output, got: %s", output)
+	}
+}
+
+func TestHandlerFormat_WorkflowPrefixFromStack(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandler(&buf, slog.LevelInfo)
+	slog.SetDefault(slog.New(handler))
+
+	// Simulate real e2e: Workflow.Do() → step.Do() → slog.Info().
+	// The step uses slog.Default() (no explicit logger/attributes).
+	// The handler should detect:
+	//   step     = "mock-bare-step"  (from (*MockBareStep).Do)
+	//   workflow = "utils"           (from (*Workflow).Do higher on the stack)
+	bare := &MockBareStep{}
+	w := &Workflow{bareStep: bare.Do}
+	w.Do()
+
+	output := buf.String()
+	t.Logf("output: %s", output)
+
+	// Verify callerPrefix detects workflow from stack.
+	// Note: in the real e2e, go-workflow sits between Workflow.Do and Step.Do.
+	// Our stack walker skips non-e2ev3 frames, so it should still find both.
+	if !strings.Contains(output, "[utils/mock-bare-step]") {
+		t.Errorf("expected [utils/mock-bare-step] prefix, got: %s", output)
+	}
+}
+
+func TestHandlerFormat_NoPrefix(t *testing.T) {
+	var buf bytes.Buffer
+	handler := NewStepHandler(&buf, slog.LevelInfo)
+	slog.SetDefault(slog.New(handler))
+
+	// Plain slog.Info from a non-method (no receiver to detect).
+	slog.Info("bare log line")
+
+	output := buf.String()
+	plain := stripANSI(output)
+	// Should still have timestamp+level, but no bracketed prefix (or auto-detected).
+	if !regexp.MustCompile(`^\d{2}:\d{2}:\d{2} INFO `).MatchString(plain) {
+		t.Errorf("expected timestamp+level first, got: %s", plain)
+	}
+}
+
+func TestStepName_GenericTypes(t *testing.T) {
+	// Verify that generic type "Workflow" resolves to package name, not "workflow".
+	w := &Workflow{}
+	name := StepName(w)
+	// In this test file (package utils), it should be "utils".
+	if name != "utils" {
+		t.Errorf("StepName(*Workflow) = %q, want %q", name, "utils")
+	}
+
+	// Non-generic types keep their own name.
+	pf := &MockPortForward{}
+	name = StepName(pf)
+	if name != "mock-port-forward" {
+		t.Errorf("StepName(*MockPortForward) = %q, want %q", name, "mock-port-forward")
+	}
+
+	mcd := &MockCallerDetected{}
+	name = StepName(mcd)
+	if name != "mock-caller-detected" {
+		t.Errorf("StepName(*MockCallerDetected) = %q, want %q", name, "mock-caller-detected")
+	}
+}
+
+func TestColorForPrefix_Deterministic(t *testing.T) {
+	// Same input always produces the same color code.
+	for _, prefix := range []string{
+		"basic-metrics/drop/port-forward",
+		"hubble-metrics/flow-intra/curl-pod",
+		"advanced-metrics/dns/validate",
+		"slog-writer",
+	} {
+		c1 := colorForPrefix(prefix)
+		c2 := colorForPrefix(prefix)
+		if c1 != c2 {
+			t.Errorf("colorForPrefix(%q) not deterministic: %d vs %d", prefix, c1, c2)
+		}
+		// Verify it's a valid ANSI color code (31-36 or 91-96).
+		if !((c1 >= 31 && c1 <= 36) || (c1 >= 91 && c1 <= 96)) {
+			t.Errorf("colorForPrefix(%q) = %d, not a valid ANSI color code", prefix, c1)
+		}
+	}
+}
diff --git a/test/e2ev3/pkg/utils/slog_writer.go b/test/e2ev3/pkg/utils/slog_writer.go
new file mode 100644
index 0000000000..08ab5661d3
--- /dev/null
+++ b/test/e2ev3/pkg/utils/slog_writer.go
@@ -0,0 +1,45 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+package utils
+
+import (
+	"bytes"
+	"context"
+	"log/slog"
+)
+
+// SlogWriter is an io.Writer that logs each complete line through slog at the given level.
+// Partial lines are buffered until a newline is received.
+type SlogWriter struct {
+	Level  slog.Level
+	Source string
+	buf    []byte
+}
+
+func (w *SlogWriter) Write(p []byte) (int, error) {
+	w.buf = append(w.buf, p...)
+	for {
+		idx := bytes.IndexByte(w.buf, '\n')
+		if idx < 0 {
+			break
+		}
+		line := string(bytes.TrimRight(w.buf[:idx], "\r"))
+		w.buf = w.buf[idx+1:]
+		if line != "" {
+			slog.Log(context.Background(), w.Level, line, "source", w.Source)
+		}
+	}
+	return len(p), nil
+}
+
+// Flush logs any remaining buffered content not terminated by a newline.
+func (w *SlogWriter) Flush() {
+	if len(w.buf) > 0 {
+		line := string(bytes.TrimRight(w.buf, "\r\n"))
+		if line != "" {
+			slog.Log(context.Background(), w.Level, line, "source", w.Source)
+		}
+		w.buf = nil
+	}
+}
diff --git a/test/e2ev3/pkg/utils/stepname.go b/test/e2ev3/pkg/utils/stepname.go
new file mode 100644
index 0000000000..828e6c20e4
--- /dev/null
+++ b/test/e2ev3/pkg/utils/stepname.go
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package utils
+
+import (
+	"reflect"
+	"strings"
+)
+
+// StepName derives a kebab-case step name from the concrete type of s.
+// For example, *k8s.CreateNamespace → "create-namespace".
+// Generic names like "Workflow" or "Step" are replaced by the package name:
+// *basicmetrics.Workflow → "basic-metrics", *config.Step → "config".
+func StepName(s any) string {
+	t := reflect.TypeOf(s)
+	if t.Kind() == reflect.Ptr {
+		t = t.Elem()
+	}
+	name := toKebabCase(t.Name())
+	if name == "workflow" || name == "step" {
+		pkg := t.PkgPath()
+		if idx := strings.LastIndex(pkg, "/"); idx != -1 {
+			return toKebabCase(pkg[idx+1:])
+		}
+	}
+	return name
+}
diff --git a/test/e2ev3/retina_e2e_test.go b/test/e2ev3/retina_e2e_test.go
new file mode 100644
index 0000000000..ec4b061bfb
--- /dev/null
+++ b/test/e2ev3/retina_e2e_test.go
@@ -0,0 +1,62 @@
+//go:build e2e
+
+// Package retina contains the e2e test entry point.
+//
+// A single test function drives three phases — image build, infrastructure
+// provisioning, and workflow tests — so that `go test -tags=e2e -provider=kind`
+// is all you need for a full local run.
+package retina
+
+import (
+	"log/slog"
+	"os"
+	"testing"
+	"time"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/images"
+	"github.com/microsoft/retina/test/e2ev3/pkg/images/build"
+	"github.com/microsoft/retina/test/e2ev3/pkg/infra"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"github.com/microsoft/retina/test/e2ev3/workflows/advancedmetrics"
+	advexp "github.com/microsoft/retina/test/e2ev3/workflows/advancedmetrics/experimental"
+	"github.com/microsoft/retina/test/e2ev3/workflows/basicmetrics"
+	basicexp "github.com/microsoft/retina/test/e2ev3/workflows/basicmetrics/experimental"
+	"github.com/microsoft/retina/test/e2ev3/workflows/capture"
+	"github.com/microsoft/retina/test/e2ev3/workflows/hubblemetrics"
+	"github.com/stretchr/testify/require"
+)
+
+// TestE2ERetina drives image build, cluster provisioning, and all Retina
+// workflow tests in sequence against a single cluster.
+func TestE2ERetina(t *testing.T) {
+	slog.SetDefault(slog.New(utils.NewStepHandler(os.Stderr, slog.LevelInfo)))
+
+	ctx, cancel := config.TestContext(t)
+	defer cancel()
+
+	c := &config.E2EConfig{}
+
+	loadConfig := &config.Step{Cfg: c}
+	buildImages := &build.Step{Cfg: c}
+	setupInfra := &infra.Workflow{Cfg: c, T: t}
+	loadImages := &images.Step{Cfg: c}
+
+	basic := &basicmetrics.Workflow{Cfg: c}
+	advanced := &advancedmetrics.Workflow{Cfg: c}
+	hubble := &hubblemetrics.Workflow{Cfg: c}
+	basicExp := &basicexp.Workflow{Cfg: c}
+	advExp := &advexp.Workflow{Cfg: c}
+	cap := &capture.Workflow{Cfg: c}
+
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.BatchPipe(
+		flow.Steps(loadConfig).Timeout(1*time.Minute),
+		flow.Steps(buildImages, setupInfra).Timeout(30*time.Minute),
+		flow.Steps(loadImages).Timeout(10*time.Minute),
+		flow.Pipe(basic, advanced, hubble, basicExp, advExp, cap),
+	))
+
+	require.NoError(t, wf.Do(ctx), "e2e workflow failed")
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/dns.go b/test/e2ev3/workflows/advancedmetrics/dns.go
new file mode 100644
index 0000000000..7e6879b016
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/dns.go
@@ -0,0 +1,185 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package advancedmetrics
+
+import (
+	"context"
+	"fmt"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+)
+
+func addAdvancedDNSScenario(restConfig *rest.Config, namespace, arch, variant string,
+	command string, expectError bool,
+	reqQuery, reqQueryType, workloadKind string,
+	respNumResponse, respQuery, respQueryType, respReturnCode, respResponse string,
+) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-adv-dns-" + variant + "-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	// Generate traffic inside the validation loop so packetparser captures it.
+	execTraffic := flow.Func("adv-dns-"+variant+"-traffic-"+arch, func(ctx context.Context) error {
+		exec := &k8s.ExecInPod{PodName: podName, PodNamespace: namespace, Command: command, RestConfig: restConfig}
+		for i := 0; i < 2; i++ {
+			if err := exec.Do(ctx); err != nil && !expectError {
+				return err
+			}
+		}
+		return nil
+	})
+	validateReq := &ValidateAdvancedDNSRequestStep{
+		PodNamespace: namespace, PodName: podName, Query: reqQuery, QueryType: reqQueryType,
+		WorkloadKind: workloadKind, WorkloadName: agnhostName, RestConfig: restConfig,
+	}
+	validateResp := &ValidateAdvancedDNSResponseStep{
+		PodNamespace: namespace, NumResponse: respNumResponse, PodName: podName,
+		Query: respQuery, QueryType: respQueryType, Response: respResponse, ReturnCode: respReturnCode,
+		WorkloadKind: workloadKind, WorkloadName: agnhostName, RestConfig: restConfig,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: "metrics", RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{execTraffic, validateReq, validateResp},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision the agnhost pod.
+			flow.Steps(createAgnhost).Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic + check metrics, retrying with backoff.
+			flow.Steps(validateWithPF).Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).When(flow.Always),
+		),
+	)
+	return wf
+}
+
+// EmptyResponse is a sentinel value that gets converted to an empty string
+// for metric label matching.
+const EmptyResponse = "emptyResponse"
+
+// KubeServiceIP is a sentinel value that gets resolved at runtime to the
+// ClusterIP of the kubernetes.default service.
+const KubeServiceIP = "kubeServiceIP"
+
+var (
+	dnsAdvRequestCountMetricName  = "networkobservability_adv_dns_request_count"
+	dnsAdvResponseCountMetricName = "networkobservability_adv_dns_response_count"
+)
+
+// ValidateAdvancedDNSRequestStep checks the advanced DNS request count metric
+// with labels including pod IP, namespace, pod name, query info, and workload info.
+type ValidateAdvancedDNSRequestStep struct {
+	PodNamespace string
+	PodName      string
+	Query        string
+	QueryType    string
+	WorkloadKind string
+	WorkloadName string
+	RestConfig   *rest.Config
+}
+
+func (v *ValidateAdvancedDNSRequestStep) Do(ctx context.Context) error {
+	metricsEndpoint := fmt.Sprintf("http://localhost:%s/metrics", config.RetinaMetricsPort)
+
+	podIP, err := k8s.GetPodIP(ctx, v.RestConfig, v.PodNamespace, v.PodName)
+	if err != nil {
+		return fmt.Errorf("failed to get pod IP address: %w", err)
+	}
+
+	validateAdvancedDNSRequestMetrics := map[string]string{
+		"ip":            podIP,
+		"namespace":     v.PodNamespace,
+		"podname":       v.PodName,
+		"query":         v.Query,
+		"query_type":    v.QueryType,
+		"workload_kind": v.WorkloadKind,
+		"workload_name": v.WorkloadName,
+	}
+
+	err = prom.CheckMetric(ctx, metricsEndpoint, dnsAdvRequestCountMetricName, validateAdvancedDNSRequestMetrics)
+	if err != nil {
+		return fmt.Errorf("failed to verify advance dns request metrics %s: %w", dnsAdvRequestCountMetricName, err)
+	}
+	return nil
+}
+
+// ValidateAdvancedDNSResponseStep checks the advanced DNS response count metric
+// with labels including pod IP, namespace, pod name, num_response, query info,
+// response, return_code, and workload info.
+type ValidateAdvancedDNSResponseStep struct {
+	PodNamespace string
+	NumResponse  string
+	PodName      string
+	Query        string
+	QueryType    string
+	Response     string
+	ReturnCode   string
+	WorkloadKind string
+	WorkloadName string
+	RestConfig   *rest.Config
+}
+
+func (v *ValidateAdvancedDNSResponseStep) Do(ctx context.Context) error {
+	metricsEndpoint := fmt.Sprintf("http://localhost:%s/metrics", config.RetinaMetricsPort)
+
+	podIP, err := k8s.GetPodIP(ctx, v.RestConfig, v.PodNamespace, v.PodName)
+	if err != nil {
+		return fmt.Errorf("failed to get pod IP address: %w", err)
+	}
+
+	if v.Response == EmptyResponse {
+		v.Response = ""
+	}
+	if v.Response == KubeServiceIP {
+		clientset, err := kubernetes.NewForConfig(v.RestConfig)
+		if err != nil {
+			return fmt.Errorf("failed to create kubernetes clientset: %w", err)
+		}
+		svc, err := clientset.CoreV1().Services("default").Get(ctx, "kubernetes", metav1.GetOptions{})
+		if err != nil {
+			return fmt.Errorf("failed to get kubernetes service ClusterIP: %w", err)
+		}
+		v.Response = svc.Spec.ClusterIP
+	}
+
+	validateAdvanceDNSResponseMetrics := map[string]string{
+		"ip":            podIP,
+		"namespace":     v.PodNamespace,
+		"num_response":  v.NumResponse,
+		"podname":       v.PodName,
+		"query":         v.Query,
+		"query_type":    v.QueryType,
+		"response":      v.Response,
+		"return_code":   v.ReturnCode,
+		"workload_kind": v.WorkloadKind,
+		"workload_name": v.WorkloadName,
+	}
+
+	err = prom.CheckMetric(ctx, metricsEndpoint, dnsAdvResponseCountMetricName, validateAdvanceDNSResponseMetrics)
+	if err != nil {
+		return fmt.Errorf("failed to verify advance dns response metrics %s: %w", dnsAdvResponseCountMetricName, err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/experimental/drop.go b/test/e2ev3/workflows/advancedmetrics/experimental/drop.go
new file mode 100644
index 0000000000..e02e83ce88
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/experimental/drop.go
@@ -0,0 +1,67 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addAdvancedDropScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-adv-drop-" + arch
+	podName := agnhostName + "-0"
+
+	createNetPol := &k8s.CreateDenyAllNetworkPolicy{
+		NetworkPolicyNamespace: namespace, RestConfig: restConfig, DenyAllLabelSelector: "app=" + agnhostName,
+	}
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := k8s.CurlExpectFail("adv-drop-curl-"+arch, &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	})
+	validateDropCount := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_drop_count",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateDropBytes := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_drop_bytes",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateDropCount, validateDropBytes},
+	}
+	deleteNetPol := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.NetworkPolicy), ResourceName: "deny-all",
+		ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName,
+		ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createNetPol, createAgnhost, execCurl).Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteNetPol, deleteAgnhost).When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/experimental/forward.go b/test/e2ev3/workflows/advancedmetrics/experimental/forward.go
new file mode 100644
index 0000000000..763f08ad68
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/experimental/forward.go
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addAdvancedForwardScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-adv-fwd-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	validateForwardCount := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_forward_count",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateForwardBytes := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_forward_bytes",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateForwardCount, validateForwardBytes},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName,
+		ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createAgnhost, execCurl).Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/experimental/latency.go b/test/e2ev3/workflows/advancedmetrics/experimental/latency.go
new file mode 100644
index 0000000000..2b7c8466d8
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/experimental/latency.go
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"k8s.io/client-go/rest"
+	flow "github.com/Azure/go-workflow"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+)
+
+func addAPIServerLatencyScenario(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateLatency := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_node_apiserver_latency",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateNoResponse := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_node_apiserver_no_response",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "k8s-app=retina",
+		},
+		Steps: []flow.Steper{validateLatency, validateNoResponse},
+	}
+
+	// Validate: retry with exponential backoff until metrics appear.
+	wf.Add(
+		flow.Step(validateWithPF).
+			Retry(k8s.RetryWithBackoff),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/experimental/tcp.go b/test/e2ev3/workflows/advancedmetrics/experimental/tcp.go
new file mode 100644
index 0000000000..a5558d4e42
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/experimental/tcp.go
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addAdvancedTCPScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-adv-tcp-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	validateTCPFlags := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_tcpflags_count",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateTCPRetrans := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: "networkobservability_adv_tcpretrans_count",
+		ValidMetrics: []map[string]string{{}}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateTCPFlags, validateTCPRetrans},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName,
+		ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createAgnhost, execCurl).Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/experimental/workflow.go b/test/e2ev3/workflows/advancedmetrics/experimental/workflow.go
new file mode 100644
index 0000000000..ad095ccda4
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/experimental/workflow.go
@@ -0,0 +1,80 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Workflow runs the experimental advanced metrics workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "advanced-metrics-experimental" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	restConfig := p.Cluster.RestConfig()
+	chartPath := p.Paths.RetinaChart
+	valuesFilePath := p.Paths.AdvancedProfile
+	testPodNamespace := config.TestPodNamespace
+	helmCfg := &p.Helm
+
+	// Construct steps.
+	upgradeRetina := &k8s.UpgradeRetinaHelmChart{
+		Namespace:          config.KubeSystemNamespace,
+		ReleaseName:        "retina",
+		KubeConfigFilePath: p.Cluster.KubeConfigPath(),
+		ChartPath:          chartPath,
+		HelmDriver:         helmCfg.Driver,
+		ValuesFile:         valuesFilePath,
+	}
+
+
+	var scenarios []flow.Steper
+	for _, arch := range config.Architectures {
+		scenarios = append(scenarios,
+			addAdvancedDropScenario(restConfig, testPodNamespace, arch),
+			addAdvancedForwardScenario(restConfig, testPodNamespace, arch),
+			addAdvancedTCPScenario(restConfig, testPodNamespace, arch),
+		)
+	}
+	scenarios = append(scenarios, addAPIServerLatencyScenario(restConfig))
+
+	ensureStable := &k8s.EnsureStableComponent{
+		PodNamespace:           config.KubeSystemNamespace,
+		LabelSelector:          "k8s-app=retina",
+		RestConfig:             restConfig,
+		IgnoreContainerRestart: false,
+	}
+
+	debug := &k8s.DebugOnFailure{
+		RestConfig: restConfig,
+		Namespace:          config.KubeSystemNamespace,
+		LabelSelector:      "k8s-app=retina",
+	}
+
+	// Wire dependencies and register.
+	// Scenarios run sequentially because they share the same port-forward port.
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.Step(upgradeRetina))
+	prev := flow.Steper(upgradeRetina)
+	for _, s := range scenarios {
+		wf.Add(flow.Step(s).DependsOn(prev))
+		prev = s
+	}
+	wf.Add(flow.Step(ensureStable).DependsOn(prev))
+	wf.Add(flow.Step(debug).DependsOn(ensureStable).When(flow.AnyFailed))
+
+	return wf.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/latency.go b/test/e2ev3/workflows/advancedmetrics/latency.go
new file mode 100644
index 0000000000..e705cef91e
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/latency.go
@@ -0,0 +1,57 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package advancedmetrics
+
+import (
+	"context"
+	"fmt"
+
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+)
+
+func addLatencyScenario(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateLatency := &ValidateAPIServerLatencyStep{}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: "10093", RemotePort: "10093", Endpoint: "metrics",
+			RestConfig: restConfig, OptionalLabelAffinity: "k8s-app=retina",
+		},
+		Steps: []flow.Steper{validateLatency},
+	}
+
+	// Validate: retry with exponential backoff until metrics appear.
+	wf.Add(
+		flow.Step(validateWithPF).
+			Retry(k8s.RetryWithBackoff),
+	)
+	return wf
+}
+
+
+
+var latencyBucketMetricName = "networkobservability_adv_node_apiserver_tcp_handshake_latency"
+
+// ValidateAPIServerLatencyStep checks that the API server TCP handshake
+// latency metric is present.
+type ValidateAPIServerLatencyStep struct{}
+
+func (v *ValidateAPIServerLatencyStep) Do(ctx context.Context) error {
+	promAddress := fmt.Sprintf("http://localhost:%s/metrics", config.RetinaMetricsPort)
+
+	metric := map[string]string{}
+	err := prom.CheckMetric(ctx, promAddress, latencyBucketMetricName, metric)
+	if err != nil {
+		return fmt.Errorf("failed to verify latency metrics %s: %w", latencyBucketMetricName, err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/workflows/advancedmetrics/workflow.go b/test/e2ev3/workflows/advancedmetrics/workflow.go
new file mode 100644
index 0000000000..652219b10b
--- /dev/null
+++ b/test/e2ev3/workflows/advancedmetrics/workflow.go
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package advancedmetrics
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Workflow runs the advanced metrics workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "advanced-metrics" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	restConfig := p.Cluster.RestConfig()
+	chartPath := p.Paths.RetinaChart
+	valuesFilePath := p.Paths.AdvancedProfile
+	testPodNamespace := config.TestPodNamespace
+	helmCfg := &p.Helm
+
+	// Construct steps.
+	upgradeRetina := &k8s.UpgradeRetinaHelmChart{
+		Namespace:          config.KubeSystemNamespace,
+		ReleaseName:        "retina",
+		KubeConfigFilePath: p.Cluster.KubeConfigPath(),
+		ChartPath:          chartPath,
+		HelmDriver:         helmCfg.Driver,
+		ValuesFile:         valuesFilePath,
+	}
+
+	var scenarios []flow.Steper
+	for _, arch := range config.Architectures {
+		scenarios = append(scenarios,
+			addAdvancedDNSScenario(restConfig, testPodNamespace, arch,
+				"valid", "nslookup kubernetes.default", false,
+				"kubernetes.default.svc.cluster.local.", "A", "StatefulSet",
+				"1", "kubernetes.default.svc.cluster.local.", "A", "NOERROR", KubeServiceIP,
+			),
+			addAdvancedDNSScenario(restConfig, testPodNamespace, arch,
+				"nxdomain", "nslookup some.non.existent.domain.", true,
+				"some.non.existent.domain.", "A", "StatefulSet",
+				"0", "some.non.existent.domain.", "A", "NXDOMAIN", EmptyResponse,
+			),
+		)
+	}
+	scenarios = append(scenarios, addLatencyScenario(restConfig))
+
+	ensureStable := &k8s.EnsureStableComponent{
+		PodNamespace:           config.KubeSystemNamespace,
+		LabelSelector:          "k8s-app=retina",
+		RestConfig:             restConfig,
+		IgnoreContainerRestart: false,
+	}
+
+	debug := &k8s.DebugOnFailure{
+		RestConfig: restConfig,
+		Namespace:          config.KubeSystemNamespace,
+		LabelSelector:      "k8s-app=retina",
+	}
+
+	// Wire dependencies and register.
+	// Scenarios run sequentially because they share the same port-forward port.
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.Step(upgradeRetina))
+	prev := flow.Steper(upgradeRetina)
+	for _, s := range scenarios {
+		wf.Add(flow.Step(s).DependsOn(prev))
+		prev = s
+	}
+	wf.Add(flow.Step(ensureStable).DependsOn(prev))
+	wf.Add(flow.Step(debug).DependsOn(ensureStable).When(flow.AnyFailed))
+
+	return wf.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/basicmetrics/dns.go b/test/e2ev3/workflows/basicmetrics/dns.go
new file mode 100644
index 0000000000..d2d5589b83
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/dns.go
@@ -0,0 +1,118 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package basicmetrics
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addBasicDNSScenario(restConfig *rest.Config, namespace, arch, variant, command string, expectError bool) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-dns-basic-" + variant + "-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCmd1 := flow.Func("basic-dns-"+variant+"-1-"+arch, func(ctx context.Context) error {
+		err := (&k8s.ExecInPod{PodName: podName, PodNamespace: namespace, Command: command, RestConfig: restConfig}).Do(ctx)
+		if expectError {
+			return nil
+		}
+		return err
+	})
+	execCmd2 := flow.Func("basic-dns-"+variant+"-2-"+arch, func(ctx context.Context) error {
+		err := (&k8s.ExecInPod{PodName: podName, PodNamespace: namespace, Command: command, RestConfig: restConfig}).Do(ctx)
+		if expectError {
+			return nil
+		}
+		return err
+	})
+	validateReq := &ValidateBasicDNSRequestStep{Variant: variant + "-" + arch}
+	validateResp := &ValidateBasicDNSResponseStep{Variant: variant + "-" + arch}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: "metrics", RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateReq, validateResp},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			flow.Pipe(createAgnhost, execCmd1, execCmd2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
+
+var (
+	dnsBasicRequestCountMetricName  = "networkobservability_dns_request_count"
+	dnsBasicResponseCountMetricName = "networkobservability_dns_response_count"
+)
+
+// ValidateBasicDNSRequestStep checks that the basic DNS request count metric exists.
+type ValidateBasicDNSRequestStep struct {
+	Variant string // distinguishes instances in the DAG (e.g. "valid-domain-amd64")
+}
+
+func (v *ValidateBasicDNSRequestStep) Do(ctx context.Context) error {
+	metricsEndpoint := fmt.Sprintf("http://localhost:%s/metrics", config.RetinaMetricsPort)
+
+	validBasicDNSRequestMetricLabels := map[string]string{}
+
+	err := prom.CheckMetric(ctx, metricsEndpoint, dnsBasicRequestCountMetricName, validBasicDNSRequestMetricLabels)
+	if err != nil {
+		return fmt.Errorf("failed to verify basic dns request metrics %s: %w", dnsBasicRequestCountMetricName, err)
+	}
+	return nil
+}
+
+// ValidateBasicDNSResponseStep checks that the basic DNS response count metric exists.
+type ValidateBasicDNSResponseStep struct {
+	Variant     string // distinguishes instances in the DAG
+	NumResponse string
+	Query       string
+	QueryType   string
+	ReturnCode  string
+	Response    string
+}
+
+func (v *ValidateBasicDNSResponseStep) Do(ctx context.Context) error {
+	metricsEndpoint := fmt.Sprintf("http://localhost:%s/metrics", config.RetinaMetricsPort)
+
+	if v.Response == emptyResponse {
+		v.Response = ""
+	}
+
+	validBasicDNSResponseMetricLabels := map[string]string{}
+
+	err := prom.CheckMetric(ctx, metricsEndpoint, dnsBasicResponseCountMetricName, validBasicDNSResponseMetricLabels)
+	if err != nil {
+		return fmt.Errorf("failed to verify basic dns response metrics %s: %w", dnsBasicResponseCountMetricName, err)
+	}
+	return nil
+}
+
+// emptyResponse is a sentinel value that gets converted to an empty string
+// for metric label matching.
+const emptyResponse = "emptyResponse"
diff --git a/test/e2ev3/workflows/basicmetrics/drop.go b/test/e2ev3/workflows/basicmetrics/drop.go
new file mode 100644
index 0000000000..e7fc83adb5
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/drop.go
@@ -0,0 +1,103 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package basicmetrics
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addDropScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-drop-" + arch
+	podName := agnhostName + "-0"
+
+	createNetPol := &k8s.CreateDenyAllNetworkPolicy{
+		NetworkPolicyNamespace: namespace, RestConfig: restConfig, DenyAllLabelSelector: "app=" + agnhostName,
+	}
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostNamespace: namespace, AgnhostName: agnhostName, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl1 := k8s.CurlExpectFail("drop-curl-1-"+arch, &k8s.ExecInPod{
+		PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	})
+	execCurl2 := k8s.CurlExpectFail("drop-curl-2-"+arch, &k8s.ExecInPod{
+		PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	})
+	validateDrop := &ValidateRetinaDropMetricStep{PortForwardedRetinaPort: "10093", Direction: "unknown", Reason: IPTableRuleDrop}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: "10093", RemotePort: "10093", Endpoint: "metrics",
+			RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateDrop},
+	}
+	deleteNetPol := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.NetworkPolicy), ResourceName: "deny-all", ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			flow.Pipe(createNetPol, createAgnhost, execCurl1, execCurl2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			flow.Pipe(deleteNetPol, deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
+
+var (
+	dropCountMetricName = "networkobservability_drop_count"
+	dropBytesMetricName = "networkobservability_drop_bytes"
+)
+
+const (
+	IPTableRuleDrop = "IPTABLE_RULE_DROP"
+
+	directionKey = "direction"
+	reasonKey    = "reason"
+)
+
+// ValidateRetinaDropMetricStep checks that drop count and drop bytes metrics
+// are present with the expected direction and reason labels.
+type ValidateRetinaDropMetricStep struct {
+	PortForwardedRetinaPort string
+	Direction               string
+	Reason                  string
+}
+
+func (v *ValidateRetinaDropMetricStep) Do(ctx context.Context) error {
+	promAddress := fmt.Sprintf("http://localhost:%s/metrics", v.PortForwardedRetinaPort)
+
+	metric := map[string]string{
+		directionKey: v.Direction,
+		reasonKey:    IPTableRuleDrop,
+	}
+
+	err := prom.CheckMetric(ctx, promAddress, dropCountMetricName, metric)
+	if err != nil {
+		return fmt.Errorf("failed to verify prometheus metrics %s: %w", dropCountMetricName, err)
+	}
+
+	err = prom.CheckMetric(ctx, promAddress, dropBytesMetricName, metric)
+	if err != nil {
+		return fmt.Errorf("failed to verify prometheus metrics %s: %w", dropBytesMetricName, err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/conntrack.go b/test/e2ev3/workflows/basicmetrics/experimental/conntrack.go
new file mode 100644
index 0000000000..7449c18f22
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/conntrack.go
@@ -0,0 +1,78 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"context"
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+)
+
+func addConntrackScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-ct-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl1 := flow.Func("ct-curl-1-"+arch, func(ctx context.Context) error {
+		return (&k8s.ExecInPod{PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig}).Do(ctx)
+	})
+	execCurl2 := flow.Func("ct-curl-2-"+arch, func(ctx context.Context) error {
+		return (&k8s.ExecInPod{PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig}).Do(ctx)
+	})
+	conntrackMetrics := []string{
+		"networkobservability_conntrack_packets_tx",
+		"networkobservability_conntrack_packets_rx",
+		"networkobservability_conntrack_bytes_tx",
+		"networkobservability_conntrack_bytes_rx",
+		"networkobservability_conntrack_total_connections",
+	}
+
+	validateSteps := make([]flow.Steper, 0, len(conntrackMetrics))
+	for _, metric := range conntrackMetrics {
+		validateSteps = append(validateSteps, &prom.ValidateMetricStep{
+			ForwardedPort: config.RetinaMetricsPort,
+			MetricName:    metric,
+			ValidMetrics:  []map[string]string{{}},
+			ExpectMetric:  true,
+			PartialMatch:  true,
+		})
+	}
+
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig,
+			OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: validateSteps,
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createAgnhost, execCurl1, execCurl2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/forward.go b/test/e2ev3/workflows/basicmetrics/experimental/forward.go
new file mode 100644
index 0000000000..b37e81898e
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/forward.go
@@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"context"
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+)
+
+func addForwardScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-fwd-" + arch
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl1 := flow.Func("fwd-curl-1-"+arch, func(ctx context.Context) error {
+		return (&k8s.ExecInPod{PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig}).Do(ctx)
+	})
+	execCurl2 := flow.Func("fwd-curl-2-"+arch, func(ctx context.Context) error {
+		return (&k8s.ExecInPod{PodNamespace: namespace, PodName: podName, Command: "curl -s -m 5 bing.com", RestConfig: restConfig}).Do(ctx)
+	})
+	validateFwdCount := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_forward_count",
+		ValidMetrics:  []map[string]string{{"direction": "egress"}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateFwdBytes := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_forward_bytes",
+		ValidMetrics:  []map[string]string{{"direction": "egress"}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig,
+			OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateFwdCount, validateFwdBytes},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createAgnhost, execCurl1, execCurl2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/network_stats.go b/test/e2ev3/workflows/basicmetrics/experimental/network_stats.go
new file mode 100644
index 0000000000..31ce94b2dd
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/network_stats.go
@@ -0,0 +1,54 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"k8s.io/client-go/rest"
+	flow "github.com/Azure/go-workflow"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+)
+
+func addNetworkStatsScenario(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateIPStats := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_ip_connection_stats",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateUDPStats := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_udp_connection_stats",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateIfaceStats := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_interface_stats",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig,
+		},
+		Steps: []flow.Steper{validateIPStats, validateUDPStats, validateIfaceStats},
+	}
+
+	// Validate: retry with exponential backoff until metrics appear.
+	wf.Add(
+		flow.Step(validateWithPF).
+			Retry(k8s.RetryWithBackoff),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/node_connectivity.go b/test/e2ev3/workflows/basicmetrics/experimental/node_connectivity.go
new file mode 100644
index 0000000000..2d59196186
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/node_connectivity.go
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"k8s.io/client-go/rest"
+	flow "github.com/Azure/go-workflow"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+)
+
+func addNodeConnectivityScenario(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateStatus := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_node_connectivity_status",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateLatency := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_node_connectivity_latency_seconds",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig,
+		},
+		Steps: []flow.Steper{validateStatus, validateLatency},
+	}
+
+	// Validate: retry with exponential backoff until metrics appear.
+	wf.Add(
+		flow.Step(validateWithPF).
+			Retry(k8s.RetryWithBackoff),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/tcp_stats.go b/test/e2ev3/workflows/basicmetrics/experimental/tcp_stats.go
new file mode 100644
index 0000000000..f39b2df43c
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/tcp_stats.go
@@ -0,0 +1,82 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addTCPStatsScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-tcpstats-" + arch
+	podName := agnhostName + "-0"
+
+	createKapinger := &k8s.CreateKapingerDeployment{
+		KapingerNamespace: namespace, KapingerReplicas: "1", RestConfig: restConfig,
+	}
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	waitKapinger := &k8s.WaitPodsReady{
+		RestConfig:    restConfig,
+		Namespace:     namespace,
+		LabelSelector: "app=kapinger",
+	}
+	execCurl1 := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace, Command: "curl -s -m 5 kapinger:80", RestConfig: restConfig,
+	}
+	execCurl2 := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace, Command: "curl -s -m 5 kapinger:80", RestConfig: restConfig,
+	}
+	validateConnStats := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_tcp_connection_stats",
+		ValidMetrics:  []map[string]string{{}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateFlagGauges := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort,
+		MetricName:    "networkobservability_tcp_flag_gauges",
+		ValidMetrics:  []map[string]string{{"flag": config.SYN}},
+		ExpectMetric:  true,
+		PartialMatch:  true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Endpoint: config.MetricsEndpoint, RestConfig: restConfig,
+			OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateConnStats, validateFlagGauges},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+	deleteKapinger := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.Deployment), ResourceName: "kapinger", ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources and generate traffic.
+			flow.Pipe(createKapinger, createAgnhost, waitKapinger, execCurl1, execCurl2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: retry with exponential backoff until metrics appear.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost, deleteKapinger).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/basicmetrics/experimental/workflow.go b/test/e2ev3/workflows/basicmetrics/experimental/workflow.go
new file mode 100644
index 0000000000..61127aaed7
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/experimental/workflow.go
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package experimental
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Workflow runs the experimental basic metrics workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "basic-metrics-experimental" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	restConfig := p.Cluster.RestConfig()
+	chartPath := p.Paths.RetinaChart
+	testPodNamespace := config.TestPodNamespace
+	imgCfg := &p.Image
+	helmCfg := &p.Helm
+
+	// Construct steps.
+	installRetina := &k8s.InstallHelmChart{
+		Namespace:          config.KubeSystemNamespace,
+		ReleaseName:        "retina",
+		KubeConfigFilePath: p.Cluster.KubeConfigPath(),
+		ChartPath:          chartPath,
+		ImageTag:           imgCfg.Tag,
+		ImageRegistry:      imgCfg.Registry,
+		ImageNamespace:     imgCfg.Namespace,
+		HelmDriver:         helmCfg.Driver,
+		ImageLoader:        p.Cluster,
+	}
+
+
+	var scenarios []flow.Steper
+	for _, arch := range config.Architectures {
+		scenarios = append(scenarios,
+			addForwardScenario(restConfig, testPodNamespace, arch),
+			addConntrackScenario(restConfig, testPodNamespace, arch),
+			addTCPStatsScenario(restConfig, testPodNamespace, arch),
+		)
+	}
+	scenarios = append(scenarios,
+		addNetworkStatsScenario(restConfig),
+		addNodeConnectivityScenario(restConfig),
+	)
+
+	ensureStable := &k8s.EnsureStableComponent{
+		PodNamespace:           config.KubeSystemNamespace,
+		LabelSelector:          "k8s-app=retina",
+		RestConfig:             restConfig,
+		IgnoreContainerRestart: false,
+	}
+
+	debug := &k8s.DebugOnFailure{
+		RestConfig: restConfig,
+		Namespace:          config.KubeSystemNamespace,
+		LabelSelector:      "k8s-app=retina",
+	}
+
+	// Wire dependencies and register.
+	// Scenarios run sequentially because they share the same port-forward port.
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.Step(installRetina))
+	prev := flow.Steper(installRetina)
+	for _, s := range scenarios {
+		wf.Add(flow.Step(s).DependsOn(prev))
+		prev = s
+	}
+	wf.Add(flow.Step(ensureStable).DependsOn(prev))
+	wf.Add(flow.Step(debug).DependsOn(ensureStable).When(flow.AnyFailed))
+
+	return wf.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/basicmetrics/hns.go b/test/e2ev3/workflows/basicmetrics/hns.go
new file mode 100644
index 0000000000..4700d34641
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/hns.go
@@ -0,0 +1,100 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package basicmetrics
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"time"
+
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"github.com/microsoft/retina/test/retry"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+const (
+	defaultRetryDelay    = 5 * time.Second
+	defaultRetryAttempts = 5
+)
+
+var (
+	ErrorNoWindowsPod = errors.New("no windows retina pod found")
+	ErrNoMetricFound  = fmt.Errorf("no metric found")
+
+	hnsMetricName  = "networkobservability_windows_hns_stats"
+	defaultRetrier = retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay, ExpBackoff: true}
+)
+
+// ValidateHNSMetricStep finds a Windows retina pod, curls the metrics endpoint
+// inside it, and checks for the HNS stats metric with retry logic.
+type ValidateHNSMetricStep struct {
+	RestConfig              *rest.Config
+	RetinaDaemonSetNamespace string
+	RetinaDaemonSetName      string
+}
+
+func (v *ValidateHNSMetricStep) String() string { return "validate-hns-metrics" }
+
+func (v *ValidateHNSMetricStep) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, v)
+	clientset, err := kubernetes.NewForConfig(v.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	pods, err := clientset.CoreV1().Pods(v.RetinaDaemonSetNamespace).List(ctx, metav1.ListOptions{
+		LabelSelector: "k8s-app=retina",
+	})
+	if err != nil {
+		return fmt.Errorf("error listing pods: %w", err)
+	}
+
+	var windowsRetinaPod *v1.Pod
+	for i := range pods.Items {
+		if pods.Items[i].Spec.NodeSelector["kubernetes.io/os"] == "windows" {
+			windowsRetinaPod = &pods.Items[i]
+		}
+	}
+	if windowsRetinaPod == nil {
+		return ErrorNoWindowsPod
+	}
+
+	labels := map[string]string{
+		"direction": "win_packets_sent_count",
+	}
+
+	log.Info("checking for metric", "metric", hnsMetricName, "labels", labels)
+
+	err = defaultRetrier.Do(ctx, func() error {
+		output, execErr := k8s.ExecPod(ctx, clientset, v.RestConfig, windowsRetinaPod.Namespace, windowsRetinaPod.Name, fmt.Sprintf("curl -s http://localhost:%s/metrics", config.RetinaMetricsPort))
+		if execErr != nil {
+			return fmt.Errorf("error executing command in windows retina pod: %w", execErr)
+		}
+		if len(output) == 0 {
+			return ErrNoMetricFound
+		}
+
+		checkErr := prom.CheckMetricFromBuffer(output, hnsMetricName, labels)
+		if checkErr != nil {
+			return fmt.Errorf("failed to verify prometheus metrics: %w", checkErr)
+		}
+
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	log.Info("found matching metric", "metric", hnsMetricName, "labels", labels)
+	return nil
+}
diff --git a/test/e2ev3/workflows/basicmetrics/tcp.go b/test/e2ev3/workflows/basicmetrics/tcp.go
new file mode 100644
index 0000000000..114c64cb1e
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/tcp.go
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package basicmetrics
+
+import (
+	"context"
+	"fmt"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addTCPScenario(restConfig *rest.Config, namespace, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-tcp-" + arch
+	podName := agnhostName + "-0"
+
+	createKapinger := &k8s.CreateKapingerDeployment{
+		KapingerNamespace: namespace, KapingerReplicas: "1", RestConfig: restConfig,
+	}
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: namespace, AgnhostArch: arch, RestConfig: restConfig,
+	}
+	waitKapinger := &k8s.WaitPodsReady{
+		RestConfig:    restConfig,
+		Namespace:     namespace,
+		LabelSelector: "app=kapinger",
+	}
+	execCurl1 := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace, Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	execCurl2 := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: namespace, Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	validateState := &ValidateRetinaTCPStateStep{PortForwardedRetinaPort: "10093"}
+	validateRemote := &ValidateRetinaTCPConnectionRemoteStep{PortForwardedRetinaPort: "10093"}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			Namespace: config.KubeSystemNamespace, LabelSelector: "k8s-app=retina",
+			LocalPort: "10093", RemotePort: "10093", Endpoint: "metrics",
+			RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{validateState, validateRemote},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName, ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+	deleteKapinger := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.Deployment), ResourceName: "kapinger", ResourceNamespace: namespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			flow.Pipe(createKapinger, createAgnhost, waitKapinger, execCurl1, execCurl2).
+				Timeout(k8s.DefaultScenarioTimeout),
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			flow.Pipe(deleteAgnhost, deleteKapinger).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
+
+var (
+	tcpStateMetricName            = "networkobservability_tcp_state"
+	tcpConnectionRemoteMetricName = "networkobservability_tcp_connection_remote"
+)
+
+const (
+	stateKey = "state"
+
+	established = "ESTABLISHED"
+	listen      = "LISTEN"
+	timewait    = "TIME_WAIT"
+)
+
+// ValidateRetinaTCPStateStep checks that the TCP state metric exists
+// for ESTABLISHED, LISTEN, and TIME_WAIT states.
+type ValidateRetinaTCPStateStep struct {
+	PortForwardedRetinaPort string
+}
+
+func (v *ValidateRetinaTCPStateStep) Do(ctx context.Context) error {
+	promAddress := fmt.Sprintf("http://localhost:%s/metrics", v.PortForwardedRetinaPort)
+
+	validMetrics := []map[string]string{
+		{stateKey: established},
+		{stateKey: listen},
+		{stateKey: timewait},
+	}
+
+	for _, metric := range validMetrics {
+		err := prom.CheckMetric(ctx, promAddress, tcpStateMetricName, metric)
+		if err != nil {
+			return fmt.Errorf("failed to verify prometheus metrics: %w", err)
+		}
+	}
+	return nil
+}
+
+// ValidateRetinaTCPConnectionRemoteStep checks the TCP connection remote metric.
+// Currently performs empty validation (no specific labels checked).
+type ValidateRetinaTCPConnectionRemoteStep struct {
+	PortForwardedRetinaPort string
+}
+
+func (v *ValidateRetinaTCPConnectionRemoteStep) Do(ctx context.Context) error {
+	promAddress := fmt.Sprintf("http://localhost:%s/metrics", v.PortForwardedRetinaPort)
+
+	validMetrics := []map[string]string{}
+
+	for _, metric := range validMetrics {
+		err := prom.CheckMetric(ctx, promAddress, tcpConnectionRemoteMetricName, metric)
+		if err != nil {
+			return fmt.Errorf("failed to verify prometheus metrics: %w", err)
+		}
+	}
+	return nil
+}
diff --git a/test/e2ev3/workflows/basicmetrics/workflow.go b/test/e2ev3/workflows/basicmetrics/workflow.go
new file mode 100644
index 0000000000..2a35972ff1
--- /dev/null
+++ b/test/e2ev3/workflows/basicmetrics/workflow.go
@@ -0,0 +1,93 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package basicmetrics
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Workflow runs the basic metrics workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "basic-metrics" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	kubeConfigFilePath := p.Cluster.KubeConfigPath()
+	restConfig := p.Cluster.RestConfig()
+	chartPath := p.Paths.RetinaChart
+	testPodNamespace := config.TestPodNamespace
+	imgCfg := &p.Image
+	helmCfg := &p.Helm
+
+	// Construct steps.
+	installRetina := &k8s.InstallHelmChart{
+		Namespace:          config.KubeSystemNamespace,
+		ReleaseName:        "retina",
+		KubeConfigFilePath: kubeConfigFilePath,
+		ChartPath:          chartPath,
+		ImageTag:           imgCfg.Tag,
+		ImageRegistry:      imgCfg.Registry,
+		ImageNamespace:     imgCfg.Namespace,
+		HelmDriver:         helmCfg.Driver,
+		ImageLoader:        p.Cluster,
+	}
+
+	var scenarios []flow.Steper
+	for _, arch := range config.Architectures {
+		scenarios = append(scenarios,
+			addDropScenario(restConfig, testPodNamespace, arch),
+			addTCPScenario(restConfig, testPodNamespace, arch),
+			addBasicDNSScenario(restConfig, testPodNamespace, arch,
+				"valid-domain", "nslookup kubernetes.default", false),
+			addBasicDNSScenario(restConfig, testPodNamespace, arch,
+				"nxdomain", "nslookup some.non.existent.domain", true),
+		)
+	}
+
+	if *config.Provider != "kind" {
+		scenarios = append(scenarios, &ValidateHNSMetricStep{
+			RestConfig:              restConfig,
+			RetinaDaemonSetNamespace: config.KubeSystemNamespace,
+			RetinaDaemonSetName:      "retina-agent-win",
+		})
+	}
+
+	ensureStable := &k8s.EnsureStableComponent{
+		PodNamespace:           config.KubeSystemNamespace,
+		LabelSelector:          "k8s-app=retina",
+		RestConfig:             restConfig,
+		IgnoreContainerRestart: false,
+	}
+
+	debug := &k8s.DebugOnFailure{
+		RestConfig:    restConfig,
+		Namespace:     config.KubeSystemNamespace,
+		LabelSelector: "k8s-app=retina",
+	}
+
+	// Wire dependencies and register.
+	// Scenarios run sequentially because they share the same port-forward port.
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.Step(installRetina))
+	prev := flow.Steper(installRetina)
+	for _, s := range scenarios {
+		wf.Add(flow.Step(s).DependsOn(prev))
+		prev = s
+	}
+	wf.Add(flow.Step(ensureStable).DependsOn(prev))
+	wf.Add(flow.Step(debug).DependsOn(ensureStable).When(flow.AnyFailed))
+
+	return wf.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/capture/install_plugin.go b/test/e2ev3/workflows/capture/install_plugin.go
new file mode 100644
index 0000000000..9cc7b1f961
--- /dev/null
+++ b/test/e2ev3/workflows/capture/install_plugin.go
@@ -0,0 +1,87 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package capture
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+const (
+	// InstallRetinaBinaryDir is the directory where the kubectl-retina binary will be installed.
+	InstallRetinaBinaryDir = "/tmp/retina-bin"
+)
+
+// InstallRetinaPluginStep builds and installs the kubectl-retina plugin
+// to allow e2e tests to run kubectl retina commands.
+type InstallRetinaPluginStep struct{}
+
+func (i *InstallRetinaPluginStep) String() string { return "install-retina-plugin" }
+
+func (i *InstallRetinaPluginStep) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, i)
+	log.Info("building kubectl-retina plugin")
+
+	if err := os.MkdirAll(InstallRetinaBinaryDir, 0o755); err != nil {
+		return fmt.Errorf("failed to create binary directory: %w", err)
+	}
+
+	binaryName := "kubectl-retina"
+
+	cmd := exec.Command("git", "rev-parse", "--show-toplevel") // #nosec
+	output, err := cmd.Output()
+	if err != nil {
+		return fmt.Errorf("failed to detect git repository root: %w", err)
+	}
+	retinaRepoRoot := strings.TrimSpace(string(output))
+	log.Info("auto-detected repository root", "path", retinaRepoRoot)
+
+	if _, err := os.Stat(retinaRepoRoot); err != nil {
+		return fmt.Errorf("invalid RetinaRepoRoot path: %w", err)
+	}
+
+	if _, err := os.Stat(filepath.Join(retinaRepoRoot, "cli", "main.go")); err != nil {
+		return fmt.Errorf("cli/main.go not found in repository root: %w", err)
+	}
+
+	buildCmd := exec.Command("go", "build", "-o",
+		filepath.Join(InstallRetinaBinaryDir, binaryName),
+		filepath.Join(retinaRepoRoot, "cli", "main.go")) // #nosec
+	buildCmd.Dir = retinaRepoRoot
+	buildOutput, err := buildCmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("failed to build kubectl-retina: %s: %w", buildOutput, err)
+	}
+	log.Info("successfully built kubectl-retina", "output", string(buildOutput))
+
+	currentPath := os.Getenv("PATH")
+	if !strings.Contains(currentPath, InstallRetinaBinaryDir) {
+		newPath := fmt.Sprintf("%s:%s", InstallRetinaBinaryDir, currentPath)
+		if err := os.Setenv("PATH", newPath); err != nil {
+			return fmt.Errorf("failed to update PATH environment variable: %w", err)
+		}
+		log.Info("added directory to PATH", "dir", InstallRetinaBinaryDir)
+	}
+
+	verifyCmd := exec.Command("kubectl", "plugin", "list") // #nosec
+	verifyOutput, err := verifyCmd.CombinedOutput()
+	if err != nil {
+		log.Warn("kubectl plugin list command failed", "error", err, "output", string(verifyOutput))
+	} else {
+		log.Info("kubectl plugin list", "output", string(verifyOutput))
+		if !strings.Contains(string(verifyOutput), "retina") {
+			log.Warn("retina plugin not found in kubectl plugin list output")
+		}
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/workflows/capture/validate_capture.go b/test/e2ev3/workflows/capture/validate_capture.go
new file mode 100644
index 0000000000..6bbec46289
--- /dev/null
+++ b/test/e2ev3/workflows/capture/validate_capture.go
@@ -0,0 +1,261 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package capture
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"time"
+
+	captureConstants "github.com/microsoft/retina/pkg/capture/constants"
+	"github.com/microsoft/retina/pkg/label"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"github.com/microsoft/retina/test/retry"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+var (
+	ErrNoCaptureJobsFound      = fmt.Errorf("no capture jobs found")
+	ErrFoundNonZeroCaptureJobs = fmt.Errorf("found non-zero amount of capture jobs when expecting zero after deletion")
+	ErrMissingEventOnCaptureJob = fmt.Errorf("missing SuccessfulCreate or Completed event on capture job")
+	ErrCaptureJobFailed        = fmt.Errorf("capture job failed")
+)
+
+// ValidateCaptureStep runs the full kubectl retina capture lifecycle:
+// create, verify jobs, download, validate files, and delete.
+type ValidateCaptureStep struct {
+	CaptureName      string
+	CaptureNamespace string
+	Duration         string
+	KubeConfigPath   string
+	RestConfig       *rest.Config
+	ImageTag         string
+	ImageRegistry    string
+	ImageNamespace   string
+}
+
+func (v *ValidateCaptureStep) String() string { return "validate-capture" }
+
+func (v *ValidateCaptureStep) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, v)
+	log.Info("running retina capture create")
+
+	imageRegistry := v.ImageRegistry
+	imageNamespace := v.ImageNamespace
+	imageTag := v.ImageTag
+
+	os.Setenv("KUBECONFIG", v.KubeConfigPath) //nolint:errcheck // best effort
+	log.Info("KUBECONFIG set", "path", os.Getenv("KUBECONFIG"))
+
+	cmd := exec.CommandContext(ctx, "kubectl", "retina", "capture", "create", "--namespace", v.CaptureNamespace, "--name", v.CaptureName, "--duration", v.Duration, "--debug") //#nosec
+	cmd.Env = append(os.Environ(), "RETINA_AGENT_IMAGE="+filepath.Join(imageRegistry, imageNamespace, "retina-agent:"+imageTag))
+
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("failed to execute create capture command: %s: %w", string(output), err)
+	}
+	log.Info("create capture command completed", "output", string(output))
+
+	clientset, err := kubernetes.NewForConfig(v.RestConfig)
+	if err != nil {
+		return fmt.Errorf("failed to create kubernetes clientset: %w", err)
+	}
+
+	retrier := retry.Retrier{Attempts: 5, Delay: 10 * time.Second, ExpBackoff: true}
+	err = retrier.Do(ctx, func() error {
+		e := v.verifyJobs(ctx, log, clientset)
+		if e != nil {
+			log.Warn("failed to verify capture jobs, retrying", "error", e)
+			return e
+		}
+		return nil
+	})
+	if err != nil {
+		return fmt.Errorf("failed to verify capture jobs were created: %w", err)
+	}
+
+	if err := v.downloadCapture(ctx, log); err != nil {
+		return fmt.Errorf("failed to download and validate capture files: %w", err)
+	}
+	defer func() {
+		outputDir := filepath.Join(".", v.CaptureName)
+		if err := os.RemoveAll(outputDir); err != nil {
+			log.Warn("failed to clean up capture files", "dir", outputDir, "error", err)
+		}
+	}()
+
+	if err := v.deleteJobs(ctx, log, clientset); err != nil {
+		return fmt.Errorf("failed to delete capture jobs: %w", err)
+	}
+
+	return nil
+}
+
+func (v *ValidateCaptureStep) verifyJobs(ctx context.Context, log *slog.Logger, clientset *kubernetes.Clientset) error {
+	captureJobSelector := &metav1.LabelSelector{
+		MatchLabels: map[string]string{
+			label.CaptureNameLabel: v.CaptureName,
+			label.AppLabel:         captureConstants.CaptureAppname,
+		},
+	}
+	labelSelector, err := labels.Parse(metav1.FormatLabelSelector(captureJobSelector))
+	if err != nil {
+		return fmt.Errorf("failed to parse label selector: %w", err)
+	}
+
+	jobList, err := clientset.BatchV1().Jobs(v.CaptureNamespace).List(ctx, metav1.ListOptions{
+		LabelSelector: labelSelector.String(),
+	})
+	if err != nil {
+		return fmt.Errorf("failed to list capture jobs: %w", err)
+	}
+
+	if len(jobList.Items) == 0 {
+		return fmt.Errorf("with labels %s=%s and %s=%s: %w",
+			label.CaptureNameLabel, v.CaptureName,
+			label.AppLabel, captureConstants.CaptureAppname, ErrNoCaptureJobsFound)
+	}
+
+	log.Info("found capture jobs", "count", len(jobList.Items))
+
+	for i := range jobList.Items {
+		for _, condition := range jobList.Items[i].Status.Conditions {
+			if condition.Type == "Complete" && condition.Status == "True" {
+				log.Info("job completed", "job", jobList.Items[i].Name)
+			}
+			if condition.Type == "Failed" && condition.Status == "True" {
+				return fmt.Errorf("%s: %w", jobList.Items[i].Name, ErrCaptureJobFailed)
+			}
+		}
+	}
+
+	events, err := clientset.CoreV1().Events(v.CaptureNamespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to list events: %w", err)
+	}
+	for i := range jobList.Items {
+		if err := v.checkJobEvents(jobList.Items[i].Name, events); err != nil {
+			return fmt.Errorf("failed to verify events for job %s: %w", jobList.Items[i].Name, err)
+		}
+		log.Info("job has required events", "job", jobList.Items[i].Name)
+	}
+
+	return nil
+}
+
+func (v *ValidateCaptureStep) checkJobEvents(jobName string, events *v1.EventList) error {
+	var created, completed bool
+	for i := range events.Items {
+		if events.Items[i].InvolvedObject.Kind == "Job" && events.Items[i].InvolvedObject.Name == jobName {
+			switch events.Items[i].Reason {
+			case "SuccessfulCreate":
+				created = true
+			case "Completed":
+				completed = true
+			}
+		}
+	}
+
+	if !created || !completed {
+		return fmt.Errorf("%s: %w", jobName, ErrMissingEventOnCaptureJob)
+	}
+
+	return nil
+}
+
+func (v *ValidateCaptureStep) deleteJobs(ctx context.Context, log *slog.Logger, clientset *kubernetes.Clientset) error {
+	log.Info("running retina capture delete")
+	cmd := exec.CommandContext(ctx, "kubectl", "retina", "capture", "delete", "--namespace", v.CaptureNamespace, "--name", v.CaptureName) //#nosec
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("failed to execute delete command: %w", err)
+	}
+	log.Info("delete command completed", "output", string(output))
+
+	captureJobSelector := &metav1.LabelSelector{
+		MatchLabels: map[string]string{
+			label.CaptureNameLabel: v.CaptureName,
+			label.AppLabel:         captureConstants.CaptureAppname,
+		},
+	}
+	labelSelector, err := labels.Parse(metav1.FormatLabelSelector(captureJobSelector))
+	if err != nil {
+		return fmt.Errorf("failed to parse label selector: %w", err)
+	}
+
+	pollRetrier := retry.Retrier{Attempts: 10, Delay: 1 * time.Second, ExpBackoff: true}
+	err = pollRetrier.Do(ctx, func() error {
+		jobList, listErr := clientset.BatchV1().Jobs(v.CaptureNamespace).List(ctx, metav1.ListOptions{
+			LabelSelector: labelSelector.String(),
+		})
+		if listErr != nil {
+			return fmt.Errorf("failed to list jobs during delete verification: %w", listErr)
+		}
+		if len(jobList.Items) > 0 {
+			return ErrFoundNonZeroCaptureJobs
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	log.Info("all relevant capture jobs deleted")
+	return nil
+}
+
+func (v *ValidateCaptureStep) downloadCapture(ctx context.Context, log *slog.Logger) error {
+	log.Info("downloading capture files")
+
+	outputDir := filepath.Join(".", v.CaptureName)
+
+	cmd := exec.CommandContext(ctx, "kubectl", "retina", "capture", "download", "--namespace", v.CaptureNamespace, "--name", v.CaptureName) // #nosec
+	output, err := cmd.CombinedOutput()
+	if err != nil {
+		return fmt.Errorf("failed to execute download capture command: %s: %w", string(output), err)
+	}
+	log.Info("download capture command completed", "output", string(output))
+
+	files, err := os.ReadDir(outputDir)
+	if err != nil {
+		return fmt.Errorf("failed to list files in output directory %s: %w", outputDir, err)
+	}
+
+	if len(files) == 0 {
+		return fmt.Errorf("no capture files were downloaded")
+	}
+	log.Info("downloaded capture files", "count", len(files))
+
+	for _, file := range files {
+		filePath := filepath.Join(outputDir, file.Name())
+
+		if !strings.HasSuffix(file.Name(), ".tar.gz") {
+			return fmt.Errorf("downloaded file %s does not have the expected .tar.gz extension", file.Name())
+		}
+
+		fileInfo, err := os.Stat(filePath)
+		if err != nil {
+			return fmt.Errorf("failed to get file info for %s: %w", filePath, err)
+		}
+
+		if fileInfo.Size() == 0 {
+			return fmt.Errorf("downloaded file %s is empty", filePath)
+		}
+
+		log.Info("validated file", "file", file.Name(), "size", fileInfo.Size())
+	}
+
+	return nil
+}
diff --git a/test/e2ev3/workflows/capture/workflow.go b/test/e2ev3/workflows/capture/workflow.go
new file mode 100644
index 0000000000..a367710142
--- /dev/null
+++ b/test/e2ev3/workflows/capture/workflow.go
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package capture
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+	"k8s.io/apimachinery/pkg/util/rand"
+)
+
+// Workflow runs the capture validation workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "capture" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	kubeConfigFilePath := p.Cluster.KubeConfigPath()
+	testPodNamespace := "default"
+	imgCfg := &p.Image
+
+	wf := new(flow.Workflow)
+
+	captureName := "retina-capture-e2e-" + rand.String(5)
+
+	installPlugin := &InstallRetinaPluginStep{}
+	validateCap := &ValidateCaptureStep{
+		CaptureName:      captureName,
+		CaptureNamespace: testPodNamespace,
+		Duration:         "5s",
+		KubeConfigPath:   kubeConfigFilePath,
+		RestConfig:       p.Cluster.RestConfig(),
+		ImageTag:         imgCfg.Tag,
+		ImageRegistry:    imgCfg.Registry,
+		ImageNamespace:   imgCfg.Namespace,
+	}
+
+	wf.Add(flow.Pipe(installPlugin, validateCap))
+
+	return wf.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/curl_pod.go b/test/e2ev3/workflows/hubblemetrics/curl_pod.go
new file mode 100644
index 0000000000..9e1b5f34f6
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/curl_pod.go
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"context"
+	"fmt"
+
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+// CurlPodStep executes a curl command from a source pod to a destination pod
+// for flow testing. It resolves the destination pod's IP and runs the command.
+type CurlPodStep struct {
+	SrcPodName      string
+	SrcPodNamespace string
+	DstPodName      string
+	DstPodNamespace string
+	RestConfig      *rest.Config
+}
+
+func (c *CurlPodStep) Do(ctx context.Context) error {
+	clientset, err := kubernetes.NewForConfig(c.RestConfig)
+	if err != nil {
+		return fmt.Errorf("error creating Kubernetes client: %w", err)
+	}
+
+	dstPodIP, err := k8s.GetPodIP(ctx, c.RestConfig, c.DstPodNamespace, c.DstPodName)
+	if err != nil {
+		return fmt.Errorf("error getting pod IP: %w", err)
+	}
+
+	cmd := fmt.Sprintf("curl -s -m 5 %s:80", dstPodIP)
+	_, err = k8s.ExecPod(ctx, clientset, c.RestConfig, c.SrcPodNamespace, c.SrcPodName, cmd)
+	if err != nil {
+		return fmt.Errorf("error executing command: %w", err)
+	}
+	return nil
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/dns.go b/test/e2ev3/workflows/hubblemetrics/dns.go
new file mode 100644
index 0000000000..47c800eabf
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/dns.go
@@ -0,0 +1,77 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addHubbleDNSScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-dns"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName:      agnhostName,
+		AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch:      arch,
+		RestConfig:       restConfig,
+	}
+	execNslookup := &k8s.ExecInPod{
+		PodName:      agnhostName + "-0",
+		PodNamespace: config.TestPodNamespace,
+		Command:      "nslookup -type=a one.one.one.one",
+		RestConfig:   restConfig,
+	}
+	validateQuery := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort,
+		MetricName:    config.HubbleDNSQueryMetricName,
+		ValidMetrics:  []map[string]string{ValidHubbleDNSQueryMetricLabels},
+		ExpectMetric:  true,
+	}
+	validateResponse := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort,
+		MetricName:    config.HubbleDNSResponseMetricName,
+		ValidMetrics:  []map[string]string{ValidHubbleDNSResponseMetricLabels},
+		ExpectMetric:  true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector:         "k8s-app=retina",
+			LocalPort:             config.HubbleMetricsPort,
+			RemotePort:            config.HubbleMetricsPort,
+			Namespace:             config.KubeSystemNamespace,
+			Endpoint:              "metrics",
+			RestConfig:            restConfig,
+			OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{execNslookup, validateQuery, validateResponse},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType:      k8s.TypeString(k8s.StatefulSet),
+		ResourceName:      agnhostName,
+		ResourceNamespace: config.TestPodNamespace,
+		RestConfig:        restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createAgnhost).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/dns_labels.go b/test/e2ev3/workflows/hubblemetrics/dns_labels.go
new file mode 100644
index 0000000000..a2941d266b
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/dns_labels.go
@@ -0,0 +1,33 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"github.com/microsoft/retina/test/e2ev3/config"
+)
+
+// Hubble DNS test fixtures: pod name and expected metric labels.
+var (
+	HubbleDNSPodName = "agnhost-dns-0"
+
+	ValidHubbleDNSQueryMetricLabels = map[string]string{
+		config.HubbleDestinationLabel: "",
+		config.HubbleSourceLabel:      config.TestPodNamespace + "/" + HubbleDNSPodName,
+		config.HubbleIPsRetunedLabel:  "0",
+		config.HubbleQTypesLabel:      "A",
+		config.HubbleRCodeLabel:       "",
+		config.HubbleQueryLabel:       "one.one.one.one.",
+	}
+
+	ValidHubbleDNSResponseMetricLabels = map[string]string{
+		config.HubbleDestinationLabel: config.TestPodNamespace + "/" + HubbleDNSPodName,
+		config.HubbleSourceLabel:      "",
+		config.HubbleIPsRetunedLabel:  "2",
+		config.HubbleQTypesLabel:      "A",
+		config.HubbleRCodeLabel:       "No Error",
+		config.HubbleQueryLabel:       "one.one.one.one.",
+	}
+)
diff --git a/test/e2ev3/workflows/hubblemetrics/drop.go b/test/e2ev3/workflows/hubblemetrics/drop.go
new file mode 100644
index 0000000000..a62c47a14f
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/drop.go
@@ -0,0 +1,82 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addHubbleDropScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := HubbleDropAgnhostName
+	podName := HubbleDropPodName
+
+	createNetPol := &k8s.CreateDenyAllNetworkPolicy{
+		NetworkPolicyNamespace: config.TestPodNamespace,
+		RestConfig:             restConfig,
+		DenyAllLabelSelector:   "app=" + agnhostName,
+	}
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := k8s.CurlExpectFail("hubble-drop-curl-"+arch, &k8s.ExecInPod{
+		PodName: podName, PodNamespace: config.TestPodNamespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	})
+	validateRetinaDrop := &prom.ValidateMetricStep{
+		ForwardedPort: config.RetinaMetricsPort, MetricName: config.RetinaDropMetricName,
+		ValidMetrics: []map[string]string{ValidRetinaDropMetricLabels}, ExpectMetric: true,
+	}
+	validateHubbleDrop := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort, MetricName: config.HubbleDropMetricName,
+		ValidMetrics: []map[string]string{ValidHubbleDropMetricLabels}, ExpectMetric: true, PartialMatch: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector: "k8s-app=retina", LocalPort: config.RetinaMetricsPort, RemotePort: config.RetinaMetricsPort,
+			Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{
+			execCurl,
+			validateRetinaDrop,
+			&k8s.WithPortForward{
+				PF: &k8s.PortForward{
+					LabelSelector: "k8s-app=retina", LocalPort: config.HubbleMetricsPort, RemotePort: config.HubbleMetricsPort,
+					Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+				},
+				Steps: []flow.Steper{validateHubbleDrop},
+			},
+		},
+	}
+	deleteNetPol := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.NetworkPolicy), ResourceName: "deny-all",
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createNetPol, createAgnhost).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteNetPol, deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/drop_labels.go b/test/e2ev3/workflows/hubblemetrics/drop_labels.go
new file mode 100644
index 0000000000..37449c8493
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/drop_labels.go
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"github.com/microsoft/retina/test/e2ev3/config"
+)
+
+// Hubble drop test fixtures: pod names and expected metric labels.
+var (
+	HubbleDropPodName     = "agnhost-drop-0"
+	HubbleDropAgnhostName = "agnhost-drop"
+
+	ValidRetinaDropMetricLabels = map[string]string{
+		config.RetinaReasonLabel:    config.IPTableRuleDrop,
+		config.RetinaDirectionLabel: "unknown",
+	}
+
+	// Note: When the agnhost pod (with deny-all network policy) tries to curl bing.com,
+	// it triggers a DNS lookup to CoreDNS. The network policy blocks this egress traffic,
+	// but Cilium/Hubble records the drop at the destination (CoreDNS) ingress side rather
+	// than the source (agnhost) egress side.
+	// We partially validate this metric.
+	ValidHubbleDropMetricLabels = map[string]string{
+		config.HubbleSourceLabel:   "",
+		config.HubbleProtocolLabel: config.UDP,
+		config.HubbleReasonLabel:   "POLICY_DENIED",
+	}
+)
diff --git a/test/e2ev3/workflows/hubblemetrics/flow_inter.go b/test/e2ev3/workflows/hubblemetrics/flow_inter.go
new file mode 100644
index 0000000000..b3b80afc1e
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/flow_inter.go
@@ -0,0 +1,93 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addHubbleFlowInterNodeScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	podnameSrc := "agnhost-flow-inter-src"
+	podnameDst := "agnhost-flow-inter-dst"
+	validSrcLabels := []map[string]string{
+		{"source": config.TestPodNamespace + "/" + podnameSrc + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podnameDst + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-endpoint", "type": "Trace", "verdict": "FORWARDED"},
+	}
+	// Validate from dst pod's perspective using source-based labels.
+	// With sourceEgressContext=pod, flow metrics always populate 'source' with the local pod
+	// and leave 'destination' empty — so we check dst-0 appears as source for both directions.
+	validDstLabels := []map[string]string{
+		{"source": config.TestPodNamespace + "/" + podnameDst + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podnameDst + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-endpoint", "type": "Trace", "verdict": "FORWARDED"},
+	}
+
+	createSrc := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: podnameSrc, AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	createDst := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: podnameDst, AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	curlPod := &CurlPodStep{
+		SrcPodName: podnameSrc + "-0", SrcPodNamespace: config.TestPodNamespace,
+		DstPodName: podnameDst + "-0", DstPodNamespace: config.TestPodNamespace,
+		RestConfig: restConfig,
+	}
+	validateSrc := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort, MetricName: config.HubbleFlowMetricName,
+		ValidMetrics: validSrcLabels, ExpectMetric: true,
+	}
+	validateDst := &prom.ValidateMetricStep{
+		ForwardedPort: "9966", MetricName: config.HubbleFlowMetricName,
+		ValidMetrics: validDstLabels, ExpectMetric: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector: "k8s-app=retina", LocalPort: config.HubbleMetricsPort, RemotePort: config.HubbleMetricsPort,
+			Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + podnameSrc,
+		},
+		Steps: []flow.Steper{
+			curlPod,
+			validateSrc,
+			&k8s.WithPortForward{
+				PF: &k8s.PortForward{
+					LabelSelector: "k8s-app=retina", LocalPort: "9966", RemotePort: config.HubbleMetricsPort,
+					Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + podnameDst,
+				},
+				Steps: []flow.Steper{validateDst},
+			},
+		},
+	}
+	deleteSrc := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: podnameSrc,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+	deleteDst := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: podnameDst,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createSrc, createDst).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteSrc, deleteDst).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/flow_intra.go b/test/e2ev3/workflows/hubblemetrics/flow_intra.go
new file mode 100644
index 0000000000..328591cf78
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/flow_intra.go
@@ -0,0 +1,68 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+)
+
+func addHubbleFlowIntraNodeScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	podname := "agnhost-flow-intra"
+	replicas := 2
+	validLabels := []map[string]string{
+		{"source": config.TestPodNamespace + "/" + podname + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podname + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-endpoint", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podname + "-1", "destination": "", "protocol": config.TCP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podname + "-1", "destination": "", "protocol": config.TCP, "subtype": "to-endpoint", "type": "Trace", "verdict": "FORWARDED"},
+	}
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: podname, AgnhostNamespace: config.TestPodNamespace,
+		ScheduleOnSameNode: true, AgnhostReplicas: &replicas,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	curlPod := &CurlPodStep{
+		SrcPodName: podname + "-0", SrcPodNamespace: config.TestPodNamespace,
+		DstPodName: podname + "-1", DstPodNamespace: config.TestPodNamespace,
+		RestConfig: restConfig,
+	}
+	validateFlow := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort, MetricName: config.HubbleFlowMetricName,
+		ValidMetrics: validLabels, ExpectMetric: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector: "k8s-app=retina", LocalPort: config.HubbleMetricsPort, RemotePort: config.HubbleMetricsPort,
+			Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + podname,
+		},
+		Steps: []flow.Steper{curlPod, validateFlow},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: podname,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createAgnhost).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/flow_world.go b/test/e2ev3/workflows/hubblemetrics/flow_world.go
new file mode 100644
index 0000000000..cd6a8ded7b
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/flow_world.go
@@ -0,0 +1,62 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addHubbleFlowToWorldScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	podname := "agnhost-flow-world"
+	validLabels := []map[string]string{
+		{"source": config.TestPodNamespace + "/" + podname + "-0", "destination": "", "protocol": config.TCP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+		{"source": config.TestPodNamespace + "/" + podname + "-0", "destination": "", "protocol": config.UDP, "subtype": "to-stack", "type": "Trace", "verdict": "FORWARDED"},
+	}
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: podname, AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := &k8s.ExecInPod{
+		PodName: podname + "-0", PodNamespace: config.TestPodNamespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	validateFlow := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort, MetricName: config.HubbleFlowMetricName,
+		ValidMetrics: validLabels, ExpectMetric: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector: "k8s-app=retina", LocalPort: config.HubbleMetricsPort, RemotePort: config.HubbleMetricsPort,
+			Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint, RestConfig: restConfig, OptionalLabelAffinity: "app=" + podname,
+		},
+		Steps: []flow.Steper{execCurl, validateFlow},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: podname,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createAgnhost).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/service.go b/test/e2ev3/workflows/hubblemetrics/service.go
new file mode 100644
index 0000000000..a0ce348431
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/service.go
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"time"
+
+	"k8s.io/client-go/rest"
+
+	flow "github.com/Azure/go-workflow"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+func addHubbleRelayValidation(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateRelay := &ValidateHubbleRelayServiceStep{RestConfig: restConfig}
+	wf.Add(flow.Step(validateRelay))
+	return wf
+}
+
+func addHubbleUIValidation(restConfig *rest.Config) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	validateUI := &ValidateHubbleUIServiceStep{RestConfig: restConfig}
+	wf.Add(flow.Step(validateUI))
+	return wf
+}
+
+
+
+// ValidateHubbleRelayServiceStep validates that the hubble-relay-service
+// exists in the cluster.
+type ValidateHubbleRelayServiceStep struct {
+	RestConfig *rest.Config
+}
+
+func (v *ValidateHubbleRelayServiceStep) String() string { return "validate-hubble-relay-service" }
+
+func (v *ValidateHubbleRelayServiceStep) Do(ctx context.Context) error {
+	step := &k8s.ValidateResource{
+		ResourceName:      "hubble-relay-service",
+		ResourceNamespace: k8s.HubbleNamespace,
+		ResourceType:      k8s.ResourceTypeService,
+		Labels:            "k8s-app=" + k8s.HubbleRelayApp,
+		RestConfig:        v.RestConfig,
+	}
+	return step.Do(ctx)
+}
+
+// ValidateHubbleUIServiceStep validates that the hubble-ui service exists
+// and that it responds with HTTP 200.
+type ValidateHubbleUIServiceStep struct {
+	RestConfig *rest.Config
+}
+
+func (v *ValidateHubbleUIServiceStep) String() string { return "validate-hubble-ui-service" }
+
+func (v *ValidateHubbleUIServiceStep) Do(ctx context.Context) error {
+	ctx, log := utils.StepLogger(ctx, v)
+	validateStep := &k8s.ValidateResource{
+		ResourceName:      k8s.HubbleUIApp,
+		ResourceNamespace: k8s.HubbleNamespace,
+		ResourceType:      k8s.ResourceTypeService,
+		Labels:            "k8s-app=" + k8s.HubbleUIApp,
+		RestConfig:        v.RestConfig,
+	}
+	if err := validateStep.Do(ctx); err != nil {
+		return fmt.Errorf("failed to validate hubble-ui service: %w", err)
+	}
+
+	// Port forward and validate HTTP response
+	pf := &k8s.PortForward{
+		LabelSelector:         "k8s-app=hubble-ui",
+		LocalPort:             "8080",
+		RemotePort:            "8081",
+		OptionalLabelAffinity: "k8s-app=hubble-ui",
+		Endpoint:              "?namespace=default",
+		RestConfig:            v.RestConfig,
+	}
+	if err := pf.Do(ctx); err != nil {
+		return fmt.Errorf("failed to port forward to hubble-ui: %w", err)
+	}
+	defer pf.Stop() //nolint:errcheck // best effort cleanup
+
+	httpStep := &k8s.ValidateHTTPResponse{
+		URL:            "http://localhost:8080",
+		ExpectedStatus: http.StatusOK,
+	}
+	if err := httpStep.Do(ctx); err != nil {
+		return fmt.Errorf("failed to validate hubble-ui HTTP response: %w", err)
+	}
+
+	log.Info("Hubble UI service validation succeeded")
+	return nil
+}
+
+const hubbleUIRequestTimeout = 30 * time.Second
+
+// ValidateHTTPResponseStep wraps the old ValidateHTTPResponse step.
+type ValidateHTTPResponseStep struct {
+	URL            string
+	ExpectedStatus int
+}
+
+func (v *ValidateHTTPResponseStep) String() string { return "validate-http-response" }
+
+func (v *ValidateHTTPResponseStep) Do(ctx context.Context) error {
+	step := &k8s.ValidateHTTPResponse{
+		URL:            v.URL,
+		ExpectedStatus: v.ExpectedStatus,
+	}
+	return step.Do(ctx)
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/tcp.go b/test/e2ev3/workflows/hubblemetrics/tcp.go
new file mode 100644
index 0000000000..01e81d1c49
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/tcp.go
@@ -0,0 +1,60 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	prom "github.com/microsoft/retina/test/e2ev3/pkg/prometheus"
+	"k8s.io/client-go/rest"
+)
+
+func addHubbleTCPScenario(restConfig *rest.Config, arch string) *flow.Workflow {
+	wf := &flow.Workflow{DontPanic: true}
+	agnhostName := "agnhost-tcp"
+	podName := agnhostName + "-0"
+
+	createAgnhost := &k8s.CreateAgnhostStatefulSet{
+		AgnhostName: agnhostName, AgnhostNamespace: config.TestPodNamespace,
+		AgnhostArch: arch, RestConfig: restConfig,
+	}
+	execCurl := &k8s.ExecInPod{
+		PodName: podName, PodNamespace: config.TestPodNamespace,
+		Command: "curl -s -m 5 bing.com", RestConfig: restConfig,
+	}
+	validateTCP := &prom.ValidateMetricStep{
+		ForwardedPort: config.HubbleMetricsPort, MetricName: config.HubbleTCPFlagsMetricName,
+		ValidMetrics: ValidHubbleTCPMetricsLabels, ExpectMetric: true,
+	}
+	validateWithPF := &k8s.WithPortForward{
+		PF: &k8s.PortForward{
+			LabelSelector: "k8s-app=retina", LocalPort: config.HubbleMetricsPort, RemotePort: config.HubbleMetricsPort,
+			Namespace: config.KubeSystemNamespace, Endpoint: config.MetricsEndpoint,
+			RestConfig: restConfig, OptionalLabelAffinity: "app=" + agnhostName,
+		},
+		Steps: []flow.Steper{execCurl, validateTCP},
+	}
+	deleteAgnhost := &k8s.DeleteKubernetesResource{
+		ResourceType: k8s.TypeString(k8s.StatefulSet), ResourceName: agnhostName,
+		ResourceNamespace: config.TestPodNamespace, RestConfig: restConfig,
+	}
+
+	wf.Add(
+		flow.BatchPipe(
+			// Setup: provision resources.
+			flow.Pipe(createAgnhost).
+				Timeout(k8s.DefaultScenarioTimeout),
+			// Validate: generate traffic and check metrics, retry with backoff.
+			flow.Steps(validateWithPF).
+				Retry(k8s.RetryWithBackoff),
+			// Cleanup: always runs, even if validation fails.
+			flow.Pipe(deleteAgnhost).
+				When(flow.Always),
+		),
+	)
+	return wf
+}
diff --git a/test/e2ev3/workflows/hubblemetrics/tcp_labels.go b/test/e2ev3/workflows/hubblemetrics/tcp_labels.go
new file mode 100644
index 0000000000..be15bbeb9b
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/tcp_labels.go
@@ -0,0 +1,34 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"github.com/microsoft/retina/test/e2ev3/config"
+)
+
+// Hubble TCP test fixtures: pod name and expected metric labels.
+var (
+	HubbleTCPPodName = "agnhost-tcp-0"
+
+	ValidHubbleTCPSYNFlag = map[string]string{
+		config.HubbleSourceLabel:      config.TestPodNamespace + "/" + HubbleTCPPodName,
+		config.HubbleDestinationLabel: "",
+		config.HubbleFamilyLabel:      config.IPV4,
+		config.HubbleFlagLabel:        config.SYN,
+	}
+
+	ValidHubbleTCPFINFlag = map[string]string{
+		config.HubbleSourceLabel:      config.TestPodNamespace + "/" + HubbleTCPPodName,
+		config.HubbleDestinationLabel: "",
+		config.HubbleFamilyLabel:      config.IPV4,
+		config.HubbleFlagLabel:        config.FIN,
+	}
+
+	ValidHubbleTCPMetricsLabels = []map[string]string{
+		ValidHubbleTCPSYNFlag,
+		ValidHubbleTCPFINFlag,
+	}
+)
diff --git a/test/e2ev3/workflows/hubblemetrics/workflow.go b/test/e2ev3/workflows/hubblemetrics/workflow.go
new file mode 100644
index 0000000000..029c1c7490
--- /dev/null
+++ b/test/e2ev3/workflows/hubblemetrics/workflow.go
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+//go:build e2e
+
+package hubblemetrics
+
+import (
+	"context"
+
+	flow "github.com/Azure/go-workflow"
+	"github.com/microsoft/retina/test/e2ev3/config"
+	k8s "github.com/microsoft/retina/test/e2ev3/pkg/kubernetes"
+	"github.com/microsoft/retina/test/e2ev3/pkg/utils"
+)
+
+// Workflow runs the hubble metrics workflow.
+type Workflow struct {
+	Cfg *config.E2EConfig
+}
+
+func (w *Workflow) String() string { return "hubble-metrics" }
+
+func (w *Workflow) Do(ctx context.Context) error {
+	ctx, _ = utils.StepLogger(ctx, w)
+	p := w.Cfg
+	restConfig := p.Cluster.RestConfig()
+	chartPath := p.Paths.HubbleChart
+	imgCfg := &p.Image
+	helmCfg := &p.Helm
+
+	// Construct steps.
+	installHubble := &k8s.InstallHubbleHelmChart{
+		Namespace:          config.KubeSystemNamespace,
+		ReleaseName:        "retina",
+		KubeConfigFilePath: p.Cluster.KubeConfigPath(),
+		ChartPath:          chartPath,
+		ImageTag:           imgCfg.Tag,
+		ImageRegistry:      imgCfg.Registry,
+		ImageNamespace:     imgCfg.Namespace,
+		HelmDriver:         helmCfg.Driver,
+		ImageLoader:        p.Cluster,
+	}
+
+	scenarios := []flow.Steper{
+		addHubbleRelayValidation(restConfig),
+		addHubbleUIValidation(restConfig),
+	}
+	for _, arch := range config.Architectures {
+		scenarios = append(scenarios,
+			addHubbleDNSScenario(restConfig, arch),
+			addHubbleFlowIntraNodeScenario(restConfig, arch),
+			addHubbleFlowInterNodeScenario(restConfig, arch),
+			addHubbleFlowToWorldScenario(restConfig, arch),
+			addHubbleDropScenario(restConfig, arch),
+			addHubbleTCPScenario(restConfig, arch),
+		)
+	}
+
+	ensureStable := &k8s.EnsureStableComponent{
+		PodNamespace:           config.KubeSystemNamespace,
+		LabelSelector:          "k8s-app=retina",
+		RestConfig:             restConfig,
+		IgnoreContainerRestart: false,
+	}
+
+	debug := &k8s.DebugOnFailure{
+		RestConfig:    restConfig,
+		Namespace:     config.KubeSystemNamespace,
+		LabelSelector: "k8s-app=retina",
+	}
+
+	// Wire dependencies and register.
+	// Scenarios run sequentially because they share the same port-forward port.
+	wf := &flow.Workflow{DontPanic: true}
+	wf.Add(flow.Step(installHubble))
+	prev := flow.Steper(installHubble)
+	for _, s := range scenarios {
+		wf.Add(flow.Step(s).DependsOn(prev))
+		prev = s
+	}
+	wf.Add(flow.Step(ensureStable).DependsOn(prev))
+	wf.Add(flow.Step(debug).DependsOn(ensureStable).When(flow.AnyFailed))
+
+	return wf.Do(ctx)
+}
diff --git a/test/profiles/advanced/values.yaml b/test/profiles/advanced/values.yaml
index 5c07e6eaaf..bbc361c241 100644
--- a/test/profiles/advanced/values.yaml
+++ b/test/profiles/advanced/values.yaml
@@ -1,6 +1,7 @@
 enablePodLevel: true
 enableAnnotations: true
 packetParserRingBuffer: "enabled"
+enabledPlugin_linux: '["dropreason","packetforward","linuxutil","dns","packetparser"]'
 operator:
   enabled: true
   enableRetinaEndpoint: true