Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
56e507b
Initial plan
Copilot Mar 17, 2026
4843de3
Implement Experiment reconciler with ConfigMap, TestWorkflow, TestTri…
Copilot Mar 17, 2026
23e6701
Final verification and documentation complete
Copilot Mar 17, 2026
8c1c97d
feat: update Experiment reconciler to enhance resource reconciliation…
fmallmann Mar 24, 2026
1d9ece6
fix: add default value to evaluate-template openApiBasePath config
fmallmann Mar 25, 2026
deb60fa
build: add agent-runtime-operator module dependency
fmallmann Mar 25, 2026
138b320
feat: add AiGatewayRef field to ExperimentSpec CRD
fmallmann Mar 25, 2026
c80ae49
build: register AiGateway types in scheme and add CRD to testdata
fmallmann Mar 25, 2026
cccfb66
feat: implement AiGateway resolution and wire into TestWorkflow
fmallmann Mar 25, 2026
6c9cd6c
build: regenerate RBAC with AiGateway permissions
fmallmann Mar 25, 2026
22d2536
docs: add aiGatewayRef to sample Experiment manifest
fmallmann Mar 25, 2026
9e63236
build: go mod tidy to mark agent-runtime-operator as direct dependency
fmallmann Mar 25, 2026
fe90565
fix: update resource selector to use matchLabels for deployment in ex…
fmallmann Mar 26, 2026
2e60046
refactor: restructure DatasetSource with InlineDataset type
fmallmann Mar 26, 2026
33aa45d
refactor: update experimentJSON and controller to use Dataset.Inline
fmallmann Mar 26, 2026
b66e0fe
docs: update sample Experiment YAML for new dataset structure
fmallmann Mar 26, 2026
d53f407
test: update all fixtures for DatasetSource restructuring
fmallmann Mar 26, 2026
ec18800
feat: Add operator release
fmallmann Mar 31, 2026
1abacb6
feat(operator): add resourceName helper with length-safe naming
fmallmann Apr 1, 2026
9c03e4a
feat(operator): add label constants and builder for managed resources
fmallmann Apr 1, 2026
0d91d2d
feat(operator): implement reconcileAnchor for anchor ConfigMap creation
fmallmann Apr 1, 2026
932d9d2
feat(operator): wire anchor ownerRef to all child resources with new …
fmallmann Apr 1, 2026
cc34c8d
feat(operator): replace finalizers with reactive anchor deletion
fmallmann Apr 1, 2026
601372a
feat(operator): implement garbage collector for orphaned anchor cleanup
fmallmann Apr 1, 2026
4e72eb4
feat(operator): add secondary anchor watch, self-healing, and registe…
fmallmann Apr 1, 2026
676be93
feat(operator): add Kubernetes Event recording for anchor lifecycle
fmallmann Apr 1, 2026
d00230f
feat(operator): update RBAC, remove generatedResources tracking
fmallmann Apr 1, 2026
2054880
chore(operator): fix gofmt import ordering in experiment_controller
fmallmann Apr 1, 2026
0e279a8
fix(operator): address review findings - labels on update, event nois…
fmallmann Apr 1, 2026
23ea01b
refactor(operator): remove legacy finalizer migration code
fmallmann Apr 1, 2026
e9f4fd7
chore: update namespace to testbench-operator-system and adjust valid…
fmallmann Apr 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,46 @@ jobs:
if: startsWith(github.ref, 'refs/tags/v')
run: |
helm push *.tgz oci://ghcr.io/${{ github.repository_owner }}/charts

operator:
runs-on: ubuntu-latest
permissions:
packages: write
contents: write

steps:
- name: Checkout
uses: 'actions/checkout@v6'
with:
fetch-depth: 0

- name: Set VERSION from tag
run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV

- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Docker Build & Push
working-directory: operator
run: |
make docker-buildx

- name: Build Installer
working-directory: operator
run: |
make build-installer

- name: Create Release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}
run: |
gh release create "$tag" \
--repo="$GITHUB_REPOSITORY" \
--title="${tag#v}" \
--generate-notes \
operator/dist/install.yaml
4 changes: 2 additions & 2 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ update_settings(max_parallel_updates=10, k8s_upsert_timeout_secs=600)
load('ext://dotenv', 'dotenv')
dotenv()

v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.6.0')
v1alpha1.extension_repo(name='agentic-layer', url='https://github.com/agentic-layer/tilt-extensions', ref='v0.13.0')

v1alpha1.extension(name='cert-manager', repo_name='agentic-layer', repo_path='cert-manager')
load('ext://cert-manager', 'cert_manager_install')
Expand Down Expand Up @@ -56,7 +56,7 @@ k8s_yaml(helm(
# Apply local development manifests
k8s_yaml(kustomize('deploy/local'))

k8s_resource('ai-gateway-litellm', port_forwards=['11001:4000'])
k8s_resource('ai-gateway', port_forwards=['11001:4000'])
k8s_resource('weather-agent', port_forwards='11010:8000', labels=['agents'], resource_deps=['agent-runtime'])
k8s_resource('lgtm', port_forwards=['11000:3000', '4318:4318'])

Expand Down
1 change: 1 addition & 0 deletions chart/templates/evaluate-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ spec:
openApiBasePath:
type: string
description: "Base path for OpenAI API"
default: ""

# Steps to execute
steps:
Expand Down
3 changes: 0 additions & 3 deletions deploy/local/testkube/values.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
global:
testWorkflows:
createOfficialTemplates: false
testkube-operator:
## deploy Operator chart
enabled: enable
39 changes: 20 additions & 19 deletions operator/.golangci.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,20 @@
version: "2"

run:
timeout: 5m
allow-parallel-runners: true
# go version is set to 1.25 for compatibility with golangci-lint v2.10.1
# which was built with go1.25; update when a newer linter release is available.
go: "1.25"

issues:
# don't skip warning about doc comments
# don't exclude the default set of lint
exclude-use-default: false
# restore some of the defaults
# (fill in the rest as needed)
exclude-rules:
- path: "api/*"
linters:
- lll
- path: "internal/*"
linters:
- dupl
- lll
linters:
disable-all: true
enable:
- dupl
- errcheck
- exportloopref
- ginkgolinter
- goconst
- gocyclo
- gofmt
- goimports
- gosimple
- govet
- ineffassign
- lll
Expand All @@ -36,10 +23,24 @@ linters:
- prealloc
- revive
- staticcheck
- typecheck
- unconvert
- unparam
- unused
exclusions:
rules:
- path: "^api/"
linters:
- lll
- path: "^internal/"
linters:
- dupl
- lll
- path: "(^internal/|^test/|^cmd/)"
linters:
- revive
- path: "^test/"
linters:
- staticcheck

linters-settings:
revive:
Expand Down
2 changes: 1 addition & 1 deletion operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM golang:1.22 AS builder
FROM golang:1.26 AS builder
ARG TARGETOS
ARG TARGETARCH

Expand Down
17 changes: 10 additions & 7 deletions operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ BUNDLE_METADATA_OPTS ?= $(BUNDLE_CHANNELS) $(BUNDLE_DEFAULT_CHANNEL)
#
# For example, running 'make bundle-build bundle-push catalog-build catalog-push' will build and push both
# agentic-layer.ai/operator-bundle:$VERSION and agentic-layer.ai/operator-catalog:$VERSION.
IMAGE_TAG_BASE ?= agentic-layer.ai/operator
IMAGE_TAG_BASE ?= ghcr.io/agentic-layer/testbench/operator

# BUNDLE_IMG defines the image:tag used for the bundle.
# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=<some-registry>/<project-name-bundle>:<tag>)
Expand All @@ -50,7 +50,7 @@ endif
# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit.
OPERATOR_SDK_VERSION ?= v1.41.1
# Image URL to use all building/pushing image targets
IMG ?= controller:latest
IMG ?= $(IMAGE_TAG_BASE):$(VERSION)

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand Down Expand Up @@ -152,15 +152,18 @@ docker-push: ## Push docker image with the manager.
# - have enabled BuildKit. More info: https://docs.docker.com/develop/develop-images/build_enhancements/
# - be able to push the image to your registry (i.e. if you do not set a valid value via IMG=<myregistry/image:<tag>> then the export will fail)
# To adequately provide solutions that are compatible with multiple platforms, you should consider using this option.
PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le
PLATFORMS ?= linux/arm64,linux/amd64
.PHONY: docker-buildx
docker-buildx: ## Build and push docker image for the manager for cross-platform support
# copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile
sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross
- $(CONTAINER_TOOL) buildx create --name operator-builder
$(CONTAINER_TOOL) buildx use operator-builder
- $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross .
- $(CONTAINER_TOOL) buildx rm operator-builder
- $(CONTAINER_TOOL) buildx create --name testbench-operator-builder
$(CONTAINER_TOOL) buildx use testbench-operator-builder
$(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) \
--tag $(IMG) \
--tag $(IMAGE_TAG_BASE):latest \
-f Dockerfile.cross .
- $(CONTAINER_TOOL) buildx rm testbench-operator-builder
rm Dockerfile.cross

.PHONY: build-installer
Expand Down
50 changes: 34 additions & 16 deletions operator/api/v1alpha1/experiment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
Expand Down Expand Up @@ -46,7 +47,27 @@ type S3Source struct {
Key string `json:"key"`
}

// DatasetSource defines where to load the test dataset from
// InlineDataset defines an inline experiment dataset with scenarios, model, and threshold.
type InlineDataset struct {
// LLM model used for evaluation (e.g., "gemini-2.5-flash-lite")
// +optional
LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"`

// Default threshold for all metrics (0.0-1.0)
// +optional
// +kubebuilder:validation:Minimum=0.0
// +kubebuilder:validation:Maximum=1.0
DefaultThreshold *float64 `json:"defaultThreshold,omitempty"`

// Test scenarios
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinItems=1
Scenarios []Scenario `json:"scenarios"`
}

// DatasetSource defines where to load the test dataset from.
// Exactly one of s3, url, or inline must be set.
// +kubebuilder:validation:XValidation:rule="(has(self.s3) ? 1 : 0) + (has(self.url) ? 1 : 0) + (has(self.inline) ? 1 : 0) == 1",message="exactly one of s3, url, or inline must be set"
type DatasetSource struct {
// S3 source configuration
// +optional
Expand All @@ -55,6 +76,10 @@ type DatasetSource struct {
// URL source (HTTP/HTTPS)
// +optional
URL string `json:"url,omitempty"`

// Inline dataset with scenarios
// +optional
Inline *InlineDataset `json:"inline,omitempty"`
}

// ToolCall represents an expected tool invocation
Expand Down Expand Up @@ -154,30 +179,23 @@ type TriggerSpec struct {
}

// ExperimentSpec defines the desired state of Experiment
// +kubebuilder:validation:XValidation:rule="!(has(self.dataset) && has(self.scenarios))",message="dataset and scenarios are mutually exclusive"
type ExperimentSpec struct {
// Reference to the Agent to evaluate
// +kubebuilder:validation:Required
AgentRef AgentRef `json:"agentRef"`

// Source of the test dataset (mutually exclusive with scenarios)
// AiGatewayRef references an AiGateway resource for LLM access during evaluation.
// Only Name and Namespace fields are used.
// +optional
Dataset *DatasetSource `json:"dataset,omitempty"`

// LLM model used for evaluation (e.g., "gemini-2.5-flash-lite", "gpt-4o")
// +optional
LLMAsAJudgeModel string `json:"llmAsAJudgeModel,omitempty"`
AiGatewayRef *corev1.ObjectReference `json:"aiGatewayRef,omitempty"`

// Default threshold for all metrics (0.0-1.0)
// +optional
// +kubebuilder:validation:Minimum=0.0
// +kubebuilder:validation:Maximum=1.0
// +kubebuilder:default=0.9
DefaultThreshold float64 `json:"defaultThreshold,omitempty"`
// Source of the test dataset
// +kubebuilder:validation:Required
Dataset DatasetSource `json:"dataset"`

// Inline test scenarios (mutually exclusive with dataset)
// OTLP endpoint URL for publishing metrics (e.g., "http://lgtm.monitoring.svc.cluster.local:4318")
// +optional
Scenarios []Scenario `json:"scenarios,omitempty"`
OTLPEndpoint string `json:"otlpEndpoint,omitempty"`

// Trigger configuration
// +optional
Expand Down
53 changes: 40 additions & 13 deletions operator/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading