diff --git a/deploy/helm/spur-cloud/.helmignore b/deploy/helm/spur-cloud/.helmignore new file mode 100644 index 0000000..c089379 --- /dev/null +++ b/deploy/helm/spur-cloud/.helmignore @@ -0,0 +1,16 @@ +# Patterns to ignore when building Helm packages. +.DS_Store +.git/ +.gitignore +.bzr/ +.hg/ +.svn/ +*.swp +*.bak +*.tmp +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deploy/helm/spur-cloud/Chart.yaml b/deploy/helm/spur-cloud/Chart.yaml new file mode 100644 index 0000000..a748092 --- /dev/null +++ b/deploy/helm/spur-cloud/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: spur-cloud +description: GPU as a Service platform built on Spur. Deploys the API, frontend, optional in-cluster Postgres, RBAC, and ingress. +type: application +version: 0.1.0 +appVersion: "0.1.0" +kubeVersion: ">=1.24.0-0" +home: https://github.com/ROCm/spur-cloud +sources: + - https://github.com/ROCm/spur-cloud +maintainers: + - name: ROCm + url: https://github.com/ROCm +keywords: + - gpu + - hpc + - spur + - scheduler diff --git a/deploy/helm/spur-cloud/README.md b/deploy/helm/spur-cloud/README.md new file mode 100644 index 0000000..5e07c20 --- /dev/null +++ b/deploy/helm/spur-cloud/README.md @@ -0,0 +1,97 @@ +# spur-cloud Helm chart + +Deploys the Spur Cloud control plane (API + frontend), optional in-cluster +Postgres, RBAC, and ingress. Does **not** deploy `spurctld` or the +`spur-k8s` operator — those live in [ROCm/spur](https://github.com/ROCm/spur). + +## TL;DR + +```bash +# 1. Generate a JWT signing key + DB password +JWT=$(openssl rand -hex 32) +DBPW=$(openssl rand -hex 16) + +# 2. Install +helm install spur-cloud ./deploy/helm/spur-cloud \ + --namespace spur-cloud --create-namespace \ + --set secrets.jwtSecret="$JWT" \ + --set secrets.dbPassword="$DBPW" \ + --set ingress.host=gpu.example.com \ + --set config.publicUrl=https://gpu.example.com +``` + +## What gets installed + +| Resource | Default | Toggle | +|----------|---------|--------| +| `Deployment` spur-cloud-api (2 replicas) | on | `api.enabled` | +| `Deployment` spur-cloud-frontend (2 replicas) | on | `frontend.enabled` | +| `Ingress` (host + `/` → frontend, `/api` → api) | on | `ingress.enabled` | +| `Secret` (spur-cloud.toml + db-password) | on | `secrets.create` | +| `ServiceAccount` + `ClusterRole`/`Binding` | on | `serviceAccount.create`, `rbac.create` | +| `StatefulSet` postgres (1 replica) | on | `postgres.enabled` | +| `Namespace` for session pods | on | `createSessionNamespace` | + +## Required secrets + +The chart fails to render unless these are set (or you provide an +`existingSecret` and turn `secrets.create=false`): + +- `secrets.jwtSecret` — JWT signing key (`openssl rand -hex 32`) +- `secrets.dbPassword` — when `postgres.enabled=true` +- `secrets.githubClientSecret` — when `config.auth.github.enabled=true` +- `secrets.oktaClientSecret` — when `config.auth.okta.enabled=true` + +## External Postgres + +```yaml +postgres: + enabled: false +database: + url: "postgresql://user:pass@rds.example.com:5432/spur_cloud" +``` + +When using ExternalSecrets / sealed-secrets: + +```yaml +secrets: + create: false + existingSecret: my-existing-secret +``` + +The existing secret must contain key `spur-cloud.toml` (full rendered +config) and, if using in-cluster Postgres, key `db-password`. + +## Image references + +Defaults point at `ghcr.io/rocm/spur-cloud-{api,frontend}`. Override: + +```yaml +api: + image: + repository: my-registry.example.com/spur-cloud-api + tag: v0.2.0 +frontend: + image: + repository: my-registry.example.com/spur-cloud-frontend + tag: v0.2.0 +image: + pullSecrets: + - name: my-registry-creds +``` + +## Verify + +```bash +helm lint ./deploy/helm/spur-cloud \ + --set secrets.jwtSecret=test --set secrets.dbPassword=test +helm template spur-cloud ./deploy/helm/spur-cloud \ + --set secrets.jwtSecret=test --set secrets.dbPassword=test +``` + +## Limitations / TODO + +- No HPA, NetworkPolicy, PodDisruptionBudget yet. +- Postgres is single-replica with no backup. Production should use managed + Postgres or a real operator (CNPG, Zalando). +- No HA story for spurctld here — see the spur chart. diff --git a/deploy/helm/spur-cloud/templates/NOTES.txt b/deploy/helm/spur-cloud/templates/NOTES.txt new file mode 100644 index 0000000..7be4858 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/NOTES.txt @@ -0,0 +1,31 @@ +Spur Cloud is installed as release "{{ .Release.Name }}" in namespace "{{ .Release.Namespace }}". + +1. Reach the UI: +{{- if .Values.ingress.enabled }} + https://{{ .Values.ingress.host }} + Make sure DNS for {{ .Values.ingress.host }} points at your ingress controller{{- if .Values.ingress.tls.enabled }} and that TLS secret "{{ .Values.ingress.tls.secretName }}" exists in this namespace{{- end }}. +{{- else }} + Ingress is disabled. Port-forward instead: + kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "spur-cloud.frontend.fullname" . }} 8080:{{ .Values.frontend.service.port }} +{{- end }} + +2. Check pods: + kubectl -n {{ .Release.Namespace }} get pods -l app.kubernetes.io/instance={{ .Release.Name }} + +3. Session pods land in namespace "{{ .Values.sessionNamespace }}" (created by this chart: {{ .Values.createSessionNamespace }}). + +{{- if not .Values.postgres.enabled }} + +NOTE: in-cluster Postgres is disabled. The API is configured to use: + {{ .Values.database.url }} +{{- end }} + +{{- if .Values.secrets.existingSecret }} + +NOTE: using existing secret "{{ .Values.secrets.existingSecret }}" — it must contain key "spur-cloud.toml"{{- if .Values.postgres.enabled }} and key "db-password"{{- end }}. +{{- end }} + +Prerequisites (NOT installed by this chart): + - Spur controller (spurctld) reachable at: {{ .Values.config.spur.controllerAddr }} + - spur-k8s operator watching the {{ .Values.sessionNamespace }} namespace + - GPU nodes labeled spur.ai/managed=true and spur.ai/gpu-type= diff --git a/deploy/helm/spur-cloud/templates/_helpers.tpl b/deploy/helm/spur-cloud/templates/_helpers.tpl new file mode 100644 index 0000000..b900a24 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/_helpers.tpl @@ -0,0 +1,89 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "spur-cloud.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Fully qualified app name. +*/}} +{{- define "spur-cloud.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{- define "spur-cloud.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "spur-cloud.labels" -}} +helm.sh/chart: {{ include "spur-cloud.chart" . }} +app.kubernetes.io/name: {{ include "spur-cloud.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{- define "spur-cloud.api.fullname" -}} +{{ include "spur-cloud.fullname" . }}-api +{{- end -}} + +{{- define "spur-cloud.frontend.fullname" -}} +{{ include "spur-cloud.fullname" . }}-frontend +{{- end -}} + +{{- define "spur-cloud.postgres.fullname" -}} +{{ include "spur-cloud.fullname" . }}-postgres +{{- end -}} + +{{- define "spur-cloud.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (include "spur-cloud.fullname" .) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{- define "spur-cloud.secretName" -}} +{{- if .Values.secrets.existingSecret -}} +{{- .Values.secrets.existingSecret -}} +{{- else -}} +{{ include "spur-cloud.fullname" . }}-secrets +{{- end -}} +{{- end -}} + +{{- define "spur-cloud.api.image" -}} +{{- $tag := default .Chart.AppVersion .Values.api.image.tag -}} +{{- printf "%s:%s" .Values.api.image.repository $tag -}} +{{- end -}} + +{{- define "spur-cloud.frontend.image" -}} +{{- $tag := default .Chart.AppVersion .Values.frontend.image.tag -}} +{{- printf "%s:%s" .Values.frontend.image.repository $tag -}} +{{- end -}} + +{{/* +Resolve the database URL: explicit override wins, otherwise build from +in-cluster Postgres service if enabled. +*/}} +{{- define "spur-cloud.databaseUrl" -}} +{{- if .Values.database.url -}} +{{- .Values.database.url -}} +{{- else if .Values.postgres.enabled -}} +{{- if not .Values.secrets.dbPassword -}} +{{- fail "secrets.dbPassword must be set when postgres.enabled is true and secrets.existingSecret is unused" -}} +{{- end -}} +{{- printf "postgresql://%s:%s@%s:5432/%s" .Values.postgres.user .Values.secrets.dbPassword (include "spur-cloud.postgres.fullname" .) .Values.postgres.database -}} +{{- else -}} +{{- fail "Either postgres.enabled must be true or database.url must be set" -}} +{{- end -}} +{{- end -}} diff --git a/deploy/helm/spur-cloud/templates/api.yaml b/deploy/helm/spur-cloud/templates/api.yaml new file mode 100644 index 0000000..62d1094 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/api.yaml @@ -0,0 +1,101 @@ +{{- if .Values.api.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spur-cloud.api.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: api +spec: + type: {{ .Values.api.service.type }} + selector: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: api + ports: + - name: http + port: {{ .Values.api.service.port }} + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "spur-cloud.api.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: api +spec: + replicas: {{ .Values.api.replicas }} + selector: + matchLabels: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: api + template: + metadata: + labels: + {{- include "spur-cloud.labels" . | nindent 8 }} + app.kubernetes.io/component: api + annotations: + checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }} + {{- with .Values.api.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "spur-cloud.serviceAccountName" . }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: api + image: {{ include "spur-cloud.api.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --config=/etc/spur-cloud/spur-cloud.toml + ports: + - name: http + containerPort: 8080 + env: + {{- range $k, $v := .Values.api.env }} + - name: {{ $k }} + value: {{ $v | quote }} + {{- end }} + volumeMounts: + - name: config + mountPath: /etc/spur-cloud + readOnly: true + readinessProbe: + httpGet: + path: /readyz + port: http + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 10 + periodSeconds: 20 + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + volumes: + - name: config + secret: + secretName: {{ include "spur-cloud.secretName" . }} + items: + - key: spur-cloud.toml + path: spur-cloud.toml + {{- with .Values.api.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.api.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.api.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/spur-cloud/templates/frontend.yaml b/deploy/helm/spur-cloud/templates/frontend.yaml new file mode 100644 index 0000000..d7c26d2 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/frontend.yaml @@ -0,0 +1,69 @@ +{{- if .Values.frontend.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spur-cloud.frontend.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: frontend +spec: + type: {{ .Values.frontend.service.type }} + selector: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: frontend + ports: + - name: http + port: {{ .Values.frontend.service.port }} + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "spur-cloud.frontend.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: frontend +spec: + replicas: {{ .Values.frontend.replicas }} + selector: + matchLabels: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: frontend + template: + metadata: + labels: + {{- include "spur-cloud.labels" . | nindent 8 }} + app.kubernetes.io/component: frontend + {{- with .Values.frontend.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: nginx + image: {{ include "spur-cloud.frontend.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 80 + resources: + {{- toYaml .Values.frontend.resources | nindent 12 }} + {{- with .Values.frontend.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.frontend.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.frontend.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/spur-cloud/templates/ingress.yaml b/deploy/helm/spur-cloud/templates/ingress.yaml new file mode 100644 index 0000000..587ad70 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/ingress.yaml @@ -0,0 +1,40 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "spur-cloud.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls.enabled }} + tls: + - hosts: + - {{ .Values.ingress.host | quote }} + secretName: {{ required "ingress.tls.secretName is required when ingress.tls.enabled" .Values.ingress.tls.secretName }} + {{- end }} + rules: + - host: {{ .Values.ingress.host | quote }} + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: {{ include "spur-cloud.api.fullname" . }} + port: + number: {{ .Values.api.service.port }} + - path: / + pathType: Prefix + backend: + service: + name: {{ include "spur-cloud.frontend.fullname" . }} + port: + number: {{ .Values.frontend.service.port }} +{{- end }} diff --git a/deploy/helm/spur-cloud/templates/postgres.yaml b/deploy/helm/spur-cloud/templates/postgres.yaml new file mode 100644 index 0000000..f446d4d --- /dev/null +++ b/deploy/helm/spur-cloud/templates/postgres.yaml @@ -0,0 +1,81 @@ +{{- if .Values.postgres.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spur-cloud.postgres.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: postgres +spec: + clusterIP: None + selector: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: postgres + ports: + - name: postgres + port: 5432 + targetPort: 5432 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "spur-cloud.postgres.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: postgres +spec: + serviceName: {{ include "spur-cloud.postgres.fullname" . }} + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: {{ include "spur-cloud.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: postgres + template: + metadata: + labels: + {{- include "spur-cloud.labels" . | nindent 8 }} + app.kubernetes.io/component: postgres + spec: + containers: + - name: postgres + image: "{{ .Values.postgres.image.repository }}:{{ .Values.postgres.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: POSTGRES_DB + value: {{ .Values.postgres.database | quote }} + - name: POSTGRES_USER + value: {{ .Values.postgres.user | quote }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "spur-cloud.secretName" . }} + key: db-password + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + ports: + - containerPort: 5432 + name: postgres + volumeMounts: + - name: data + mountPath: /var/lib/postgresql/data + resources: + {{- toYaml .Values.postgres.resources | nindent 12 }} + readinessProbe: + exec: + command: ["pg_isready", "-U", {{ .Values.postgres.user | quote }}, "-d", {{ .Values.postgres.database | quote }}] + initialDelaySeconds: 5 + periodSeconds: 10 + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ReadWriteOnce] + {{- with .Values.postgres.storage.storageClassName }} + storageClassName: {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.postgres.storage.size | quote }} +{{- end }} diff --git a/deploy/helm/spur-cloud/templates/rbac.yaml b/deploy/helm/spur-cloud/templates/rbac.yaml new file mode 100644 index 0000000..336575e --- /dev/null +++ b/deploy/helm/spur-cloud/templates/rbac.yaml @@ -0,0 +1,49 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "spur-cloud.serviceAccountName" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} +{{- if .Values.rbac.create }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "spur-cloud.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} +rules: + # Terminal (kube exec) and pod lifecycle + - apiGroups: [""] + resources: ["pods", "pods/exec", "pods/log"] + verbs: ["get", "list", "watch", "create", "delete"] + # SSH NodePort services + - apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "create", "delete"] + # SpurJob CRs + - apiGroups: ["spur.ai"] + resources: ["spurjobs"] + verbs: ["get", "list", "watch", "create", "patch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "spur-cloud.fullname" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ include "spur-cloud.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "spur-cloud.fullname" . }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/deploy/helm/spur-cloud/templates/secret.yaml b/deploy/helm/spur-cloud/templates/secret.yaml new file mode 100644 index 0000000..a98dd9f --- /dev/null +++ b/deploy/helm/spur-cloud/templates/secret.yaml @@ -0,0 +1,58 @@ +{{- if .Values.secrets.create -}} +{{- if not .Values.secrets.jwtSecret -}} +{{- fail "secrets.jwtSecret is required (generate with: openssl rand -hex 32) — or set secrets.existingSecret and secrets.create=false" -}} +{{- end -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "spur-cloud.secretName" . }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} +type: Opaque +stringData: + spur-cloud.toml: | + public_url = {{ .Values.config.publicUrl | quote }} + + [server] + listen_addr = {{ .Values.config.server.listenAddr | quote }} + session_namespace = {{ .Values.sessionNamespace | quote }} + + [database] + url = {{ include "spur-cloud.databaseUrl" . | quote }} + + [spur] + controller_addr = {{ .Values.config.spur.controllerAddr | quote }} + + [auth] + jwt_secret = {{ .Values.secrets.jwtSecret | quote }} + jwt_expiry_hours = {{ .Values.config.auth.jwtExpiryHours }} + {{- if .Values.config.auth.github.enabled }} + {{- if not .Values.secrets.githubClientSecret }} + {{- fail "secrets.githubClientSecret is required when config.auth.github.enabled" }} + {{- end }} + + [auth.github] + enabled = true + client_id = {{ .Values.config.auth.github.clientId | quote }} + client_secret = {{ .Values.secrets.githubClientSecret | quote }} + {{- end }} + {{- if .Values.config.auth.okta.enabled }} + {{- if not .Values.secrets.oktaClientSecret }} + {{- fail "secrets.oktaClientSecret is required when config.auth.okta.enabled" }} + {{- end }} + + [auth.okta] + enabled = true + issuer = {{ .Values.config.auth.okta.issuer | quote }} + client_id = {{ .Values.config.auth.okta.clientId | quote }} + client_secret = {{ .Values.secrets.oktaClientSecret | quote }} + admin_groups = [{{- range $i, $g := .Values.config.auth.okta.adminGroups }}{{ if $i }}, {{ end }}{{ $g | quote }}{{- end }}] + {{- end }} + {{- with .Values.config.extraToml }} + + {{ . | nindent 4 }} + {{- end }} + {{- if and .Values.postgres.enabled (not .Values.database.url) }} + db-password: {{ .Values.secrets.dbPassword | quote }} + {{- end }} +{{- end -}} diff --git a/deploy/helm/spur-cloud/templates/session-namespace.yaml b/deploy/helm/spur-cloud/templates/session-namespace.yaml new file mode 100644 index 0000000..228b8c7 --- /dev/null +++ b/deploy/helm/spur-cloud/templates/session-namespace.yaml @@ -0,0 +1,13 @@ +{{- if .Values.createSessionNamespace -}} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.sessionNamespace }} + labels: + {{- include "spur-cloud.labels" . | nindent 4 }} + app.kubernetes.io/component: session-namespace + annotations: + # Keep the namespace if the release is uninstalled — user pods may still + # be running there and namespace deletion is destructive. + helm.sh/resource-policy: keep +{{- end }} diff --git a/deploy/helm/spur-cloud/values.yaml b/deploy/helm/spur-cloud/values.yaml new file mode 100644 index 0000000..b00bc2f --- /dev/null +++ b/deploy/helm/spur-cloud/values.yaml @@ -0,0 +1,135 @@ +# Default values for spur-cloud. +# Override with `-f my-values.yaml` or `--set key=value`. + +nameOverride: "" +fullnameOverride: "" + +# Namespace for the user GPU session pods. Created if `createSessionNamespace` is true. +sessionNamespace: spur-sessions +createSessionNamespace: true + +image: + pullPolicy: IfNotPresent + pullSecrets: [] + +api: + enabled: true + replicas: 2 + image: + repository: ghcr.io/rocm/spur-cloud-api + tag: "" # Defaults to .Chart.AppVersion + service: + type: ClusterIP + port: 8080 + resources: + requests: + cpu: 500m + memory: 256Mi + limits: + cpu: "2" + memory: 1Gi + env: + RUST_LOG: info + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +frontend: + enabled: true + replicas: 2 + image: + repository: ghcr.io/rocm/spur-cloud-frontend + tag: "" # Defaults to .Chart.AppVersion + service: + type: ClusterIP + port: 80 + resources: + requests: + cpu: 100m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi + podAnnotations: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +ingress: + enabled: true + className: nginx + host: gpu.example.com + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + tls: + enabled: false + secretName: "" + +# Application config rendered into the ConfigMap as spur-cloud.toml. +config: + publicUrl: https://gpu.example.com + server: + listenAddr: 0.0.0.0:8080 + spur: + controllerAddr: http://spurctld.spur:6817 + auth: + jwtExpiryHours: 24 + github: + enabled: false + clientId: "" + okta: + enabled: false + issuer: "" + clientId: "" + adminGroups: [] + # Free-form TOML appended verbatim to spur-cloud.toml. Use for fields the + # chart does not yet template (e.g. billing, feature flags). + extraToml: "" + +# Secret material. Either provide values here (chart creates the Secret) or +# set `existingSecret` to bring your own (managed by ExternalSecrets, sealed-secrets, etc.). +secrets: + create: true + existingSecret: "" + jwtSecret: "" # Required. Generate with: openssl rand -hex 32 + dbPassword: "" # Required when postgres.enabled or external DB needs a password. + githubClientSecret: "" # Only if config.auth.github.enabled + oktaClientSecret: "" # Only if config.auth.okta.enabled + +# In-cluster Postgres for dev / single-node deploys. For production, disable +# this and point `database.url` at managed Postgres (RDS, CloudSQL, Crunchy). +postgres: + enabled: true + image: + repository: postgres + tag: "16-alpine" + database: spur_cloud + user: spur_cloud + storage: + size: 10Gi + storageClassName: "" # Empty = cluster default + resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: "1" + memory: 1Gi + +# External database. Used when postgres.enabled = false. +# If both are off, you must set database.url explicitly. +database: + # Full DSN. When empty and postgres.enabled, the chart builds one pointing + # at the in-cluster Postgres service. + url: "" + +rbac: + create: true + +serviceAccount: + create: true + name: "" + annotations: {}