Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ KUBECONFIG ?= $(HOME)/.kube/config

CLEANER_NAMESPACE ?= $(NAMESPACE)
CLEANER_SCHEDULE ?= 0 * * * *
CLEANER_LABEL_SELECTOR ?= hyperfleet.io/cluster-id
CLEANER_LABEL_SELECTOR ?= hyperfleet.io/cluster-id hyperfleet.io/test-run
CLEANER_AGE_MINUTES ?= 180
CLEANER_MAESTRO_URL ?= http://maestro.$(MAESTRO_NAMESPACE).svc.cluster.local:8000

Expand Down Expand Up @@ -211,7 +211,7 @@ install-repos: check-helmfile-env ## Add all hyperfleet helm repos
$(call add-helm-repo,adapter,$(ADAPTER_CHART_REF))

.PHONY: install-hyperfleet
install-hyperfleet: check-helmfile-env ## Install all HyperFleet components
install-hyperfleet: check-helmfile-env check-hyperfleet-namespace ## Install all HyperFleet components
helmfile -f helmfile/helmfile.yaml.gotmpl -e $(HELMFILE_ENV) apply

.PHONY: install-api
Expand Down Expand Up @@ -334,8 +334,12 @@ define check-namespace
endef

.PHONY: check-hyperfleet-namespace
check-hyperfleet-namespace: ## Create Hyperfleet namespace if it doesn't exist
check-hyperfleet-namespace: ## Create Hyperfleet namespace if it doesn't exist and label it
@printf '%s' "$(NAMESPACE)" | grep -qE '^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$$' \
|| { echo "ERROR: NAMESPACE '$(NAMESPACE)' is not a valid DNS label (lowercase alphanumeric and hyphens, 1-63 chars)"; exit 1; }
$(call check-namespace,$(NAMESPACE))
@kubectl label namespace "$(NAMESPACE)" "hyperfleet.io/test-run=$(NAMESPACE)" --overwrite >/dev/null
@echo "OK: namespace $(NAMESPACE) labeled with hyperfleet.io/test-run=$(NAMESPACE)"
Comment thread
coderabbitai[bot] marked this conversation as resolved.

.PHONY: check-maestro-namespace
check-maestro-namespace: ## Create Maestro namespace if it doesn't exist
Expand Down
43 changes: 24 additions & 19 deletions helm/namespace-cleaner/scripts/namespace-cleaner.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
set -eo pipefail

LABEL_SELECTOR="${LABEL_SELECTOR:-hyperfleet.io/cluster-id}"
LABEL_SELECTOR="${LABEL_SELECTOR:-hyperfleet.io/cluster-id hyperfleet.io/test-run}"
AGE_MINUTES="${AGE_MINUTES:-180}"
MAESTRO_URL="${MAESTRO_URL:-http://maestro.maestro.svc.cluster.local:8000}"
DRY_RUN="${DRY_RUN:-false}"
Expand Down Expand Up @@ -72,26 +72,31 @@ else
fi

# --- Step 2: delete stale namespaces (non-blocking) ---
kubectl get namespaces -l "${LABEL_SELECTOR}" \
-o go-template='{{range .items}}{{.metadata.name}}|{{.metadata.creationTimestamp}}|{{.status.phase}}{{"\n"}}{{end}}' \
| while IFS='|' read -r ns_name created_at phase; do
[ -z "${ns_name}" ] && continue
[ "${phase}" != "Active" ] && continue
# LABEL_SELECTOR may contain multiple space-separated selectors; each is matched
# independently (OR semantics). Namespaces already Terminating are skipped.
IFS=' ' read -ra _selectors <<< "${LABEL_SELECTOR}"
for selector in "${_selectors[@]}"; do
kubectl get namespaces -l "${selector}" \
-o go-template='{{range .items}}{{.metadata.name}}|{{.metadata.creationTimestamp}}|{{.status.phase}}{{"\n"}}{{end}}' \
| while IFS='|' read -r ns_name created_at phase; do
[ -z "${ns_name}" ] && continue
[ "${phase}" != "Active" ] && continue

created_seconds=$(parse_timestamp "${created_at}") || continue
age=$((NOW - created_seconds))
created_seconds=$(parse_timestamp "${created_at}") || continue
age=$((NOW - created_seconds))

if [ "${age}" -gt "${AGE_SECONDS}" ]; then
age_m=$((age / 60))
if [ "${DRY_RUN}" = "true" ]; then
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [DRY-RUN] Would delete namespace '${ns_name}' (age=${age_m}m)"
else
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [INFO] Deleting namespace '${ns_name}' (age=${age_m}m)"
kubectl delete namespace "${ns_name}" --wait=false \
&& echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [INFO] Delete requested for namespace '${ns_name}'" \
|| echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [WARN] Failed to delete namespace '${ns_name}'"
if [ "${age}" -gt "${AGE_SECONDS}" ]; then
age_m=$((age / 60))
if [ "${DRY_RUN}" = "true" ]; then
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [DRY-RUN] Would delete namespace '${ns_name}' (age=${age_m}m)"
else
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [INFO] Deleting namespace '${ns_name}' (age=${age_m}m)"
kubectl delete namespace "${ns_name}" --wait=false \

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a non-blocking call, it won't error out if the namespace failed to be deleted. With --wait=false, kubectl would almost never fail because it only validates that the DELETE request was accepted by the API server.

I'm not sure what the alternative is because we don't want to block the deletion. But the namespace could just hang in terminating state forever and the logs from this script would be "Delete requested for namespace"

&& echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [INFO] Delete requested for namespace '${ns_name}'" \
|| echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [WARN] Failed to delete namespace '${ns_name}'"
fi
fi
fi
done
done
done
Comment thread
rh-amarin marked this conversation as resolved.

echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [INFO] Namespace cleaner run complete"
8 changes: 8 additions & 0 deletions helm/namespace-cleaner/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Name for cluster-scoped resources (ClusterRole, ClusterRoleBinding).
Includes the release namespace so multiple installs don't collide.
*/}}
{{- define "namespace-cleaner.clusterResourceName" -}}
{{- printf "%s-%s" (include "namespace-cleaner.fullname" .) .Release.Namespace | trunc 63 | trimSuffix "-" }}
Comment on lines +47 to +48

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major | ⚡ Quick win

Preserve namespace uniqueness after truncation.

This helper can still generate colliding cluster-scoped names when namespace-cleaner.fullname already consumes most of the 63-character budget, because .Release.Namespace gets truncated off. That reintroduces ClusterRole/ClusterRoleBinding conflicts between installs in different namespaces and breaks the isolation this PR is adding. Keep a fixed-length namespace-derived suffix instead of truncating the entire combined string.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@helm/namespace-cleaner/templates/_helpers.tpl` around lines 47 - 48, The
cluster-scoped name helper can still collide after truncation because
`namespace-cleaner.clusterResourceName` truncates the entire combined fullname
and namespace string, which can drop the namespace identifier entirely. Update
this helper so the namespace contribution is preserved as a fixed-length suffix
derived from `.Release.Namespace` while keeping the base
`namespace-cleaner.fullname` truncated to fit within the 63-character limit.
Ensure the resulting name remains unique per namespace even when the fullname is
long.

{{- end }}

{{/*
Selector labels
*/}}
Expand Down
2 changes: 1 addition & 1 deletion helm/namespace-cleaner/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "namespace-cleaner.fullname" . }}
name: {{ include "namespace-cleaner.clusterResourceName" . }}
labels:
{{- include "namespace-cleaner.labels" . | nindent 4 }}
rules:
Expand Down
4 changes: 2 additions & 2 deletions helm/namespace-cleaner/templates/clusterrolebinding.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "namespace-cleaner.fullname" . }}
name: {{ include "namespace-cleaner.clusterResourceName" . }}
labels:
{{- include "namespace-cleaner.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "namespace-cleaner.fullname" . }}
name: {{ include "namespace-cleaner.clusterResourceName" . }}
subjects:
- kind: ServiceAccount
name: {{ include "namespace-cleaner.fullname" . }}
Expand Down
5 changes: 3 additions & 2 deletions helm/namespace-cleaner/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ image:
# Cron schedule for the cleaner job (default: every hour)
schedule: "0 * * * *"

# Kubernetes label selector used to identify namespaces eligible for deletion
labelSelector: "hyperfleet.io/cluster-id"
# Space-separated label selectors used to identify namespaces eligible for deletion.
# Each selector is matched independently (OR semantics).
labelSelector: "hyperfleet.io/cluster-id hyperfleet.io/test-run"

# Delete namespaces older than this many minutes
ageMinutes: 180
Expand Down