From e4cb82f764c692c95824675df885773f52694675 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 13:06:27 -0300 Subject: [PATCH 01/11] handling GKE cluster with pullsecret-adapter SA --- k8s/README.md | 163 ++++++++++++++++++++++++++++++++++++++++ k8s/deploy.sh | 59 +++++++++++++++ k8s/job.yaml | 46 ++++++++++++ k8s/serviceaccount.yaml | 7 ++ 4 files changed, 275 insertions(+) create mode 100644 k8s/README.md create mode 100755 k8s/deploy.sh create mode 100644 k8s/job.yaml create mode 100644 k8s/serviceaccount.yaml diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 0000000..44fb446 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,163 @@ +# Kubernetes Deployment for Pull Secret Job + +This directory contains Kubernetes manifests to deploy the Pull Secret Job on GKE cluster `hyperfleet-dev-croche`. + +## Prerequisites + +1. **GKE Auth Plugin installed:** + ```bash + sudo dnf install google-cloud-sdk-gke-gcloud-auth-plugin + ``` + +2. **kubectl configured:** + ```bash + gcloud container clusters get-credentials hyperfleet-dev-croche \ + --zone=us-central1-a \ + --project=sda-ccs-3 + ``` + +3. **Workload Identity configured:** + - Service Account: `pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com` + - Workload Pool: `sda-ccs-3.svc.id.goog` + +## Quick Start + +### Option 1: Using the deploy script (Recommended) + +```bash +./k8s/deploy.sh +``` + +### Option 2: Manual deployment + +```bash +# 1. Create ServiceAccount with Workload Identity annotation +kubectl apply -f k8s/serviceaccount.yaml + +# 2. Create the Job +kubectl apply -f k8s/job.yaml + +# 3. Monitor the job +kubectl get job pullsecret-test-job -w + +# 4. View logs +POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') +kubectl logs -f $POD_NAME +``` + +## Files + +- **serviceaccount.yaml** - Kubernetes ServiceAccount with Workload Identity binding +- **job.yaml** - Kubernetes Job that runs the pull-secret container +- **deploy.sh** - Automated deployment script + +## Workload Identity Setup + +The ServiceAccount is annotated to use GCP Workload Identity: + +```yaml +annotations: + iam.gke.io/gcp-service-account: pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com +``` + +This allows the pod to authenticate to GCP services without service account keys. + +## Environment Variables + +The Job is configured with the following environment variables: + +| Variable | Value | Description | +|----------|-------|-------------| +| `GCP_PROJECT_ID` | `sda-ccs-3` | GCP project where secrets are stored | +| `CLUSTER_ID` | `cls-test-123` | Cluster identifier | +| `SECRET_NAME` | `hyperfleet-cls-test-123-pull-secret` | Secret name in GCP | +| `PULL_SECRET_DATA` | `{...}` | Pull secret JSON data | + +## Monitoring + +### Check job status +```bash +kubectl get job pullsecret-test-job +kubectl describe job pullsecret-test-job +``` + +### View pod logs +```bash +kubectl logs -f $(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') +``` + +### Check events +```bash +kubectl get events --sort-by='.lastTimestamp' +``` + +## Cleanup + +```bash +# Delete the job (keeps pods for 1 hour for debugging) +kubectl delete job pullsecret-test-job + +# Force delete pods immediately +kubectl delete pods -l app=pullsecret-adapter +``` + +## Troubleshooting + +### Pod is not starting + +Check pod events: +```bash +kubectl describe pod $(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') +``` + +### Authentication errors + +Verify Workload Identity binding: +```bash +# Check if SA exists +kubectl get sa pullsecret-adapter-job -o yaml + +# Check GCP IAM binding +gcloud iam service-accounts get-iam-policy \ + pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com \ + --project=sda-ccs-3 +``` + +### Image pull errors + +Verify the image exists and is accessible: +```bash +podman pull quay.io/ldornele/pull-secret:dev-f1bf914 +``` + +## Customizing the Job + +Edit `k8s/job.yaml` to: +- Change environment variables (CLUSTER_ID, SECRET_NAME, etc.) +- Update image tag +- Modify resource limits +- Change security settings + +After editing, reapply: +```bash +kubectl delete job pullsecret-test-job +kubectl apply -f k8s/job.yaml +``` + +## Production Considerations + +For production deployment: + +1. Use a dedicated namespace (e.g., `hyperfleet-system`) +2. Add resource quotas +3. Configure network policies +4. Set up pod disruption budgets +5. Add monitoring and alerting +6. Use ConfigMaps/Secrets for sensitive data +7. Configure proper RBAC + +## References + +- [GKE Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) +- [Kubernetes Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) +- [Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) diff --git a/k8s/deploy.sh b/k8s/deploy.sh new file mode 100755 index 0000000..a371b3a --- /dev/null +++ b/k8s/deploy.sh @@ -0,0 +1,59 @@ +#!/bin/bash +set -e + +echo "====================================" +echo "Deploying Pull Secret Job to GKE" +echo "====================================" +echo "" + +# Check if kubectl is configured +echo "Checking cluster connection..." +kubectl cluster-info | head -1 +echo "" + +# Apply ServiceAccount +echo "Creating ServiceAccount with Workload Identity..." +kubectl apply -f k8s/serviceaccount.yaml +echo "" + +# Wait a bit for SA to be created +sleep 2 + +# Apply Job +echo "Creating Job..." +kubectl apply -f k8s/job.yaml +echo "" + +# Wait for job to start +echo "Waiting for job to start..." +sleep 3 + +# Show job status +echo "Job status:" +kubectl get job pullsecret-test-job +echo "" + +# Show pod status +echo "Pod status:" +kubectl get pods -l app=pullsecret-adapter +echo "" + +# Get pod name +POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') + +echo "====================================" +echo "Job deployed successfully!" +echo "====================================" +echo "" +echo "Monitor job progress:" +echo " kubectl get job pullsecret-test-job -w" +echo "" +echo "View logs:" +echo " kubectl logs -f $POD_NAME" +echo "" +echo "Check job status:" +echo " kubectl describe job pullsecret-test-job" +echo "" +echo "Delete job when done:" +echo " kubectl delete job pullsecret-test-job" +echo "" diff --git a/k8s/job.yaml b/k8s/job.yaml new file mode 100644 index 0000000..691ea10 --- /dev/null +++ b/k8s/job.yaml @@ -0,0 +1,46 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: pullsecret-test-job + namespace: default + labels: + app: pullsecret-adapter + job-type: test +spec: + backoffLimit: 3 + ttlSecondsAfterFinished: 3600 # Keep job for 1 hour after completion + template: + metadata: + labels: + app: pullsecret-adapter + spec: + serviceAccountName: pullsecret-adapter + restartPolicy: Never + containers: + - name: pull-secret + image: quay.io/ldornele/pull-secret:dev-f1bf914 + imagePullPolicy: Always + env: + - name: GCP_PROJECT_ID + value: "sda-ccs-3" + - name: CLUSTER_ID + value: "cls-test-123" + - name: SECRET_NAME + value: "hyperfleet-cls-test-123-pull-secret" + - name: PULL_SECRET_DATA + value: '{"auths":{"registry.redhat.io":{"auth":"dGVzdDp0ZXN0","email":"test@example.com"}}}' + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + securityContext: + runAsNonRoot: true + runAsUser: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL diff --git a/k8s/serviceaccount.yaml b/k8s/serviceaccount.yaml new file mode 100644 index 0000000..b2bbfd9 --- /dev/null +++ b/k8s/serviceaccount.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pullsecret-adapter + namespace: default + annotations: + iam.gke.io/gcp-service-account: pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com From 317565b0ca31a4a3997a39793cfc87422e79dc18 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 16:40:07 -0300 Subject: [PATCH 02/11] Fix CI workflow --- .github/workflows/ci.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1a324cb..02e534d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,16 +24,21 @@ jobs: with: go-version: '1.23.9' cache: true + cache-dependency-path: adapter-pull-secret/go.sum - name: Run golangci-lint uses: golangci/golangci-lint-action@v6 with: version: latest args: --timeout=5m + working-directory: adapter-pull-secret build: name: Build runs-on: ubuntu-latest + defaults: + run: + working-directory: adapter-pull-secret steps: - name: Checkout code uses: actions/checkout@v4 @@ -43,6 +48,7 @@ jobs: with: go-version: '1.23.9' cache: true + cache-dependency-path: adapter-pull-secret/go.sum - name: Download dependencies run: go mod download @@ -58,6 +64,9 @@ jobs: test: name: Test runs-on: ubuntu-latest + defaults: + run: + working-directory: adapter-pull-secret steps: - name: Checkout code uses: actions/checkout@v4 @@ -67,6 +76,7 @@ jobs: with: go-version: '1.23.9' cache: true + cache-dependency-path: adapter-pull-secret/go.sum - name: Download dependencies run: go mod download @@ -78,6 +88,6 @@ jobs: uses: codecov/codecov-action@v4 if: success() with: - files: ./coverage.txt + files: ./adapter-pull-secret/coverage.txt flags: unittests fail_ci_if_error: false From 44698c9a513539dd6affe92e18c8186d4da94035 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 16:57:06 -0300 Subject: [PATCH 03/11] Fix CI workflow --- .github/workflows/ci.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02e534d..1a324cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,21 +24,16 @@ jobs: with: go-version: '1.23.9' cache: true - cache-dependency-path: adapter-pull-secret/go.sum - name: Run golangci-lint uses: golangci/golangci-lint-action@v6 with: version: latest args: --timeout=5m - working-directory: adapter-pull-secret build: name: Build runs-on: ubuntu-latest - defaults: - run: - working-directory: adapter-pull-secret steps: - name: Checkout code uses: actions/checkout@v4 @@ -48,7 +43,6 @@ jobs: with: go-version: '1.23.9' cache: true - cache-dependency-path: adapter-pull-secret/go.sum - name: Download dependencies run: go mod download @@ -64,9 +58,6 @@ jobs: test: name: Test runs-on: ubuntu-latest - defaults: - run: - working-directory: adapter-pull-secret steps: - name: Checkout code uses: actions/checkout@v4 @@ -76,7 +67,6 @@ jobs: with: go-version: '1.23.9' cache: true - cache-dependency-path: adapter-pull-secret/go.sum - name: Download dependencies run: go mod download @@ -88,6 +78,6 @@ jobs: uses: codecov/codecov-action@v4 if: success() with: - files: ./adapter-pull-secret/coverage.txt + files: ./coverage.txt flags: unittests fail_ci_if_error: false From 0c1b4dee6d0f6f2cb8e17f40eb98ef6b98465cfc Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 17:23:04 -0300 Subject: [PATCH 04/11] Fix - Add cmd folder --- cmd/pull-secret/jobs/README_TESTS.md | 158 +++++++ cmd/pull-secret/jobs/cmd.go | 23 + cmd/pull-secret/jobs/pull_secret.go | 356 +++++++++++++++ cmd/pull-secret/jobs/pull_secret_test.go | 533 +++++++++++++++++++++++ cmd/pull-secret/main.go | 52 +++ 5 files changed, 1122 insertions(+) create mode 100644 cmd/pull-secret/jobs/README_TESTS.md create mode 100644 cmd/pull-secret/jobs/cmd.go create mode 100644 cmd/pull-secret/jobs/pull_secret.go create mode 100644 cmd/pull-secret/jobs/pull_secret_test.go create mode 100644 cmd/pull-secret/main.go diff --git a/cmd/pull-secret/jobs/README_TESTS.md b/cmd/pull-secret/jobs/README_TESTS.md new file mode 100644 index 0000000..c893c19 --- /dev/null +++ b/cmd/pull-secret/jobs/README_TESTS.md @@ -0,0 +1,158 @@ +# Pull Secret Job - Test Documentation + +## Test Coverage Summary + +Current test coverage: **42.3%** of statements + +## Test Files + +- `pull_secret_test.go` - Unit tests for Pull Secret Job + +## Tests Included + +### 1. Pull Secret Validation (`TestValidatePullSecret`) +Tests the validation of pull secret JSON format: +- ✅ Valid pull secret with single registry +- ✅ Valid pull secret with multiple registries +- ✅ Invalid JSON format +- ✅ Missing 'auths' key +- ✅ Empty auths object +- ✅ Auths is not an object +- ✅ Empty pull secret string + +### 2. Configuration Validation (`TestPullSecretTask_validateConfig`) +Tests the validation of required environment variables: +- ✅ Valid configuration with all fields +- ✅ Missing GCP_PROJECT_ID +- ✅ Missing CLUSTER_ID +- ✅ Missing SECRET_NAME +- ✅ Missing PULL_SECRET_DATA +- ✅ All fields empty + +### 3. Task Metadata (`TestPullSecretTask_TaskName`) +Tests the task name: +- ✅ Returns "pull-secret-mvp" + +### 4. Job Metadata (`TestPullSecretJob_GetMetadata`) +Tests job metadata: +- ✅ Returns correct Use value +- ✅ Returns non-empty Description + +### 5. Worker Count (`TestPullSecretJob_GetWorkerCount`) +Tests worker count configuration: +- ✅ Returns 1 worker + +### 6. Task Creation (`TestPullSecretJob_GetTasks`) +Tests task creation from environment variables: +- ✅ All environment variables set +- ✅ Auto-generate secret name from cluster ID +- ✅ Use fake pull secret when not provided + +### 7. Retry Logic (`TestIsRetryable`) +Tests retry decision for different gRPC error codes: +- ✅ Retryable errors: Unavailable, DeadlineExceeded, Internal, ResourceExhausted +- ✅ Non-retryable errors: PermissionDenied, NotFound, AlreadyExists, InvalidArgument + +### 8. Retry with Backoff (`TestRetryWithBackoff`) +Tests the exponential backoff retry mechanism: +- ✅ Success on first try +- ✅ Success after multiple retries +- ✅ Max retries exceeded +- ✅ Non-retryable error stops immediately +- ✅ Context cancellation + +### 9. Structured Logging (`TestLogStructured`) +Tests structured JSON logging: +- ✅ Complete log entry with all fields +- ✅ Log entry without duration +- ✅ Log entry without version + +## Functions Tested + +✅ Fully tested: +- `validatePullSecret()` - Pull secret validation +- `PullSecretTask.validateConfig()` - Configuration validation +- `PullSecretTask.TaskName()` - Task name retrieval +- `PullSecretJob.GetMetadata()` - Job metadata +- `PullSecretJob.GetWorkerCount()` - Worker count +- `PullSecretJob.GetTasks()` - Task creation +- `isRetryable()` - Retry logic +- `retryWithBackoff()` - Exponential backoff +- `logStructured()` - Structured logging + +⚠️ Not tested (require GCP Secret Manager mock): +- `PullSecretTask.Process()` - Main processing logic +- `PullSecretTask.secretExists()` - Check if secret exists +- `PullSecretTask.createSecret()` - Create secret in GCP +- `PullSecretTask.addSecretVersion()` - Add secret version +- `PullSecretTask.verifySecret()` - Verify secret accessibility +- `PullSecretTask.createOrUpdateSecret()` - Create/update orchestration + +## Running Tests + +### Run all tests +```bash +make test +``` + +### Run tests with verbose output +```bash +go test -v ./cmd/pull-secret/jobs/ +``` + +### Run specific test +```bash +go test -v -run TestValidatePullSecret ./cmd/pull-secret/jobs/ +``` + +### Generate coverage report +```bash +make test +go tool cover -html=coverage.txt +``` + +### Run tests with race detector +```bash +go test -race ./cmd/pull-secret/jobs/ +``` + +## Test Principles + +1. **Table-Driven Tests**: All tests use table-driven approach for comprehensive coverage +2. **Isolation**: Tests use environment variable manipulation with proper cleanup +3. **Race Detection**: Tests run with `-race` flag to detect concurrency issues +4. **Coverage**: Target is 80%+ coverage for testable code +5. **Fast**: Unit tests complete in < 10 seconds + +## Future Test Improvements + +To increase coverage to 80%+, consider: + +1. **Mock GCP Secret Manager Client** + - Use interface abstraction for Secret Manager client + - Create mock implementation for testing + - Test Process(), secretExists(), createSecret(), etc. + +2. **Integration Tests** + - Test with real GCP Secret Manager (separate test suite) + - Use test GCP project with cleanup + - Validate end-to-end workflow + +3. **Error Scenarios** + - Test all GCP API error responses + - Test network failures and timeouts + - Test partial failures in retry logic + +4. **Performance Tests** + - Benchmark retry backoff timing + - Measure memory allocation + - Test concurrent task execution + +## CI/CD Integration + +Tests are automatically run in GitHub Actions on every pull request: +- Lint check +- Build check +- Test check with coverage report + +See `.github/workflows/ci.yml` for CI configuration. diff --git a/cmd/pull-secret/jobs/cmd.go b/cmd/pull-secret/jobs/cmd.go new file mode 100644 index 0000000..c3cc819 --- /dev/null +++ b/cmd/pull-secret/jobs/cmd.go @@ -0,0 +1,23 @@ +package jobs + +import ( + "context" + + "github.com/spf13/cobra" + "gitlab.cee.redhat.com/service/hyperfleet/mvp/pkg/job" +) + +func NewJobCommand(ctx context.Context) *cobra.Command { + + var jobRegistry = job.NewJobRegistry() + + // Register only the Pull Secret Job + jobRegistry.AddJob(&PullSecretJob{}) + + builder := &job.CommandBuilder{} + builder.SetContext(ctx) + builder.SetRegistry(*jobRegistry) + cmd := builder.Build() + + return cmd +} diff --git a/cmd/pull-secret/jobs/pull_secret.go b/cmd/pull-secret/jobs/pull_secret.go new file mode 100644 index 0000000..1843bd7 --- /dev/null +++ b/cmd/pull-secret/jobs/pull_secret.go @@ -0,0 +1,356 @@ +package jobs + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "time" + + secretmanager "cloud.google.com/go/secretmanager/apiv1" + "cloud.google.com/go/secretmanager/apiv1/secretmanagerpb" + "github.com/spf13/pflag" + "gitlab.cee.redhat.com/service/hyperfleet/mvp/pkg/job" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +type PullSecretTask struct { + PullSecret string + GCPProjectID string + ClusterID string + SecretName string +} + +var PullSecretJobArgs struct { + name string +} + +type PullSecretJob struct { +} + +func (e PullSecretTask) TaskName() string { + return "pull-secret-mvp" +} + +func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { + + var tasks []job.Task + + // Read configuration from environment variables + gcpProjectID := os.Getenv("GCP_PROJECT_ID") + clusterID := os.Getenv("CLUSTER_ID") + secretName := os.Getenv("SECRET_NAME") + pullSecretData := os.Getenv("PULL_SECRET_DATA") + + // Use fake pull secret for testing if PULL_SECRET_DATA is not provided + if pullSecretData == "" { + pullSecretData = `{"auths":{"cloud.openshift.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"quay.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.connect.redhat.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.redhat.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"}}}` + } + + // Auto-derive secret name if not provided + if secretName == "" && clusterID != "" { + secretName = fmt.Sprintf("hyperfleet-%s-pull-secret", clusterID) + } + + tasks = append(tasks, PullSecretTask{ + PullSecret: pullSecretData, + GCPProjectID: gcpProjectID, + ClusterID: clusterID, + SecretName: secretName, + }) + + return tasks, nil +} + +func (pullsecretJob *PullSecretJob) GetMetadata() job.Metadata { + return job.Metadata{ + Use: "pull-secret", + Description: "Pull Secret Job Execution - Stores pull secret in GCP Secret Manager", + } +} + +func (pullsecretJob *PullSecretJob) AddFlags(flags *pflag.FlagSet) {} + +func (pullsecretJob *PullSecretJob) GetWorkerCount() int { + return 1 +} + +func (e PullSecretTask) Process(ctx context.Context) error { + + // Validate required environment variables + if err := e.validateConfig(); err != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "validate-config", 0, err.Error(), "") + return err + } + + // Validate pull secret JSON format + if err := validatePullSecret(e.PullSecret); err != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "validate-pull-secret", 0, fmt.Sprintf("Invalid pull secret format: %v", err), "") + return fmt.Errorf("invalid pull secret format: %w", err) + } + + logStructured("info", e.ClusterID, e.GCPProjectID, "start", 0, "Starting pull secret storage operation", "") + + // Initialize Secret Manager client + client, err := secretmanager.NewClient(ctx) + if err != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "init-client", 0, fmt.Sprintf("Failed to create secretmanager client: %v", err), "") + return fmt.Errorf("failed to create secretmanager client: %w", err) + } + defer func() { + if closeErr := client.Close(); closeErr != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "close-client", 0, fmt.Sprintf("Failed to close client: %v", closeErr), "") + } + }() + + logStructured("info", e.ClusterID, e.GCPProjectID, "client-initialized", 0, "Successfully initialized Secret Manager client", "") + + // Create or update secret with retry logic + if err := retryWithBackoff(ctx, func() error { + return e.createOrUpdateSecret(ctx, client) + }, 3); err != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "create-update-secret", 0, fmt.Sprintf("Failed to create/update secret: %v", err), "") + return err + } + + // Verify secret is accessible + if err := retryWithBackoff(ctx, func() error { + return e.verifySecret(ctx, client) + }, 3); err != nil { + logStructured("error", e.ClusterID, e.GCPProjectID, "verify-secret", 0, fmt.Sprintf("Failed to verify secret: %v", err), "") + return err + } + + logStructured("info", e.ClusterID, e.GCPProjectID, "completed", 0, "Successfully created/updated pull secret", "") + + return nil +} + +// validateConfig validates required environment variables +func (e PullSecretTask) validateConfig() error { + if e.GCPProjectID == "" { + return fmt.Errorf("missing required environment variable: GCP_PROJECT_ID") + } + if e.ClusterID == "" { + return fmt.Errorf("missing required environment variable: CLUSTER_ID") + } + if e.SecretName == "" { + return fmt.Errorf("missing required environment variable: SECRET_NAME") + } + if e.PullSecret == "" { + return fmt.Errorf("missing required environment variable: PULL_SECRET_DATA") + } + return nil +} + +// createOrUpdateSecret creates or updates the secret in GCP Secret Manager +func (e PullSecretTask) createOrUpdateSecret(ctx context.Context, client *secretmanager.Client) error { + startTime := time.Now() + + // Check if secret exists + exists, err := e.secretExists(ctx, client) + if err != nil { + return err + } + + if !exists { + // Create new secret + logStructured("info", e.ClusterID, e.GCPProjectID, "create-secret", 0, fmt.Sprintf("Creating new secret: %s", e.SecretName), "") + if createErr := e.createSecret(ctx, client); createErr != nil { + return fmt.Errorf("failed to create secret: %w", createErr) + } + duration := time.Since(startTime).Milliseconds() + logStructured("info", e.ClusterID, e.GCPProjectID, "create-secret", duration, "Successfully created secret", "") + } else { + logStructured("info", e.ClusterID, e.GCPProjectID, "secret-exists", 0, fmt.Sprintf("Secret already exists: %s", e.SecretName), "") + } + + // Add secret version with data + startTime = time.Now() + logStructured("info", e.ClusterID, e.GCPProjectID, "add-secret-version", 0, "Adding secret version with pull secret data", "") + version, err := e.addSecretVersion(ctx, client) + if err != nil { + return fmt.Errorf("failed to add secret version: %w", err) + } + duration := time.Since(startTime).Milliseconds() + logStructured("info", e.ClusterID, e.GCPProjectID, "add-secret-version", duration, "Successfully created secret version", version) + + return nil +} + +// secretExists checks if a secret exists in GCP Secret Manager +func (e PullSecretTask) secretExists(ctx context.Context, client *secretmanager.Client) (bool, error) { + name := fmt.Sprintf("projects/%s/secrets/%s", e.GCPProjectID, e.SecretName) + + req := &secretmanagerpb.GetSecretRequest{ + Name: name, + } + + _, err := client.GetSecret(ctx, req) + if err != nil { + if status.Code(err) == codes.NotFound { + return false, nil + } + return false, fmt.Errorf("failed to check secret existence: %w", err) + } + + return true, nil +} + +// createSecret creates a new secret in GCP Secret Manager +func (e PullSecretTask) createSecret(ctx context.Context, client *secretmanager.Client) error { + req := &secretmanagerpb.CreateSecretRequest{ + Parent: fmt.Sprintf("projects/%s", e.GCPProjectID), + SecretId: e.SecretName, + Secret: &secretmanagerpb.Secret{ + Replication: &secretmanagerpb.Replication{ + Replication: &secretmanagerpb.Replication_Automatic_{ + Automatic: &secretmanagerpb.Replication_Automatic{}, + }, + }, + Labels: map[string]string{ + "managed-by": "hyperfleet", + "adapter": "pullsecret", + "cluster-id": e.ClusterID, + "resource-type": "pull-secret", + "hyperfleet-version": "v1", + }, + }, + } + + _, err := client.CreateSecret(ctx, req) + if err != nil { + return fmt.Errorf("failed to create secret: %w", err) + } + + return nil +} + +// addSecretVersion adds a new version with pull secret data +func (e PullSecretTask) addSecretVersion(ctx context.Context, client *secretmanager.Client) (string, error) { + parent := fmt.Sprintf("projects/%s/secrets/%s", e.GCPProjectID, e.SecretName) + + req := &secretmanagerpb.AddSecretVersionRequest{ + Parent: parent, + Payload: &secretmanagerpb.SecretPayload{ + Data: []byte(e.PullSecret), + }, + } + + version, err := client.AddSecretVersion(ctx, req) + if err != nil { + return "", fmt.Errorf("failed to add secret version: %w", err) + } + + return version.Name, nil +} + +// verifySecret verifies that the secret is accessible +func (e PullSecretTask) verifySecret(ctx context.Context, client *secretmanager.Client) error { + startTime := time.Now() + name := fmt.Sprintf("projects/%s/secrets/%s/versions/latest", e.GCPProjectID, e.SecretName) + + req := &secretmanagerpb.AccessSecretVersionRequest{ + Name: name, + } + + result, err := client.AccessSecretVersion(ctx, req) + if err != nil { + return fmt.Errorf("failed to access secret version: %w", err) + } + + duration := time.Since(startTime).Milliseconds() + logStructured("info", e.ClusterID, e.GCPProjectID, "verify-secret", duration, fmt.Sprintf("Verified secret (%d bytes)", len(result.Payload.Data)), "") + + return nil +} + +// validatePullSecret validates the pull secret JSON format +func validatePullSecret(pullSecretJSON string) error { + var pullSecret map[string]interface{} + if err := json.Unmarshal([]byte(pullSecretJSON), &pullSecret); err != nil { + return fmt.Errorf("invalid JSON: %w", err) + } + + auths, ok := pullSecret["auths"] + if !ok { + return fmt.Errorf("missing 'auths' key") + } + + authsMap, ok := auths.(map[string]interface{}) + if !ok || len(authsMap) == 0 { + return fmt.Errorf("'auths' must be a non-empty object") + } + + return nil +} + +// retryWithBackoff retries a function with exponential backoff +func retryWithBackoff(ctx context.Context, fn func() error, maxRetries int) error { + var err error + for i := 0; i < maxRetries; i++ { + err = fn() + if err == nil { + return nil + } + + if !isRetryable(err) { + return err + } + + if i < maxRetries-1 { + // Calculate backoff with jitter (±20%) + baseBackoff := time.Duration(1< 0 { + logEntry["duration_ms"] = durationMs + } + + if version != "" { + logEntry["version"] = version + } + + jsonLog, err := json.Marshal(logEntry) + if err != nil { + log.Printf("Failed to marshal log entry: %v", err) + return + } + + fmt.Println(string(jsonLog)) +} diff --git a/cmd/pull-secret/jobs/pull_secret_test.go b/cmd/pull-secret/jobs/pull_secret_test.go new file mode 100644 index 0000000..8780d0e --- /dev/null +++ b/cmd/pull-secret/jobs/pull_secret_test.go @@ -0,0 +1,533 @@ +package jobs + +import ( + "context" + "os" + "testing" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// TestValidatePullSecret tests the pull secret validation function +func TestValidatePullSecret(t *testing.T) { + tests := []struct { + name string + pullSecret string + expectError bool + errorMsg string + }{ + { + name: "valid pull secret", + pullSecret: `{ + "auths": { + "registry.redhat.io": { + "auth": "dGVzdDp0ZXN0", + "email": "test@example.com" + } + } + }`, + expectError: false, + }, + { + name: "valid pull secret with multiple registries", + pullSecret: `{ + "auths": { + "registry.redhat.io": { + "auth": "dGVzdDp0ZXN0", + "email": "test@example.com" + }, + "quay.io": { + "auth": "dGVzdDp0ZXN0", + "email": "test@example.com" + } + } + }`, + expectError: false, + }, + { + name: "invalid JSON", + pullSecret: `{invalid json`, + expectError: true, + errorMsg: "invalid JSON", + }, + { + name: "missing auths key", + pullSecret: `{"registries": {}}`, + expectError: true, + errorMsg: "missing 'auths' key", + }, + { + name: "empty auths object", + pullSecret: `{"auths": {}}`, + expectError: true, + errorMsg: "'auths' must be a non-empty object", + }, + { + name: "auths is not an object", + pullSecret: `{"auths": "string"}`, + expectError: true, + errorMsg: "'auths' must be a non-empty object", + }, + { + name: "empty pull secret", + pullSecret: ``, + expectError: true, + errorMsg: "invalid JSON", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validatePullSecret(tt.pullSecret) + if tt.expectError { + if err == nil { + t.Errorf("expected error containing '%s', got nil", tt.errorMsg) + } + } else { + if err != nil { + t.Errorf("expected no error, got: %v", err) + } + } + }) + } +} + +// TestPullSecretTask_validateConfig tests configuration validation +func TestPullSecretTask_validateConfig(t *testing.T) { + tests := []struct { + name string + task PullSecretTask + expectError bool + errorMsg string + }{ + { + name: "valid configuration", + task: PullSecretTask{ + GCPProjectID: "test-project", + ClusterID: "cls-123", + SecretName: "test-secret", + PullSecret: "test-data", + }, + expectError: false, + }, + { + name: "missing GCP project ID", + task: PullSecretTask{ + ClusterID: "cls-123", + SecretName: "test-secret", + PullSecret: "test-data", + }, + expectError: true, + errorMsg: "GCP_PROJECT_ID", + }, + { + name: "missing cluster ID", + task: PullSecretTask{ + GCPProjectID: "test-project", + SecretName: "test-secret", + PullSecret: "test-data", + }, + expectError: true, + errorMsg: "CLUSTER_ID", + }, + { + name: "missing secret name", + task: PullSecretTask{ + GCPProjectID: "test-project", + ClusterID: "cls-123", + PullSecret: "test-data", + }, + expectError: true, + errorMsg: "SECRET_NAME", + }, + { + name: "missing pull secret data", + task: PullSecretTask{ + GCPProjectID: "test-project", + ClusterID: "cls-123", + SecretName: "test-secret", + }, + expectError: true, + errorMsg: "PULL_SECRET_DATA", + }, + { + name: "all fields empty", + task: PullSecretTask{}, + expectError: true, + errorMsg: "GCP_PROJECT_ID", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.task.validateConfig() + if tt.expectError { + if err == nil { + t.Errorf("expected error, got nil") + } + } else { + if err != nil { + t.Errorf("expected no error, got: %v", err) + } + } + }) + } +} + +// TestPullSecretTask_TaskName tests the TaskName method +func TestPullSecretTask_TaskName(t *testing.T) { + task := PullSecretTask{} + expected := "pull-secret-mvp" + got := task.TaskName() + + if got != expected { + t.Errorf("expected task name '%s', got '%s'", expected, got) + } +} + +// TestPullSecretJob_GetMetadata tests the GetMetadata method +func TestPullSecretJob_GetMetadata(t *testing.T) { + job := &PullSecretJob{} + metadata := job.GetMetadata() + + if metadata.Use != "pull-secret" { + t.Errorf("expected Use 'pull-secret', got '%s'", metadata.Use) + } + + if metadata.Description == "" { + t.Error("expected non-empty Description") + } +} + +// TestPullSecretJob_GetWorkerCount tests the GetWorkerCount method +func TestPullSecretJob_GetWorkerCount(t *testing.T) { + job := &PullSecretJob{} + expected := 1 + got := job.GetWorkerCount() + + if got != expected { + t.Errorf("expected worker count %d, got %d", expected, got) + } +} + +// TestPullSecretJob_GetTasks tests the GetTasks method with environment variables +func TestPullSecretJob_GetTasks(t *testing.T) { + tests := []struct { + name string + envVars map[string]string + expectError bool + checkTask func(*testing.T, PullSecretTask) + }{ + { + name: "all environment variables set", + envVars: map[string]string{ + "GCP_PROJECT_ID": "test-project", + "CLUSTER_ID": "cls-123", + "SECRET_NAME": "custom-secret", + "PULL_SECRET_DATA": `{"auths":{"registry.io":{"auth":"dGVzdA=="}}}`, + }, + expectError: false, + checkTask: func(t *testing.T, task PullSecretTask) { + if task.GCPProjectID != "test-project" { + t.Errorf("expected GCPProjectID 'test-project', got '%s'", task.GCPProjectID) + } + if task.ClusterID != "cls-123" { + t.Errorf("expected ClusterID 'cls-123', got '%s'", task.ClusterID) + } + if task.SecretName != "custom-secret" { + t.Errorf("expected SecretName 'custom-secret', got '%s'", task.SecretName) + } + }, + }, + { + name: "auto-generate secret name", + envVars: map[string]string{ + "GCP_PROJECT_ID": "test-project", + "CLUSTER_ID": "cls-456", + "PULL_SECRET_DATA": `{"auths":{"registry.io":{"auth":"dGVzdA=="}}}`, + }, + expectError: false, + checkTask: func(t *testing.T, task PullSecretTask) { + expected := "hyperfleet-cls-456-pull-secret" + if task.SecretName != expected { + t.Errorf("expected auto-generated SecretName '%s', got '%s'", expected, task.SecretName) + } + }, + }, + { + name: "use fake pull secret when not provided", + envVars: map[string]string{ + "GCP_PROJECT_ID": "test-project", + "CLUSTER_ID": "cls-789", + }, + expectError: false, + checkTask: func(t *testing.T, task PullSecretTask) { + if task.PullSecret == "" { + t.Error("expected fake pull secret data, got empty string") + } + // Verify it's valid JSON + if err := validatePullSecret(task.PullSecret); err != nil { + t.Errorf("fake pull secret is invalid: %v", err) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Clear and set environment variables + os.Clearenv() + for k, v := range tt.envVars { + os.Setenv(k, v) + } + defer os.Clearenv() + + job := &PullSecretJob{} + tasks, err := job.GetTasks() + + if tt.expectError { + if err == nil { + t.Error("expected error, got nil") + } + return + } + + if err != nil { + t.Errorf("expected no error, got: %v", err) + return + } + + if len(tasks) != 1 { + t.Errorf("expected 1 task, got %d", len(tasks)) + return + } + + task, ok := tasks[0].(PullSecretTask) + if !ok { + t.Error("expected task to be PullSecretTask") + return + } + + if tt.checkTask != nil { + tt.checkTask(t, task) + } + }) + } +} + +// TestIsRetryable tests the retry logic for different error codes +func TestIsRetryable(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + { + name: "retryable - Unavailable", + err: status.Error(codes.Unavailable, "service unavailable"), + expected: true, + }, + { + name: "retryable - DeadlineExceeded", + err: status.Error(codes.DeadlineExceeded, "deadline exceeded"), + expected: true, + }, + { + name: "retryable - Internal", + err: status.Error(codes.Internal, "internal error"), + expected: true, + }, + { + name: "retryable - ResourceExhausted", + err: status.Error(codes.ResourceExhausted, "rate limit exceeded"), + expected: true, + }, + { + name: "not retryable - PermissionDenied", + err: status.Error(codes.PermissionDenied, "permission denied"), + expected: false, + }, + { + name: "not retryable - NotFound", + err: status.Error(codes.NotFound, "not found"), + expected: false, + }, + { + name: "not retryable - AlreadyExists", + err: status.Error(codes.AlreadyExists, "already exists"), + expected: false, + }, + { + name: "not retryable - InvalidArgument", + err: status.Error(codes.InvalidArgument, "invalid argument"), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isRetryable(tt.err) + if result != tt.expected { + t.Errorf("expected isRetryable(%v) = %v, got %v", tt.err, tt.expected, result) + } + }) + } +} + +// TestRetryWithBackoff tests the retry mechanism +func TestRetryWithBackoff(t *testing.T) { + t.Run("success on first try", func(t *testing.T) { + attempts := 0 + fn := func() error { + attempts++ + return nil + } + + ctx := context.Background() + err := retryWithBackoff(ctx, fn, 3) + + if err != nil { + t.Errorf("expected no error, got: %v", err) + } + + if attempts != 1 { + t.Errorf("expected 1 attempt, got %d", attempts) + } + }) + + t.Run("success after retries", func(t *testing.T) { + attempts := 0 + fn := func() error { + attempts++ + if attempts < 3 { + return status.Error(codes.Unavailable, "unavailable") + } + return nil + } + + ctx := context.Background() + err := retryWithBackoff(ctx, fn, 5) + + if err != nil { + t.Errorf("expected no error, got: %v", err) + } + + if attempts != 3 { + t.Errorf("expected 3 attempts, got %d", attempts) + } + }) + + t.Run("max retries exceeded", func(t *testing.T) { + attempts := 0 + fn := func() error { + attempts++ + return status.Error(codes.Unavailable, "unavailable") + } + + ctx := context.Background() + err := retryWithBackoff(ctx, fn, 3) + + if err == nil { + t.Error("expected error, got nil") + } + + if attempts != 3 { + t.Errorf("expected 3 attempts, got %d", attempts) + } + }) + + t.Run("non-retryable error stops immediately", func(t *testing.T) { + attempts := 0 + fn := func() error { + attempts++ + return status.Error(codes.PermissionDenied, "permission denied") + } + + ctx := context.Background() + err := retryWithBackoff(ctx, fn, 3) + + if err == nil { + t.Error("expected error, got nil") + } + + if attempts != 1 { + t.Errorf("expected 1 attempt (no retries), got %d", attempts) + } + }) + + t.Run("context cancellation", func(t *testing.T) { + attempts := 0 + fn := func() error { + attempts++ + return status.Error(codes.Unavailable, "unavailable") + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // Cancel immediately + + err := retryWithBackoff(ctx, fn, 3) + + if err != context.Canceled { + t.Errorf("expected context.Canceled error, got: %v", err) + } + }) +} + +// TestLogStructured verifies that logStructured doesn't panic +func TestLogStructured(t *testing.T) { + tests := []struct { + name string + level string + clusterID string + gcpProject string + operation string + durationMs int64 + message string + version string + }{ + { + name: "complete log entry", + level: "info", + clusterID: "cls-123", + gcpProject: "test-project", + operation: "test-operation", + durationMs: 100, + message: "test message", + version: "v1", + }, + { + name: "log entry without duration", + level: "error", + clusterID: "cls-456", + gcpProject: "test-project", + operation: "test-operation", + durationMs: 0, + message: "error message", + version: "", + }, + { + name: "log entry without version", + level: "info", + clusterID: "cls-789", + gcpProject: "test-project", + operation: "test-operation", + durationMs: 200, + message: "test message", + version: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test should not panic + defer func() { + if r := recover(); r != nil { + t.Errorf("logStructured panicked: %v", r) + } + }() + + logStructured(tt.level, tt.clusterID, tt.gcpProject, tt.operation, tt.durationMs, tt.message, tt.version) + }) + } +} diff --git a/cmd/pull-secret/main.go b/cmd/pull-secret/main.go new file mode 100644 index 0000000..3dfeb0b --- /dev/null +++ b/cmd/pull-secret/main.go @@ -0,0 +1,52 @@ +package main + +import ( + "context" + "flag" + + logger "github.com/openshift-online/ocm-service-common/pkg/ocmlogger" + "github.com/spf13/cobra" + "gitlab.cee.redhat.com/service/hyperfleet/mvp/cmd/pull-secret/jobs" + + // This package is used to make the runtime maxprocs cGroup aware rather than + // using the number of available machine cores. This is necessary for containerized + // applications so the Go scheduler doesnt overcommit compute and cause the container + // to be throttled: https://github.com/golang/go/issues/33803 + // + // This will be a noop in non-containerized environments and also obeys the GOMAXPROCS + // env override. + _ "go.uber.org/automaxprocs" +) + +func init() { + logger.SetTrimList([]string{"pull-secret", "pkg"}) + _ = logger.SetLogLevel(logger.OCM_LOG_LEVEL_DEFAULT) //nolint:errcheck // Default log level, error can be safely ignored +} + +func main() { + ctx := context.Background() + ulog := logger.NewOCMLogger(ctx) + + // This is needed to make `glog` believe that the flags have already been parsed, otherwise + // every log messages is prefixed by an error message stating the the flags haven't been + // parsed. + _ = flag.CommandLine.Parse([]string{}) //nolint:errcheck // Parse flags, error can be safely ignored + + // Always log to stderr by default + if err := flag.Set("logtostderr", "true"); err != nil { + ulog.Contextual().Info("Unable to set logtostderr to true") + } + + rootCmd := &cobra.Command{ + Use: "pull-secret", + Long: "pull-secret job runner", + } + + // Add job command + jobCmd := jobs.NewJobCommand(ctx) + rootCmd.AddCommand(jobCmd) + + if err := rootCmd.Execute(); err != nil { + ulog.Contextual().Fatal(err, "error running command") + } +} From 2f78ce2f1acacb1751b266e723ff3e18178b14e9 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 18:19:47 -0300 Subject: [PATCH 05/11] Fix golangci-lint configuration validation error --- .golangci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 980570c..0b0bbe2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -28,8 +28,6 @@ linters-settings: govet: enable: - shadow - disable: - - check-shadowing errcheck: check-type-assertions: true From 54af59be45304fd5d18c4193df5408647d8a6495 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 19:07:38 -0300 Subject: [PATCH 06/11] apply additional improvements suggested by CodeRabbit --- cmd/pull-secret/jobs/README_TESTS.md | 2 +- k8s/README.md | 2 +- k8s/deploy.sh | 17 +++++++++++------ k8s/job.yaml | 2 +- k8s/serviceaccount.yaml | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmd/pull-secret/jobs/README_TESTS.md b/cmd/pull-secret/jobs/README_TESTS.md index c893c19..f6f2bf9 100644 --- a/cmd/pull-secret/jobs/README_TESTS.md +++ b/cmd/pull-secret/jobs/README_TESTS.md @@ -155,4 +155,4 @@ Tests are automatically run in GitHub Actions on every pull request: - Build check - Test check with coverage report -See `.github/workflows/ci.yml` for CI configuration. +See `.github/workflows/ci.yml` for CI configuration on GitHub. diff --git a/k8s/README.md b/k8s/README.md index 44fb446..3a92ccc 100644 --- a/k8s/README.md +++ b/k8s/README.md @@ -115,7 +115,7 @@ kubectl describe pod $(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{ Verify Workload Identity binding: ```bash # Check if SA exists -kubectl get sa pullsecret-adapter-job -o yaml +kubectl get sa pullsecret-adapter -o yaml # Check GCP IAM binding gcloud iam service-accounts get-iam-policy \ diff --git a/k8s/deploy.sh b/k8s/deploy.sh index a371b3a..3a91a28 100755 --- a/k8s/deploy.sh +++ b/k8s/deploy.sh @@ -16,17 +16,17 @@ echo "Creating ServiceAccount with Workload Identity..." kubectl apply -f k8s/serviceaccount.yaml echo "" -# Wait a bit for SA to be created -sleep 2 +# Wait for ServiceAccount to be created +kubectl wait --for=jsonpath='{.metadata.name}'=pullsecret-adapter sa/pullsecret-adapter --timeout=30s # Apply Job echo "Creating Job..." kubectl apply -f k8s/job.yaml echo "" -# Wait for job to start -echo "Waiting for job to start..." -sleep 3 +# Wait for job to be created +echo "Waiting for job to be ready..." +kubectl wait --for=condition=ready pod -l app=pullsecret-adapter --timeout=60s || true # Show job status echo "Job status:" @@ -39,7 +39,12 @@ kubectl get pods -l app=pullsecret-adapter echo "" # Get pod name -POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') +POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") + +if [ -z "$POD_NAME" ]; then + echo "Warning: No pod found yet. Use 'kubectl get pods -l app=pullsecret-adapter' to check status." + POD_NAME="" +fi echo "====================================" echo "Job deployed successfully!" diff --git a/k8s/job.yaml b/k8s/job.yaml index 691ea10..2e5ff5f 100644 --- a/k8s/job.yaml +++ b/k8s/job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: name: pullsecret-test-job - namespace: default + namespace: hyperfleet-system labels: app: pullsecret-adapter job-type: test diff --git a/k8s/serviceaccount.yaml b/k8s/serviceaccount.yaml index b2bbfd9..074d6de 100644 --- a/k8s/serviceaccount.yaml +++ b/k8s/serviceaccount.yaml @@ -2,6 +2,6 @@ apiVersion: v1 kind: ServiceAccount metadata: name: pullsecret-adapter - namespace: default + namespace: hyperfleet-system annotations: iam.gke.io/gcp-service-account: pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com From 7a2da97eeafdb294036343f3cde87be726cb1888 Mon Sep 17 00:00:00 2001 From: ldornele Date: Tue, 16 Dec 2025 19:55:42 -0300 Subject: [PATCH 07/11] Fix deployment script namespace and job wait conditions --- k8s/deploy.sh | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/k8s/deploy.sh b/k8s/deploy.sh index 3a91a28..84e2a46 100755 --- a/k8s/deploy.sh +++ b/k8s/deploy.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e +NAMESPACE="hyperfleet-system" + echo "====================================" echo "Deploying Pull Secret Job to GKE" echo "====================================" @@ -11,38 +13,49 @@ echo "Checking cluster connection..." kubectl cluster-info | head -1 echo "" +# Create namespace if it doesn't exist +echo "Creating namespace $NAMESPACE if it doesn't exist..." +kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f - +echo "" + # Apply ServiceAccount echo "Creating ServiceAccount with Workload Identity..." kubectl apply -f k8s/serviceaccount.yaml echo "" # Wait for ServiceAccount to be created -kubectl wait --for=jsonpath='{.metadata.name}'=pullsecret-adapter sa/pullsecret-adapter --timeout=30s +kubectl wait --for=jsonpath='{.metadata.name}'=pullsecret-adapter sa/pullsecret-adapter -n $NAMESPACE --timeout=30s # Apply Job echo "Creating Job..." kubectl apply -f k8s/job.yaml echo "" -# Wait for job to be created -echo "Waiting for job to be ready..." -kubectl wait --for=condition=ready pod -l app=pullsecret-adapter --timeout=60s || true +# Wait a moment for pod to be created +echo "Waiting for pod to be created..." +sleep 2 # Show job status echo "Job status:" -kubectl get job pullsecret-test-job +kubectl get job pullsecret-test-job -n $NAMESPACE echo "" # Show pod status echo "Pod status:" -kubectl get pods -l app=pullsecret-adapter +kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE +echo "" + +# Wait for job to complete (optional - can comment out if you want to just deploy and check manually) +echo "Waiting for job to complete (this may take a minute)..." +kubectl wait --for=condition=complete --timeout=120s job/pullsecret-test-job -n $NAMESPACE || \ +kubectl wait --for=condition=failed --timeout=10s job/pullsecret-test-job -n $NAMESPACE || true echo "" # Get pod name -POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") +POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") if [ -z "$POD_NAME" ]; then - echo "Warning: No pod found yet. Use 'kubectl get pods -l app=pullsecret-adapter' to check status." + echo "Warning: No pod found yet. Use 'kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE' to check status." POD_NAME="" fi @@ -51,14 +64,14 @@ echo "Job deployed successfully!" echo "====================================" echo "" echo "Monitor job progress:" -echo " kubectl get job pullsecret-test-job -w" +echo " kubectl get job pullsecret-test-job -n $NAMESPACE -w" echo "" echo "View logs:" -echo " kubectl logs -f $POD_NAME" +echo " kubectl logs -f $POD_NAME -n $NAMESPACE" echo "" echo "Check job status:" -echo " kubectl describe job pullsecret-test-job" +echo " kubectl describe job pullsecret-test-job -n $NAMESPACE" echo "" echo "Delete job when done:" -echo " kubectl delete job pullsecret-test-job" +echo " kubectl delete job pullsecret-test-job -n $NAMESPACE" echo "" From 4340c17451f100e4838f76c1cb5b8616557acff0 Mon Sep 17 00:00:00 2001 From: ldornele Date: Fri, 19 Dec 2025 16:31:15 -0300 Subject: [PATCH 08/11] Add dry-run mode and constants --- cmd/pull-secret/jobs/pull_secret.go | 32 +++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/cmd/pull-secret/jobs/pull_secret.go b/cmd/pull-secret/jobs/pull_secret.go index 1843bd7..a4a1311 100644 --- a/cmd/pull-secret/jobs/pull_secret.go +++ b/cmd/pull-secret/jobs/pull_secret.go @@ -16,11 +16,19 @@ import ( "google.golang.org/grpc/status" ) +const ( + pullSecretTaskName = "pull-secret-mvp" + + // defaultPullSecretData is a fake pull secret used for testing when PULL_SECRET_DATA is not provided + defaultPullSecretData = `{"auths":{"cloud.openshift.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"quay.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.connect.redhat.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.redhat.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"}}}` +) + type PullSecretTask struct { PullSecret string GCPProjectID string ClusterID string SecretName string + DryRun bool } var PullSecretJobArgs struct { @@ -28,10 +36,11 @@ var PullSecretJobArgs struct { } type PullSecretJob struct { + DryRun bool } func (e PullSecretTask) TaskName() string { - return "pull-secret-mvp" + return pullSecretTaskName } func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { @@ -46,7 +55,7 @@ func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { // Use fake pull secret for testing if PULL_SECRET_DATA is not provided if pullSecretData == "" { - pullSecretData = `{"auths":{"cloud.openshift.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"quay.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.connect.redhat.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.redhat.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"}}}` + pullSecretData = defaultPullSecretData } // Auto-derive secret name if not provided @@ -59,6 +68,7 @@ func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { GCPProjectID: gcpProjectID, ClusterID: clusterID, SecretName: secretName, + DryRun: pullsecretJob.DryRun, }) return tasks, nil @@ -71,7 +81,9 @@ func (pullsecretJob *PullSecretJob) GetMetadata() job.Metadata { } } -func (pullsecretJob *PullSecretJob) AddFlags(flags *pflag.FlagSet) {} +func (pullsecretJob *PullSecretJob) AddFlags(flags *pflag.FlagSet) { + flags.BoolVar(&pullsecretJob.DryRun, "dry-run", false, "Dry run mode - validate authentication and configuration without creating/updating secrets") +} func (pullsecretJob *PullSecretJob) GetWorkerCount() int { return 1 @@ -91,7 +103,11 @@ func (e PullSecretTask) Process(ctx context.Context) error { return fmt.Errorf("invalid pull secret format: %w", err) } - logStructured("info", e.ClusterID, e.GCPProjectID, "start", 0, "Starting pull secret storage operation", "") + if e.DryRun { + logStructured("info", e.ClusterID, e.GCPProjectID, "start", 0, "Starting pull secret storage operation (DRY RUN MODE)", "") + } else { + logStructured("info", e.ClusterID, e.GCPProjectID, "start", 0, "Starting pull secret storage operation", "") + } // Initialize Secret Manager client client, err := secretmanager.NewClient(ctx) @@ -107,6 +123,14 @@ func (e PullSecretTask) Process(ctx context.Context) error { logStructured("info", e.ClusterID, e.GCPProjectID, "client-initialized", 0, "Successfully initialized Secret Manager client", "") + // In dry-run mode, skip actual secret operations + if e.DryRun { + logStructured("info", e.ClusterID, e.GCPProjectID, "dry-run", 0, "DRY RUN: Skipping secret creation/update operations", "") + logStructured("info", e.ClusterID, e.GCPProjectID, "dry-run", 0, fmt.Sprintf("DRY RUN: Would create/update secret: %s", e.SecretName), "") + logStructured("info", e.ClusterID, e.GCPProjectID, "completed", 0, "DRY RUN completed successfully - authentication validated", "") + return nil + } + // Create or update secret with retry logic if err := retryWithBackoff(ctx, func() error { return e.createOrUpdateSecret(ctx, client) From 3455f691811a93523b8b13b10b35411243ba636d Mon Sep 17 00:00:00 2001 From: ldornele Date: Fri, 19 Dec 2025 17:17:45 -0300 Subject: [PATCH 09/11] apply additional improvements suggested by CodeRabbit --- cmd/pull-secret/jobs/pull_secret.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cmd/pull-secret/jobs/pull_secret.go b/cmd/pull-secret/jobs/pull_secret.go index a4a1311..1bbd56e 100644 --- a/cmd/pull-secret/jobs/pull_secret.go +++ b/cmd/pull-secret/jobs/pull_secret.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "log" + "math/rand" "os" "time" @@ -31,10 +32,6 @@ type PullSecretTask struct { DryRun bool } -var PullSecretJobArgs struct { - name string -} - type PullSecretJob struct { DryRun bool } @@ -327,7 +324,9 @@ func retryWithBackoff(ctx context.Context, fn func() error, maxRetries int) erro if i < maxRetries-1 { // Calculate backoff with jitter (±20%) baseBackoff := time.Duration(1< Date: Wed, 7 Jan 2026 17:08:11 -0300 Subject: [PATCH 10/11] [HYPERFLEET-273]: refactoring after code review feedback --- charts/pull-secret/.helmignore | 23 +++ charts/pull-secret/Chart.yaml | 13 ++ charts/pull-secret/README.md | 193 ++++++++++++++++++ charts/pull-secret/templates/_helpers.tpl | 57 ++++++ charts/pull-secret/templates/job.yaml | 35 ++++ .../pull-secret/templates/serviceaccount.yaml | 9 + charts/pull-secret/values.yaml | 76 +++++++ cmd/pull-secret/jobs/pull_secret.go | 35 ++-- k8s/README.md | 163 --------------- k8s/deploy.sh | 77 ------- k8s/job.yaml | 46 ----- k8s/serviceaccount.yaml | 7 - 12 files changed, 427 insertions(+), 307 deletions(-) create mode 100644 charts/pull-secret/.helmignore create mode 100644 charts/pull-secret/Chart.yaml create mode 100644 charts/pull-secret/README.md create mode 100644 charts/pull-secret/templates/_helpers.tpl create mode 100644 charts/pull-secret/templates/job.yaml create mode 100644 charts/pull-secret/templates/serviceaccount.yaml create mode 100644 charts/pull-secret/values.yaml delete mode 100644 k8s/README.md delete mode 100755 k8s/deploy.sh delete mode 100644 k8s/job.yaml delete mode 100644 k8s/serviceaccount.yaml diff --git a/charts/pull-secret/.helmignore b/charts/pull-secret/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/pull-secret/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/pull-secret/Chart.yaml b/charts/pull-secret/Chart.yaml new file mode 100644 index 0000000..ab540dc --- /dev/null +++ b/charts/pull-secret/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v2 +name: pull-secret +description: HyperFleet Pull Secret Adapter - Manages pull secrets in GCP Secret Manager +type: application +version: 0.1.0 +appVersion: "1.0" +keywords: + - hyperfleet + - pull-secret + - gcp + - secret-manager +maintainers: + - name: HyperFleet Team diff --git a/charts/pull-secret/README.md b/charts/pull-secret/README.md new file mode 100644 index 0000000..f89a0af --- /dev/null +++ b/charts/pull-secret/README.md @@ -0,0 +1,193 @@ +# Pull Secret Adapter Helm Chart + +This Helm chart deploys the HyperFleet Pull Secret Adapter as a Kubernetes Job on GKE. + +## Prerequisites + +1. **Helm 3.x installed** + ```bash + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + ``` + +2. **kubectl configured for your GKE cluster** + ```bash + gcloud container clusters get-credentials YOUR_CLUSTER_NAME \ + --zone=YOUR_ZONE \ + --project=YOUR_PROJECT_ID + ``` + +3. **Workload Identity configured** + - Service Account: `your-service-account@your-project.iam.gserviceaccount.com` + - Workload Pool: `your-project.svc.id.goog` + +## Installation + +### Quick Start + +Deploy with default values: + +```bash +helm install pullsecret-job ./charts/pull-secret \ + --namespace hyperfleet-system \ + --create-namespace +``` + +### Custom Values + +Deploy with custom configuration: + +```bash +helm install pullsecret-job ./charts/pull-secret \ + --namespace hyperfleet-system \ + --create-namespace \ + --set env.gcpProjectId=my-project \ + --set env.clusterId=my-cluster-123 \ + --set env.pullSecretData='{"auths":{...}}' \ + --set image.tag=latest +``` + +### Using a Values File + +Create a custom values file (`my-values.yaml`): + +```yaml +env: + gcpProjectId: "my-gcp-project" + clusterId: "my-cluster-123" + secretName: "hyperfleet-my-cluster-123-pull-secret" + pullSecretData: '{"auths":{"registry.example.com":{"auth":"...","email":"user@example.com"}}}' + +serviceAccount: + gcpServiceAccount: "my-service-account@my-project.iam.gserviceaccount.com" + +image: + tag: "v1.0.0" +``` + +Then install: + +```bash +helm install pullsecret-job ./charts/pull-secret \ + --namespace hyperfleet-system \ + --create-namespace \ + -f my-values.yaml +``` + +## Configuration + +The following table lists the configurable parameters: + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `namespace` | Kubernetes namespace | `hyperfleet-system` | +| `job.name` | Job name | `pullsecret-job` | +| `job.backoffLimit` | Number of retries on failure | `3` | +| `job.ttlSecondsAfterFinished` | Cleanup delay after completion | `3600` (1 hour) | +| `image.repository` | Container image repository | `quay.io/hyperfleet/pull-secret` | +| `image.tag` | Container image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `Always` | +| `serviceAccount.name` | Kubernetes ServiceAccount name | `pullsecret-adapter` | +| `serviceAccount.gcpServiceAccount` | GCP service account for Workload Identity | `your-service-account@your-project.iam.gserviceaccount.com` | +| `env.gcpProjectId` | GCP project ID | `your-gcp-project` | +| `env.clusterId` | Cluster identifier | `your-cluster-id` | +| `env.secretName` | Secret name in GCP Secret Manager | `hyperfleet-your-cluster-id-pull-secret` | +| `env.pullSecretData` | Pull secret JSON data (required) | `{"auths":{...}}` | +| `resources.requests.cpu` | CPU request | `100m` | +| `resources.requests.memory` | Memory request | `128Mi` | +| `resources.limits.cpu` | CPU limit | `500m` | +| `resources.limits.memory` | Memory limit | `512Mi` | + +## Usage + +### Monitoring + +Check job status: +```bash +helm status pullsecret-job -n hyperfleet-system +kubectl get job pullsecret-job -n hyperfleet-system +``` + +View logs: +```bash +kubectl logs -f job/pullsecret-job -n hyperfleet-system +``` + +### Upgrading + +Upgrade the deployment with new values: +```bash +helm upgrade pullsecret-job ./charts/pull-secret \ + --namespace hyperfleet-system \ + --set image.tag=v1.1.0 +``` + +### Uninstalling + +Remove the job: +```bash +helm uninstall pullsecret-job -n hyperfleet-system +``` + +## Dry Run Mode + +Test without creating secrets: +```bash +helm install pullsecret-job ./charts/pull-secret \ + --namespace hyperfleet-system \ + --dry-run --debug +``` + +## Troubleshooting + +### View rendered templates +```bash +helm template pullsecret-job ./charts/pull-secret +``` + +### Check deployment issues +```bash +kubectl describe job pullsecret-job -n hyperfleet-system +kubectl get events -n hyperfleet-system --sort-by='.lastTimestamp' +``` + +### Authentication errors + +Verify Workload Identity binding: +```bash +# Check ServiceAccount +kubectl get sa pullsecret-adapter -n hyperfleet-system -o yaml + +# Check GCP IAM binding +gcloud iam service-accounts get-iam-policy \ + your-service-account@your-project.iam.gserviceaccount.com \ + --project=your-project +``` + +## Development + +### Linting + +Lint the chart: +```bash +helm lint ./charts/pull-secret +``` + +### Testing + +Test template rendering: +```bash +helm template test-release ./charts/pull-secret --debug +``` + +### Packaging + +Package the chart: +```bash +helm package ./charts/pull-secret +``` + +## References + +- [Helm Documentation](https://helm.sh/docs/) +- [GKE Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) +- [Kubernetes Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) diff --git a/charts/pull-secret/templates/_helpers.tpl b/charts/pull-secret/templates/_helpers.tpl new file mode 100644 index 0000000..f64b21c --- /dev/null +++ b/charts/pull-secret/templates/_helpers.tpl @@ -0,0 +1,57 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "pull-secret.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "pull-secret.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "pull-secret.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "pull-secret.labels" -}} +helm.sh/chart: {{ include "pull-secret.chart" . }} +{{ include "pull-secret.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "pull-secret.selectorLabels" -}} +app.kubernetes.io/name: {{ include "pull-secret.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app: {{ .Values.labels.app }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "pull-secret.serviceAccountName" -}} +{{- default "pullsecret-adapter" .Values.serviceAccount.name }} +{{- end }} diff --git a/charts/pull-secret/templates/job.yaml b/charts/pull-secret/templates/job.yaml new file mode 100644 index 0000000..4c19d49 --- /dev/null +++ b/charts/pull-secret/templates/job.yaml @@ -0,0 +1,35 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Values.job.name }} + namespace: {{ .Values.namespace }} + labels: + {{- include "pull-secret.labels" . | nindent 4 }} + job-type: {{ .Values.labels.jobType }} +spec: + backoffLimit: {{ .Values.job.backoffLimit }} + ttlSecondsAfterFinished: {{ .Values.job.ttlSecondsAfterFinished }} + template: + metadata: + labels: + {{- include "pull-secret.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "pull-secret.serviceAccountName" . }} + restartPolicy: {{ .Values.job.restartPolicy }} + containers: + - name: pull-secret + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: GCP_PROJECT_ID + value: {{ .Values.env.gcpProjectId | quote }} + - name: CLUSTER_ID + value: {{ .Values.env.clusterId | quote }} + - name: SECRET_NAME + value: {{ .Values.env.secretName | quote }} + - name: PULL_SECRET_DATA + value: {{ .Values.env.pullSecretData | quote }} + resources: + {{- toYaml .Values.resources | nindent 10 }} + securityContext: + {{- toYaml .Values.securityContext | nindent 10 }} diff --git a/charts/pull-secret/templates/serviceaccount.yaml b/charts/pull-secret/templates/serviceaccount.yaml new file mode 100644 index 0000000..70a85aa --- /dev/null +++ b/charts/pull-secret/templates/serviceaccount.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "pull-secret.serviceAccountName" . }} + namespace: {{ .Values.namespace }} + labels: + {{- include "pull-secret.labels" . | nindent 4 }} + annotations: + iam.gke.io/gcp-service-account: {{ .Values.serviceAccount.gcpServiceAccount }} diff --git a/charts/pull-secret/values.yaml b/charts/pull-secret/values.yaml new file mode 100644 index 0000000..326db90 --- /dev/null +++ b/charts/pull-secret/values.yaml @@ -0,0 +1,76 @@ +# Default values for pull-secret adapter + +# Namespace where the job will be deployed +namespace: hyperfleet-system + +# Job configuration +job: + # Name of the Kubernetes Job + name: pullsecret-job + + # Number of times to retry the job on failure + backoffLimit: 3 + + # Time in seconds after job completion before cleanup (1 hour) + ttlSecondsAfterFinished: 3600 + + # Job restart policy + restartPolicy: Never + +# Image configuration +image: + repository: quay.io/hyperfleet/pull-secret + tag: latest + pullPolicy: Always + +# ServiceAccount configuration +serviceAccount: + # Name of the Kubernetes ServiceAccount + name: pullsecret-adapter + + # GCP service account email for Workload Identity + # IMPORTANT: Replace with your actual GCP service account + gcpServiceAccount: your-service-account@your-project.iam.gserviceaccount.com + +# Environment variables +env: + # GCP project ID where secrets will be stored + # IMPORTANT: Replace with your actual GCP project ID + gcpProjectId: "your-gcp-project" + + # Cluster identifier + # IMPORTANT: Replace with your actual cluster ID + clusterId: "your-cluster-id" + + # Secret name in GCP Secret Manager (auto-derived if not provided) + # This will be auto-generated as: hyperfleet-{clusterId}-pull-secret + secretName: "hyperfleet-your-cluster-id-pull-secret" + + # Pull secret JSON data (must be provided) + # IMPORTANT: Replace with your actual pull secret data + # This is a dummy example - use your real OpenShift pull secret + pullSecretData: '{"auths":{"registry.example.com":{"auth":"ZXhhbXBsZTpleGFtcGxl","email":"user@example.com"}}}' + +# Resource limits +resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi + +# Security context +securityContext: + runAsNonRoot: true + runAsUser: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + +# Labels +labels: + app: pullsecret-adapter + jobType: pull-secret diff --git a/cmd/pull-secret/jobs/pull_secret.go b/cmd/pull-secret/jobs/pull_secret.go index 1bbd56e..1a6d16e 100644 --- a/cmd/pull-secret/jobs/pull_secret.go +++ b/cmd/pull-secret/jobs/pull_secret.go @@ -19,9 +19,6 @@ import ( const ( pullSecretTaskName = "pull-secret-mvp" - - // defaultPullSecretData is a fake pull secret used for testing when PULL_SECRET_DATA is not provided - defaultPullSecretData = `{"auths":{"cloud.openshift.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"quay.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.connect.redhat.com":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"},"registry.redhat.io":{"auth":"ZmFrZXVzZXI6ZmFrZXBhc3N3b3Jk","email":"user@example.com"}}}` ) type PullSecretTask struct { @@ -40,6 +37,21 @@ func (e PullSecretTask) TaskName() string { return pullSecretTaskName } +// projectPath returns the GCP project path +func (e PullSecretTask) projectPath() string { + return fmt.Sprintf("projects/%s", e.GCPProjectID) +} + +// secretPath returns the full path to the secret +func (e PullSecretTask) secretPath() string { + return fmt.Sprintf("projects/%s/secrets/%s", e.GCPProjectID, e.SecretName) +} + +// secretVersionPath returns the path to a specific secret version +func (e PullSecretTask) secretVersionPath(version string) string { + return fmt.Sprintf("projects/%s/secrets/%s/versions/%s", e.GCPProjectID, e.SecretName, version) +} + func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { var tasks []job.Task @@ -50,9 +62,9 @@ func (pullsecretJob *PullSecretJob) GetTasks() ([]job.Task, error) { secretName := os.Getenv("SECRET_NAME") pullSecretData := os.Getenv("PULL_SECRET_DATA") - // Use fake pull secret for testing if PULL_SECRET_DATA is not provided + // Fail fast if PULL_SECRET_DATA is not provided if pullSecretData == "" { - pullSecretData = defaultPullSecretData + return nil, fmt.Errorf("PULL_SECRET_DATA environment variable is required but not set") } // Auto-derive secret name if not provided @@ -203,10 +215,8 @@ func (e PullSecretTask) createOrUpdateSecret(ctx context.Context, client *secret // secretExists checks if a secret exists in GCP Secret Manager func (e PullSecretTask) secretExists(ctx context.Context, client *secretmanager.Client) (bool, error) { - name := fmt.Sprintf("projects/%s/secrets/%s", e.GCPProjectID, e.SecretName) - req := &secretmanagerpb.GetSecretRequest{ - Name: name, + Name: e.secretPath(), } _, err := client.GetSecret(ctx, req) @@ -223,7 +233,7 @@ func (e PullSecretTask) secretExists(ctx context.Context, client *secretmanager. // createSecret creates a new secret in GCP Secret Manager func (e PullSecretTask) createSecret(ctx context.Context, client *secretmanager.Client) error { req := &secretmanagerpb.CreateSecretRequest{ - Parent: fmt.Sprintf("projects/%s", e.GCPProjectID), + Parent: e.projectPath(), SecretId: e.SecretName, Secret: &secretmanagerpb.Secret{ Replication: &secretmanagerpb.Replication{ @@ -251,10 +261,8 @@ func (e PullSecretTask) createSecret(ctx context.Context, client *secretmanager. // addSecretVersion adds a new version with pull secret data func (e PullSecretTask) addSecretVersion(ctx context.Context, client *secretmanager.Client) (string, error) { - parent := fmt.Sprintf("projects/%s/secrets/%s", e.GCPProjectID, e.SecretName) - req := &secretmanagerpb.AddSecretVersionRequest{ - Parent: parent, + Parent: e.secretPath(), Payload: &secretmanagerpb.SecretPayload{ Data: []byte(e.PullSecret), }, @@ -271,10 +279,9 @@ func (e PullSecretTask) addSecretVersion(ctx context.Context, client *secretmana // verifySecret verifies that the secret is accessible func (e PullSecretTask) verifySecret(ctx context.Context, client *secretmanager.Client) error { startTime := time.Now() - name := fmt.Sprintf("projects/%s/secrets/%s/versions/latest", e.GCPProjectID, e.SecretName) req := &secretmanagerpb.AccessSecretVersionRequest{ - Name: name, + Name: e.secretVersionPath("latest"), } result, err := client.AccessSecretVersion(ctx, req) diff --git a/k8s/README.md b/k8s/README.md deleted file mode 100644 index 3a92ccc..0000000 --- a/k8s/README.md +++ /dev/null @@ -1,163 +0,0 @@ -# Kubernetes Deployment for Pull Secret Job - -This directory contains Kubernetes manifests to deploy the Pull Secret Job on GKE cluster `hyperfleet-dev-croche`. - -## Prerequisites - -1. **GKE Auth Plugin installed:** - ```bash - sudo dnf install google-cloud-sdk-gke-gcloud-auth-plugin - ``` - -2. **kubectl configured:** - ```bash - gcloud container clusters get-credentials hyperfleet-dev-croche \ - --zone=us-central1-a \ - --project=sda-ccs-3 - ``` - -3. **Workload Identity configured:** - - Service Account: `pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com` - - Workload Pool: `sda-ccs-3.svc.id.goog` - -## Quick Start - -### Option 1: Using the deploy script (Recommended) - -```bash -./k8s/deploy.sh -``` - -### Option 2: Manual deployment - -```bash -# 1. Create ServiceAccount with Workload Identity annotation -kubectl apply -f k8s/serviceaccount.yaml - -# 2. Create the Job -kubectl apply -f k8s/job.yaml - -# 3. Monitor the job -kubectl get job pullsecret-test-job -w - -# 4. View logs -POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') -kubectl logs -f $POD_NAME -``` - -## Files - -- **serviceaccount.yaml** - Kubernetes ServiceAccount with Workload Identity binding -- **job.yaml** - Kubernetes Job that runs the pull-secret container -- **deploy.sh** - Automated deployment script - -## Workload Identity Setup - -The ServiceAccount is annotated to use GCP Workload Identity: - -```yaml -annotations: - iam.gke.io/gcp-service-account: pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com -``` - -This allows the pod to authenticate to GCP services without service account keys. - -## Environment Variables - -The Job is configured with the following environment variables: - -| Variable | Value | Description | -|----------|-------|-------------| -| `GCP_PROJECT_ID` | `sda-ccs-3` | GCP project where secrets are stored | -| `CLUSTER_ID` | `cls-test-123` | Cluster identifier | -| `SECRET_NAME` | `hyperfleet-cls-test-123-pull-secret` | Secret name in GCP | -| `PULL_SECRET_DATA` | `{...}` | Pull secret JSON data | - -## Monitoring - -### Check job status -```bash -kubectl get job pullsecret-test-job -kubectl describe job pullsecret-test-job -``` - -### View pod logs -```bash -kubectl logs -f $(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') -``` - -### Check events -```bash -kubectl get events --sort-by='.lastTimestamp' -``` - -## Cleanup - -```bash -# Delete the job (keeps pods for 1 hour for debugging) -kubectl delete job pullsecret-test-job - -# Force delete pods immediately -kubectl delete pods -l app=pullsecret-adapter -``` - -## Troubleshooting - -### Pod is not starting - -Check pod events: -```bash -kubectl describe pod $(kubectl get pods -l app=pullsecret-adapter -o jsonpath='{.items[0].metadata.name}') -``` - -### Authentication errors - -Verify Workload Identity binding: -```bash -# Check if SA exists -kubectl get sa pullsecret-adapter -o yaml - -# Check GCP IAM binding -gcloud iam service-accounts get-iam-policy \ - pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com \ - --project=sda-ccs-3 -``` - -### Image pull errors - -Verify the image exists and is accessible: -```bash -podman pull quay.io/ldornele/pull-secret:dev-f1bf914 -``` - -## Customizing the Job - -Edit `k8s/job.yaml` to: -- Change environment variables (CLUSTER_ID, SECRET_NAME, etc.) -- Update image tag -- Modify resource limits -- Change security settings - -After editing, reapply: -```bash -kubectl delete job pullsecret-test-job -kubectl apply -f k8s/job.yaml -``` - -## Production Considerations - -For production deployment: - -1. Use a dedicated namespace (e.g., `hyperfleet-system`) -2. Add resource quotas -3. Configure network policies -4. Set up pod disruption budgets -5. Add monitoring and alerting -6. Use ConfigMaps/Secrets for sensitive data -7. Configure proper RBAC - -## References - -- [GKE Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) -- [Kubernetes Jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) -- [Pod Security Standards](https://kubernetes.io/docs/concepts/security/pod-security-standards/) diff --git a/k8s/deploy.sh b/k8s/deploy.sh deleted file mode 100755 index 84e2a46..0000000 --- a/k8s/deploy.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -set -e - -NAMESPACE="hyperfleet-system" - -echo "====================================" -echo "Deploying Pull Secret Job to GKE" -echo "====================================" -echo "" - -# Check if kubectl is configured -echo "Checking cluster connection..." -kubectl cluster-info | head -1 -echo "" - -# Create namespace if it doesn't exist -echo "Creating namespace $NAMESPACE if it doesn't exist..." -kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f - -echo "" - -# Apply ServiceAccount -echo "Creating ServiceAccount with Workload Identity..." -kubectl apply -f k8s/serviceaccount.yaml -echo "" - -# Wait for ServiceAccount to be created -kubectl wait --for=jsonpath='{.metadata.name}'=pullsecret-adapter sa/pullsecret-adapter -n $NAMESPACE --timeout=30s - -# Apply Job -echo "Creating Job..." -kubectl apply -f k8s/job.yaml -echo "" - -# Wait a moment for pod to be created -echo "Waiting for pod to be created..." -sleep 2 - -# Show job status -echo "Job status:" -kubectl get job pullsecret-test-job -n $NAMESPACE -echo "" - -# Show pod status -echo "Pod status:" -kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE -echo "" - -# Wait for job to complete (optional - can comment out if you want to just deploy and check manually) -echo "Waiting for job to complete (this may take a minute)..." -kubectl wait --for=condition=complete --timeout=120s job/pullsecret-test-job -n $NAMESPACE || \ -kubectl wait --for=condition=failed --timeout=10s job/pullsecret-test-job -n $NAMESPACE || true -echo "" - -# Get pod name -POD_NAME=$(kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") - -if [ -z "$POD_NAME" ]; then - echo "Warning: No pod found yet. Use 'kubectl get pods -l app=pullsecret-adapter -n $NAMESPACE' to check status." - POD_NAME="" -fi - -echo "====================================" -echo "Job deployed successfully!" -echo "====================================" -echo "" -echo "Monitor job progress:" -echo " kubectl get job pullsecret-test-job -n $NAMESPACE -w" -echo "" -echo "View logs:" -echo " kubectl logs -f $POD_NAME -n $NAMESPACE" -echo "" -echo "Check job status:" -echo " kubectl describe job pullsecret-test-job -n $NAMESPACE" -echo "" -echo "Delete job when done:" -echo " kubectl delete job pullsecret-test-job -n $NAMESPACE" -echo "" diff --git a/k8s/job.yaml b/k8s/job.yaml deleted file mode 100644 index 2e5ff5f..0000000 --- a/k8s/job.yaml +++ /dev/null @@ -1,46 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: pullsecret-test-job - namespace: hyperfleet-system - labels: - app: pullsecret-adapter - job-type: test -spec: - backoffLimit: 3 - ttlSecondsAfterFinished: 3600 # Keep job for 1 hour after completion - template: - metadata: - labels: - app: pullsecret-adapter - spec: - serviceAccountName: pullsecret-adapter - restartPolicy: Never - containers: - - name: pull-secret - image: quay.io/ldornele/pull-secret:dev-f1bf914 - imagePullPolicy: Always - env: - - name: GCP_PROJECT_ID - value: "sda-ccs-3" - - name: CLUSTER_ID - value: "cls-test-123" - - name: SECRET_NAME - value: "hyperfleet-cls-test-123-pull-secret" - - name: PULL_SECRET_DATA - value: '{"auths":{"registry.redhat.io":{"auth":"dGVzdDp0ZXN0","email":"test@example.com"}}}' - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 500m - memory: 512Mi - securityContext: - runAsNonRoot: true - runAsUser: 1000 - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: - drop: - - ALL diff --git a/k8s/serviceaccount.yaml b/k8s/serviceaccount.yaml deleted file mode 100644 index 074d6de..0000000 --- a/k8s/serviceaccount.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: pullsecret-adapter - namespace: hyperfleet-system - annotations: - iam.gke.io/gcp-service-account: pullsecret-adapter@sda-ccs-3.iam.gserviceaccount.com From 1c20e8ccf5e2849c33e94c598e90f62c187f414b Mon Sep 17 00:00:00 2001 From: ldornele Date: Wed, 7 Jan 2026 17:59:51 -0300 Subject: [PATCH 11/11] [HYPERFLEET-273]: Tests updated and passing --- cmd/pull-secret/jobs/pull_secret_test.go | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/cmd/pull-secret/jobs/pull_secret_test.go b/cmd/pull-secret/jobs/pull_secret_test.go index 8780d0e..92b20e3 100644 --- a/cmd/pull-secret/jobs/pull_secret_test.go +++ b/cmd/pull-secret/jobs/pull_secret_test.go @@ -256,21 +256,12 @@ func TestPullSecretJob_GetTasks(t *testing.T) { }, }, { - name: "use fake pull secret when not provided", + name: "missing PULL_SECRET_DATA returns error", envVars: map[string]string{ "GCP_PROJECT_ID": "test-project", "CLUSTER_ID": "cls-789", }, - expectError: false, - checkTask: func(t *testing.T, task PullSecretTask) { - if task.PullSecret == "" { - t.Error("expected fake pull secret data, got empty string") - } - // Verify it's valid JSON - if err := validatePullSecret(task.PullSecret); err != nil { - t.Errorf("fake pull secret is invalid: %v", err) - } - }, + expectError: true, }, }