diff --git a/e2e-tests/ab-test-config-bundle.test.ts b/e2e-tests/ab-test-config-bundle.test.ts index 9c18b2f31..cec0a9cc0 100644 --- a/e2e-tests/ab-test-config-bundle.test.ts +++ b/e2e-tests/ab-test-config-bundle.test.ts @@ -105,10 +105,6 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => { await retry( async () => { const result = await run(['deploy', '--yes', '--json']); - if (result.exitCode !== 0) { - console.log('Initial deploy stdout:', result.stdout); - console.log('Initial deploy stderr:', result.stderr); - } expect(result.exitCode, `Initial deploy failed`).toBe(0); const json = parseJsonOutput(result.stdout) as { success: boolean }; expect(json.success).toBe(true); @@ -123,10 +119,12 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => { it.skipIf(!canRun)( 'adds config-bundle AB test with 90/10 split', async () => { - // Config bundles reference ARNs from deployed resources. - // Use placeholder bundle ARNs — the deploy step will validate or create them. - const controlBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/control-v1`; - const treatmentBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/treatment-v1`; + // Use placeholder bundle ARNs that satisfy the service format constraints. + // Real config bundles would be created separately; these test the AB test wiring. + const region = process.env.AWS_REGION ?? 'us-east-1'; + const account = process.env.AWS_ACCOUNT_ID ?? '000000000000'; + const controlBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/control-bundle-AbCdEfGhIj`; + const treatmentBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/treatment-bundle-AbCdEfGhIj`; const result = await run([ 'add', @@ -140,11 +138,11 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => { '--control-bundle', controlBundle, '--control-version', - 'v1', + '00000000-0000-0000-0000-000000000001', '--treatment-bundle', treatmentBundle, '--treatment-version', - 'v1', + '00000000-0000-0000-0000-000000000002', '--control-weight', '90', '--treatment-weight', diff --git a/e2e-tests/ab-test-target-based.test.ts b/e2e-tests/ab-test-target-based.test.ts index ac687e4fb..274ee447a 100644 --- a/e2e-tests/ab-test-target-based.test.ts +++ b/e2e-tests/ab-test-target-based.test.ts @@ -195,10 +195,6 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { await retry( async () => { const result = await run(['deploy', '--yes', '--json']); - if (result.exitCode !== 0) { - console.log('Deploy stdout:', result.stdout); - console.log('Deploy stderr:', result.stderr); - } expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0); const json = parseJsonOutput(result.stdout) as { success: boolean }; expect(json.success).toBe(true); @@ -210,6 +206,23 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { 600000 ); + it.skipIf(!canRun)( + 'AB test reaches RUNNING status after deploy', + async () => { + await retry( + async () => { + const result = await run(['ab-test', abTestName, '--json']); + expect(result.exitCode, `ab-test lookup failed: ${result.stdout} ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { executionStatus: string }; + expect(json.executionStatus, 'AB test should be RUNNING after deploy').toBe('RUNNING'); + }, + 12, + 15000 + ); + }, + 300000 + ); + it.skipIf(!canRun)( 'status shows all resources deployed', async () => { @@ -220,7 +233,7 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { const json = parseJsonOutput(result.stdout) as { success: boolean; - resources: { resourceType: string; name: string; deploymentState: string }[]; + resources: { resourceType: string; name: string; deploymentState: string; invocationUrl?: string }[]; }; expect(json.success).toBe(true); @@ -229,9 +242,12 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); expect(agent!.deploymentState).toBe('deployed'); - // Gateway should be deployed - const gateway = json.resources.find(r => r.resourceType === 'http-gateway' && r.name === `${abTestName}-gw`); - expect(gateway, 'HTTP gateway should appear in status').toBeDefined(); + // AB test should be deployed (HTTP gateways are not surfaced as top-level status resources) + const abTest = json.resources.find(r => r.resourceType === 'ab-test' && r.name === abTestName); + expect(abTest, `AB test "${abTestName}" should appear in status`).toBeDefined(); + expect(abTest!.deploymentState).toBe('deployed'); + // invocationUrl proves the HTTP gateway was deployed and wired up correctly + expect(abTest!.invocationUrl, 'AB test should have a gateway invocation URL').toBeTruthy(); }, 3, 15000 @@ -280,7 +296,7 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { 'promotes AB test (updates agentcore.json)', async () => { const result = await run(['promote', 'ab-test', abTestName, '--json']); - expect(result.exitCode, `Promote failed: ${result.stderr}`).toBe(0); + expect(result.exitCode, `Promote failed: ${result.stdout} ${result.stderr}`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); expect(json).toHaveProperty('promoted', true); diff --git a/scripts/run-e2e-local.sh b/scripts/run-e2e-local.sh new file mode 100755 index 000000000..81b9af3c1 --- /dev/null +++ b/scripts/run-e2e-local.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# Run E2E tests locally, replicating the GitHub Actions e2e-tests.yml workflow. +# +# Required env vars: +# E2E_ROLE_ARN — IAM role ARN to assume (grants access to the test account) +# E2E_SECRET_ARN — Secrets Manager ARN containing ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY +# +# Optional env vars: +# AWS_REGION — defaults to us-east-1 +# +# Usage: +# export E2E_ROLE_ARN=arn:aws:iam:::role/ +# export E2E_SECRET_ARN=arn:aws:secretsmanager:::secret: +# ./scripts/run-e2e-local.sh # runs strands-bedrock.test.ts (CI default) +# ./scripts/run-e2e-local.sh --all # runs the full e2e suite +# ./scripts/run-e2e-local.sh e2e-tests/foo.test.ts # runs a specific test file +# +# Prerequisites: aws CLI, node >=20.19, npm, git, uv, jq + +set -euo pipefail + +ROLE_ARN="${E2E_ROLE_ARN:-}" +SECRET_ARN="${E2E_SECRET_ARN:-}" +AWS_REGION="${AWS_REGION:-us-east-1}" + +if [[ -z "$ROLE_ARN" ]]; then + echo "❌ E2E_ROLE_ARN is not set. Export it before running this script:" + echo " export E2E_ROLE_ARN=arn:aws:iam:::role/" + exit 1 +fi + +if [[ -z "$SECRET_ARN" ]]; then + echo "❌ E2E_SECRET_ARN is not set. Export it before running this script:" + echo " export E2E_SECRET_ARN=arn:aws:secretsmanager:::secret:" + exit 1 +fi +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# ── Parse arguments ──────────────────────────────────────────────────────────── +RUN_ALL=false +TEST_FILES=() +for arg in "$@"; do + if [[ "$arg" == "--all" ]]; then + RUN_ALL=true + else + TEST_FILES+=("$arg") + fi +done + +echo "=== Assuming IAM role ===" +CREDS=$(aws sts assume-role \ + --role-arn "$ROLE_ARN" \ + --role-session-name "local-e2e-$(date +%s)" \ + --duration-seconds 3600 \ + --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \ + --output text) + +export AWS_ACCESS_KEY_ID=$(echo "$CREDS" | awk '{print $1}') +export AWS_SECRET_ACCESS_KEY=$(echo "$CREDS" | awk '{print $2}') +export AWS_SESSION_TOKEN=$(echo "$CREDS" | awk '{print $3}') +export AWS_REGION + +echo "✅ Assumed role successfully" + +echo "=== Fetching API keys from Secrets Manager ===" +SECRET_JSON=$(aws secretsmanager get-secret-value \ + --secret-id "$SECRET_ARN" \ + --region "$AWS_REGION" \ + --query SecretString \ + --output text) + +# Mirror the GitHub workflow: parse-json-secrets maps keys to E2E_ then +# the workflow maps them to the bare names the tests expect. +export ANTHROPIC_API_KEY=$(echo "$SECRET_JSON" | jq -r '.ANTHROPIC_API_KEY // empty') +export OPENAI_API_KEY=$(echo "$SECRET_JSON" | jq -r '.OPENAI_API_KEY // empty') +export GEMINI_API_KEY=$(echo "$SECRET_JSON" | jq -r '.GEMINI_API_KEY // empty') + +echo "✅ Secrets loaded (keys present: $(echo "$SECRET_JSON" | jq -r 'keys | join(", ")')" + +echo "=== Setting AWS account env var ===" +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +echo "✅ AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID AWS_REGION=$AWS_REGION" + +echo "=== Configuring git (required for agentcore create) ===" +git config --global user.email "ci@local" 2>/dev/null || true +git config --global user.name "Local E2E" 2>/dev/null || true + +cd "$REPO_ROOT" + +echo "=== Installing dependencies ===" +npm ci + +echo "=== Building CLI ===" +npm run build + +echo "=== Installing CLI globally ===" +TARBALL=$(npm pack | tail -1) +npm install -g "$TARBALL" +echo "✅ Installed: $(agentcore --version)" + +echo "=== Running E2E tests ===" +if [[ "$RUN_ALL" == "true" ]]; then + echo "Running full e2e suite" + npx vitest run --project e2e +elif [[ ${#TEST_FILES[@]} -gt 0 ]]; then + echo "Running: ${TEST_FILES[*]}" + npx vitest run --project e2e "${TEST_FILES[@]}" +else + echo "Running default: e2e-tests/strands-bedrock.test.ts" + npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts +fi diff --git a/src/cli/commands/pause/__tests__/promote.test.ts b/src/cli/commands/pause/__tests__/promote.test.ts new file mode 100644 index 000000000..4b1ae200b --- /dev/null +++ b/src/cli/commands/pause/__tests__/promote.test.ts @@ -0,0 +1,59 @@ +import { waitForRunningThenStop } from '../promote-utils.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockGetABTest = vi.fn(); +const mockUpdateABTest = vi.fn(); + +vi.mock('../../../aws/agentcore-ab-tests', () => ({ + getABTest: (...args: unknown[]) => mockGetABTest(...args), + updateABTest: (...args: unknown[]) => mockUpdateABTest(...args), +})); + +describe('waitForRunningThenStop', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockUpdateABTest.mockResolvedValue({ executionStatus: 'STOPPED' }); + }); + + it('stops immediately when already RUNNING', async () => { + mockGetABTest.mockResolvedValue({ executionStatus: 'RUNNING' }); + + await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0); + + expect(mockGetABTest).toHaveBeenCalledTimes(1); + expect(mockUpdateABTest).toHaveBeenCalledWith({ + region: 'us-east-1', + abTestId: 'abt-123', + executionStatus: 'STOPPED', + }); + }); + + it('polls until RUNNING then stops', async () => { + mockGetABTest + .mockResolvedValueOnce({ executionStatus: 'UPDATING' }) + .mockResolvedValueOnce({ executionStatus: 'UPDATING' }) + .mockResolvedValueOnce({ executionStatus: 'RUNNING' }); + + await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 5, 0); + + expect(mockGetABTest).toHaveBeenCalledTimes(3); + expect(mockUpdateABTest).toHaveBeenCalledOnce(); + }); + + it('throws if AB test never reaches RUNNING', async () => { + mockGetABTest.mockResolvedValue({ executionStatus: 'UPDATING' }); + + await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0)).rejects.toThrow( + 'did not reach RUNNING state' + ); + + expect(mockGetABTest).toHaveBeenCalledTimes(3); + expect(mockUpdateABTest).not.toHaveBeenCalled(); + }); + + it('includes current status in the error message', async () => { + mockGetABTest.mockResolvedValue({ executionStatus: 'STOPPED' }); + + await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 2, 0)).rejects.toThrow('current: STOPPED'); + }); +}); diff --git a/src/cli/commands/pause/command.tsx b/src/cli/commands/pause/command.tsx index 4ad1cc0dc..e99e266aa 100644 --- a/src/cli/commands/pause/command.tsx +++ b/src/cli/commands/pause/command.tsx @@ -7,6 +7,7 @@ import type { OnlineEvalActionOptions } from '../../operations/eval'; import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { requireProject } from '../../tui/guards'; import { getRegion } from '../shared/region-utils'; +import { waitForRunningThenStop } from './promote-utils'; import type { Command } from '@commander-js/extra-typings'; import { Text, render } from 'ink'; import React from 'react'; @@ -274,12 +275,7 @@ export const registerPromote = (program: Command) => { process.exit(1); } - // Stop the AB test - const result = await updateABTest({ - region, - abTestId, - executionStatus: 'STOPPED', - }); + const result = await waitForRunningThenStop(region, abTestId, name); // Apply promotion to agentcore.json const { promoteABTestConfig } = await import('../../operations/ab-test/promote'); diff --git a/src/cli/commands/pause/promote-utils.ts b/src/cli/commands/pause/promote-utils.ts new file mode 100644 index 000000000..9bca03f8a --- /dev/null +++ b/src/cli/commands/pause/promote-utils.ts @@ -0,0 +1,28 @@ +import { getABTest, updateABTest } from '../../aws/agentcore-ab-tests'; +import type { UpdateABTestResult } from '../../aws/agentcore-ab-tests'; + +/** + * Poll until the AB test reaches RUNNING status, then stop it. + * Throws if the test never reaches RUNNING within the allotted attempts. + */ +export async function waitForRunningThenStop( + region: string, + abTestId: string, + name: string, + maxAttempts = 12, + delayMs = 10_000 +): Promise { + let currentStatus: string | undefined; + for (let attempt = 0; attempt < maxAttempts; attempt++) { + const current = await getABTest({ region, abTestId }); + currentStatus = current.executionStatus; + if (currentStatus === 'RUNNING') break; + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + if (currentStatus !== 'RUNNING') { + throw new Error( + `AB test "${name}" did not reach RUNNING state after waiting (current: ${currentStatus}). Cannot promote.` + ); + } + return updateABTest({ region, abTestId, executionStatus: 'STOPPED' }); +} diff --git a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts index b220cd99f..39a32d20f 100644 --- a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts +++ b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts @@ -516,22 +516,22 @@ describe('setupABTests', () => { const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument); expect(trustPolicy.Statement).toHaveLength(1); expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com'); + expect(trustPolicy.Statement[0].Condition.StringEquals['aws:SourceAccount']).toBeDefined(); + expect(trustPolicy.Statement[0].Condition.ArnLike['aws:SourceArn']).toContain('ab-test/*'); // Second call: PutRolePolicyCommand with inline policy const putPolicyCall = mockIAMSend.mock.calls[1]![0]; const policy = JSON.parse(putPolicyCall.input.PolicyDocument); const sids = policy.Statement.map((s: { Sid: string }) => s.Sid); - expect(sids).toContain('GatewayRuleStatement'); - expect(sids).toContain('GatewayReadStatement'); - expect(sids).toContain('GatewayListStatement'); - expect(sids).toContain('OnlineEvaluationConfigStatement'); - expect(sids).toContain('ConfigurationBundleReadStatement'); - expect(sids).toContain('CloudWatchLogReadStatement'); - expect(sids).toContain('CloudWatchIndexPolicyStatement'); - - // ListGateways must use wildcard resource (can't be scoped) - const listGatewayStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'GatewayListStatement'); - expect(listGatewayStmt.Resource).toEqual(['*']); + expect(sids).toContain('AgentCoreResources'); + expect(sids).toContain('CloudWatchLogs'); + + // AgentCoreResources must include all required actions + const agentCoreStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'AgentCoreResources'); + expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetEvaluator'); + expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetGateway'); + expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetOnlineEvaluationConfig'); + expect(agentCoreStmt.Condition.StringEquals['aws:ResourceAccount']).toBeDefined(); }); }); diff --git a/src/cli/operations/deploy/post-deploy-ab-tests.ts b/src/cli/operations/deploy/post-deploy-ab-tests.ts index 4678d16f8..38353b5d3 100644 --- a/src/cli/operations/deploy/post-deploy-ab-tests.ts +++ b/src/cli/operations/deploy/post-deploy-ab-tests.ts @@ -153,10 +153,6 @@ export async function setupABTests(options: SetupABTestsOptions): Promise pv.onlineEvaluationConfigArn); if (testSpec.roleArn) { resolvedRoleArn = testSpec.roleArn; } else { @@ -165,7 +161,6 @@ export async function setupABTests(options: SetupABTestsOptions): Promise { - const { region, projectName, testName, gatewayArn, onlineEvalConfigArns } = options; + const { region, projectName, testName, gatewayArn } = options; const credentials = getCredentialProvider(); const iamClient = new IAMClient({ region, credentials }); // Extract account ID from gateway ARN (arn:aws:bedrock-agentcore:REGION:ACCOUNT:gateway/ID) const accountId = gatewayArn.split(':')[4] ?? '*'; - // Extract gateway ID for resource scoping - const gatewayId = gatewayArn.split('/').pop() ?? '*'; const roleName = generateRoleName(projectName, testName); @@ -578,6 +570,10 @@ async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise< Effect: 'Allow', Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, Action: 'sts:AssumeRole', + Condition: { + StringEquals: { 'aws:SourceAccount': accountId }, + ArnLike: { 'aws:SourceArn': `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:ab-test/*` }, + }, }, ], }); @@ -623,51 +619,38 @@ async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise< Version: '2012-10-17', Statement: [ { - Sid: 'GatewayRuleStatement', + Sid: 'AgentCoreResources', Effect: 'Allow', Action: [ + 'bedrock-agentcore:GetGateway', + 'bedrock-agentcore:GetGatewayTarget', + 'bedrock-agentcore:ListGatewayTargets', 'bedrock-agentcore:CreateGatewayRule', 'bedrock-agentcore:UpdateGatewayRule', 'bedrock-agentcore:GetGatewayRule', 'bedrock-agentcore:DeleteGatewayRule', 'bedrock-agentcore:ListGatewayRules', + 'bedrock-agentcore:GetOnlineEvaluationConfig', + 'bedrock-agentcore:GetEvaluator', + 'bedrock-agentcore:GetConfigurationBundle', + 'bedrock-agentcore:GetConfigurationBundleVersion', + 'bedrock-agentcore:ListConfigurationBundleVersions', ], - Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:gateway/${gatewayId}`], - }, - { - Sid: 'GatewayReadStatement', - Effect: 'Allow', - Action: ['bedrock-agentcore:GetGateway'], - Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:gateway/${gatewayId}`], - }, - { - Sid: 'GatewayListStatement', - Effect: 'Allow', - Action: ['bedrock-agentcore:ListGateways'], - Resource: ['*'], - }, - { - Sid: 'OnlineEvaluationConfigStatement', - Effect: 'Allow', - Action: ['bedrock-agentcore:GetOnlineEvaluationConfig', 'bedrock-agentcore:UpdateOnlineEvaluationConfig'], - Resource: onlineEvalConfigArns, + Resource: `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:*`, + Condition: { StringEquals: { 'aws:ResourceAccount': accountId } }, }, { - Sid: 'ConfigurationBundleReadStatement', - Effect: 'Allow', - Action: ['bedrock-agentcore:GetConfigurationBundle', 'bedrock-agentcore:GetConfigurationBundleVersion'], - Resource: [`${arnPrefix(region)}:bedrock-agentcore:${region}:${accountId}:configuration-bundle/*`], - }, - { - Sid: 'CloudWatchDescribeLogGroups', + Sid: 'CloudWatchLogsDescribe', Effect: 'Allow', Action: ['logs:DescribeLogGroups'], - Resource: ['*'], + Resource: '*', }, { - Sid: 'CloudWatchLogReadStatement', + Sid: 'CloudWatchLogs', Effect: 'Allow', Action: [ + 'logs:DescribeIndexPolicies', + 'logs:PutIndexPolicy', 'logs:StartQuery', 'logs:GetQueryResults', 'logs:StopQuery', @@ -675,19 +658,9 @@ async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise< 'logs:GetLogEvents', ], Resource: [ - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*`, - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*:*`, - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans`, - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans:*`, - ], - }, - { - Sid: 'CloudWatchIndexPolicyStatement', - Effect: 'Allow', - Action: ['logs:DescribeIndexPolicies', 'logs:PutIndexPolicy'], - Resource: [ - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans`, - `${arnPrefix(region)}:logs:${region}:${accountId}:log-group:aws/spans:*`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans:*`, ], }, ],