From 38c45f45dfa6f3eb37cf1808b9ca912c6818cd82 Mon Sep 17 00:00:00 2001 From: "T.J Ariyawansa" Date: Mon, 4 May 2026 18:54:17 +0000 Subject: [PATCH 1/2] docs: clarify integration vs e2e test boundaries and add e2e README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rewrite integ-tests/README.md to accurately describe what integration tests do (local file/stdout assertions, no AWS required) — the old README described e2e behavior (CloudFormation, credentials, costs) - Add e2e-tests/README.md documenting the AWS boundary, prerequisites, createE2ESuite() pattern, key patterns, and cleanup requirements - Update docs/TESTING.md to add an E2E Tests section with a link to the new README, fix the stale integration tests section that incorrectly listed AWS credentials as a prerequisite, and add link to integ README --- docs/TESTING.md | 24 ++++++-- e2e-tests/README.md | 130 ++++++++++++++++++++++++++++++++++++++++++ integ-tests/README.md | 123 +++++++++++++++++++++++++++------------ 3 files changed, 236 insertions(+), 41 deletions(-) create mode 100644 e2e-tests/README.md diff --git a/docs/TESTING.md b/docs/TESTING.md index 9c70af6b3..d7c7870ef 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -39,6 +39,21 @@ integ-tests/ See [integ-tests/README.md](../integ-tests/README.md) for integration test details. +### E2E Tests + +E2E tests live in `e2e-tests/` and verify the full user journey across the AWS boundary +— deploy, invoke, status, logs, traces, and control plane API calls. + +``` +e2e-tests/ +├── e2e-helper.ts # Shared utilities and createE2ESuite() factory +├── strands-bedrock.test.ts +├── langgraph-openai.test.ts +└── ... +``` + +See [e2e-tests/README.md](../e2e-tests/README.md) for e2e test details. + ## Writing Tests ### Imports @@ -435,14 +450,13 @@ npx playwright install chromium ## Integration Tests -Integration tests require: - -- AWS credentials configured -- IAM permissions for CloudFormation operations -- Dedicated test AWS account (recommended) +Integration tests require no AWS credentials. They run the real CLI binary and assert +on local files and stdout only. Run integration tests: ```bash npm run test:integ ``` + +See [integ-tests/README.md](../integ-tests/README.md) for full details. diff --git a/e2e-tests/README.md b/e2e-tests/README.md new file mode 100644 index 000000000..87e5a04de --- /dev/null +++ b/e2e-tests/README.md @@ -0,0 +1,130 @@ +# E2E Tests + +This directory contains end-to-end tests that verify the full user journey across the +AWS boundary. They create, deploy, invoke, and destroy real AWS resources. + +## What E2E Tests Cover + +E2E tests verify behaviors that require AWS to confirm they happened: + +- **Deployment** — `agentcore deploy` creates a real CloudFormation stack +- **`deployed-state.json`** — after deploy, `agentcore/.cli/deployed-state.json` contains + the correct ARNs and IDs for each deployed resource +- **Live AWS state** — `agentcore status` returns a real resource ARN and + `deploymentState: 'deployed'` +- **Live agent behavior** — `agentcore invoke` succeeds against a running agent +- **Observability** — `agentcore logs` returns real CloudWatch entries, + `agentcore traces list` returns real trace data +- **Direct control plane API calls** — `pause`, `resume`, and `promote` on AB tests + return live execution state from AWS + +They do **not** verify config file mutations or CLI input validation. Those belong +in `integ-tests/`. + +## Prerequisites + +- AWS credentials configured (`aws sts get-caller-identity` must succeed) +- `npm`, `git`, and `uv` on PATH +- Sufficient IAM permissions to create/delete CloudFormation stacks +- A dedicated test AWS account (recommended to avoid cost surprises) +- Model-specific API keys set as env vars for non-Bedrock providers + (e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`) + +## Running + +```bash +# Run all e2e tests (requires AWS credentials) +npm run test:e2e + +# Run a specific file +npx vitest run e2e-tests/strands-bedrock.test.ts +``` + +E2E tests are not run automatically on every PR. They run on a schedule and before +releases. + +## Writing E2E Tests + +Most framework/model combination tests are a single call to `createE2ESuite()`: + +```typescript +import { createE2ESuite } from './e2e-helper.js'; + +createE2ESuite({ + framework: 'Strands', + modelProvider: 'Bedrock', +}); +``` + +`createE2ESuite()` generates the full lifecycle suite: +`create → deploy → invoke → status → logs → traces → destroy`. + +For feature-specific lifecycle tests (AB tests, evals, config bundles), write the +suite directly using helpers from `e2e-helper.ts`: + +```typescript +import { + baseCanRun, + hasAws, + runAgentCoreCLI, + teardownE2EProject, + writeAwsTargets, +} from './e2e-helper.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const canRun = baseCanRun && hasAws; + +describe.sequential('e2e: my feature lifecycle', () => { + let projectPath: string; + const agentName = `E2eMyFeat${String(Date.now()).slice(-8)}`; + + beforeAll(async () => { + if (!canRun) return; + // create project, write AWS targets + await writeAwsTargets(projectPath); + }, 300000); + + // Always destroy AWS resources — never skip this + afterAll(async () => { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + }, 600000); + + it.skipIf(!canRun)('deploys to AWS successfully', async () => { + const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath); + expect(result.exitCode).toBe(0); + expect(JSON.parse(result.stdout).success).toBe(true); + }, 600000); +}); +``` + +### Key patterns + +| Pattern | Why | +|---------|-----| +| `describe.sequential` | Tests depend on each other — deploy must succeed before invoke | +| `it.skipIf(!canRun)` | Gracefully skips when credentials or prerequisites are missing | +| `afterAll(() => teardownE2EProject(...))` | Always destroy AWS resources to avoid cost and leakage | +| `retry(fn, 3, 15000)` | AWS operations are eventually consistent — retries handle cold starts | +| `hasAwsCredentials()` | Gate the entire suite — skip all if no credentials | +| Long timeouts (600000ms) | CloudFormation deploys take minutes, not seconds | + +### File naming + +Framework/model combination tests: `{framework}-{model}.test.ts` +- `strands-bedrock.test.ts` +- `langgraph-openai.test.ts` + +Feature lifecycle tests: describe what the test exercises end-to-end +- `ab-test-target-based.test.ts` +- `dev-lifecycle.test.ts` +- `evals-lifecycle.test.ts` + +## Important Notes + +- E2E tests create real AWS resources and **will incur costs** +- Always include `teardownE2EProject()` in `afterAll` — never skip cleanup +- Use unique agent names (timestamp suffix) to avoid conflicts with parallel runs +- Stale credential providers older than 30 minutes are cleaned up automatically + in `beforeAll` via `cleanupStaleCredentialProviders()` diff --git a/integ-tests/README.md b/integ-tests/README.md index 729977988..4f79ad2c1 100644 --- a/integ-tests/README.md +++ b/integ-tests/README.md @@ -1,64 +1,115 @@ # Integration Tests -This directory contains real AWS integration tests that actually deploy resources. +This directory contains integration tests that run the real CLI binary and assert on +what it produces locally — no AWS credentials, no network access, no deployed resources. + +## What Integration Tests Cover + +Integration tests verify that CLI commands behave correctly by checking: + +- **Exit code and stdout** — the command exits `0` on success, non-zero on failure, + and `--json` output has the correct shape +- **`agentcore/agentcore.json`** — the project config was mutated correctly after + `add`, `remove`, or `create` commands +- **Scaffolded files** — `app/{agent}/pyproject.toml` contains the right framework + dependencies, `app/{agent}/main.py` exists, `.git/` was initialized +- **Validation behavior** — the CLI rejects invalid input with the right error message + before making any network call + +They do **not** verify deployments, live AWS state, or agent invocation. Those belong +in `e2e-tests/`. ## Prerequisites -- AWS credentials configured -- Sufficient IAM permissions to create/delete CloudFormation stacks -- A dedicated test AWS account (recommended) +- `npm` and `git` on PATH (some tests skip automatically if missing via `describe.skipIf`) +- `uv` on PATH (required for tests that scaffold Python agents) +- No AWS credentials needed -## Running Integration Tests +## Running ```bash # Run all integration tests npm run test:integ -# Run a specific test -npx vitest run integ-tests/deploy.test.ts --testTimeout=300000 +# Run a specific file +npx vitest run integ-tests/add-remove-gateway.test.ts ``` -## Test Naming Convention +## Writing Integration Tests -All integration test files should be prefixed with `integ.`: +```typescript +import { createTestProject, runCLI, readProjectConfig } from '../src/test-utils/index.js'; +import type { TestProject } from '../src/test-utils/index.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; -- `integ.deploy.ts` - Tests actual deployment -- `integ.invoke.ts` - Tests invoking deployed agents -- `integ.destroy.ts` - Tests stack destruction -- `integ.e2e.ts` - Full end-to-end lifecycle test +describe('integration: add and remove a gateway', () => { + let project: TestProject; -## CI/CD + beforeAll(async () => { + project = await createTestProject({ noAgent: true }); + }); -Integration tests are NOT run automatically on every PR. They can be triggered: + afterAll(async () => { + await project.cleanup(); + }); -1. Manually via GitHub Actions workflow_dispatch -2. On a schedule (if configured) -3. Before releases + it('adds a gateway', async () => { + const result = await runCLI( + ['add', 'gateway', '--name', 'MyGateway', '--json'], + project.projectPath + ); -## Writing Integration Tests + expect(result.exitCode).toBe(0); + expect(JSON.parse(result.stdout).success).toBe(true); -```typescript -import { runCLI } from '../src/test-utils/cli-runner'; -import { afterAll, describe, expect, it } from 'vitest'; + const config = await readProjectConfig(project.projectPath); + const gateway = config.agentCoreGateways?.find(g => g.name === 'MyGateway'); + expect(gateway).toBeTruthy(); + }); -describe('integ: deploy', () => { - // Use unique stack names to avoid conflicts - const stackName = `test-${Date.now()}`; + it('removes the gateway', async () => { + const result = await runCLI( + ['remove', 'gateway', '--name', 'MyGateway', '--json'], + project.projectPath + ); - afterAll(async () => { - // ALWAYS clean up - destroy the stack - await runCLI(['destroy', '--target', stackName, '--force'], projectDir); - }); + expect(result.exitCode).toBe(0); - it('deploys successfully', async () => { - // Test implementation + const config = await readProjectConfig(project.projectPath); + expect(config.agentCoreGateways?.find(g => g.name === 'MyGateway')).toBeFalsy(); }); }); ``` -## Important Notes +### Key patterns + +| Pattern | Why | +|---------|-----| +| `createTestProject()` | Fast temp project setup — no npm/uv install | +| `runCLI([...args], projectPath)` | Runs the real built CLI binary, not a mock | +| `readProjectConfig(path)` | Reads and parses `agentcore/agentcore.json` | +| `afterAll(() => project.cleanup())` | Always delete the temp directory | +| `--json` flag | Makes stdout machine-readable for assertions | +| Assert exit code first | Fail fast with a useful message before asserting output | + +### File naming + +Name files after the feature area, not the command: + +- `add-remove-gateway.test.ts` — not `add.test.ts` +- `create-frameworks.test.ts` — not `create.test.ts` +- `lifecycle-config.test.ts` — not `flags.test.ts` -- Integration tests create real AWS resources and may incur costs -- Always include cleanup in `after()` hooks -- Use unique names to avoid conflicts with parallel runs -- Set appropriate timeouts (5-15 minutes for deploy operations) +### No mocking + +Integration tests contain zero mocks. The CLI commands tested here make no network +calls, so there is nothing to intercept. The real binary runs against the real +filesystem. + +## CI/CD + +Integration tests are not run automatically on every PR. They can be triggered: + +1. Manually via GitHub Actions `workflow_dispatch` +2. On a schedule (if configured) +3. Before releases From 2863378830d144e71105b377c98e98ef6bccdfd4 Mon Sep 17 00:00:00 2001 From: "T.J Ariyawansa" Date: Mon, 4 May 2026 18:59:16 +0000 Subject: [PATCH 2/2] chore: fix prettier formatting in docs and READMEs --- docs/TESTING.md | 7 ++-- e2e-tests/README.md | 78 ++++++++++++++++++++----------------------- integ-tests/README.md | 54 ++++++++++++------------------ 3 files changed, 61 insertions(+), 78 deletions(-) diff --git a/docs/TESTING.md b/docs/TESTING.md index d7c7870ef..601cf258f 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -41,8 +41,8 @@ See [integ-tests/README.md](../integ-tests/README.md) for integration test detai ### E2E Tests -E2E tests live in `e2e-tests/` and verify the full user journey across the AWS boundary -— deploy, invoke, status, logs, traces, and control plane API calls. +E2E tests live in `e2e-tests/` and verify the full user journey across the AWS boundary — deploy, invoke, status, logs, +traces, and control plane API calls. ``` e2e-tests/ @@ -450,8 +450,7 @@ npx playwright install chromium ## Integration Tests -Integration tests require no AWS credentials. They run the real CLI binary and assert -on local files and stdout only. +Integration tests require no AWS credentials. They run the real CLI binary and assert on local files and stdout only. Run integration tests: diff --git a/e2e-tests/README.md b/e2e-tests/README.md index 87e5a04de..cdaa0066d 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -1,25 +1,21 @@ # E2E Tests -This directory contains end-to-end tests that verify the full user journey across the -AWS boundary. They create, deploy, invoke, and destroy real AWS resources. +This directory contains end-to-end tests that verify the full user journey across the AWS boundary. They create, deploy, +invoke, and destroy real AWS resources. ## What E2E Tests Cover E2E tests verify behaviors that require AWS to confirm they happened: - **Deployment** — `agentcore deploy` creates a real CloudFormation stack -- **`deployed-state.json`** — after deploy, `agentcore/.cli/deployed-state.json` contains - the correct ARNs and IDs for each deployed resource -- **Live AWS state** — `agentcore status` returns a real resource ARN and - `deploymentState: 'deployed'` +- **`deployed-state.json`** — after deploy, `agentcore/.cli/deployed-state.json` contains the correct ARNs and IDs for + each deployed resource +- **Live AWS state** — `agentcore status` returns a real resource ARN and `deploymentState: 'deployed'` - **Live agent behavior** — `agentcore invoke` succeeds against a running agent -- **Observability** — `agentcore logs` returns real CloudWatch entries, - `agentcore traces list` returns real trace data -- **Direct control plane API calls** — `pause`, `resume`, and `promote` on AB tests - return live execution state from AWS +- **Observability** — `agentcore logs` returns real CloudWatch entries, `agentcore traces list` returns real trace data +- **Direct control plane API calls** — `pause`, `resume`, and `promote` on AB tests return live execution state from AWS -They do **not** verify config file mutations or CLI input validation. Those belong -in `integ-tests/`. +They do **not** verify config file mutations or CLI input validation. Those belong in `integ-tests/`. ## Prerequisites @@ -27,8 +23,8 @@ in `integ-tests/`. - `npm`, `git`, and `uv` on PATH - Sufficient IAM permissions to create/delete CloudFormation stacks - A dedicated test AWS account (recommended to avoid cost surprises) -- Model-specific API keys set as env vars for non-Bedrock providers - (e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`) +- Model-specific API keys set as env vars for non-Bedrock providers (e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, + `GOOGLE_API_KEY`) ## Running @@ -40,8 +36,7 @@ npm run test:e2e npx vitest run e2e-tests/strands-bedrock.test.ts ``` -E2E tests are not run automatically on every PR. They run on a schedule and before -releases. +E2E tests are not run automatically on every PR. They run on a schedule and before releases. ## Writing E2E Tests @@ -56,20 +51,13 @@ createE2ESuite({ }); ``` -`createE2ESuite()` generates the full lifecycle suite: -`create → deploy → invoke → status → logs → traces → destroy`. +`createE2ESuite()` generates the full lifecycle suite: `create → deploy → invoke → status → logs → traces → destroy`. -For feature-specific lifecycle tests (AB tests, evals, config bundles), write the -suite directly using helpers from `e2e-helper.ts`: +For feature-specific lifecycle tests (AB tests, evals, config bundles), write the suite directly using helpers from +`e2e-helper.ts`: ```typescript -import { - baseCanRun, - hasAws, - runAgentCoreCLI, - teardownE2EProject, - writeAwsTargets, -} from './e2e-helper.js'; +import { baseCanRun, hasAws, runAgentCoreCLI, teardownE2EProject, writeAwsTargets } from './e2e-helper.js'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; const canRun = baseCanRun && hasAws; @@ -91,32 +79,38 @@ describe.sequential('e2e: my feature lifecycle', () => { } }, 600000); - it.skipIf(!canRun)('deploys to AWS successfully', async () => { - const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath); - expect(result.exitCode).toBe(0); - expect(JSON.parse(result.stdout).success).toBe(true); - }, 600000); + it.skipIf(!canRun)( + 'deploys to AWS successfully', + async () => { + const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath); + expect(result.exitCode).toBe(0); + expect(JSON.parse(result.stdout).success).toBe(true); + }, + 600000 + ); }); ``` ### Key patterns -| Pattern | Why | -|---------|-----| -| `describe.sequential` | Tests depend on each other — deploy must succeed before invoke | -| `it.skipIf(!canRun)` | Gracefully skips when credentials or prerequisites are missing | -| `afterAll(() => teardownE2EProject(...))` | Always destroy AWS resources to avoid cost and leakage | -| `retry(fn, 3, 15000)` | AWS operations are eventually consistent — retries handle cold starts | -| `hasAwsCredentials()` | Gate the entire suite — skip all if no credentials | -| Long timeouts (600000ms) | CloudFormation deploys take minutes, not seconds | +| Pattern | Why | +| ----------------------------------------- | --------------------------------------------------------------------- | +| `describe.sequential` | Tests depend on each other — deploy must succeed before invoke | +| `it.skipIf(!canRun)` | Gracefully skips when credentials or prerequisites are missing | +| `afterAll(() => teardownE2EProject(...))` | Always destroy AWS resources to avoid cost and leakage | +| `retry(fn, 3, 15000)` | AWS operations are eventually consistent — retries handle cold starts | +| `hasAwsCredentials()` | Gate the entire suite — skip all if no credentials | +| Long timeouts (600000ms) | CloudFormation deploys take minutes, not seconds | ### File naming Framework/model combination tests: `{framework}-{model}.test.ts` + - `strands-bedrock.test.ts` - `langgraph-openai.test.ts` Feature lifecycle tests: describe what the test exercises end-to-end + - `ab-test-target-based.test.ts` - `dev-lifecycle.test.ts` - `evals-lifecycle.test.ts` @@ -126,5 +120,5 @@ Feature lifecycle tests: describe what the test exercises end-to-end - E2E tests create real AWS resources and **will incur costs** - Always include `teardownE2EProject()` in `afterAll` — never skip cleanup - Use unique agent names (timestamp suffix) to avoid conflicts with parallel runs -- Stale credential providers older than 30 minutes are cleaned up automatically - in `beforeAll` via `cleanupStaleCredentialProviders()` +- Stale credential providers older than 30 minutes are cleaned up automatically in `beforeAll` via + `cleanupStaleCredentialProviders()` diff --git a/integ-tests/README.md b/integ-tests/README.md index 4f79ad2c1..66a117d12 100644 --- a/integ-tests/README.md +++ b/integ-tests/README.md @@ -1,23 +1,20 @@ # Integration Tests -This directory contains integration tests that run the real CLI binary and assert on -what it produces locally — no AWS credentials, no network access, no deployed resources. +This directory contains integration tests that run the real CLI binary and assert on what it produces locally — no AWS +credentials, no network access, no deployed resources. ## What Integration Tests Cover Integration tests verify that CLI commands behave correctly by checking: -- **Exit code and stdout** — the command exits `0` on success, non-zero on failure, - and `--json` output has the correct shape -- **`agentcore/agentcore.json`** — the project config was mutated correctly after - `add`, `remove`, or `create` commands -- **Scaffolded files** — `app/{agent}/pyproject.toml` contains the right framework - dependencies, `app/{agent}/main.py` exists, `.git/` was initialized -- **Validation behavior** — the CLI rejects invalid input with the right error message - before making any network call +- **Exit code and stdout** — the command exits `0` on success, non-zero on failure, and `--json` output has the correct + shape +- **`agentcore/agentcore.json`** — the project config was mutated correctly after `add`, `remove`, or `create` commands +- **Scaffolded files** — `app/{agent}/pyproject.toml` contains the right framework dependencies, `app/{agent}/main.py` + exists, `.git/` was initialized +- **Validation behavior** — the CLI rejects invalid input with the right error message before making any network call -They do **not** verify deployments, live AWS state, or agent invocation. Those belong -in `e2e-tests/`. +They do **not** verify deployments, live AWS state, or agent invocation. Those belong in `e2e-tests/`. ## Prerequisites @@ -38,7 +35,7 @@ npx vitest run integ-tests/add-remove-gateway.test.ts ## Writing Integration Tests ```typescript -import { createTestProject, runCLI, readProjectConfig } from '../src/test-utils/index.js'; +import { createTestProject, readProjectConfig, runCLI } from '../src/test-utils/index.js'; import type { TestProject } from '../src/test-utils/index.js'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; @@ -54,10 +51,7 @@ describe('integration: add and remove a gateway', () => { }); it('adds a gateway', async () => { - const result = await runCLI( - ['add', 'gateway', '--name', 'MyGateway', '--json'], - project.projectPath - ); + const result = await runCLI(['add', 'gateway', '--name', 'MyGateway', '--json'], project.projectPath); expect(result.exitCode).toBe(0); expect(JSON.parse(result.stdout).success).toBe(true); @@ -68,10 +62,7 @@ describe('integration: add and remove a gateway', () => { }); it('removes the gateway', async () => { - const result = await runCLI( - ['remove', 'gateway', '--name', 'MyGateway', '--json'], - project.projectPath - ); + const result = await runCLI(['remove', 'gateway', '--name', 'MyGateway', '--json'], project.projectPath); expect(result.exitCode).toBe(0); @@ -83,14 +74,14 @@ describe('integration: add and remove a gateway', () => { ### Key patterns -| Pattern | Why | -|---------|-----| -| `createTestProject()` | Fast temp project setup — no npm/uv install | -| `runCLI([...args], projectPath)` | Runs the real built CLI binary, not a mock | -| `readProjectConfig(path)` | Reads and parses `agentcore/agentcore.json` | -| `afterAll(() => project.cleanup())` | Always delete the temp directory | -| `--json` flag | Makes stdout machine-readable for assertions | -| Assert exit code first | Fail fast with a useful message before asserting output | +| Pattern | Why | +| ----------------------------------- | ------------------------------------------------------- | +| `createTestProject()` | Fast temp project setup — no npm/uv install | +| `runCLI([...args], projectPath)` | Runs the real built CLI binary, not a mock | +| `readProjectConfig(path)` | Reads and parses `agentcore/agentcore.json` | +| `afterAll(() => project.cleanup())` | Always delete the temp directory | +| `--json` flag | Makes stdout machine-readable for assertions | +| Assert exit code first | Fail fast with a useful message before asserting output | ### File naming @@ -102,9 +93,8 @@ Name files after the feature area, not the command: ### No mocking -Integration tests contain zero mocks. The CLI commands tested here make no network -calls, so there is nothing to intercept. The real binary runs against the real -filesystem. +Integration tests contain zero mocks. The CLI commands tested here make no network calls, so there is nothing to +intercept. The real binary runs against the real filesystem. ## CI/CD